summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/core.c12
-rw-r--r--net/6lowpan/iphc.c57
-rw-r--r--net/8021q/vlan_dev.c16
-rw-r--r--net/8021q/vlan_netlink.c3
-rw-r--r--net/9p/client.c5
-rw-r--r--net/9p/protocol.c2
-rw-r--r--net/Makefile2
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/common.c22
-rw-r--r--net/batman-adv/bat_iv_ogm.c17
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c123
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h11
-rw-r--r--net/batman-adv/distributed-arp-table.c64
-rw-r--r--net/batman-adv/log.h5
-rw-r--r--net/batman-adv/main.c3
-rw-r--r--net/batman-adv/main.h18
-rw-r--r--net/batman-adv/multicast.c12
-rw-r--r--net/batman-adv/routing.c25
-rw-r--r--net/batman-adv/send.c76
-rw-r--r--net/batman-adv/send.h4
-rw-r--r--net/batman-adv/soft-interface.c238
-rw-r--r--net/batman-adv/tp_meter.c7
-rw-r--r--net/batman-adv/translation-table.c42
-rw-r--r--net/batman-adv/types.h6
-rw-r--r--net/bluetooth/6lowpan.c192
-rw-r--r--net/bluetooth/Kconfig1
-rw-r--r--net/bluetooth/Makefile2
-rw-r--r--net/bluetooth/af_bluetooth.c26
-rw-r--r--net/bluetooth/amp.c10
-rw-r--r--net/bluetooth/ecc.c816
-rw-r--r--net/bluetooth/ecc.h54
-rw-r--r--net/bluetooth/ecdh_helper.c231
-rw-r--r--net/bluetooth/ecdh_helper.h27
-rw-r--r--net/bluetooth/hci_core.c4
-rw-r--r--net/bluetooth/hci_sock.c3
-rw-r--r--net/bluetooth/l2cap_core.c30
-rw-r--r--net/bluetooth/rfcomm/core.c4
-rw-r--r--net/bluetooth/selftest.c28
-rw-r--r--net/bluetooth/smp.c46
-rw-r--r--net/bpf/Makefile1
-rw-r--r--net/bpf/test_run.c173
-rw-r--r--net/bridge/br_fdb.c5
-rw-r--r--net/bridge/br_forward.c24
-rw-r--r--net/bridge/br_if.c3
-rw-r--r--net/bridge/br_mdb.c9
-rw-r--r--net/bridge/br_netfilter_hooks.c3
-rw-r--r--net/bridge/br_netlink.c18
-rw-r--r--net/bridge/br_netlink_tunnel.c4
-rw-r--r--net/bridge/br_sysfs_if.c2
-rw-r--r--net/bridge/netfilter/ebt_dnat.c20
-rw-r--r--net/bridge/netfilter/ebt_log.c34
-rw-r--r--net/bridge/netfilter/ebtable_broute.c4
-rw-r--r--net/bridge/netfilter/ebtable_filter.c15
-rw-r--r--net/bridge/netfilter/ebtable_nat.c15
-rw-r--r--net/bridge/netfilter/ebtables.c63
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c2
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c6
-rw-r--r--net/can/af_can.c183
-rw-r--r--net/can/af_can.h13
-rw-r--r--net/can/bcm.c98
-rw-r--r--net/can/gw.c80
-rw-r--r--net/can/proc.c285
-rw-r--r--net/can/raw.c92
-rw-r--r--net/ceph/ceph_common.c29
-rw-r--r--net/ceph/cls_lock_client.c51
-rw-r--r--net/ceph/debugfs.c7
-rw-r--r--net/ceph/messenger.c6
-rw-r--r--net/ceph/osd_client.c143
-rw-r--r--net/ceph/pagelist.c2
-rw-r--r--net/ceph/snapshot.c6
-rw-r--r--net/core/datagram.c21
-rw-r--r--net/core/dev.c295
-rw-r--r--net/core/devlink.c862
-rw-r--r--net/core/drop_monitor.c5
-rw-r--r--net/core/ethtool.c4
-rw-r--r--net/core/fib_rules.c30
-rw-r--r--net/core/filter.c167
-rw-r--r--net/core/flow.c29
-rw-r--r--net/core/flow_dissector.c445
-rw-r--r--net/core/gro_cells.c2
-rw-r--r--net/core/lwt_bpf.c5
-rw-r--r--net/core/lwtunnel.c9
-rw-r--r--net/core/neighbour.c63
-rw-r--r--net/core/net_namespace.c13
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/rtnetlink.c193
-rw-r--r--net/core/secure_seq.c34
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/core/sock.c184
-rw-r--r--net/core/sock_diag.c15
-rw-r--r--net/core/sock_reuseport.c4
-rw-r--r--net/core/sysctl_net_core.c8
-rw-r--r--net/core/utils.c2
-rw-r--r--net/dcb/dcbnl.c60
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/dccp/ipv6.c8
-rw-r--r--net/decnet/af_decnet.c16
-rw-r--r--net/decnet/dn_dev.c12
-rw-r--r--net/decnet/dn_fib.c12
-rw-r--r--net/decnet/dn_neigh.c12
-rw-r--r--net/decnet/dn_route.c6
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c6
-rw-r--r--net/dsa/Kconfig8
-rw-r--r--net/dsa/Makefile4
-rw-r--r--net/dsa/dsa.c795
-rw-r--r--net/dsa/dsa2.c50
-rw-r--r--net/dsa/dsa_priv.h15
-rw-r--r--net/dsa/legacy.c818
-rw-r--r--net/dsa/slave.c13
-rw-r--r--net/dsa/switch.c15
-rw-r--r--net/dsa/tag_brcm.c27
-rw-r--r--net/dsa/tag_dsa.c27
-rw-r--r--net/dsa/tag_edsa.c27
-rw-r--r--net/dsa/tag_lan9303.c136
-rw-r--r--net/dsa/tag_mtk.c100
-rw-r--r--net/dsa/tag_qca.c27
-rw-r--r--net/dsa/tag_trailer.c26
-rw-r--r--net/hsr/hsr_netlink.c4
-rw-r--r--net/ieee802154/nl802154.c29
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/arp.c22
-rw-r--r--net/ipv4/devinet.c117
-rw-r--r--net/ipv4/esp4.c373
-rw-r--r--net/ipv4/esp4_offload.c231
-rw-r--r--net/ipv4/fib_frontend.c27
-rw-r--r--net/ipv4/fib_notifier.c86
-rw-r--r--net/ipv4/fib_rules.c55
-rw-r--r--net/ipv4/fib_semantics.c11
-rw-r--r--net/ipv4/fib_trie.c134
-rw-r--r--net/ipv4/icmp.c19
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_hashtables.c6
-rw-r--r--net/ipv4/ip_gre.c24
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/ip_sockglue.c12
-rw-r--r--net/ipv4/ip_tunnel.c27
-rw-r--r--net/ipv4/ip_tunnel_core.c5
-rw-r--r--net/ipv4/ip_vti.c31
-rw-r--r--net/ipv4/ipconfig.c1
-rw-r--r--net/ipv4/ipip.c24
-rw-r--r--net/ipv4/ipmr.c41
-rw-r--r--net/ipv4/netfilter/arp_tables.c23
-rw-r--r--net/ipv4/netfilter/ip_tables.c20
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c19
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c94
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c45
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c27
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nf_socket_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c6
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/protocol.c2
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/route.c123
-rw-r--r--net/ipv4/syncookies.c12
-rw-r--r--net/ipv4/sysctl_net_ipv4.c107
-rw-r--r--net/ipv4/tcp.c9
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_fastopen.c102
-rw-r--r--net/ipv4/tcp_input.c172
-rw-r--r--net/ipv4/tcp_ipv4.c58
-rw-r--r--net/ipv4/tcp_lp.c6
-rw-r--r--net/ipv4/tcp_metrics.c152
-rw-r--r--net/ipv4/tcp_minisocks.c27
-rw-r--r--net/ipv4/tcp_output.c24
-rw-r--r--net/ipv4/tcp_rate.c7
-rw-r--r--net/ipv4/tcp_recovery.c19
-rw-r--r--net/ipv4/tcp_timer.c7
-rw-r--r--net/ipv4/tcp_westwood.c4
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/udp_impl.h1
-rw-r--r--net/ipv4/xfrm4_mode_transport.c34
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c28
-rw-r--r--net/ipv4/xfrm4_output.c3
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c239
-rw-r--r--net/ipv6/addrlabel.c12
-rw-r--r--net/ipv6/af_inet6.c8
-rw-r--r--net/ipv6/esp6.c294
-rw-r--r--net/ipv6/esp6_offload.c233
-rw-r--r--net/ipv6/exthdrs.c2
-rw-r--r--net/ipv6/ila/ila_lwt.c3
-rw-r--r--net/ipv6/ila/ila_xlat.c8
-rw-r--r--net/ipv6/ip6_gre.c14
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_offload.c7
-rw-r--r--net/ipv6/ip6_output.c5
-rw-r--r--net/ipv6/ip6_tunnel.c19
-rw-r--r--net/ipv6/ip6_vti.c10
-rw-r--r--net/ipv6/ip6mr.c9
-rw-r--r--net/ipv6/mcast.c49
-rw-r--r--net/ipv6/ndisc.c6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c29
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c93
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c3
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c10
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c5
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c4
-rw-r--r--net/ipv6/output_core.c14
-rw-r--r--net/ipv6/protocol.c2
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/route.c54
-rw-r--r--net/ipv6/seg6_iptunnel.c51
-rw-r--r--net/ipv6/sit.c37
-rw-r--r--net/ipv6/syncookies.c10
-rw-r--r--net/ipv6/tcp_ipv6.c51
-rw-r--r--net/ipv6/udp.c72
-rw-r--r--net/ipv6/udp_impl.h1
-rw-r--r--net/ipv6/udp_offload.c6
-rw-r--r--net/ipv6/xfrm6_mode_transport.c34
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c27
-rw-r--r--net/ipv6/xfrm6_output.c9
-rw-r--r--net/ipx/af_ipx.c5
-rw-r--r--net/kcm/kcmsock.c4
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_core.c62
-rw-r--r--net/l2tp/l2tp_core.h12
-rw-r--r--net/l2tp/l2tp_eth.c76
-rw-r--r--net/l2tp/l2tp_netlink.c5
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/llc/llc_conn.c4
-rw-r--r--net/llc/llc_sap.c2
-rw-r--r--net/mac80211/agg-rx.c12
-rw-r--r--net/mac80211/agg-tx.c12
-rw-r--r--net/mac80211/cfg.c242
-rw-r--r--net/mac80211/ibss.c16
-rw-r--r--net/mac80211/ieee80211_i.h44
-rw-r--r--net/mac80211/iface.c19
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mesh.c39
-rw-r--r--net/mac80211/mesh_hwmp.c23
-rw-r--r--net/mac80211/mesh_pathtbl.c8
-rw-r--r--net/mac80211/mesh_plink.c37
-rw-r--r--net/mac80211/mlme.c73
-rw-r--r--net/mac80211/pm.c2
-rw-r--r--net/mac80211/rate.c69
-rw-r--r--net/mac80211/rate.h47
-rw-r--r--net/mac80211/rc80211_minstrel.c6
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c10
-rw-r--r--net/mac80211/rx.c272
-rw-r--r--net/mac80211/scan.c12
-rw-r--r--net/mac80211/spectmgmt.c4
-rw-r--r--net/mac80211/sta_info.c48
-rw-r--r--net/mac80211/sta_info.h87
-rw-r--r--net/mac80211/status.c168
-rw-r--r--net/mac80211/tdls.c29
-rw-r--r--net/mac80211/tx.c13
-rw-r--r--net/mac80211/util.c101
-rw-r--r--net/mac802154/ieee802154_i.h1
-rw-r--r--net/mpls/af_mpls.c372
-rw-r--r--net/mpls/internal.h68
-rw-r--r--net/mpls/mpls_iptunnel.c88
-rw-r--r--net/netfilter/core.c53
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h5
-rw-r--r--net/netfilter/ipset/ip_set_core.c43
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c24
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c25
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c58
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c20
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c22
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c8
-rw-r--r--net/netfilter/nf_conntrack_acct.c2
-rw-r--r--net/netfilter/nf_conntrack_amanda.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c191
-rw-r--r--net/netfilter/nf_conntrack_ecache.c9
-rw-r--r--net/netfilter/nf_conntrack_expect.c46
-rw-r--r--net/netfilter/nf_conntrack_extend.c114
-rw-r--r--net/netfilter/nf_conntrack_ftp.c8
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c6
-rw-r--r--net/netfilter/nf_conntrack_helper.c44
-rw-r--r--net/netfilter/nf_conntrack_irc.c8
-rw-r--r--net/netfilter/nf_conntrack_labels.c2
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c175
-rw-r--r--net/netfilter/nf_conntrack_pptp.c15
-rw-r--r--net/netfilter/nf_conntrack_proto.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c18
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c22
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c28
-rw-r--r--net/netfilter/nf_conntrack_sane.c8
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c2
-rw-r--r--net/netfilter/nf_conntrack_sip.c18
-rw-r--r--net/netfilter/nf_conntrack_standalone.c6
-rw-r--r--net/netfilter/nf_conntrack_tftp.c6
-rw-r--r--net/netfilter/nf_conntrack_timeout.c2
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c2
-rw-r--r--net/netfilter/nf_internals.h2
-rw-r--r--net/netfilter/nf_log.c5
-rw-r--r--net/netfilter/nf_nat_amanda.c11
-rw-r--r--net/netfilter/nf_nat_core.c42
-rw-r--r--net/netfilter/nf_nat_helper.c40
-rw-r--r--net/netfilter/nf_nat_irc.c9
-rw-r--r--net/netfilter/nf_queue.c7
-rw-r--r--net/netfilter/nf_synproxy_core.c10
-rw-r--r--net/netfilter/nf_tables_api.c135
-rw-r--r--net/netfilter/nf_tables_netdev.c2
-rw-r--r--net/netfilter/nf_tables_trace.c3
-rw-r--r--net/netfilter/nfnetlink.c35
-rw-r--r--net/netfilter/nfnetlink_acct.c20
-rw-r--r--net/netfilter/nfnetlink_cthelper.c30
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c19
-rw-r--r--net/netfilter/nfnetlink_log.c20
-rw-r--r--net/netfilter/nfnetlink_queue.c26
-rw-r--r--net/netfilter/nft_compat.c23
-rw-r--r--net/netfilter/nft_counter.c3
-rw-r--r--net/netfilter/nft_ct.c212
-rw-r--r--net/netfilter/nft_dynset.c19
-rw-r--r--net/netfilter/nft_exthdr.c15
-rw-r--r--net/netfilter/nft_fib.c16
-rw-r--r--net/netfilter/nft_hash.c133
-rw-r--r--net/netfilter/nft_limit.c10
-rw-r--r--net/netfilter/nft_lookup.c14
-rw-r--r--net/netfilter/nft_masq.c4
-rw-r--r--net/netfilter/nft_meta.c6
-rw-r--r--net/netfilter/nft_nat.c4
-rw-r--r--net/netfilter/nft_numgen.c2
-rw-r--r--net/netfilter/nft_objref.c14
-rw-r--r--net/netfilter/nft_queue.c2
-rw-r--r--net/netfilter/nft_quota.c3
-rw-r--r--net/netfilter/nft_redir.c4
-rw-r--r--net/netfilter/nft_reject.c5
-rw-r--r--net/netfilter/nft_reject_inet.c6
-rw-r--r--net/netfilter/nft_set_bitmap.c5
-rw-r--r--net/netfilter/nft_set_hash.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c31
-rw-r--r--net/netfilter/x_tables.c28
-rw-r--r--net/netfilter/xt_AUDIT.c126
-rw-r--r--net/netfilter/xt_CT.c27
-rw-r--r--net/netfilter/xt_HMARK.c2
-rw-r--r--net/netfilter/xt_cluster.c3
-rw-r--r--net/netfilter/xt_connlabel.c2
-rw-r--r--net/netfilter/xt_connmark.c4
-rw-r--r--net/netfilter/xt_conntrack.c11
-rw-r--r--net/netfilter/xt_hashlimit.c10
-rw-r--r--net/netfilter/xt_ipvs.c2
-rw-r--r--net/netfilter/xt_limit.c11
-rw-r--r--net/netfilter/xt_recent.c7
-rw-r--r--net/netfilter/xt_socket.c2
-rw-r--r--net/netfilter/xt_state.c13
-rw-r--r--net/netlabel/netlabel_cipso_v4.c19
-rw-r--r--net/netlink/af_netlink.c90
-rw-r--r--net/netlink/af_netlink.h9
-rw-r--r--net/netlink/diag.c25
-rw-r--r--net/netlink/genetlink.c11
-rw-r--r--net/nfc/netlink.c29
-rw-r--r--net/openvswitch/actions.c271
-rw-r--r--net/openvswitch/conntrack.c64
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow_netlink.c145
-rw-r--r--net/openvswitch/vport-vxlan.c3
-rw-r--r--net/packet/af_packet.c58
-rw-r--r--net/phonet/pn_netlink.c12
-rw-r--r--net/qrtr/Kconfig2
-rw-r--r--net/qrtr/qrtr.c5
-rw-r--r--net/qrtr/smd.c42
-rw-r--r--net/rds/connection.c10
-rw-r--r--net/rds/ib_cm.c5
-rw-r--r--net/rds/ib_fmr.c38
-rw-r--r--net/rds/ib_mr.h2
-rw-r--r--net/rds/recv.c4
-rw-r--r--net/rds/threads.c2
-rw-r--r--net/rxrpc/ar-internal.h19
-rw-r--r--net/rxrpc/call_accept.c6
-rw-r--r--net/rxrpc/call_event.c2
-rw-r--r--net/rxrpc/call_object.c4
-rw-r--r--net/rxrpc/conn_client.c1
-rw-r--r--net/rxrpc/conn_event.c17
-rw-r--r--net/rxrpc/input.c17
-rw-r--r--net/rxrpc/insecure.c10
-rw-r--r--net/rxrpc/peer_event.c2
-rw-r--r--net/rxrpc/recvmsg.c8
-rw-r--r--net/rxrpc/rxkad.c184
-rw-r--r--net/rxrpc/sendmsg.c17
-rw-r--r--net/sched/Kconfig45
-rw-r--r--net/sched/act_api.c48
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_connmark.c3
-rw-r--r--net/sched/act_csum.c14
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c8
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/act_sample.c2
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c3
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c32
-rw-r--r--net/sched/cls_basic.c12
-rw-r--r--net/sched/cls_bpf.c14
-rw-r--r--net/sched/cls_cgroup.c10
-rw-r--r--net/sched/cls_flow.c17
-rw-r--r--net/sched/cls_flower.c99
-rw-r--r--net/sched/cls_fw.c32
-rw-r--r--net/sched/cls_matchall.c14
-rw-r--r--net/sched/cls_route.c46
-rw-r--r--net/sched/cls_rsvp.h38
-rw-r--r--net/sched/cls_tcindex.c16
-rw-r--r--net/sched/cls_u32.c73
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/ematch.c2
-rw-r--r--net/sched/sch_api.c74
-rw-r--r--net/sched/sch_atm.c2
-rw-r--r--net/sched/sch_cbq.c9
-rw-r--r--net/sched/sch_choke.c64
-rw-r--r--net/sched/sch_codel.c2
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_fq.c14
-rw-r--r--net/sched/sch_fq_codel.c31
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_gred.c4
-rw-r--r--net/sched/sch_hfsc.c6
-rw-r--r--net/sched/sch_hhf.c35
-rw-r--r--net/sched/sch_htb.c6
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c41
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_netem.c34
-rw-r--r--net/sched/sch_pie.c2
-rw-r--r--net/sched/sch_prio.c5
-rw-r--r--net/sched/sch_qfq.c5
-rw-r--r--net/sched/sch_red.c4
-rw-r--r--net/sched/sch_sfb.c4
-rw-r--r--net/sched/sch_sfq.c11
-rw-r--r--net/sched/sch_tbf.c4
-rw-r--r--net/sctp/chunk.c14
-rw-r--r--net/sctp/ipv6.c49
-rw-r--r--net/sctp/outqueue.c10
-rw-r--r--net/sctp/sm_make_chunk.c4
-rw-r--r--net/sctp/sm_statefuns.c15
-rw-r--r--net/sctp/socket.c149
-rw-r--r--net/sctp/stream.c464
-rw-r--r--net/sctp/sysctl.c7
-rw-r--r--net/sctp/ulpevent.c56
-rw-r--r--net/smc/Kconfig4
-rw-r--r--net/smc/af_smc.c25
-rw-r--r--net/smc/smc.h1
-rw-r--r--net/smc/smc_cdc.c11
-rw-r--r--net/smc/smc_clc.c4
-rw-r--r--net/smc/smc_close.c74
-rw-r--r--net/smc/smc_close.h2
-rw-r--r--net/smc/smc_core.c18
-rw-r--r--net/smc/smc_core.h2
-rw-r--r--net/smc/smc_ib.c35
-rw-r--r--net/smc/smc_ib.h3
-rw-r--r--net/smc/smc_pnet.c9
-rw-r--r--net/smc/smc_pnet.h1
-rw-r--r--net/smc/smc_rx.c3
-rw-r--r--net/smc/smc_tx.c6
-rw-r--r--net/smc/smc_wr.c2
-rw-r--r--net/socket.c46
-rw-r--r--net/sunrpc/Kconfig1
-rw-r--r--net/sunrpc/clnt.c8
-rw-r--r--net/sunrpc/sched.c5
-rw-r--r--net/sunrpc/svc.c134
-rw-r--r--net/sunrpc/xdr.c2
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/sunrpc/xprtrdma/Makefile2
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c12
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c71
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c89
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c79
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c512
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c978
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c110
-rw-r--r--net/sunrpc/xprtrdma/transport.c57
-rw-r--r--net/sunrpc/xprtrdma/verbs.c323
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h22
-rw-r--r--net/switchdev/switchdev.c2
-rw-r--r--net/sysctl_net.c1
-rw-r--r--net/tipc/bearer.c14
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/name_table.c2
-rw-r--r--net/tipc/net.c4
-rw-r--r--net/tipc/netlink.c3
-rw-r--r--net/tipc/netlink_compat.c32
-rw-r--r--net/tipc/node.c14
-rw-r--r--net/tipc/socket.c334
-rw-r--r--net/tipc/subscr.c17
-rw-r--r--net/tipc/subscr.h3
-rw-r--r--net/tipc/udp_media.c7
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/vmw_vsock/Makefile2
-rw-r--r--net/vmw_vsock/af_vsock_tap.c114
-rw-r--r--net/vmw_vsock/virtio_transport.c9
-rw-r--r--net/vmw_vsock/virtio_transport_common.c64
-rw-r--r--net/vmw_vsock/vmci_transport.c22
-rw-r--r--net/wireless/ap.c5
-rw-r--r--net/wireless/chan.c117
-rw-r--r--net/wireless/core.c121
-rw-r--r--net/wireless/core.h78
-rw-r--r--net/wireless/ibss.c1
-rw-r--r--net/wireless/mesh.c1
-rw-r--r--net/wireless/mlme.c70
-rw-r--r--net/wireless/nl80211.c689
-rw-r--r--net/wireless/nl80211.h15
-rw-r--r--net/wireless/rdev-ops.h29
-rw-r--r--net/wireless/reg.c145
-rw-r--r--net/wireless/reg.h36
-rw-r--r--net/wireless/scan.c161
-rw-r--r--net/wireless/sme.c262
-rw-r--r--net/wireless/trace.h76
-rw-r--r--net/wireless/util.c96
-rw-r--r--net/wireless/wext-compat.c2
-rw-r--r--net/x25/af_x25.c24
-rw-r--r--net/x25/sysctl_net_x25.c5
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/xfrm_device.c208
-rw-r--r--net/xfrm/xfrm_hash.h4
-rw-r--r--net/xfrm/xfrm_input.c41
-rw-r--r--net/xfrm/xfrm_output.c46
-rw-r--r--net/xfrm/xfrm_policy.c27
-rw-r--r--net/xfrm/xfrm_replay.c162
-rw-r--r--net/xfrm/xfrm_state.c147
-rw-r--r--net/xfrm/xfrm_user.c47
538 files changed, 16195 insertions, 9048 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 5945f7e19c67..40d3d72beb53 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -23,10 +23,18 @@ int lowpan_register_netdevice(struct net_device *dev,
{
int i, ret;
- dev->addr_len = EUI64_ADDR_LEN;
+ switch (lltype) {
+ case LOWPAN_LLTYPE_IEEE802154:
+ dev->addr_len = EUI64_ADDR_LEN;
+ break;
+
+ case LOWPAN_LLTYPE_BTLE:
+ dev->addr_len = ETH_ALEN;
+ break;
+ }
+
dev->type = ARPHRD_6LOWPAN;
dev->mtu = IPV6_MIN_MTU;
- dev->priv_flags |= IFF_NO_QUEUE;
lowpan_dev(dev)->lltype = lltype;
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 79f1fa22509a..6b1042e21656 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -278,6 +278,23 @@ lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev,
return ret;
}
+static void lowpan_iphc_uncompress_lladdr(const struct net_device *dev,
+ struct in6_addr *ipaddr,
+ const void *lladdr)
+{
+ switch (dev->addr_len) {
+ case ETH_ALEN:
+ lowpan_iphc_uncompress_eui48_lladdr(ipaddr, lladdr);
+ break;
+ case EUI64_ADDR_LEN:
+ lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+
/* Uncompress address function for source and
* destination address(non-multicast).
*
@@ -320,7 +337,7 @@ static int lowpan_iphc_uncompress_addr(struct sk_buff *skb,
lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr);
break;
default:
- lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
+ lowpan_iphc_uncompress_lladdr(dev, ipaddr, lladdr);
break;
}
break;
@@ -381,7 +398,7 @@ static int lowpan_iphc_uncompress_ctx_addr(struct sk_buff *skb,
lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr);
break;
default:
- lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
+ lowpan_iphc_uncompress_lladdr(dev, ipaddr, lladdr);
break;
}
ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen);
@@ -666,6 +683,8 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
switch (iphc1 & (LOWPAN_IPHC_M | LOWPAN_IPHC_DAC)) {
case LOWPAN_IPHC_M | LOWPAN_IPHC_DAC:
+ skb->pkt_type = PACKET_BROADCAST;
+
spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
if (!ci) {
@@ -681,11 +700,15 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
break;
case LOWPAN_IPHC_M:
+ skb->pkt_type = PACKET_BROADCAST;
+
/* multicast */
err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr,
iphc1 & LOWPAN_IPHC_DAM_MASK);
break;
case LOWPAN_IPHC_DAC:
+ skb->pkt_type = PACKET_HOST;
+
spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
if (!ci) {
@@ -701,6 +724,8 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
break;
default:
+ skb->pkt_type = PACKET_HOST;
+
err = lowpan_iphc_uncompress_addr(skb, dev, &hdr.daddr,
iphc1 & LOWPAN_IPHC_DAM_MASK,
daddr);
@@ -802,6 +827,21 @@ lowpan_iphc_compress_ctx_802154_lladdr(const struct in6_addr *ipaddr,
return lladdr_compress;
}
+static bool lowpan_iphc_addr_equal(const struct net_device *dev,
+ const struct lowpan_iphc_ctx *ctx,
+ const struct in6_addr *ipaddr,
+ const void *lladdr)
+{
+ struct in6_addr tmp = {};
+
+ lowpan_iphc_uncompress_lladdr(dev, &tmp, lladdr);
+
+ if (ctx)
+ ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+
+ return ipv6_addr_equal(&tmp, ipaddr);
+}
+
static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct net_device *dev,
const struct in6_addr *ipaddr,
const struct lowpan_iphc_ctx *ctx,
@@ -819,13 +859,7 @@ static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct net_device *dev,
}
break;
default:
- /* check for SAM/DAM = 11 */
- memcpy(&tmp.s6_addr[8], lladdr, EUI64_ADDR_LEN);
- /* second bit-flip (Universe/Local) is done according RFC2464 */
- tmp.s6_addr[8] ^= 0x02;
- /* context information are always used */
- ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
- if (ipv6_addr_equal(&tmp, ipaddr)) {
+ if (lowpan_iphc_addr_equal(dev, ctx, ipaddr, lladdr)) {
dam = LOWPAN_IPHC_DAM_11;
goto out;
}
@@ -921,11 +955,12 @@ static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct net_device *dev,
}
break;
default:
- if (is_addr_mac_addr_based(ipaddr, lladdr)) {
- dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
+ if (lowpan_iphc_addr_equal(dev, NULL, ipaddr, lladdr)) {
+ dam = LOWPAN_IPHC_DAM_11;
pr_debug("address compression 0 bits\n");
goto out;
}
+
break;
}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e97ab824e368..953b6728bd00 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -562,8 +562,7 @@ static int vlan_dev_init(struct net_device *dev)
NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
NETIF_F_ALL_FCOE;
- dev->features |= real_dev->vlan_features | NETIF_F_LLTX |
- NETIF_F_GSO_SOFTWARE;
+ dev->features |= dev->hw_features | NETIF_F_LLTX;
dev->gso_max_size = real_dev->gso_max_size;
dev->gso_max_segs = real_dev->gso_max_segs;
if (dev->features & NETIF_F_VLAN_FEATURES)
@@ -627,11 +626,18 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
netdev_features_t old_features = features;
+ netdev_features_t lower_features;
- features = netdev_intersect_features(features, real_dev->vlan_features);
- features |= NETIF_F_RXCSUM;
- features = netdev_intersect_features(features, real_dev->features);
+ lower_features = netdev_intersect_features((real_dev->vlan_features |
+ NETIF_F_RXCSUM),
+ real_dev->features);
+ /* Add HW_CSUM setting to preserve user ability to control
+ * checksum offload on the vlan device.
+ */
+ if (lower_features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+ lower_features |= NETIF_F_HW_CSUM;
+ features = netdev_intersect_features(features, lower_features);
features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE);
features |= NETIF_F_LLTX;
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 1270207f3d7c..9c94aad153b3 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -35,7 +35,8 @@ static inline int vlan_validate_qos_map(struct nlattr *attr)
{
if (!attr)
return 0;
- return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy);
+ return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy,
+ NULL);
}
static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
diff --git a/net/9p/client.c b/net/9p/client.c
index 8e5c6a8d0a37..1218fb3b52da 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -592,9 +592,8 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
ename = &req->rc->sdata[req->rc->offset];
if (len > inline_len) {
/* We have error in external buffer */
- err = copy_from_iter(ename + inline_len,
- len - inline_len, uidata);
- if (err != len - inline_len) {
+ if (!copy_from_iter_full(ename + inline_len,
+ len - inline_len, uidata)) {
err = -EFAULT;
goto out_err;
}
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 16d287565987..16e10680518c 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -74,7 +74,7 @@ pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size)
{
size_t len = min(pdu->capacity - pdu->size, size);
struct iov_iter i = *from;
- if (copy_from_iter(&pdu->sdata[pdu->size], len, &i) != len)
+ if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, &i))
len = 0;
pdu->size += len;
diff --git a/net/Makefile b/net/Makefile
index 9b681550e3a3..9086ffbb5085 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -12,7 +12,7 @@ obj-$(CONFIG_NET) += $(tmp-y)
# LLC has to be linked before the files in net/802/
obj-$(CONFIG_LLC) += llc/
-obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/
+obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_XFRM) += xfrm/
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 53b4ac09e7b7..ec527b62f79d 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -106,7 +106,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
entry->expires = jiffies - 1;
/* force resolution or expiration */
error = neigh_update(entry->neigh, NULL, NUD_NONE,
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN, 0);
if (error)
pr_crit("neigh_update failed with %d\n", error);
goto out;
@@ -481,7 +481,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
link_vcc(clip_vcc, entry);
}
error = neigh_update(neigh, llc_oui, NUD_PERMANENT,
- NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 0);
neigh_release(neigh);
return error;
}
diff --git a/net/atm/common.c b/net/atm/common.c
index 9613381f5db0..f06422f4108d 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -62,21 +62,16 @@ static void vcc_remove_socket(struct sock *sk)
write_unlock_irq(&vcc_sklist_lock);
}
-static struct sk_buff *alloc_tx(struct atm_vcc *vcc, unsigned int size)
+static bool vcc_tx_ready(struct atm_vcc *vcc, unsigned int size)
{
- struct sk_buff *skb;
struct sock *sk = sk_atm(vcc);
if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) {
pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n",
sk_wmem_alloc_get(sk), size, sk->sk_sndbuf);
- return NULL;
+ return false;
}
- while (!(skb = alloc_skb(size, GFP_KERNEL)))
- schedule();
- pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
- atomic_add(skb->truesize, &sk->sk_wmem_alloc);
- return skb;
+ return true;
}
static void vcc_sock_destruct(struct sock *sk)
@@ -606,7 +601,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
eff = (size+3) & ~3; /* align to word boundary */
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
error = 0;
- while (!(skb = alloc_tx(vcc, eff))) {
+ while (!vcc_tx_ready(vcc, eff)) {
if (m->msg_flags & MSG_DONTWAIT) {
error = -EAGAIN;
break;
@@ -628,6 +623,15 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
finish_wait(sk_sleep(sk), &wait);
if (error)
goto out;
+
+ skb = alloc_skb(eff, GFP_KERNEL);
+ if (!skb) {
+ error = -ENOMEM;
+ goto out;
+ }
+ pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
+ atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+
skb->dev = NULL; /* for paths shared with net_device interfaces */
ATM_SKB(skb)->atm_options = vcc->atm_options;
if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 71343d0fec94..495ba7cdcb04 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -679,15 +679,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_forw_packet *forw_packet_aggr;
+ struct sk_buff *skb;
unsigned char *skb_buff;
unsigned int skb_size;
atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left;
- forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
- queue_left, bat_priv);
- if (!forw_packet_aggr)
- return;
-
if (atomic_read(&bat_priv->aggregated_ogms) &&
packet_len < BATADV_MAX_AGGREGATION_BYTES)
skb_size = BATADV_MAX_AGGREGATION_BYTES;
@@ -696,9 +692,14 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
skb_size += ETH_HLEN;
- forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
- if (!forw_packet_aggr->skb) {
- batadv_forw_packet_free(forw_packet_aggr, true);
+ skb = netdev_alloc_skb_ip_align(NULL, skb_size);
+ if (!skb)
+ return;
+
+ forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
+ queue_left, bat_priv, skb);
+ if (!forw_packet_aggr) {
+ kfree_skb(skb);
return;
}
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ba8420d8a992..d07e89ec8467 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -395,7 +395,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
ether_addr_copy(ethhdr->h_source, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): CLAIM %pM on vid %d\n", mac,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_UNCLAIM:
/* unclaim frame
@@ -404,7 +404,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
ether_addr_copy(hw_src, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): UNCLAIM %pM on vid %d\n", mac,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_ANNOUNCE:
/* announcement frame
@@ -413,7 +413,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
ether_addr_copy(hw_src, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): ANNOUNCE of %pM on vid %d\n",
- ethhdr->h_source, BATADV_PRINT_VID(vid));
+ ethhdr->h_source, batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_REQUEST:
/* request frame
@@ -425,14 +425,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): REQUEST of %pM to %pM on vid %d\n",
ethhdr->h_source, ethhdr->h_dest,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_LOOPDETECT:
ether_addr_copy(ethhdr->h_source, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n",
ethhdr->h_source, ethhdr->h_dest,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
}
@@ -475,9 +475,9 @@ static void batadv_bla_loopdetect_report(struct work_struct *work)
batadv_info(bat_priv->soft_iface,
"Possible loop on VLAN %d detected which can't be handled by BLA - please check your network setup!\n",
- BATADV_PRINT_VID(backbone_gw->vid));
+ batadv_print_vid(backbone_gw->vid));
snprintf(vid_str, sizeof(vid_str), "%d",
- BATADV_PRINT_VID(backbone_gw->vid));
+ batadv_print_vid(backbone_gw->vid));
vid_str[sizeof(vid_str) - 1] = 0;
batadv_throw_uevent(bat_priv, BATADV_UEV_BLA, BATADV_UEV_LOOPDETECT,
@@ -510,7 +510,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n",
- orig, BATADV_PRINT_VID(vid));
+ orig, batadv_print_vid(vid));
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry)
@@ -719,7 +719,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
- mac, BATADV_PRINT_VID(vid));
+ mac, batadv_print_vid(vid));
kref_get(&claim->refcount);
hash_added = batadv_hash_add(bat_priv->bla.claim_hash,
@@ -739,8 +739,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
goto claim_free_ref;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_add_claim(): changing ownership for %pM, vid %d\n",
- mac, BATADV_PRINT_VID(vid));
+ "bla_add_claim(): changing ownership for %pM, vid %d to gw %pM\n",
+ mac, batadv_print_vid(vid), backbone_gw->orig);
remove_crc = true;
}
@@ -809,7 +809,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
return;
batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n",
- mac, BATADV_PRINT_VID(vid));
+ mac, batadv_print_vid(vid));
batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
batadv_choose_claim, claim);
@@ -849,7 +849,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
- BATADV_PRINT_VID(vid), backbone_gw->orig, crc);
+ batadv_print_vid(vid), backbone_gw->orig, crc);
spin_lock_bh(&backbone_gw->crc_lock);
backbone_crc = backbone_gw->crc;
@@ -859,7 +859,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
"handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n",
backbone_gw->orig,
- BATADV_PRINT_VID(backbone_gw->vid),
+ batadv_print_vid(backbone_gw->vid),
backbone_crc, crc);
batadv_bla_send_request(backbone_gw);
@@ -904,7 +904,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_request(): REQUEST vid %d (sent by %pM)...\n",
- BATADV_PRINT_VID(vid), ethhdr->h_source);
+ batadv_print_vid(vid), ethhdr->h_source);
batadv_bla_answer_request(bat_priv, primary_if, vid);
return true;
@@ -941,7 +941,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv,
/* this must be an UNCLAIM frame */
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n",
- claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig);
+ claim_addr, batadv_print_vid(vid), backbone_gw->orig);
batadv_bla_del_claim(bat_priv, claim_addr, vid);
batadv_backbone_gw_put(backbone_gw);
@@ -1161,7 +1161,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
if (ret == 1)
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
- ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src,
+ ethhdr->h_source, batadv_print_vid(vid), hw_src,
hw_dst);
if (ret < 2)
@@ -1197,7 +1197,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
- ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst);
+ ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst);
return true;
}
@@ -1295,10 +1295,13 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
goto skip;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_purge_claims(): %pM, vid %d, time out\n",
- claim->addr, claim->vid);
+ "bla_purge_claims(): timed out.\n");
purge_now:
+ batadv_dbg(BATADV_DBG_BLA, bat_priv,
+ "bla_purge_claims(): %pM, vid %d\n",
+ claim->addr, claim->vid);
+
batadv_handle_unclaim(bat_priv, primary_if,
backbone_gw->orig,
claim->addr, claim->vid);
@@ -1846,6 +1849,13 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
/* possible optimization: race for a claim */
/* No claim exists yet, claim it for us!
*/
+
+ batadv_dbg(BATADV_DBG_BLA, bat_priv,
+ "bla_rx(): Unclaimed MAC %pM found. Claim it. Local: %s\n",
+ ethhdr->h_source,
+ batadv_is_my_client(bat_priv,
+ ethhdr->h_source, vid) ?
+ "yes" : "no");
batadv_handle_claim(bat_priv, primary_if,
primary_if->net_dev->dev_addr,
ethhdr->h_source, vid);
@@ -1963,10 +1973,22 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
/* if yes, the client has roamed and we have
* to unclaim it.
*/
- batadv_handle_unclaim(bat_priv, primary_if,
- primary_if->net_dev->dev_addr,
- ethhdr->h_source, vid);
- goto allow;
+ if (batadv_has_timed_out(claim->lasttime, 100)) {
+ /* only unclaim if the last claim entry is
+ * older than 100 ms to make sure we really
+ * have a roaming client here.
+ */
+ batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Roaming client %pM detected. Unclaim it.\n",
+ ethhdr->h_source);
+ batadv_handle_unclaim(bat_priv, primary_if,
+ primary_if->net_dev->dev_addr,
+ ethhdr->h_source, vid);
+ goto allow;
+ } else {
+ batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Race for claim %pM detected. Drop packet.\n",
+ ethhdr->h_source);
+ goto handled;
+ }
}
/* check if it is a multicast/broadcast frame */
@@ -2042,7 +2064,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
backbone_crc = backbone_gw->crc;
spin_unlock_bh(&backbone_gw->crc_lock);
seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n",
- claim->addr, BATADV_PRINT_VID(claim->vid),
+ claim->addr, batadv_print_vid(claim->vid),
backbone_gw->orig,
(is_own ? 'x' : ' '),
backbone_crc);
@@ -2274,7 +2296,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n",
backbone_gw->orig,
- BATADV_PRINT_VID(backbone_gw->vid), secs,
+ batadv_print_vid(backbone_gw->vid), secs,
msecs, backbone_crc);
}
rcu_read_unlock();
@@ -2449,3 +2471,52 @@ out:
return ret;
}
+
+#ifdef CONFIG_BATMAN_ADV_DAT
+/**
+ * batadv_bla_check_claim - check if address is claimed
+ *
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: mac address of which the claim status is checked
+ * @vid: the VLAN ID
+ *
+ * addr is checked if this address is claimed by the local device itself.
+ *
+ * Return: true if bla is disabled or the mac is claimed by the device,
+ * false if the device addr is already claimed by another gateway
+ */
+bool batadv_bla_check_claim(struct batadv_priv *bat_priv,
+ u8 *addr, unsigned short vid)
+{
+ struct batadv_bla_claim search_claim;
+ struct batadv_bla_claim *claim = NULL;
+ struct batadv_hard_iface *primary_if = NULL;
+ bool ret = true;
+
+ if (!atomic_read(&bat_priv->bridge_loop_avoidance))
+ return ret;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ return ret;
+
+ /* First look if the mac address is claimed */
+ ether_addr_copy(search_claim.addr, addr);
+ search_claim.vid = vid;
+
+ claim = batadv_claim_hash_find(bat_priv, &search_claim);
+
+ /* If there is a claim and we are not owner of the claim,
+ * return false.
+ */
+ if (claim) {
+ if (!batadv_compare_eth(claim->backbone_gw->orig,
+ primary_if->net_dev->dev_addr))
+ ret = false;
+ batadv_claim_put(claim);
+ }
+
+ batadv_hardif_put(primary_if);
+ return ret;
+}
+#endif
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index e157986bd01c..234775748b8e 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -69,6 +69,10 @@ void batadv_bla_status_update(struct net_device *net_dev);
int batadv_bla_init(struct batadv_priv *bat_priv);
void batadv_bla_free(struct batadv_priv *bat_priv);
int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
+#ifdef CONFIG_BATMAN_ADV_DAT
+bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr,
+ unsigned short vid);
+#endif
#define BATADV_BLA_CRC_INIT 0
#else /* ifdef CONFIG_BATMAN_ADV_BLA */
@@ -145,6 +149,13 @@ static inline int batadv_bla_backbone_dump(struct sk_buff *msg,
return -EOPNOTSUPP;
}
+static inline
+bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr,
+ unsigned short vid)
+{
+ return true;
+}
+
#endif /* ifdef CONFIG_BATMAN_ADV_BLA */
#endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 1bfd1dbc2feb..013e970eff39 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -43,6 +43,7 @@
#include <linux/workqueue.h>
#include <net/arp.h>
+#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
@@ -330,7 +331,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
batadv_dbg(BATADV_DBG_DAT, bat_priv,
"Entry updated: %pI4 %pM (vid: %d)\n",
&dat_entry->ip, dat_entry->mac_addr,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
goto out;
}
@@ -356,7 +357,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
}
batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM (vid: %d)\n",
- &dat_entry->ip, dat_entry->mac_addr, BATADV_PRINT_VID(vid));
+ &dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(vid));
out:
if (dat_entry)
@@ -835,7 +836,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n",
&dat_entry->ip, dat_entry->mac_addr,
- BATADV_PRINT_VID(dat_entry->vid),
+ batadv_print_vid(dat_entry->vid),
last_seen_mins, last_seen_secs);
}
rcu_read_unlock();
@@ -1002,6 +1003,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
bool ret = false;
struct batadv_dat_entry *dat_entry = NULL;
struct sk_buff *skb_new;
+ struct net_device *soft_iface = bat_priv->soft_iface;
int hdr_size = 0;
unsigned short vid;
@@ -1040,16 +1042,30 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
goto out;
}
+ /* If BLA is enabled, only send ARP replies if we have claimed
+ * the destination for the ARP request or if no one else of
+ * the backbone gws belonging to our backbone has claimed the
+ * destination.
+ */
+ if (!batadv_bla_check_claim(bat_priv,
+ dat_entry->mac_addr, vid)) {
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "Device %pM claimed by another backbone gw. Don't send ARP reply!",
+ dat_entry->mac_addr);
+ ret = true;
+ goto out;
+ }
+
skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src,
dat_entry->mac_addr,
hw_src, vid);
if (!skb_new)
goto out;
- skb_new->protocol = eth_type_trans(skb_new,
- bat_priv->soft_iface);
- bat_priv->stats.rx_packets++;
- bat_priv->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
+ skb_new->protocol = eth_type_trans(skb_new, soft_iface);
+
+ soft_iface->stats.rx_packets++;
+ soft_iface->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
netif_rx(skb_new);
batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
@@ -1188,6 +1204,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
{
+ struct batadv_dat_entry *dat_entry = NULL;
u16 type;
__be32 ip_src, ip_dst;
u8 *hw_src, *hw_dst;
@@ -1210,12 +1227,41 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
hw_dst = batadv_arp_hw_dst(skb, hdr_size);
ip_dst = batadv_arp_ip_dst(skb, hdr_size);
+ /* If ip_dst is already in cache and has the right mac address,
+ * drop this frame if this ARP reply is destined for us because it's
+ * most probably an ARP reply generated by another node of the DHT.
+ * We have most probably received already a reply earlier. Delivering
+ * this frame would lead to doubled receive of an ARP reply.
+ */
+ dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_src, vid);
+ if (dat_entry && batadv_compare_eth(hw_src, dat_entry->mac_addr)) {
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "Doubled ARP reply removed: ARP MSG = [src: %pM-%pI4 dst: %pM-%pI4]; dat_entry: %pM-%pI4\n",
+ hw_src, &ip_src, hw_dst, &ip_dst,
+ dat_entry->mac_addr, &dat_entry->ip);
+ dropped = true;
+ goto out;
+ }
+
/* Update our internal cache with both the IP addresses the node got
* within the ARP reply
*/
batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid);
+ /* If BLA is enabled, only forward ARP replies if we have claimed the
+ * source of the ARP reply or if no one else of the same backbone has
+ * already claimed that client. This prevents that different gateways
+ * to the same backbone all forward the ARP reply leading to multiple
+ * replies in the backbone.
+ */
+ if (!batadv_bla_check_claim(bat_priv, hw_src, vid)) {
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "Device %pM claimed by another backbone gw. Drop ARP reply.\n",
+ hw_src);
+ dropped = true;
+ goto out;
+ }
+
/* if this REPLY is directed to a client of mine, let's deliver the
* packet to the interface
*/
@@ -1228,6 +1274,8 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
out:
if (dropped)
kfree_skb(skb);
+ if (dat_entry)
+ batadv_dat_entry_put(dat_entry);
/* if dropped == false -> deliver to the interface */
return dropped;
}
@@ -1256,7 +1304,7 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
/* If this packet is an ARP_REQUEST and the node already has the
* information that it is going to ask, then the packet can be dropped
*/
- if (forw_packet->num_packets)
+ if (batadv_forw_packet_is_rebroadcast(forw_packet))
goto out;
vid = batadv_dat_get_vid(forw_packet->skb, &hdr_size);
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 7a2b9f4da078..65ce97efa6b5 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -73,9 +73,10 @@ __printf(2, 3);
/* possibly ratelimited debug output */
#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
do { \
- if (atomic_read(&(bat_priv)->log_level) & (type) && \
+ struct batadv_priv *__batpriv = (bat_priv); \
+ if (atomic_read(&__batpriv->log_level) & (type) && \
(!(ratelimited) || net_ratelimit())) \
- batadv_debug_log(bat_priv, fmt, ## arg); \
+ batadv_debug_log(__batpriv, fmt, ## arg); \
} \
while (0)
#else /* !CONFIG_BATMAN_ADV_DEBUG */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5000c540614d..fb381fb26a66 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -516,6 +516,9 @@ static void batadv_recv_handler_init(void)
BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_change) != 12);
BUILD_BUG_ON(sizeof(struct batadv_tvlv_roam_adv) != 8);
+ i = FIELD_SIZEOF(struct sk_buff, cb);
+ BUILD_BUG_ON(sizeof(struct batadv_skb_cb) > i);
+
/* broadcast packet */
batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 57a8103dbce7..810f7d026f54 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.0"
+#define BATADV_SOURCE_VERSION "2017.1"
#endif
/* B.A.T.M.A.N. parameters */
@@ -193,6 +193,7 @@ enum batadv_uev_type {
#include <linux/percpu.h>
#include <linux/types.h>
+#include "packet.h"
#include "types.h"
struct net_device;
@@ -200,8 +201,19 @@ struct packet_type;
struct seq_file;
struct sk_buff;
-#define BATADV_PRINT_VID(vid) (((vid) & BATADV_VLAN_HAS_TAG) ? \
- (int)((vid) & VLAN_VID_MASK) : -1)
+/**
+ * batadv_print_vid - return printable version of vid information
+ * @vid: the VLAN identifier
+ *
+ * Return: -1 when no VLAN is used, VLAN id otherwise
+ */
+static inline int batadv_print_vid(unsigned short vid)
+{
+ if (vid & BATADV_VLAN_HAS_TAG)
+ return (int)(vid & VLAN_VID_MASK);
+ else
+ return -1;
+}
extern struct list_head batadv_hardif_list;
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 952ba81a565b..d327670641ac 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -494,9 +494,8 @@ static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
if (!bridged)
goto update;
-#if !IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
- pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
-#endif
+ if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING))
+ pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP);
querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP);
@@ -671,7 +670,6 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
return 0;
}
-#if IS_ENABLED(CONFIG_IPV6)
/**
* batadv_mcast_is_report_ipv6 - check for MLD reports
* @skb: the ethernet frame destined for the mesh
@@ -736,7 +734,6 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
return 0;
}
-#endif
/**
* batadv_mcast_forw_mode_check - check for optimized forwarding potential
@@ -765,11 +762,12 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
case ETH_P_IP:
return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb,
is_unsnoopable);
-#if IS_ENABLED(CONFIG_IPV6)
case ETH_P_IPV6:
+ if (!IS_ENABLED(CONFIG_IPV6))
+ return -EINVAL;
+
return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb,
is_unsnoopable);
-#endif
default:
return -EINVAL;
}
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7fd740b6e36d..e1ebe14ee2a6 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -941,15 +941,17 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
struct batadv_unicast_packet *unicast_packet;
struct batadv_unicast_4addr_packet *unicast_4addr_packet;
- u8 *orig_addr;
- struct batadv_orig_node *orig_node = NULL;
+ u8 *orig_addr, *orig_addr_gw;
+ struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL;
int check, hdr_size = sizeof(*unicast_packet);
enum batadv_subtype subtype;
- bool is4addr;
+ struct ethhdr *ethhdr;
int ret = NET_RX_DROP;
+ bool is4addr, is_gw;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
+ ethhdr = eth_hdr(skb);
is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR;
/* the caller function should have already pulled 2 bytes */
@@ -972,6 +974,23 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
/* packet for me */
if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
+ /* If this is a unicast packet from another backgone gw,
+ * drop it.
+ */
+ orig_addr_gw = ethhdr->h_source;
+ orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw);
+ if (orig_node_gw) {
+ is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw,
+ hdr_size);
+ batadv_orig_node_put(orig_node_gw);
+ if (is_gw) {
+ batadv_dbg(BATADV_DBG_BLA, bat_priv,
+ "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n",
+ orig_addr_gw);
+ return NET_RX_DROP;
+ }
+ }
+
if (is4addr) {
subtype = unicast_4addr_packet->subtype;
batadv_dat_inc_counter(bat_priv, subtype);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 1489ec27daff..403df596a73d 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -482,6 +482,7 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
* @if_outgoing: The (optional) if_outgoing to be grabbed
* @queue_left: The (optional) queue counter to decrease
* @bat_priv: The bat_priv for the mesh of this forw_packet
+ * @skb: The raw packet this forwarding packet shall contain
*
* Allocates a forwarding packet and tries to get a reference to the
* (optional) if_incoming, if_outgoing and queue_left. If queue_left
@@ -493,7 +494,8 @@ struct batadv_forw_packet *
batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
struct batadv_hard_iface *if_outgoing,
atomic_t *queue_left,
- struct batadv_priv *bat_priv)
+ struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
{
struct batadv_forw_packet *forw_packet;
const char *qname;
@@ -525,7 +527,7 @@ batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
INIT_HLIST_NODE(&forw_packet->list);
INIT_HLIST_NODE(&forw_packet->cleanup_list);
- forw_packet->skb = NULL;
+ forw_packet->skb = skb;
forw_packet->queue_left = queue_left;
forw_packet->if_incoming = if_incoming;
forw_packet->if_outgoing = if_outgoing;
@@ -756,22 +758,23 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
if (!primary_if)
goto err;
+ newskb = skb_copy(skb, GFP_ATOMIC);
+ if (!newskb) {
+ batadv_hardif_put(primary_if);
+ goto err;
+ }
+
forw_packet = batadv_forw_packet_alloc(primary_if, NULL,
&bat_priv->bcast_queue_left,
- bat_priv);
+ bat_priv, newskb);
batadv_hardif_put(primary_if);
if (!forw_packet)
- goto err;
-
- newskb = skb_copy(skb, GFP_ATOMIC);
- if (!newskb)
goto err_packet_free;
/* as we have a copy now, it is safe to decrease the TTL */
bcast_packet = (struct batadv_bcast_packet *)newskb->data;
bcast_packet->ttl--;
- forw_packet->skb = newskb;
forw_packet->own = own_packet;
INIT_DELAYED_WORK(&forw_packet->delayed_work,
@@ -781,11 +784,60 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
return NETDEV_TX_OK;
err_packet_free:
- batadv_forw_packet_free(forw_packet, true);
+ kfree_skb(newskb);
err:
return NETDEV_TX_BUSY;
}
+/**
+ * batadv_forw_packet_bcasts_left - check if a retransmission is necessary
+ * @forw_packet: the forwarding packet to check
+ * @hard_iface: the interface to check on
+ *
+ * Checks whether a given packet has any (re)transmissions left on the provided
+ * interface.
+ *
+ * hard_iface may be NULL: In that case the number of transmissions this skb had
+ * so far is compared with the maximum amount of retransmissions independent of
+ * any interface instead.
+ *
+ * Return: True if (re)transmissions are left, false otherwise.
+ */
+static bool
+batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet,
+ struct batadv_hard_iface *hard_iface)
+{
+ unsigned int max;
+
+ if (hard_iface)
+ max = hard_iface->num_bcasts;
+ else
+ max = BATADV_NUM_BCASTS_MAX;
+
+ return BATADV_SKB_CB(forw_packet->skb)->num_bcasts < max;
+}
+
+/**
+ * batadv_forw_packet_bcasts_inc - increment retransmission counter of a packet
+ * @forw_packet: the packet to increase the counter for
+ */
+static void
+batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet)
+{
+ BATADV_SKB_CB(forw_packet->skb)->num_bcasts++;
+}
+
+/**
+ * batadv_forw_packet_is_rebroadcast - check packet for previous transmissions
+ * @forw_packet: the packet to check
+ *
+ * Return: True if this packet was transmitted before, false otherwise.
+ */
+bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet)
+{
+ return BATADV_SKB_CB(forw_packet->skb)->num_bcasts > 0;
+}
+
static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
{
struct batadv_hard_iface *hard_iface;
@@ -826,7 +878,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
if (hard_iface->soft_iface != soft_iface)
continue;
- if (forw_packet->num_packets >= hard_iface->num_bcasts)
+ if (!batadv_forw_packet_bcasts_left(forw_packet, hard_iface))
continue;
if (forw_packet->own) {
@@ -884,10 +936,10 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
}
rcu_read_unlock();
- forw_packet->num_packets++;
+ batadv_forw_packet_bcasts_inc(forw_packet);
/* if we still have some more bcasts to send */
- if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) {
+ if (batadv_forw_packet_bcasts_left(forw_packet, NULL)) {
batadv_forw_packet_bcast_queue(bat_priv, forw_packet,
send_time);
return;
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index f21166d10323..a16b34f473ef 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -34,11 +34,13 @@ struct batadv_forw_packet *
batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
struct batadv_hard_iface *if_outgoing,
atomic_t *queue_left,
- struct batadv_priv *bat_priv);
+ struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
bool batadv_forw_packet_steal(struct batadv_forw_packet *packet, spinlock_t *l);
void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
struct batadv_forw_packet *forw_packet,
unsigned long send_time);
+bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet);
int batadv_send_skb_to_orig(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index d042c99af028..b25789abf7b9 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -64,28 +64,6 @@
#include "sysfs.h"
#include "translation-table.h"
-static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
-static void batadv_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info);
-static u32 batadv_get_msglevel(struct net_device *dev);
-static void batadv_set_msglevel(struct net_device *dev, u32 value);
-static u32 batadv_get_link(struct net_device *dev);
-static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data);
-static void batadv_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats, u64 *data);
-static int batadv_get_sset_count(struct net_device *dev, int stringset);
-
-static const struct ethtool_ops batadv_ethtool_ops = {
- .get_settings = batadv_get_settings,
- .get_drvinfo = batadv_get_drvinfo,
- .get_msglevel = batadv_get_msglevel,
- .set_msglevel = batadv_set_msglevel,
- .get_link = batadv_get_link,
- .get_strings = batadv_get_strings,
- .get_ethtool_stats = batadv_get_ethtool_stats,
- .get_sset_count = batadv_get_sset_count,
-};
-
int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
{
int result;
@@ -140,7 +118,7 @@ static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
static struct net_device_stats *batadv_interface_stats(struct net_device *dev)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
- struct net_device_stats *stats = &bat_priv->stats;
+ struct net_device_stats *stats = &dev->stats;
stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX);
stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES);
@@ -230,6 +208,9 @@ static int batadv_interface_tx(struct sk_buff *skb,
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dropped;
+ /* reset control block to avoid left overs from previous users */
+ memset(skb->cb, 0, sizeof(struct batadv_skb_cb));
+
netif_trans_update(soft_iface);
vid = batadv_get_vid(skb, 0);
ethhdr = eth_hdr(skb);
@@ -947,6 +928,98 @@ static const struct net_device_ops batadv_netdev_ops = {
.ndo_del_slave = batadv_softif_slave_del,
};
+static void batadv_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
+ strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
+ strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+ strlcpy(info->bus_info, "batman", sizeof(info->bus_info));
+}
+
+/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702
+ * Declare each description string in struct.name[] to get fixed sized buffer
+ * and compile time checking for strings longer than ETH_GSTRING_LEN.
+ */
+static const struct {
+ const char name[ETH_GSTRING_LEN];
+} batadv_counters_strings[] = {
+ { "tx" },
+ { "tx_bytes" },
+ { "tx_dropped" },
+ { "rx" },
+ { "rx_bytes" },
+ { "forward" },
+ { "forward_bytes" },
+ { "mgmt_tx" },
+ { "mgmt_tx_bytes" },
+ { "mgmt_rx" },
+ { "mgmt_rx_bytes" },
+ { "frag_tx" },
+ { "frag_tx_bytes" },
+ { "frag_rx" },
+ { "frag_rx_bytes" },
+ { "frag_fwd" },
+ { "frag_fwd_bytes" },
+ { "tt_request_tx" },
+ { "tt_request_rx" },
+ { "tt_response_tx" },
+ { "tt_response_rx" },
+ { "tt_roam_adv_tx" },
+ { "tt_roam_adv_rx" },
+#ifdef CONFIG_BATMAN_ADV_DAT
+ { "dat_get_tx" },
+ { "dat_get_rx" },
+ { "dat_put_tx" },
+ { "dat_put_rx" },
+ { "dat_cached_reply_tx" },
+#endif
+#ifdef CONFIG_BATMAN_ADV_NC
+ { "nc_code" },
+ { "nc_code_bytes" },
+ { "nc_recode" },
+ { "nc_recode_bytes" },
+ { "nc_buffer" },
+ { "nc_decode" },
+ { "nc_decode_bytes" },
+ { "nc_decode_failed" },
+ { "nc_sniffed" },
+#endif
+};
+
+static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ if (stringset == ETH_SS_STATS)
+ memcpy(data, batadv_counters_strings,
+ sizeof(batadv_counters_strings));
+}
+
+static void batadv_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+ int i;
+
+ for (i = 0; i < BATADV_CNT_NUM; i++)
+ data[i] = batadv_sum_counter(bat_priv, i);
+}
+
+static int batadv_get_sset_count(struct net_device *dev, int stringset)
+{
+ if (stringset == ETH_SS_STATS)
+ return BATADV_CNT_NUM;
+
+ return -EOPNOTSUPP;
+}
+
+static const struct ethtool_ops batadv_ethtool_ops = {
+ .get_drvinfo = batadv_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = batadv_get_strings,
+ .get_ethtool_stats = batadv_get_ethtool_stats,
+ .get_sset_count = batadv_get_sset_count,
+};
+
/**
* batadv_softif_free - Deconstructor of batadv_soft_interface
* @dev: Device to cleanup and remove
@@ -971,8 +1044,6 @@ static void batadv_softif_free(struct net_device *dev)
*/
static void batadv_softif_init_early(struct net_device *dev)
{
- struct batadv_priv *priv = netdev_priv(dev);
-
ether_setup(dev);
dev->netdev_ops = &batadv_netdev_ops;
@@ -989,8 +1060,6 @@ static void batadv_softif_init_early(struct net_device *dev)
eth_hw_addr_random(dev);
dev->ethtool_ops = &batadv_ethtool_ops;
-
- memset(priv, 0, sizeof(*priv));
}
struct net_device *batadv_softif_create(struct net *net, const char *name)
@@ -1083,118 +1152,3 @@ struct rtnl_link_ops batadv_link_ops __read_mostly = {
.setup = batadv_softif_init_early,
.dellink = batadv_softif_destroy_netlink,
};
-
-/* ethtool */
-static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
- cmd->supported = 0;
- cmd->advertising = 0;
- ethtool_cmd_speed_set(cmd, SPEED_10);
- cmd->duplex = DUPLEX_FULL;
- cmd->port = PORT_TP;
- cmd->phy_address = 0;
- cmd->transceiver = XCVR_INTERNAL;
- cmd->autoneg = AUTONEG_DISABLE;
- cmd->maxtxpkt = 0;
- cmd->maxrxpkt = 0;
-
- return 0;
-}
-
-static void batadv_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info)
-{
- strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
- strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
- strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
- strlcpy(info->bus_info, "batman", sizeof(info->bus_info));
-}
-
-static u32 batadv_get_msglevel(struct net_device *dev)
-{
- return -EOPNOTSUPP;
-}
-
-static void batadv_set_msglevel(struct net_device *dev, u32 value)
-{
-}
-
-static u32 batadv_get_link(struct net_device *dev)
-{
- return 1;
-}
-
-/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702
- * Declare each description string in struct.name[] to get fixed sized buffer
- * and compile time checking for strings longer than ETH_GSTRING_LEN.
- */
-static const struct {
- const char name[ETH_GSTRING_LEN];
-} batadv_counters_strings[] = {
- { "tx" },
- { "tx_bytes" },
- { "tx_dropped" },
- { "rx" },
- { "rx_bytes" },
- { "forward" },
- { "forward_bytes" },
- { "mgmt_tx" },
- { "mgmt_tx_bytes" },
- { "mgmt_rx" },
- { "mgmt_rx_bytes" },
- { "frag_tx" },
- { "frag_tx_bytes" },
- { "frag_rx" },
- { "frag_rx_bytes" },
- { "frag_fwd" },
- { "frag_fwd_bytes" },
- { "tt_request_tx" },
- { "tt_request_rx" },
- { "tt_response_tx" },
- { "tt_response_rx" },
- { "tt_roam_adv_tx" },
- { "tt_roam_adv_rx" },
-#ifdef CONFIG_BATMAN_ADV_DAT
- { "dat_get_tx" },
- { "dat_get_rx" },
- { "dat_put_tx" },
- { "dat_put_rx" },
- { "dat_cached_reply_tx" },
-#endif
-#ifdef CONFIG_BATMAN_ADV_NC
- { "nc_code" },
- { "nc_code_bytes" },
- { "nc_recode" },
- { "nc_recode_bytes" },
- { "nc_buffer" },
- { "nc_decode" },
- { "nc_decode_bytes" },
- { "nc_decode_failed" },
- { "nc_sniffed" },
-#endif
-};
-
-static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data)
-{
- if (stringset == ETH_SS_STATS)
- memcpy(data, batadv_counters_strings,
- sizeof(batadv_counters_strings));
-}
-
-static void batadv_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats, u64 *data)
-{
- struct batadv_priv *bat_priv = netdev_priv(dev);
- int i;
-
- for (i = 0; i < BATADV_CNT_NUM; i++)
- data[i] = batadv_sum_counter(bat_priv, i);
-}
-
-static int batadv_get_sset_count(struct net_device *dev, int stringset)
-{
- if (stringset == ETH_SS_STATS)
- return BATADV_CNT_NUM;
-
- return -EOPNOTSUPP;
-}
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index c94ebdecdc3d..556f9a865ddf 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -873,8 +873,8 @@ static int batadv_tp_send(void *arg)
/* something went wrong during the preparation/transmission */
if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) {
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
- "Meter: batadv_tp_send() cannot send packets (%d)\n",
- err);
+ "Meter: %s() cannot send packets (%d)\n",
+ __func__, err);
/* ensure nobody else tries to stop the thread now */
if (atomic_dec_and_test(&tp_vars->sending))
tp_vars->reason = err;
@@ -979,7 +979,8 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
if (!tp_vars) {
spin_unlock_bh(&bat_priv->tp_list_lock);
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
- "Meter: batadv_tp_start cannot allocate list elements\n");
+ "Meter: %s cannot allocate list elements\n",
+ __func__);
batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR,
dst, bat_priv, session_cookie);
return;
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 6077a87d46f0..e75b4937b497 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -617,7 +617,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
- BATADV_PRINT_VID(tt_global->common.vid), message);
+ batadv_print_vid(tt_global->common.vid), message);
batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
batadv_choose_tt, &tt_global->common);
@@ -671,7 +671,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (tt_local->common.flags & BATADV_TT_CLIENT_PENDING) {
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Re-adding pending client %pM (vid: %d)\n",
- addr, BATADV_PRINT_VID(vid));
+ addr, batadv_print_vid(vid));
/* whatever the reason why the PENDING flag was set,
* this is a client which was enqueued to be removed in
* this orig_interval. Since it popped up again, the
@@ -684,7 +684,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (tt_local->common.flags & BATADV_TT_CLIENT_ROAM) {
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Roaming client %pM (vid: %d) came back to its original location\n",
- addr, BATADV_PRINT_VID(vid));
+ addr, batadv_print_vid(vid));
/* the ROAM flag is set because this client roamed away
* and the node got a roaming_advertisement message. Now
* that the client popped up again at its original
@@ -716,7 +716,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (!vlan) {
net_ratelimited_function(batadv_info, soft_iface,
"adding TT local entry %pM to non-existent VLAN %d\n",
- addr, BATADV_PRINT_VID(vid));
+ addr, batadv_print_vid(vid));
kmem_cache_free(batadv_tl_cache, tt_local);
tt_local = NULL;
goto out;
@@ -724,7 +724,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n",
- addr, BATADV_PRINT_VID(vid),
+ addr, batadv_print_vid(vid),
(u8)atomic_read(&bat_priv->tt.vn));
ether_addr_copy(tt_local->common.addr, addr);
@@ -1097,7 +1097,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq,
" * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n",
tt_common_entry->addr,
- BATADV_PRINT_VID(tt_common_entry->vid),
+ batadv_print_vid(tt_common_entry->vid),
((tt_common_entry->flags &
BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
no_purge ? 'P' : '.',
@@ -1296,7 +1296,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Local tt entry (%pM, vid: %d) pending to be removed: %s\n",
tt_local_entry->common.addr,
- BATADV_PRINT_VID(tt_local_entry->common.vid), message);
+ batadv_print_vid(tt_local_entry->common.vid), message);
}
/**
@@ -1727,7 +1727,7 @@ add_orig_entry:
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new global tt entry: %pM (vid: %d, via %pM)\n",
- common->addr, BATADV_PRINT_VID(common->vid),
+ common->addr, batadv_print_vid(common->vid),
orig_node->orig);
ret = true;
@@ -1835,7 +1835,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
if (!vlan) {
seq_printf(seq,
" * Cannot retrieve VLAN %d for originator %pM\n",
- BATADV_PRINT_VID(tt_common_entry->vid),
+ batadv_print_vid(tt_common_entry->vid),
best_entry->orig_node->orig);
goto print_list;
}
@@ -1844,7 +1844,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
seq_printf(seq,
" %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n",
'*', tt_global_entry->common.addr,
- BATADV_PRINT_VID(tt_global_entry->common.vid),
+ batadv_print_vid(tt_global_entry->common.vid),
best_entry->ttvn, best_entry->orig_node->orig,
last_ttvn, vlan->tt.crc,
((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
@@ -1867,7 +1867,7 @@ print_list:
if (!vlan) {
seq_printf(seq,
" + Cannot retrieve VLAN %d for originator %pM\n",
- BATADV_PRINT_VID(tt_common_entry->vid),
+ batadv_print_vid(tt_common_entry->vid),
orig_entry->orig_node->orig);
continue;
}
@@ -1876,7 +1876,7 @@ print_list:
seq_printf(seq,
" %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n",
'+', tt_global_entry->common.addr,
- BATADV_PRINT_VID(tt_global_entry->common.vid),
+ batadv_print_vid(tt_global_entry->common.vid),
orig_entry->ttvn, orig_entry->orig_node->orig,
last_ttvn, vlan->tt.crc,
((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
@@ -2213,7 +2213,7 @@ batadv_tt_global_del_orig_node(struct batadv_priv *bat_priv,
"Deleting %pM from global tt entry %pM (vid: %d): %s\n",
orig_node->orig,
tt_global_entry->common.addr,
- BATADV_PRINT_VID(vid), message);
+ batadv_print_vid(vid), message);
_batadv_tt_global_del_orig_entry(tt_global_entry,
orig_entry);
}
@@ -2253,12 +2253,13 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv,
/* its the last one, mark for roaming. */
tt_global_entry->common.flags |= BATADV_TT_CLIENT_ROAM;
tt_global_entry->roam_at = jiffies;
- } else
+ } else {
/* there is another entry, we can simply delete this
* one and can still use the other one.
*/
batadv_tt_global_del_orig_node(bat_priv, tt_global_entry,
orig_node, message);
+ }
}
/**
@@ -2314,10 +2315,11 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
/* local entry exists, case 2: client roamed to us. */
batadv_tt_global_del_orig_list(tt_global_entry);
batadv_tt_global_free(bat_priv, tt_global_entry, message);
- } else
+ } else {
/* no local entry exists, case 1: check for roaming */
batadv_tt_global_del_roaming(bat_priv, tt_global_entry,
orig_node, message);
+ }
out:
if (tt_global_entry)
@@ -2375,7 +2377,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
- BATADV_PRINT_VID(vid), message);
+ batadv_print_vid(vid), message);
hlist_del_rcu(&tt_common_entry->hash_entry);
batadv_tt_global_entry_put(tt_global);
}
@@ -2435,7 +2437,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
- BATADV_PRINT_VID(tt_global->common.vid),
+ batadv_print_vid(tt_global->common.vid),
msg);
hlist_del_rcu(&tt_common->hash_entry);
@@ -3650,7 +3652,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Sending ROAMING_ADV to %pM (client %pM, vid: %d)\n",
- orig_node->orig, client, BATADV_PRINT_VID(vid));
+ orig_node->orig, client, batadv_print_vid(vid));
batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX);
@@ -3773,7 +3775,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting local tt entry (%pM, vid: %d): pending\n",
tt_common->addr,
- BATADV_PRINT_VID(tt_common->vid));
+ batadv_print_vid(tt_common->vid));
batadv_tt_local_size_dec(bat_priv, tt_common->vid);
hlist_del_rcu(&tt_common->hash_entry);
@@ -4017,7 +4019,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n",
- addr, BATADV_PRINT_VID(vid), orig_node->orig);
+ addr, batadv_print_vid(vid), orig_node->orig);
ret = true;
out:
return ret;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 246f21b4973b..ea43a6449247 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1000,7 +1000,6 @@ struct batadv_priv_bat_v {
* struct batadv_priv - per mesh interface data
* @mesh_state: current status of the mesh (inactive/active/deactivating)
* @soft_iface: net device which holds this struct as private data
- * @stats: structure holding the data for the ndo_get_stats() call
* @bat_counters: mesh internal traffic statistic counters (see batadv_counters)
* @aggregated_ogms: bool indicating whether OGM aggregation is enabled
* @bonding: bool indicating whether traffic bonding is enabled
@@ -1055,7 +1054,6 @@ struct batadv_priv_bat_v {
struct batadv_priv {
atomic_t mesh_state;
struct net_device *soft_iface;
- struct net_device_stats stats;
u64 __percpu *bat_counters; /* Per cpu counters */
atomic_t aggregated_ogms;
atomic_t bonding;
@@ -1377,9 +1375,11 @@ struct batadv_nc_packet {
* relevant to batman-adv in the skb->cb buffer in skbs.
* @decoded: Marks a skb as decoded, which is checked when searching for coding
* opportunities in network-coding.c
+ * @num_bcasts: Counter for broadcast packet retransmissions
*/
struct batadv_skb_cb {
bool decoded;
+ unsigned int num_bcasts;
};
/**
@@ -1392,7 +1392,7 @@ struct batadv_skb_cb {
* @skb: bcast packet's skb buffer
* @packet_len: size of aggregated OGM packet inside the skb buffer
* @direct_link_flags: direct link flags for aggregated OGM packets
- * @num_packets: counter for bcast packet retransmission
+ * @num_packets: counter for aggregated OGMv1 packets
* @delayed_work: work queue callback item for packet sending
* @if_incoming: pointer to incoming hard-iface or primary iface if
* locally generated packet
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index d491529332f4..608959989f8e 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -20,6 +20,7 @@
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#include <net/pkt_sched.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -38,7 +39,6 @@ struct skb_cb {
struct in6_addr addr;
struct in6_addr gw;
struct l2cap_chan *chan;
- int status;
};
#define lowpan_cb(skb) ((struct skb_cb *)((skb)->cb))
@@ -64,7 +64,7 @@ struct lowpan_peer {
struct l2cap_chan *chan;
/* peer addresses in various formats */
- unsigned char eui64_addr[EUI64_ADDR_LEN];
+ unsigned char lladdr[ETH_ALEN];
struct in6_addr peer_addr;
};
@@ -270,28 +270,20 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev)
}
static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
- struct l2cap_chan *chan)
+ struct lowpan_peer *peer)
{
- const u8 *saddr, *daddr;
+ const u8 *saddr;
struct lowpan_btle_dev *dev;
- struct lowpan_peer *peer;
dev = lowpan_btle_dev(netdev);
- rcu_read_lock();
- peer = __peer_lookup_chan(dev, chan);
- rcu_read_unlock();
- if (!peer)
- return -EINVAL;
-
- saddr = peer->eui64_addr;
- daddr = dev->netdev->dev_addr;
+ saddr = peer->lladdr;
- return lowpan_header_decompress(skb, netdev, daddr, saddr);
+ return lowpan_header_decompress(skb, netdev, netdev->dev_addr, saddr);
}
static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
- struct l2cap_chan *chan)
+ struct lowpan_peer *peer)
{
struct sk_buff *local_skb;
int ret;
@@ -344,8 +336,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
local_skb->dev = dev;
- ret = iphc_decompress(local_skb, dev, chan);
+ ret = iphc_decompress(local_skb, dev, peer);
if (ret < 0) {
+ BT_DBG("iphc_decompress failed: %d", ret);
kfree_skb(local_skb);
goto drop;
}
@@ -365,6 +358,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
consume_skb(local_skb);
consume_skb(skb);
} else {
+ BT_DBG("unknown packet type");
goto drop;
}
@@ -390,7 +384,7 @@ static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
if (!dev || !dev->netdev)
return -ENOENT;
- err = recv_pkt(skb, dev->netdev, chan);
+ err = recv_pkt(skb, dev->netdev, peer);
if (err) {
BT_DBG("recv pkt %d", err);
err = -EAGAIN;
@@ -399,37 +393,6 @@ static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
return err;
}
-static u8 get_addr_type_from_eui64(u8 byte)
-{
- /* Is universal(0) or local(1) bit */
- return ((byte & 0x02) ? BDADDR_LE_RANDOM : BDADDR_LE_PUBLIC);
-}
-
-static void copy_to_bdaddr(struct in6_addr *ip6_daddr, bdaddr_t *addr)
-{
- u8 *eui64 = ip6_daddr->s6_addr + 8;
-
- addr->b[0] = eui64[7];
- addr->b[1] = eui64[6];
- addr->b[2] = eui64[5];
- addr->b[3] = eui64[2];
- addr->b[4] = eui64[1];
- addr->b[5] = eui64[0];
-}
-
-static void convert_dest_bdaddr(struct in6_addr *ip6_daddr,
- bdaddr_t *addr, u8 *addr_type)
-{
- copy_to_bdaddr(ip6_daddr, addr);
-
- /* We need to toggle the U/L bit that we got from IPv6 address
- * so that we get the proper address and type of the BD address.
- */
- addr->b[5] ^= 0x02;
-
- *addr_type = get_addr_type_from_eui64(addr->b[5]);
-}
-
static int setup_header(struct sk_buff *skb, struct net_device *netdev,
bdaddr_t *peer_addr, u8 *peer_addr_type)
{
@@ -437,8 +400,7 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev,
struct ipv6hdr *hdr;
struct lowpan_btle_dev *dev;
struct lowpan_peer *peer;
- bdaddr_t addr, *any = BDADDR_ANY;
- u8 *daddr = any->b;
+ u8 *daddr;
int err, status = 0;
hdr = ipv6_hdr(skb);
@@ -449,34 +411,24 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev,
if (ipv6_addr_is_multicast(&ipv6_daddr)) {
lowpan_cb(skb)->chan = NULL;
+ daddr = NULL;
} else {
- u8 addr_type;
+ BT_DBG("dest IP %pI6c", &ipv6_daddr);
- /* Get destination BT device from skb.
- * If there is no such peer then discard the packet.
+ /* The packet might be sent to 6lowpan interface
+ * because of routing (either via default route
+ * or user set route) so get peer according to
+ * the destination address.
*/
- convert_dest_bdaddr(&ipv6_daddr, &addr, &addr_type);
-
- BT_DBG("dest addr %pMR type %d IP %pI6c", &addr,
- addr_type, &ipv6_daddr);
-
- peer = peer_lookup_ba(dev, &addr, addr_type);
+ peer = peer_lookup_dst(dev, &ipv6_daddr, skb);
if (!peer) {
- /* The packet might be sent to 6lowpan interface
- * because of routing (either via default route
- * or user set route) so get peer according to
- * the destination address.
- */
- peer = peer_lookup_dst(dev, &ipv6_daddr, skb);
- if (!peer) {
- BT_DBG("no such peer %pMR found", &addr);
- return -ENOENT;
- }
+ BT_DBG("no such peer");
+ return -ENOENT;
}
- daddr = peer->eui64_addr;
- *peer_addr = addr;
- *peer_addr_type = addr_type;
+ daddr = peer->lladdr;
+ *peer_addr = peer->chan->dst;
+ *peer_addr_type = peer->chan->dst_type;
lowpan_cb(skb)->chan = peer->chan;
status = 1;
@@ -527,15 +479,8 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
return 0;
}
- if (!err)
- err = lowpan_cb(skb)->status;
-
- if (err < 0) {
- if (err == -EAGAIN)
- netdev->stats.tx_dropped++;
- else
- netdev->stats.tx_errors++;
- }
+ if (err < 0)
+ netdev->stats.tx_errors++;
return err;
}
@@ -647,9 +592,9 @@ static void netdev_setup(struct net_device *dev)
{
dev->hard_header_len = 0;
dev->needed_tailroom = 0;
- dev->flags = IFF_RUNNING | IFF_POINTOPOINT |
- IFF_MULTICAST;
+ dev->flags = IFF_RUNNING | IFF_MULTICAST;
dev->watchdog_timeo = 0;
+ dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
dev->netdev_ops = &netdev_ops;
dev->header_ops = &header_ops;
@@ -660,34 +605,6 @@ static struct device_type bt_type = {
.name = "bluetooth",
};
-static void set_addr(u8 *eui, u8 *addr, u8 addr_type)
-{
- /* addr is the BT address in little-endian format */
- eui[0] = addr[5];
- eui[1] = addr[4];
- eui[2] = addr[3];
- eui[3] = 0xFF;
- eui[4] = 0xFE;
- eui[5] = addr[2];
- eui[6] = addr[1];
- eui[7] = addr[0];
-
- /* Universal/local bit set, BT 6lowpan draft ch. 3.2.1 */
- if (addr_type == BDADDR_LE_PUBLIC)
- eui[0] &= ~0x02;
- else
- eui[0] |= 0x02;
-
- BT_DBG("type %d addr %*phC", addr_type, 8, eui);
-}
-
-static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr,
- u8 addr_type)
-{
- netdev->addr_assign_type = NET_ADDR_PERM;
- set_addr(netdev->dev_addr, addr->b, addr_type);
-}
-
static void ifup(struct net_device *netdev)
{
int err;
@@ -746,16 +663,9 @@ static struct l2cap_chan *chan_create(void)
return chan;
}
-static void set_ip_addr_bits(u8 addr_type, u8 *addr)
-{
- if (addr_type == BDADDR_LE_PUBLIC)
- *addr |= 0x02;
- else
- *addr &= ~0x02;
-}
-
static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
- struct lowpan_btle_dev *dev)
+ struct lowpan_btle_dev *dev,
+ bool new_netdev)
{
struct lowpan_peer *peer;
@@ -766,19 +676,9 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
peer->chan = chan;
memset(&peer->peer_addr, 0, sizeof(struct in6_addr));
- /* RFC 2464 ch. 5 */
- peer->peer_addr.s6_addr[0] = 0xFE;
- peer->peer_addr.s6_addr[1] = 0x80;
- set_addr((u8 *)&peer->peer_addr.s6_addr + 8, chan->dst.b,
- chan->dst_type);
-
- memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8,
- EUI64_ADDR_LEN);
+ baswap((void *)peer->lladdr, &chan->dst);
- /* IPv6 address needs to have the U/L bit set properly so toggle
- * it back here.
- */
- set_ip_addr_bits(chan->dst_type, (u8 *)&peer->peer_addr.s6_addr + 8);
+ lowpan_iphc_uncompress_eui48_lladdr(&peer->peer_addr, peer->lladdr);
spin_lock(&devices_lock);
INIT_LIST_HEAD(&peer->list);
@@ -786,7 +686,8 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
spin_unlock(&devices_lock);
/* Notifying peers about us needs to be done without locks held */
- INIT_DELAYED_WORK(&dev->notify_peers, do_notify_peers);
+ if (new_netdev)
+ INIT_DELAYED_WORK(&dev->notify_peers, do_notify_peers);
schedule_delayed_work(&dev->notify_peers, msecs_to_jiffies(100));
return peer->chan;
@@ -803,7 +704,8 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev)
if (!netdev)
return -ENOMEM;
- set_dev_addr(netdev, &chan->src, chan->src_type);
+ netdev->addr_assign_type = NET_ADDR_PERM;
+ baswap((void *)netdev->dev_addr, &chan->src);
netdev->netdev_ops = &netdev_ops;
SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev);
@@ -843,6 +745,7 @@ out:
static inline void chan_ready_cb(struct l2cap_chan *chan)
{
struct lowpan_btle_dev *dev;
+ bool new_netdev = false;
dev = lookup_dev(chan->conn);
@@ -853,12 +756,13 @@ static inline void chan_ready_cb(struct l2cap_chan *chan)
l2cap_chan_del(chan, -ENOENT);
return;
}
+ new_netdev = true;
}
if (!try_module_get(THIS_MODULE))
return;
- add_peer_chan(chan, dev);
+ add_peer_chan(chan, dev, new_netdev);
ifup(dev->netdev);
}
@@ -964,26 +868,28 @@ static struct sk_buff *chan_alloc_skb_cb(struct l2cap_chan *chan,
static void chan_suspend_cb(struct l2cap_chan *chan)
{
- struct sk_buff *skb = chan->data;
+ struct lowpan_btle_dev *dev;
- BT_DBG("chan %p conn %p skb %p", chan, chan->conn, skb);
+ BT_DBG("chan %p suspend", chan);
- if (!skb)
+ dev = lookup_dev(chan->conn);
+ if (!dev || !dev->netdev)
return;
- lowpan_cb(skb)->status = -EAGAIN;
+ netif_stop_queue(dev->netdev);
}
static void chan_resume_cb(struct l2cap_chan *chan)
{
- struct sk_buff *skb = chan->data;
+ struct lowpan_btle_dev *dev;
- BT_DBG("chan %p conn %p skb %p", chan, chan->conn, skb);
+ BT_DBG("chan %p resume", chan);
- if (!skb)
+ dev = lookup_dev(chan->conn);
+ if (!dev || !dev->netdev)
return;
- lowpan_cb(skb)->status = 0;
+ netif_wake_queue(dev->netdev);
}
static long chan_get_sndtimeo_cb(struct l2cap_chan *chan)
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 06c31b9a68b0..68f951b3e85a 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -13,6 +13,7 @@ menuconfig BT
select CRYPTO_CMAC
select CRYPTO_ECB
select CRYPTO_SHA256
+ select CRYPTO_ECDH
help
Bluetooth is low-cost, low-power, short-range wireless technology.
It was designed as a replacement for cables and other short-range
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 4bfaa19a5573..5d0a113e2e40 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -13,7 +13,7 @@ bluetooth_6lowpan-y := 6lowpan.o
bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \
- ecc.o hci_request.o mgmt_util.o
+ ecdh_helper.o hci_request.o mgmt_util.o
bluetooth-$(CONFIG_BT_BREDR) += sco.o
bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 69e1f7d362a8..42d0997e2fbb 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -159,12 +159,17 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk)
BT_DBG("parent %p, sk %p", parent, sk);
sock_hold(sk);
+ lock_sock(sk);
list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
bt_sk(sk)->parent = parent;
+ release_sock(sk);
parent->sk_ack_backlog++;
}
EXPORT_SYMBOL(bt_accept_enqueue);
+/* Calling function must hold the sk lock.
+ * bt_sk(sk)->parent must be non-NULL meaning sk is in the parent list.
+ */
void bt_accept_unlink(struct sock *sk)
{
BT_DBG("sk %p state %d", sk, sk->sk_state);
@@ -183,11 +188,32 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
BT_DBG("parent %p", parent);
+restart:
list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) {
sk = (struct sock *)s;
+ /* Prevent early freeing of sk due to unlink and sock_kill */
+ sock_hold(sk);
lock_sock(sk);
+ /* Check sk has not already been unlinked via
+ * bt_accept_unlink() due to serialisation caused by sk locking
+ */
+ if (!bt_sk(sk)->parent) {
+ BT_DBG("sk %p, already unlinked", sk);
+ release_sock(sk);
+ sock_put(sk);
+
+ /* Restart the loop as sk is no longer in the list
+ * and also avoid a potential infinite loop because
+ * list_for_each_entry_safe() is not thread safe.
+ */
+ goto restart;
+ }
+
+ /* sk is safely in the parent list so reduce reference count */
+ sock_put(sk);
+
/* FIXME: Is this check still needed */
if (sk->sk_state == BT_CLOSED) {
bt_accept_unlink(sk);
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index 02a4ccc04e1e..ebcab5bbadd7 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -263,7 +263,7 @@ void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle)
struct hci_cp_read_local_amp_assoc cp;
struct amp_assoc *loc_assoc = &hdev->loc_assoc;
struct hci_request req;
- int err = 0;
+ int err;
BT_DBG("%s handle %d", hdev->name, phy_handle);
@@ -282,7 +282,7 @@ void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr)
{
struct hci_cp_read_local_amp_assoc cp;
struct hci_request req;
- int err = 0;
+ int err;
memset(&hdev->loc_assoc, 0, sizeof(struct amp_assoc));
memset(&cp, 0, sizeof(cp));
@@ -292,7 +292,7 @@ void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr)
set_bit(READ_LOC_AMP_ASSOC, &mgr->state);
hci_req_init(&req, hdev);
hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
- hci_req_run_skb(&req, read_local_amp_assoc_complete);
+ err = hci_req_run_skb(&req, read_local_amp_assoc_complete);
if (err < 0)
a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID);
}
@@ -303,7 +303,7 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
struct hci_cp_read_local_amp_assoc cp;
struct amp_mgr *mgr = hcon->amp_mgr;
struct hci_request req;
- int err = 0;
+ int err;
cp.phy_handle = hcon->handle;
cp.len_so_far = cpu_to_le16(0);
@@ -314,7 +314,7 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
/* Read Local AMP Assoc final link information data */
hci_req_init(&req, hdev);
hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
- hci_req_run_skb(&req, read_local_amp_assoc_complete);
+ err = hci_req_run_skb(&req, read_local_amp_assoc_complete);
if (err < 0)
a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID);
}
diff --git a/net/bluetooth/ecc.c b/net/bluetooth/ecc.c
deleted file mode 100644
index e1709f8467ac..000000000000
--- a/net/bluetooth/ecc.c
+++ /dev/null
@@ -1,816 +0,0 @@
-/*
- * Copyright (c) 2013, Kenneth MacKay
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/random.h>
-
-#include "ecc.h"
-
-/* 256-bit curve */
-#define ECC_BYTES 32
-
-#define MAX_TRIES 16
-
-/* Number of u64's needed */
-#define NUM_ECC_DIGITS (ECC_BYTES / 8)
-
-struct ecc_point {
- u64 x[NUM_ECC_DIGITS];
- u64 y[NUM_ECC_DIGITS];
-};
-
-typedef struct {
- u64 m_low;
- u64 m_high;
-} uint128_t;
-
-#define CURVE_P_32 { 0xFFFFFFFFFFFFFFFFull, 0x00000000FFFFFFFFull, \
- 0x0000000000000000ull, 0xFFFFFFFF00000001ull }
-
-#define CURVE_G_32 { \
- { 0xF4A13945D898C296ull, 0x77037D812DEB33A0ull, \
- 0xF8BCE6E563A440F2ull, 0x6B17D1F2E12C4247ull }, \
- { 0xCBB6406837BF51F5ull, 0x2BCE33576B315ECEull, \
- 0x8EE7EB4A7C0F9E16ull, 0x4FE342E2FE1A7F9Bull } \
-}
-
-#define CURVE_N_32 { 0xF3B9CAC2FC632551ull, 0xBCE6FAADA7179E84ull, \
- 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00000000ull }
-
-static u64 curve_p[NUM_ECC_DIGITS] = CURVE_P_32;
-static struct ecc_point curve_g = CURVE_G_32;
-static u64 curve_n[NUM_ECC_DIGITS] = CURVE_N_32;
-
-static void vli_clear(u64 *vli)
-{
- int i;
-
- for (i = 0; i < NUM_ECC_DIGITS; i++)
- vli[i] = 0;
-}
-
-/* Returns true if vli == 0, false otherwise. */
-static bool vli_is_zero(const u64 *vli)
-{
- int i;
-
- for (i = 0; i < NUM_ECC_DIGITS; i++) {
- if (vli[i])
- return false;
- }
-
- return true;
-}
-
-/* Returns nonzero if bit bit of vli is set. */
-static u64 vli_test_bit(const u64 *vli, unsigned int bit)
-{
- return (vli[bit / 64] & ((u64) 1 << (bit % 64)));
-}
-
-/* Counts the number of 64-bit "digits" in vli. */
-static unsigned int vli_num_digits(const u64 *vli)
-{
- int i;
-
- /* Search from the end until we find a non-zero digit.
- * We do it in reverse because we expect that most digits will
- * be nonzero.
- */
- for (i = NUM_ECC_DIGITS - 1; i >= 0 && vli[i] == 0; i--);
-
- return (i + 1);
-}
-
-/* Counts the number of bits required for vli. */
-static unsigned int vli_num_bits(const u64 *vli)
-{
- unsigned int i, num_digits;
- u64 digit;
-
- num_digits = vli_num_digits(vli);
- if (num_digits == 0)
- return 0;
-
- digit = vli[num_digits - 1];
- for (i = 0; digit; i++)
- digit >>= 1;
-
- return ((num_digits - 1) * 64 + i);
-}
-
-/* Sets dest = src. */
-static void vli_set(u64 *dest, const u64 *src)
-{
- int i;
-
- for (i = 0; i < NUM_ECC_DIGITS; i++)
- dest[i] = src[i];
-}
-
-/* Returns sign of left - right. */
-static int vli_cmp(const u64 *left, const u64 *right)
-{
- int i;
-
- for (i = NUM_ECC_DIGITS - 1; i >= 0; i--) {
- if (left[i] > right[i])
- return 1;
- else if (left[i] < right[i])
- return -1;
- }
-
- return 0;
-}
-
-/* Computes result = in << c, returning carry. Can modify in place
- * (if result == in). 0 < shift < 64.
- */
-static u64 vli_lshift(u64 *result, const u64 *in,
- unsigned int shift)
-{
- u64 carry = 0;
- int i;
-
- for (i = 0; i < NUM_ECC_DIGITS; i++) {
- u64 temp = in[i];
-
- result[i] = (temp << shift) | carry;
- carry = temp >> (64 - shift);
- }
-
- return carry;
-}
-
-/* Computes vli = vli >> 1. */
-static void vli_rshift1(u64 *vli)
-{
- u64 *end = vli;
- u64 carry = 0;
-
- vli += NUM_ECC_DIGITS;
-
- while (vli-- > end) {
- u64 temp = *vli;
- *vli = (temp >> 1) | carry;
- carry = temp << 63;
- }
-}
-
-/* Computes result = left + right, returning carry. Can modify in place. */
-static u64 vli_add(u64 *result, const u64 *left,
- const u64 *right)
-{
- u64 carry = 0;
- int i;
-
- for (i = 0; i < NUM_ECC_DIGITS; i++) {
- u64 sum;
-
- sum = left[i] + right[i] + carry;
- if (sum != left[i])
- carry = (sum < left[i]);
-
- result[i] = sum;
- }
-
- return carry;
-}
-
-/* Computes result = left - right, returning borrow. Can modify in place. */
-static u64 vli_sub(u64 *result, const u64 *left, const u64 *right)
-{
- u64 borrow = 0;
- int i;
-
- for (i = 0; i < NUM_ECC_DIGITS; i++) {
- u64 diff;
-
- diff = left[i] - right[i] - borrow;
- if (diff != left[i])
- borrow = (diff > left[i]);
-
- result[i] = diff;
- }
-
- return borrow;
-}
-
-static uint128_t mul_64_64(u64 left, u64 right)
-{
- u64 a0 = left & 0xffffffffull;
- u64 a1 = left >> 32;
- u64 b0 = right & 0xffffffffull;
- u64 b1 = right >> 32;
- u64 m0 = a0 * b0;
- u64 m1 = a0 * b1;
- u64 m2 = a1 * b0;
- u64 m3 = a1 * b1;
- uint128_t result;
-
- m2 += (m0 >> 32);
- m2 += m1;
-
- /* Overflow */
- if (m2 < m1)
- m3 += 0x100000000ull;
-
- result.m_low = (m0 & 0xffffffffull) | (m2 << 32);
- result.m_high = m3 + (m2 >> 32);
-
- return result;
-}
-
-static uint128_t add_128_128(uint128_t a, uint128_t b)
-{
- uint128_t result;
-
- result.m_low = a.m_low + b.m_low;
- result.m_high = a.m_high + b.m_high + (result.m_low < a.m_low);
-
- return result;
-}
-
-static void vli_mult(u64 *result, const u64 *left, const u64 *right)
-{
- uint128_t r01 = { 0, 0 };
- u64 r2 = 0;
- unsigned int i, k;
-
- /* Compute each digit of result in sequence, maintaining the
- * carries.
- */
- for (k = 0; k < NUM_ECC_DIGITS * 2 - 1; k++) {
- unsigned int min;
-
- if (k < NUM_ECC_DIGITS)
- min = 0;
- else
- min = (k + 1) - NUM_ECC_DIGITS;
-
- for (i = min; i <= k && i < NUM_ECC_DIGITS; i++) {
- uint128_t product;
-
- product = mul_64_64(left[i], right[k - i]);
-
- r01 = add_128_128(r01, product);
- r2 += (r01.m_high < product.m_high);
- }
-
- result[k] = r01.m_low;
- r01.m_low = r01.m_high;
- r01.m_high = r2;
- r2 = 0;
- }
-
- result[NUM_ECC_DIGITS * 2 - 1] = r01.m_low;
-}
-
-static void vli_square(u64 *result, const u64 *left)
-{
- uint128_t r01 = { 0, 0 };
- u64 r2 = 0;
- int i, k;
-
- for (k = 0; k < NUM_ECC_DIGITS * 2 - 1; k++) {
- unsigned int min;
-
- if (k < NUM_ECC_DIGITS)
- min = 0;
- else
- min = (k + 1) - NUM_ECC_DIGITS;
-
- for (i = min; i <= k && i <= k - i; i++) {
- uint128_t product;
-
- product = mul_64_64(left[i], left[k - i]);
-
- if (i < k - i) {
- r2 += product.m_high >> 63;
- product.m_high = (product.m_high << 1) |
- (product.m_low >> 63);
- product.m_low <<= 1;
- }
-
- r01 = add_128_128(r01, product);
- r2 += (r01.m_high < product.m_high);
- }
-
- result[k] = r01.m_low;
- r01.m_low = r01.m_high;
- r01.m_high = r2;
- r2 = 0;
- }
-
- result[NUM_ECC_DIGITS * 2 - 1] = r01.m_low;
-}
-
-/* Computes result = (left + right) % mod.
- * Assumes that left < mod and right < mod, result != mod.
- */
-static void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
- const u64 *mod)
-{
- u64 carry;
-
- carry = vli_add(result, left, right);
-
- /* result > mod (result = mod + remainder), so subtract mod to
- * get remainder.
- */
- if (carry || vli_cmp(result, mod) >= 0)
- vli_sub(result, result, mod);
-}
-
-/* Computes result = (left - right) % mod.
- * Assumes that left < mod and right < mod, result != mod.
- */
-static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
- const u64 *mod)
-{
- u64 borrow = vli_sub(result, left, right);
-
- /* In this case, p_result == -diff == (max int) - diff.
- * Since -x % d == d - x, we can get the correct result from
- * result + mod (with overflow).
- */
- if (borrow)
- vli_add(result, result, mod);
-}
-
-/* Computes result = product % curve_p
- from http://www.nsa.gov/ia/_files/nist-routines.pdf */
-static void vli_mmod_fast(u64 *result, const u64 *product)
-{
- u64 tmp[NUM_ECC_DIGITS];
- int carry;
-
- /* t */
- vli_set(result, product);
-
- /* s1 */
- tmp[0] = 0;
- tmp[1] = product[5] & 0xffffffff00000000ull;
- tmp[2] = product[6];
- tmp[3] = product[7];
- carry = vli_lshift(tmp, tmp, 1);
- carry += vli_add(result, result, tmp);
-
- /* s2 */
- tmp[1] = product[6] << 32;
- tmp[2] = (product[6] >> 32) | (product[7] << 32);
- tmp[3] = product[7] >> 32;
- carry += vli_lshift(tmp, tmp, 1);
- carry += vli_add(result, result, tmp);
-
- /* s3 */
- tmp[0] = product[4];
- tmp[1] = product[5] & 0xffffffff;
- tmp[2] = 0;
- tmp[3] = product[7];
- carry += vli_add(result, result, tmp);
-
- /* s4 */
- tmp[0] = (product[4] >> 32) | (product[5] << 32);
- tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull);
- tmp[2] = product[7];
- tmp[3] = (product[6] >> 32) | (product[4] << 32);
- carry += vli_add(result, result, tmp);
-
- /* d1 */
- tmp[0] = (product[5] >> 32) | (product[6] << 32);
- tmp[1] = (product[6] >> 32);
- tmp[2] = 0;
- tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32);
- carry -= vli_sub(result, result, tmp);
-
- /* d2 */
- tmp[0] = product[6];
- tmp[1] = product[7];
- tmp[2] = 0;
- tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull);
- carry -= vli_sub(result, result, tmp);
-
- /* d3 */
- tmp[0] = (product[6] >> 32) | (product[7] << 32);
- tmp[1] = (product[7] >> 32) | (product[4] << 32);
- tmp[2] = (product[4] >> 32) | (product[5] << 32);
- tmp[3] = (product[6] << 32);
- carry -= vli_sub(result, result, tmp);
-
- /* d4 */
- tmp[0] = product[7];
- tmp[1] = product[4] & 0xffffffff00000000ull;
- tmp[2] = product[5];
- tmp[3] = product[6] & 0xffffffff00000000ull;
- carry -= vli_sub(result, result, tmp);
-
- if (carry < 0) {
- do {
- carry += vli_add(result, result, curve_p);
- } while (carry < 0);
- } else {
- while (carry || vli_cmp(curve_p, result) != 1)
- carry -= vli_sub(result, result, curve_p);
- }
-}
-
-/* Computes result = (left * right) % curve_p. */
-static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right)
-{
- u64 product[2 * NUM_ECC_DIGITS];
-
- vli_mult(product, left, right);
- vli_mmod_fast(result, product);
-}
-
-/* Computes result = left^2 % curve_p. */
-static void vli_mod_square_fast(u64 *result, const u64 *left)
-{
- u64 product[2 * NUM_ECC_DIGITS];
-
- vli_square(product, left);
- vli_mmod_fast(result, product);
-}
-
-#define EVEN(vli) (!(vli[0] & 1))
-/* Computes result = (1 / p_input) % mod. All VLIs are the same size.
- * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
- * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
- */
-static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod)
-{
- u64 a[NUM_ECC_DIGITS], b[NUM_ECC_DIGITS];
- u64 u[NUM_ECC_DIGITS], v[NUM_ECC_DIGITS];
- u64 carry;
- int cmp_result;
-
- if (vli_is_zero(input)) {
- vli_clear(result);
- return;
- }
-
- vli_set(a, input);
- vli_set(b, mod);
- vli_clear(u);
- u[0] = 1;
- vli_clear(v);
-
- while ((cmp_result = vli_cmp(a, b)) != 0) {
- carry = 0;
-
- if (EVEN(a)) {
- vli_rshift1(a);
-
- if (!EVEN(u))
- carry = vli_add(u, u, mod);
-
- vli_rshift1(u);
- if (carry)
- u[NUM_ECC_DIGITS - 1] |= 0x8000000000000000ull;
- } else if (EVEN(b)) {
- vli_rshift1(b);
-
- if (!EVEN(v))
- carry = vli_add(v, v, mod);
-
- vli_rshift1(v);
- if (carry)
- v[NUM_ECC_DIGITS - 1] |= 0x8000000000000000ull;
- } else if (cmp_result > 0) {
- vli_sub(a, a, b);
- vli_rshift1(a);
-
- if (vli_cmp(u, v) < 0)
- vli_add(u, u, mod);
-
- vli_sub(u, u, v);
- if (!EVEN(u))
- carry = vli_add(u, u, mod);
-
- vli_rshift1(u);
- if (carry)
- u[NUM_ECC_DIGITS - 1] |= 0x8000000000000000ull;
- } else {
- vli_sub(b, b, a);
- vli_rshift1(b);
-
- if (vli_cmp(v, u) < 0)
- vli_add(v, v, mod);
-
- vli_sub(v, v, u);
- if (!EVEN(v))
- carry = vli_add(v, v, mod);
-
- vli_rshift1(v);
- if (carry)
- v[NUM_ECC_DIGITS - 1] |= 0x8000000000000000ull;
- }
- }
-
- vli_set(result, u);
-}
-
-/* ------ Point operations ------ */
-
-/* Returns true if p_point is the point at infinity, false otherwise. */
-static bool ecc_point_is_zero(const struct ecc_point *point)
-{
- return (vli_is_zero(point->x) && vli_is_zero(point->y));
-}
-
-/* Point multiplication algorithm using Montgomery's ladder with co-Z
- * coordinates. From http://eprint.iacr.org/2011/338.pdf
- */
-
-/* Double in place */
-static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1)
-{
- /* t1 = x, t2 = y, t3 = z */
- u64 t4[NUM_ECC_DIGITS];
- u64 t5[NUM_ECC_DIGITS];
-
- if (vli_is_zero(z1))
- return;
-
- vli_mod_square_fast(t4, y1); /* t4 = y1^2 */
- vli_mod_mult_fast(t5, x1, t4); /* t5 = x1*y1^2 = A */
- vli_mod_square_fast(t4, t4); /* t4 = y1^4 */
- vli_mod_mult_fast(y1, y1, z1); /* t2 = y1*z1 = z3 */
- vli_mod_square_fast(z1, z1); /* t3 = z1^2 */
-
- vli_mod_add(x1, x1, z1, curve_p); /* t1 = x1 + z1^2 */
- vli_mod_add(z1, z1, z1, curve_p); /* t3 = 2*z1^2 */
- vli_mod_sub(z1, x1, z1, curve_p); /* t3 = x1 - z1^2 */
- vli_mod_mult_fast(x1, x1, z1); /* t1 = x1^2 - z1^4 */
-
- vli_mod_add(z1, x1, x1, curve_p); /* t3 = 2*(x1^2 - z1^4) */
- vli_mod_add(x1, x1, z1, curve_p); /* t1 = 3*(x1^2 - z1^4) */
- if (vli_test_bit(x1, 0)) {
- u64 carry = vli_add(x1, x1, curve_p);
- vli_rshift1(x1);
- x1[NUM_ECC_DIGITS - 1] |= carry << 63;
- } else {
- vli_rshift1(x1);
- }
- /* t1 = 3/2*(x1^2 - z1^4) = B */
-
- vli_mod_square_fast(z1, x1); /* t3 = B^2 */
- vli_mod_sub(z1, z1, t5, curve_p); /* t3 = B^2 - A */
- vli_mod_sub(z1, z1, t5, curve_p); /* t3 = B^2 - 2A = x3 */
- vli_mod_sub(t5, t5, z1, curve_p); /* t5 = A - x3 */
- vli_mod_mult_fast(x1, x1, t5); /* t1 = B * (A - x3) */
- vli_mod_sub(t4, x1, t4, curve_p); /* t4 = B * (A - x3) - y1^4 = y3 */
-
- vli_set(x1, z1);
- vli_set(z1, y1);
- vli_set(y1, t4);
-}
-
-/* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
-static void apply_z(u64 *x1, u64 *y1, u64 *z)
-{
- u64 t1[NUM_ECC_DIGITS];
-
- vli_mod_square_fast(t1, z); /* z^2 */
- vli_mod_mult_fast(x1, x1, t1); /* x1 * z^2 */
- vli_mod_mult_fast(t1, t1, z); /* z^3 */
- vli_mod_mult_fast(y1, y1, t1); /* y1 * z^3 */
-}
-
-/* P = (x1, y1) => 2P, (x2, y2) => P' */
-static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
- u64 *p_initial_z)
-{
- u64 z[NUM_ECC_DIGITS];
-
- vli_set(x2, x1);
- vli_set(y2, y1);
-
- vli_clear(z);
- z[0] = 1;
-
- if (p_initial_z)
- vli_set(z, p_initial_z);
-
- apply_z(x1, y1, z);
-
- ecc_point_double_jacobian(x1, y1, z);
-
- apply_z(x2, y2, z);
-}
-
-/* Input P = (x1, y1, Z), Q = (x2, y2, Z)
- * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
- * or P => P', Q => P + Q
- */
-static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2)
-{
- /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
- u64 t5[NUM_ECC_DIGITS];
-
- vli_mod_sub(t5, x2, x1, curve_p); /* t5 = x2 - x1 */
- vli_mod_square_fast(t5, t5); /* t5 = (x2 - x1)^2 = A */
- vli_mod_mult_fast(x1, x1, t5); /* t1 = x1*A = B */
- vli_mod_mult_fast(x2, x2, t5); /* t3 = x2*A = C */
- vli_mod_sub(y2, y2, y1, curve_p); /* t4 = y2 - y1 */
- vli_mod_square_fast(t5, y2); /* t5 = (y2 - y1)^2 = D */
-
- vli_mod_sub(t5, t5, x1, curve_p); /* t5 = D - B */
- vli_mod_sub(t5, t5, x2, curve_p); /* t5 = D - B - C = x3 */
- vli_mod_sub(x2, x2, x1, curve_p); /* t3 = C - B */
- vli_mod_mult_fast(y1, y1, x2); /* t2 = y1*(C - B) */
- vli_mod_sub(x2, x1, t5, curve_p); /* t3 = B - x3 */
- vli_mod_mult_fast(y2, y2, x2); /* t4 = (y2 - y1)*(B - x3) */
- vli_mod_sub(y2, y2, y1, curve_p); /* t4 = y3 */
-
- vli_set(x2, t5);
-}
-
-/* Input P = (x1, y1, Z), Q = (x2, y2, Z)
- * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
- * or P => P - Q, Q => P + Q
- */
-static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2)
-{
- /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
- u64 t5[NUM_ECC_DIGITS];
- u64 t6[NUM_ECC_DIGITS];
- u64 t7[NUM_ECC_DIGITS];
-
- vli_mod_sub(t5, x2, x1, curve_p); /* t5 = x2 - x1 */
- vli_mod_square_fast(t5, t5); /* t5 = (x2 - x1)^2 = A */
- vli_mod_mult_fast(x1, x1, t5); /* t1 = x1*A = B */
- vli_mod_mult_fast(x2, x2, t5); /* t3 = x2*A = C */
- vli_mod_add(t5, y2, y1, curve_p); /* t4 = y2 + y1 */
- vli_mod_sub(y2, y2, y1, curve_p); /* t4 = y2 - y1 */
-
- vli_mod_sub(t6, x2, x1, curve_p); /* t6 = C - B */
- vli_mod_mult_fast(y1, y1, t6); /* t2 = y1 * (C - B) */
- vli_mod_add(t6, x1, x2, curve_p); /* t6 = B + C */
- vli_mod_square_fast(x2, y2); /* t3 = (y2 - y1)^2 */
- vli_mod_sub(x2, x2, t6, curve_p); /* t3 = x3 */
-
- vli_mod_sub(t7, x1, x2, curve_p); /* t7 = B - x3 */
- vli_mod_mult_fast(y2, y2, t7); /* t4 = (y2 - y1)*(B - x3) */
- vli_mod_sub(y2, y2, y1, curve_p); /* t4 = y3 */
-
- vli_mod_square_fast(t7, t5); /* t7 = (y2 + y1)^2 = F */
- vli_mod_sub(t7, t7, t6, curve_p); /* t7 = x3' */
- vli_mod_sub(t6, t7, x1, curve_p); /* t6 = x3' - B */
- vli_mod_mult_fast(t6, t6, t5); /* t6 = (y2 + y1)*(x3' - B) */
- vli_mod_sub(y1, t6, y1, curve_p); /* t2 = y3' */
-
- vli_set(x1, t7);
-}
-
-static void ecc_point_mult(struct ecc_point *result,
- const struct ecc_point *point, u64 *scalar,
- u64 *initial_z, int num_bits)
-{
- /* R0 and R1 */
- u64 rx[2][NUM_ECC_DIGITS];
- u64 ry[2][NUM_ECC_DIGITS];
- u64 z[NUM_ECC_DIGITS];
- int i, nb;
-
- vli_set(rx[1], point->x);
- vli_set(ry[1], point->y);
-
- xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z);
-
- for (i = num_bits - 2; i > 0; i--) {
- nb = !vli_test_bit(scalar, i);
- xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb]);
- xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb]);
- }
-
- nb = !vli_test_bit(scalar, 0);
- xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb]);
-
- /* Find final 1/Z value. */
- vli_mod_sub(z, rx[1], rx[0], curve_p); /* X1 - X0 */
- vli_mod_mult_fast(z, z, ry[1 - nb]); /* Yb * (X1 - X0) */
- vli_mod_mult_fast(z, z, point->x); /* xP * Yb * (X1 - X0) */
- vli_mod_inv(z, z, curve_p); /* 1 / (xP * Yb * (X1 - X0)) */
- vli_mod_mult_fast(z, z, point->y); /* yP / (xP * Yb * (X1 - X0)) */
- vli_mod_mult_fast(z, z, rx[1 - nb]); /* Xb * yP / (xP * Yb * (X1 - X0)) */
- /* End 1/Z calculation */
-
- xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb]);
-
- apply_z(rx[0], ry[0], z);
-
- vli_set(result->x, rx[0]);
- vli_set(result->y, ry[0]);
-}
-
-static void ecc_bytes2native(const u8 bytes[ECC_BYTES],
- u64 native[NUM_ECC_DIGITS])
-{
- int i;
-
- for (i = 0; i < NUM_ECC_DIGITS; i++) {
- const u8 *digit = bytes + 8 * (NUM_ECC_DIGITS - 1 - i);
-
- native[NUM_ECC_DIGITS - 1 - i] =
- ((u64) digit[0] << 0) |
- ((u64) digit[1] << 8) |
- ((u64) digit[2] << 16) |
- ((u64) digit[3] << 24) |
- ((u64) digit[4] << 32) |
- ((u64) digit[5] << 40) |
- ((u64) digit[6] << 48) |
- ((u64) digit[7] << 56);
- }
-}
-
-static void ecc_native2bytes(const u64 native[NUM_ECC_DIGITS],
- u8 bytes[ECC_BYTES])
-{
- int i;
-
- for (i = 0; i < NUM_ECC_DIGITS; i++) {
- u8 *digit = bytes + 8 * (NUM_ECC_DIGITS - 1 - i);
-
- digit[0] = native[NUM_ECC_DIGITS - 1 - i] >> 0;
- digit[1] = native[NUM_ECC_DIGITS - 1 - i] >> 8;
- digit[2] = native[NUM_ECC_DIGITS - 1 - i] >> 16;
- digit[3] = native[NUM_ECC_DIGITS - 1 - i] >> 24;
- digit[4] = native[NUM_ECC_DIGITS - 1 - i] >> 32;
- digit[5] = native[NUM_ECC_DIGITS - 1 - i] >> 40;
- digit[6] = native[NUM_ECC_DIGITS - 1 - i] >> 48;
- digit[7] = native[NUM_ECC_DIGITS - 1 - i] >> 56;
- }
-}
-
-bool ecc_make_key(u8 public_key[64], u8 private_key[32])
-{
- struct ecc_point pk;
- u64 priv[NUM_ECC_DIGITS];
- unsigned int tries = 0;
-
- do {
- if (tries++ >= MAX_TRIES)
- return false;
-
- get_random_bytes(priv, ECC_BYTES);
-
- if (vli_is_zero(priv))
- continue;
-
- /* Make sure the private key is in the range [1, n-1]. */
- if (vli_cmp(curve_n, priv) != 1)
- continue;
-
- ecc_point_mult(&pk, &curve_g, priv, NULL, vli_num_bits(priv));
- } while (ecc_point_is_zero(&pk));
-
- ecc_native2bytes(priv, private_key);
- ecc_native2bytes(pk.x, public_key);
- ecc_native2bytes(pk.y, &public_key[32]);
-
- return true;
-}
-
-bool ecdh_shared_secret(const u8 public_key[64], const u8 private_key[32],
- u8 secret[32])
-{
- u64 priv[NUM_ECC_DIGITS];
- u64 rand[NUM_ECC_DIGITS];
- struct ecc_point product, pk;
-
- get_random_bytes(rand, ECC_BYTES);
-
- ecc_bytes2native(public_key, pk.x);
- ecc_bytes2native(&public_key[32], pk.y);
- ecc_bytes2native(private_key, priv);
-
- ecc_point_mult(&product, &pk, priv, rand, vli_num_bits(priv));
-
- ecc_native2bytes(product.x, secret);
-
- return !ecc_point_is_zero(&product);
-}
diff --git a/net/bluetooth/ecc.h b/net/bluetooth/ecc.h
deleted file mode 100644
index 8d6a2f4d1905..000000000000
--- a/net/bluetooth/ecc.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2013, Kenneth MacKay
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Create a public/private key pair.
- * Outputs:
- * public_key - Will be filled in with the public key.
- * private_key - Will be filled in with the private key.
- *
- * Returns true if the key pair was generated successfully, false
- * if an error occurred. The keys are with the LSB first.
- */
-bool ecc_make_key(u8 public_key[64], u8 private_key[32]);
-
-/* Compute a shared secret given your secret key and someone else's
- * public key.
- * Note: It is recommended that you hash the result of ecdh_shared_secret
- * before using it for symmetric encryption or HMAC.
- *
- * Inputs:
- * public_key - The public key of the remote party
- * private_key - Your private key.
- *
- * Outputs:
- * secret - Will be filled in with the shared secret value.
- *
- * Returns true if the shared secret was generated successfully, false
- * if an error occurred. Both input and output parameters are with the
- * LSB first.
- */
-bool ecdh_shared_secret(const u8 public_key[64], const u8 private_key[32],
- u8 secret[32]);
diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c
new file mode 100644
index 000000000000..24d4e60f8c48
--- /dev/null
+++ b/net/bluetooth/ecdh_helper.c
@@ -0,0 +1,231 @@
+/*
+ * ECDH helper functions - KPP wrappings
+ *
+ * Copyright (C) 2017 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation;
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+ * CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+ * COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+ * SOFTWARE IS DISCLAIMED.
+ */
+#include "ecdh_helper.h"
+
+#include <linux/scatterlist.h>
+#include <crypto/kpp.h>
+#include <crypto/ecdh.h>
+
+struct ecdh_completion {
+ struct completion completion;
+ int err;
+};
+
+static void ecdh_complete(struct crypto_async_request *req, int err)
+{
+ struct ecdh_completion *res = req->data;
+
+ if (err == -EINPROGRESS)
+ return;
+
+ res->err = err;
+ complete(&res->completion);
+}
+
+static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
+{
+ int i;
+
+ for (i = 0; i < ndigits; i++)
+ out[i] = __swab64(in[ndigits - 1 - i]);
+}
+
+bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
+ u8 secret[32])
+{
+ struct crypto_kpp *tfm;
+ struct kpp_request *req;
+ struct ecdh p;
+ struct ecdh_completion result;
+ struct scatterlist src, dst;
+ u8 *tmp, *buf;
+ unsigned int buf_len;
+ int err = -ENOMEM;
+
+ tmp = kmalloc(64, GFP_KERNEL);
+ if (!tmp)
+ return false;
+
+ tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(tfm)) {
+ pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n",
+ PTR_ERR(tfm));
+ goto free_tmp;
+ }
+
+ req = kpp_request_alloc(tfm, GFP_KERNEL);
+ if (!req)
+ goto free_kpp;
+
+ init_completion(&result.completion);
+
+ /* Security Manager Protocol holds digits in litte-endian order
+ * while ECC API expect big-endian data
+ */
+ swap_digits((u64 *)private_key, (u64 *)tmp, 4);
+ p.key = (char *)tmp;
+ p.key_size = 32;
+ /* Set curve_id */
+ p.curve_id = ECC_CURVE_NIST_P256;
+ buf_len = crypto_ecdh_key_len(&p);
+ buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!buf) {
+ pr_err("alg: kpp: Failed to allocate %d bytes for buf\n",
+ buf_len);
+ goto free_req;
+ }
+ crypto_ecdh_encode_key(buf, buf_len, &p);
+
+ /* Set A private Key */
+ err = crypto_kpp_set_secret(tfm, (void *)buf, buf_len);
+ if (err)
+ goto free_all;
+
+ swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */
+ swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */
+
+ sg_init_one(&src, tmp, 64);
+ sg_init_one(&dst, secret, 32);
+ kpp_request_set_input(req, &src, 64);
+ kpp_request_set_output(req, &dst, 32);
+ kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ ecdh_complete, &result);
+ err = crypto_kpp_compute_shared_secret(req);
+ if (err == -EINPROGRESS) {
+ wait_for_completion(&result.completion);
+ err = result.err;
+ }
+ if (err < 0) {
+ pr_err("alg: ecdh: compute shared secret failed. err %d\n",
+ err);
+ goto free_all;
+ }
+
+ swap_digits((u64 *)secret, (u64 *)tmp, 4);
+ memcpy(secret, tmp, 32);
+
+free_all:
+ kzfree(buf);
+free_req:
+ kpp_request_free(req);
+free_kpp:
+ crypto_free_kpp(tfm);
+free_tmp:
+ kfree(tmp);
+ return (err == 0);
+}
+
+bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32])
+{
+ struct crypto_kpp *tfm;
+ struct kpp_request *req;
+ struct ecdh p;
+ struct ecdh_completion result;
+ struct scatterlist dst;
+ u8 *tmp, *buf;
+ unsigned int buf_len;
+ int err = -ENOMEM;
+ const unsigned short max_tries = 16;
+ unsigned short tries = 0;
+
+ tmp = kmalloc(64, GFP_KERNEL);
+ if (!tmp)
+ return false;
+
+ tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(tfm)) {
+ pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n",
+ PTR_ERR(tfm));
+ goto free_tmp;
+ }
+
+ req = kpp_request_alloc(tfm, GFP_KERNEL);
+ if (!req)
+ goto free_kpp;
+
+ init_completion(&result.completion);
+
+ /* Set curve_id */
+ p.curve_id = ECC_CURVE_NIST_P256;
+ p.key_size = 32;
+ buf_len = crypto_ecdh_key_len(&p);
+ buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!buf) {
+ pr_err("alg: kpp: Failed to allocate %d bytes for buf\n",
+ buf_len);
+ goto free_req;
+ }
+
+ do {
+ if (tries++ >= max_tries)
+ goto free_all;
+
+ /* Set private Key */
+ p.key = (char *)private_key;
+ crypto_ecdh_encode_key(buf, buf_len, &p);
+ err = crypto_kpp_set_secret(tfm, buf, buf_len);
+ if (err)
+ goto free_all;
+
+ sg_init_one(&dst, tmp, 64);
+ kpp_request_set_input(req, NULL, 0);
+ kpp_request_set_output(req, &dst, 64);
+ kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ ecdh_complete, &result);
+
+ err = crypto_kpp_generate_public_key(req);
+
+ if (err == -EINPROGRESS) {
+ wait_for_completion(&result.completion);
+ err = result.err;
+ }
+
+ /* Private key is not valid. Regenerate */
+ if (err == -EINVAL)
+ continue;
+
+ if (err < 0)
+ goto free_all;
+ else
+ break;
+
+ } while (true);
+
+ /* Keys are handed back in little endian as expected by Security
+ * Manager Protocol
+ */
+ swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */
+ swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */
+ swap_digits((u64 *)private_key, (u64 *)tmp, 4);
+ memcpy(private_key, tmp, 32);
+
+free_all:
+ kzfree(buf);
+free_req:
+ kpp_request_free(req);
+free_kpp:
+ crypto_free_kpp(tfm);
+free_tmp:
+ kfree(tmp);
+ return (err == 0);
+}
diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h
new file mode 100644
index 000000000000..7a423faf76e5
--- /dev/null
+++ b/net/bluetooth/ecdh_helper.h
@@ -0,0 +1,27 @@
+/*
+ * ECDH helper functions - KPP wrappings
+ *
+ * Copyright (C) 2017 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation;
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+ * CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+ * COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+ * SOFTWARE IS DISCLAIMED.
+ */
+#include <linux/types.h>
+
+bool compute_ecdh_secret(const u8 pub_a[64], const u8 priv_b[32],
+ u8 secret[32]);
+bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3ac89e9ace71..05686776a5fb 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2950,8 +2950,8 @@ struct hci_dev *hci_alloc_dev(void)
hdev->le_adv_max_interval = 0x0800;
hdev->le_scan_interval = 0x0060;
hdev->le_scan_window = 0x0030;
- hdev->le_conn_min_interval = 0x0028;
- hdev->le_conn_max_interval = 0x0038;
+ hdev->le_conn_min_interval = 0x0018;
+ hdev->le_conn_max_interval = 0x0028;
hdev->le_conn_latency = 0x0000;
hdev->le_supv_timeout = 0x002a;
hdev->le_def_tx_len = 0x001b;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f64d6566021f..638bf0e1a2e3 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1680,7 +1680,8 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
- if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE))
+ if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE|
+ MSG_CMSG_COMPAT))
return -EINVAL;
if (len < 4 || len > HCI_MAX_FRAME_SIZE)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index fc7f321a3823..f88ac99528ce 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2425,6 +2425,22 @@ static int l2cap_segment_le_sdu(struct l2cap_chan *chan,
return 0;
}
+static void l2cap_le_flowctl_send(struct l2cap_chan *chan)
+{
+ int sent = 0;
+
+ BT_DBG("chan %p", chan);
+
+ while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) {
+ l2cap_do_send(chan, skb_dequeue(&chan->tx_q));
+ chan->tx_credits--;
+ sent++;
+ }
+
+ BT_DBG("Sent %d credits %u queued %u", sent, chan->tx_credits,
+ skb_queue_len(&chan->tx_q));
+}
+
int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
{
struct sk_buff *skb;
@@ -2458,9 +2474,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
if (len > chan->omtu)
return -EMSGSIZE;
- if (!chan->tx_credits)
- return -EAGAIN;
-
__skb_queue_head_init(&seg_queue);
err = l2cap_segment_le_sdu(chan, &seg_queue, msg, len);
@@ -2475,10 +2488,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
skb_queue_splice_tail_init(&seg_queue, &chan->tx_q);
- while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) {
- l2cap_do_send(chan, skb_dequeue(&chan->tx_q));
- chan->tx_credits--;
- }
+ l2cap_le_flowctl_send(chan);
if (!chan->tx_credits)
chan->ops->suspend(chan);
@@ -5570,10 +5580,8 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
chan->tx_credits += credits;
- while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) {
- l2cap_do_send(chan, skb_dequeue(&chan->tx_q));
- chan->tx_credits--;
- }
+ /* Resume sending */
+ l2cap_le_flowctl_send(chan);
if (chan->tx_credits)
chan->ops->resume(chan);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index f7eb02f09b54..8ebca9033d60 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -311,7 +311,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
skb_queue_head_init(&d->tx_queue);
mutex_init(&d->lock);
- atomic_set(&d->refcnt, 1);
+ refcount_set(&d->refcnt, 1);
rfcomm_dlc_clear_state(d);
@@ -342,7 +342,7 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d)
{
struct rfcomm_session *s = d->session;
- BT_DBG("dlc %p refcnt %d session %p", d, atomic_read(&d->refcnt), s);
+ BT_DBG("dlc %p refcnt %d session %p", d, refcount_read(&d->refcnt), s);
list_del(&d->list);
d->session = NULL;
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index dc688f13e496..ee92c925ecc5 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -26,7 +26,7 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
-#include "ecc.h"
+#include "ecdh_helper.h"
#include "smp.h"
#include "selftest.h"
@@ -142,18 +142,30 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
const u8 pub_a[64], const u8 pub_b[64],
const u8 dhkey[32])
{
- u8 dhkey_a[32], dhkey_b[32];
+ u8 *tmp, *dhkey_a, *dhkey_b;
+ int ret = 0;
- ecdh_shared_secret(pub_b, priv_a, dhkey_a);
- ecdh_shared_secret(pub_a, priv_b, dhkey_b);
-
- if (memcmp(dhkey_a, dhkey, 32))
+ tmp = kmalloc(64, GFP_KERNEL);
+ if (!tmp)
return -EINVAL;
+ dhkey_a = &tmp[0];
+ dhkey_b = &tmp[32];
+
+ compute_ecdh_secret(pub_b, priv_a, dhkey_a);
+ compute_ecdh_secret(pub_a, priv_b, dhkey_b);
+
+ if (memcmp(dhkey_a, dhkey, 32)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
if (memcmp(dhkey_b, dhkey, 32))
- return -EINVAL;
+ ret = -EINVAL;
- return 0;
+out:
+ kfree(dhkey_a);
+ return ret;
}
static char test_ecdh_buffer[32];
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index fae391f1871f..14585edc9439 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -31,7 +31,7 @@
#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/mgmt.h>
-#include "ecc.h"
+#include "ecdh_helper.h"
#include "smp.h"
#define SMP_DEV(hdev) \
@@ -569,8 +569,11 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
smp->debug_key = true;
} else {
while (true) {
+ /* Seed private key with random number */
+ get_random_bytes(smp->local_sk, 32);
+
/* Generate local key pair for Secure Connections */
- if (!ecc_make_key(smp->local_pk, smp->local_sk))
+ if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
return -EIO;
/* This is unlikely, but we need to check that
@@ -1895,8 +1898,11 @@ static u8 sc_send_public_key(struct smp_chan *smp)
set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags);
} else {
while (true) {
+ /* Seed private key with random number */
+ get_random_bytes(smp->local_sk, 32);
+
/* Generate local key pair for Secure Connections */
- if (!ecc_make_key(smp->local_pk, smp->local_sk))
+ if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
return SMP_UNSPECIFIED;
/* This is unlikely, but we need to check that
@@ -2670,7 +2676,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk);
SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32);
- if (!ecdh_shared_secret(smp->remote_pk, smp->local_sk, smp->dhkey))
+ if (!compute_ecdh_secret(smp->remote_pk, smp->local_sk, smp->dhkey))
return SMP_UNSPECIFIED;
SMP_DBG("DHKey %32phN", smp->dhkey);
@@ -3483,6 +3489,32 @@ void smp_unregister(struct hci_dev *hdev)
#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
+static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
+{
+ int i;
+
+ for (i = 0; i < ndigits; i++)
+ out[i] = __swab64(in[ndigits - 1 - i]);
+}
+
+static int __init test_debug_key(void)
+{
+ u8 pk[64], sk[32];
+
+ swap_digits((u64 *)debug_sk, (u64 *)sk, 4);
+
+ if (!generate_ecdh_keys(pk, sk))
+ return -EINVAL;
+
+ if (memcmp(sk, debug_sk, 32))
+ return -EINVAL;
+
+ if (memcmp(pk, debug_pk, 64))
+ return -EINVAL;
+
+ return 0;
+}
+
static int __init test_ah(struct crypto_cipher *tfm_aes)
{
const u8 irk[16] = {
@@ -3738,6 +3770,12 @@ static int __init run_selftests(struct crypto_cipher *tfm_aes,
calltime = ktime_get();
+ err = test_debug_key();
+ if (err) {
+ BT_ERR("debug_key test failed");
+ goto done;
+ }
+
err = test_ah(tfm_aes);
if (err) {
BT_ERR("smp_ah test failed");
diff --git a/net/bpf/Makefile b/net/bpf/Makefile
new file mode 100644
index 000000000000..27b2992a0692
--- /dev/null
+++ b/net/bpf/Makefile
@@ -0,0 +1 @@
+obj-y := test_run.o
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
new file mode 100644
index 000000000000..6be41a44d688
--- /dev/null
+++ b/net/bpf/test_run.c
@@ -0,0 +1,173 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/etherdevice.h>
+#include <linux/filter.h>
+#include <linux/sched/signal.h>
+
+static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx)
+{
+ u32 ret;
+
+ preempt_disable();
+ rcu_read_lock();
+ ret = BPF_PROG_RUN(prog, ctx);
+ rcu_read_unlock();
+ preempt_enable();
+
+ return ret;
+}
+
+static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
+{
+ u64 time_start, time_spent = 0;
+ u32 ret = 0, i;
+
+ if (!repeat)
+ repeat = 1;
+ time_start = ktime_get_ns();
+ for (i = 0; i < repeat; i++) {
+ ret = bpf_test_run_one(prog, ctx);
+ if (need_resched()) {
+ if (signal_pending(current))
+ break;
+ time_spent += ktime_get_ns() - time_start;
+ cond_resched();
+ time_start = ktime_get_ns();
+ }
+ }
+ time_spent += ktime_get_ns() - time_start;
+ do_div(time_spent, repeat);
+ *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+
+ return ret;
+}
+
+static int bpf_test_finish(const union bpf_attr *kattr,
+ union bpf_attr __user *uattr, const void *data,
+ u32 size, u32 retval, u32 duration)
+{
+ void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
+ int err = -EFAULT;
+
+ if (data_out && copy_to_user(data_out, data, size))
+ goto out;
+ if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
+ goto out;
+ if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
+ goto out;
+ if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
+ goto out;
+ err = 0;
+out:
+ return err;
+}
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
+ u32 headroom, u32 tailroom)
+{
+ void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+ void *data;
+
+ if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
+ return ERR_PTR(-EINVAL);
+
+ data = kzalloc(size + headroom + tailroom, GFP_USER);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ if (copy_from_user(data + headroom, data_in, size)) {
+ kfree(data);
+ return ERR_PTR(-EFAULT);
+ }
+ return data;
+}
+
+int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ bool is_l2 = false, is_direct_pkt_access = false;
+ u32 size = kattr->test.data_size_in;
+ u32 repeat = kattr->test.repeat;
+ u32 retval, duration;
+ struct sk_buff *skb;
+ void *data;
+ int ret;
+
+ data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ switch (prog->type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
+ is_l2 = true;
+ /* fall through */
+ case BPF_PROG_TYPE_LWT_IN:
+ case BPF_PROG_TYPE_LWT_OUT:
+ case BPF_PROG_TYPE_LWT_XMIT:
+ is_direct_pkt_access = true;
+ break;
+ default:
+ break;
+ }
+
+ skb = build_skb(data, 0);
+ if (!skb) {
+ kfree(data);
+ return -ENOMEM;
+ }
+
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+ __skb_put(skb, size);
+ skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev);
+ skb_reset_network_header(skb);
+
+ if (is_l2)
+ __skb_push(skb, ETH_HLEN);
+ if (is_direct_pkt_access)
+ bpf_compute_data_end(skb);
+ retval = bpf_test_run(prog, skb, repeat, &duration);
+ if (!is_l2)
+ __skb_push(skb, ETH_HLEN);
+ size = skb->len;
+ /* bpf program can never convert linear skb to non-linear */
+ if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
+ size = skb_headlen(skb);
+ ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+ kfree_skb(skb);
+ return ret;
+}
+
+int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ u32 size = kattr->test.data_size_in;
+ u32 repeat = kattr->test.repeat;
+ struct xdp_buff xdp = {};
+ u32 retval, duration;
+ void *data;
+ int ret;
+
+ data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM + NET_IP_ALIGN, 0);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ xdp.data_hard_start = data;
+ xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN;
+ xdp.data_end = xdp.data + size;
+
+ retval = bpf_test_run(prog, &xdp, repeat, &duration);
+ if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN)
+ size = xdp.data_end - xdp.data;
+ ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
+ kfree(data);
+ return ret;
+}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6e08b7199dd7..ab0c7cc8448f 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -589,6 +589,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
if (unlikely(source != fdb->dst)) {
fdb->dst = source;
fdb_modified = true;
+ /* Take over HW learned entry */
+ if (unlikely(fdb->added_by_external_learn))
+ fdb->added_by_external_learn = 0;
}
if (now != fdb->updated)
fdb->updated = now;
@@ -854,6 +857,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
br_fdb_update(br, p, addr, vid, true);
rcu_read_unlock();
local_bh_enable();
+ } else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
+ err = br_fdb_external_learn_add(br, p, addr, vid);
} else {
spin_lock_bh(&br->hash_lock);
err = fdb_add_entry(br, p, addr, ndm->ndm_state,
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 902af6ba481c..48fb17417fac 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -183,13 +183,23 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
struct net_bridge_port *p;
list_for_each_entry_rcu(p, &br->port_list, list) {
- /* Do not flood unicast traffic to ports that turn it off */
- if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD))
- continue;
- /* Do not flood if mc off, except for traffic we originate */
- if (pkt_type == BR_PKT_MULTICAST &&
- !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev)
- continue;
+ /* Do not flood unicast traffic to ports that turn it off, nor
+ * other traffic if flood off, except for traffic we originate
+ */
+ switch (pkt_type) {
+ case BR_PKT_UNICAST:
+ if (!(p->flags & BR_FLOOD))
+ continue;
+ break;
+ case BR_PKT_MULTICAST:
+ if (!(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev)
+ continue;
+ break;
+ case BR_PKT_BROADCAST:
+ if (!(p->flags & BR_BCAST_FLOOD) && skb->dev != br->dev)
+ continue;
+ break;
+ }
/* Do not flood to ports that enable proxy ARP */
if (p->flags & BR_PROXYARP)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a8d0ed282a10..7f8d05cf9065 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -22,6 +22,7 @@
#include <linux/rtnetlink.h>
#include <linux/if_ether.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include <net/sock.h>
#include <linux/if_vlan.h>
#include <net/switchdev.h>
@@ -360,7 +361,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
p->path_cost = port_cost(dev);
p->priority = 0x8000 >> BR_PORT_BITS;
p->port_no = index;
- p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD;
+ p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
br_init_port(p);
br_set_state(p, BR_STATE_DISABLED);
br_stp_port_timer_init(p);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 056e6ac49d8f..b0845480a3ae 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -464,7 +464,8 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device *dev;
int err;
- err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL,
+ NULL);
if (err < 0)
return err;
@@ -568,7 +569,8 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
return ret;
}
-static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
@@ -662,7 +664,8 @@ unlock:
return err;
}
-static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 1f1e62095464..067cf0313449 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -997,13 +997,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
if (!elem)
return okfn(net, sk, skb);
- /* We may already have this, but read-locks nest anyway */
- rcu_read_lock();
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
ret = nf_hook_slow(skb, &state, elem);
- rcu_read_unlock();
if (ret == 1)
ret = okfn(net, sk, skb);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 225ef7d53701..574f78824d8a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -133,6 +133,8 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_MCAST_TO_UCAST */
+ nla_total_size(1) /* IFLA_BRPORT_LEARNING */
+ nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */
+ + nla_total_size(1) /* IFLA_BRPORT_MCAST_FLOOD */
+ + nla_total_size(1) /* IFLA_BRPORT_BCAST_FLOOD */
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
+ nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
@@ -189,6 +191,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
!!(p->flags & BR_FLOOD)) ||
nla_put_u8(skb, IFLA_BRPORT_MCAST_FLOOD,
!!(p->flags & BR_MCAST_FLOOD)) ||
+ nla_put_u8(skb, IFLA_BRPORT_BCAST_FLOOD,
+ !!(p->flags & BR_BCAST_FLOOD)) ||
nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) ||
nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI,
!!(p->flags & BR_PROXYARP_WIFI)) ||
@@ -631,6 +635,8 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 },
[IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NLA_U8 },
[IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
+ [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
+ [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
};
/* Change the state of the port and notify spanning tree */
@@ -683,6 +689,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD);
br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST);
+ br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD);
br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
@@ -748,8 +755,8 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
if (p && protinfo) {
if (protinfo->nla_type & NLA_F_NESTED) {
- err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
- protinfo, br_port_policy);
+ err = nla_parse_nested(tb, IFLA_BRPORT_MAX, protinfo,
+ br_port_policy, NULL);
if (err)
return err;
@@ -828,6 +835,13 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
return -EPROTONOSUPPORT;
}
}
+
+ if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
+ __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
+
+ if (defpvid >= VLAN_VID_MASK)
+ return -EINVAL;
+ }
#endif
return 0;
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index c913491495ab..3712c7f0e00c 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -227,8 +227,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr,
memset(tinfo, 0, sizeof(*tinfo));
- err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX,
- attr, vlan_tunnel_policy);
+ err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, attr,
+ vlan_tunnel_policy, NULL);
if (err < 0)
return err;
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 79aee759aba5..5d5d413a6cf8 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -173,6 +173,7 @@ BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD);
BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
+BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -221,6 +222,7 @@ static const struct brport_attribute *brport_attrs[] = {
&brport_attr_proxyarp,
&brport_attr_proxyarp_wifi,
&brport_attr_multicast_flood,
+ &brport_attr_broadcast_flood,
NULL
};
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 4e0b0c359325..e0bb624c3845 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -9,6 +9,7 @@
*/
#include <linux/module.h>
#include <net/sock.h>
+#include "../br_private.h"
#include <linux/netfilter.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
@@ -18,11 +19,30 @@ static unsigned int
ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_nat_info *info = par->targinfo;
+ struct net_device *dev;
if (!skb_make_writable(skb, 0))
return EBT_DROP;
ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);
+
+ if (is_multicast_ether_addr(info->mac)) {
+ if (is_broadcast_ether_addr(info->mac))
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+ } else {
+ if (xt_hooknum(par) != NF_BR_BROUTING)
+ dev = br_port_get_rcu(xt_in(par))->br->dev;
+ else
+ dev = xt_in(par);
+
+ if (ether_addr_equal(info->mac, dev->dev_addr))
+ skb->pkt_type = PACKET_HOST;
+ else
+ skb->pkt_type = PACKET_OTHERHOST;
+ }
+
return info->target;
}
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 98b9c8e8615e..707caea39743 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -62,10 +62,10 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset)
pptr = skb_header_pointer(skb, offset,
sizeof(_ports), &_ports);
if (pptr == NULL) {
- printk(" INCOMPLETE TCP/UDP header");
+ pr_cont(" INCOMPLETE TCP/UDP header");
return;
}
- printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
+ pr_cont(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
}
}
@@ -100,11 +100,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL) {
- printk(" INCOMPLETE IP header");
+ pr_cont(" INCOMPLETE IP header");
goto out;
}
- printk(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d",
- &ih->saddr, &ih->daddr, ih->tos, ih->protocol);
+ pr_cont(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d",
+ &ih->saddr, &ih->daddr, ih->tos, ih->protocol);
print_ports(skb, ih->protocol, ih->ihl*4);
goto out;
}
@@ -120,11 +120,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL) {
- printk(" INCOMPLETE IPv6 header");
+ pr_cont(" INCOMPLETE IPv6 header");
goto out;
}
- printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
- &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
+ pr_cont(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
+ &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
nexthdr = ih->nexthdr;
offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr, &frag_off);
if (offset_ph == -1)
@@ -142,12 +142,12 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL) {
- printk(" INCOMPLETE ARP header");
+ pr_cont(" INCOMPLETE ARP header");
goto out;
}
- printk(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
- ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
- ntohs(ah->ar_op));
+ pr_cont(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
+ ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
+ ntohs(ah->ar_op));
/* If it's for Ethernet and the lengths are OK,
* then log the ARP payload
@@ -161,17 +161,17 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ap = skb_header_pointer(skb, sizeof(_arph),
sizeof(_arpp), &_arpp);
if (ap == NULL) {
- printk(" INCOMPLETE ARP payload");
+ pr_cont(" INCOMPLETE ARP payload");
goto out;
}
- printk(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4",
- ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
+ pr_cont(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4",
+ ap->mac_src, ap->ip_src,
+ ap->mac_dst, ap->ip_dst);
}
}
out:
- printk("\n");
+ pr_cont("\n");
spin_unlock_bh(&ebt_log_lock);
-
}
static unsigned int
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 8fe36dc3aab2..2585b100ebbb 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -65,13 +65,13 @@ static int ebt_broute(struct sk_buff *skb)
static int __net_init broute_net_init(struct net *net)
{
- net->xt.broute_table = ebt_register_table(net, &broute_table);
+ net->xt.broute_table = ebt_register_table(net, &broute_table, NULL);
return PTR_ERR_OR_ZERO(net->xt.broute_table);
}
static void __net_exit broute_net_exit(struct net *net)
{
- ebt_unregister_table(net, net->xt.broute_table);
+ ebt_unregister_table(net, net->xt.broute_table, NULL);
}
static struct pernet_operations broute_net_ops = {
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 593a1bdc079e..f22ef7c21913 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -93,13 +93,13 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
static int __net_init frame_filter_net_init(struct net *net)
{
- net->xt.frame_filter = ebt_register_table(net, &frame_filter);
+ net->xt.frame_filter = ebt_register_table(net, &frame_filter, ebt_ops_filter);
return PTR_ERR_OR_ZERO(net->xt.frame_filter);
}
static void __net_exit frame_filter_net_exit(struct net *net)
{
- ebt_unregister_table(net, net->xt.frame_filter);
+ ebt_unregister_table(net, net->xt.frame_filter, ebt_ops_filter);
}
static struct pernet_operations frame_filter_net_ops = {
@@ -109,20 +109,11 @@ static struct pernet_operations frame_filter_net_ops = {
static int __init ebtable_filter_init(void)
{
- int ret;
-
- ret = register_pernet_subsys(&frame_filter_net_ops);
- if (ret < 0)
- return ret;
- ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter));
- if (ret < 0)
- unregister_pernet_subsys(&frame_filter_net_ops);
- return ret;
+ return register_pernet_subsys(&frame_filter_net_ops);
}
static void __exit ebtable_filter_fini(void)
{
- nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter));
unregister_pernet_subsys(&frame_filter_net_ops);
}
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index eb33919821ee..2f7a4f314406 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -93,13 +93,13 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
static int __net_init frame_nat_net_init(struct net *net)
{
- net->xt.frame_nat = ebt_register_table(net, &frame_nat);
+ net->xt.frame_nat = ebt_register_table(net, &frame_nat, ebt_ops_nat);
return PTR_ERR_OR_ZERO(net->xt.frame_nat);
}
static void __net_exit frame_nat_net_exit(struct net *net)
{
- ebt_unregister_table(net, net->xt.frame_nat);
+ ebt_unregister_table(net, net->xt.frame_nat, ebt_ops_nat);
}
static struct pernet_operations frame_nat_net_ops = {
@@ -109,20 +109,11 @@ static struct pernet_operations frame_nat_net_ops = {
static int __init ebtable_nat_init(void)
{
- int ret;
-
- ret = register_pernet_subsys(&frame_nat_net_ops);
- if (ret < 0)
- return ret;
- ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat));
- if (ret < 0)
- unregister_pernet_subsys(&frame_nat_net_ops);
- return ret;
+ return register_pernet_subsys(&frame_nat_net_ops);
}
static void __exit ebtable_nat_fini(void)
{
- nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat));
unregister_pernet_subsys(&frame_nat_net_ops);
}
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 79b69917f521..9ec0c9f908fa 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1157,8 +1157,30 @@ free_newinfo:
return ret;
}
+static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
+{
+ int i;
+
+ mutex_lock(&ebt_mutex);
+ list_del(&table->list);
+ mutex_unlock(&ebt_mutex);
+ EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size,
+ ebt_cleanup_entry, net, NULL);
+ if (table->private->nentries)
+ module_put(table->me);
+ vfree(table->private->entries);
+ if (table->private->chainstack) {
+ for_each_possible_cpu(i)
+ vfree(table->private->chainstack[i]);
+ vfree(table->private->chainstack);
+ }
+ vfree(table->private);
+ kfree(table);
+}
+
struct ebt_table *
-ebt_register_table(struct net *net, const struct ebt_table *input_table)
+ebt_register_table(struct net *net, const struct ebt_table *input_table,
+ const struct nf_hook_ops *ops)
{
struct ebt_table_info *newinfo;
struct ebt_table *t, *table;
@@ -1238,6 +1260,16 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table)
}
list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]);
mutex_unlock(&ebt_mutex);
+
+ if (!ops)
+ return table;
+
+ ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
+ if (ret) {
+ __ebt_unregister_table(net, table);
+ return ERR_PTR(ret);
+ }
+
return table;
free_unlock:
mutex_unlock(&ebt_mutex);
@@ -1256,29 +1288,12 @@ out:
return ERR_PTR(ret);
}
-void ebt_unregister_table(struct net *net, struct ebt_table *table)
+void ebt_unregister_table(struct net *net, struct ebt_table *table,
+ const struct nf_hook_ops *ops)
{
- int i;
-
- if (!table) {
- BUGPRINT("Request to unregister NULL table!!!\n");
- return;
- }
- mutex_lock(&ebt_mutex);
- list_del(&table->list);
- mutex_unlock(&ebt_mutex);
- EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size,
- ebt_cleanup_entry, net, NULL);
- if (table->private->nentries)
- module_put(table->me);
- vfree(table->private->entries);
- if (table->private->chainstack) {
- for_each_possible_cpu(i)
- vfree(table->private->chainstack[i]);
- vfree(table->private->chainstack);
- }
- vfree(table->private);
- kfree(table);
+ if (ops)
+ nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ __ebt_unregister_table(net, table);
}
/* userspace just supplied us with counters */
@@ -1713,7 +1728,7 @@ static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr,
if (*size < sizeof(*ce))
return -EINVAL;
- ce = (struct ebt_entry __user *)*dstptr;
+ ce = *dstptr;
if (copy_to_user(ce, e, sizeof(*ce)))
return -EFAULT;
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index 5974dbc1ea24..bb63c9aed55d 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -111,7 +111,7 @@ nft_meta_bridge_select_ops(const struct nft_ctx *ctx,
static struct nft_expr_type nft_meta_bridge_type __read_mostly = {
.family = NFPROTO_BRIDGE,
.name = "meta",
- .select_ops = &nft_meta_bridge_select_ops,
+ .select_ops = nft_meta_bridge_select_ops,
.policy = nft_meta_policy,
.maxattr = NFTA_META_MAX,
.owner = THIS_MODULE,
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 206dc266ecd2..346ef6b00b8f 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -375,11 +375,7 @@ static int nft_reject_bridge_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int icmp_code, err;
-
- err = nft_reject_bridge_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
+ int icmp_code;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 5488e4a6ccd0..b6406fe33c76 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -2,7 +2,7 @@
* af_can.c - Protocol family CAN core module
* (used by different CAN protocol modules)
*
- * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * Copyright (c) 2002-2017 Volkswagen Group Electronic Research
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -75,20 +75,12 @@ static int stats_timer __read_mostly = 1;
module_param(stats_timer, int, S_IRUGO);
MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
-/* receive filters subscribed for 'all' CAN devices */
-struct dev_rcv_lists can_rx_alldev_list;
-static DEFINE_SPINLOCK(can_rcvlists_lock);
-
static struct kmem_cache *rcv_cache __read_mostly;
/* table of registered CAN protocols */
static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
static DEFINE_MUTEX(proto_tab_lock);
-struct timer_list can_stattimer; /* timer for statistics update */
-struct s_stats can_stats; /* packet statistics */
-struct s_pstats can_pstats; /* receive list statistics */
-
static atomic_t skbcounter = ATOMIC_INIT(0);
/*
@@ -145,9 +137,6 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
if (protocol < 0 || protocol >= CAN_NPROTO)
return -EINVAL;
- if (!net_eq(net, &init_net))
- return -EAFNOSUPPORT;
-
cp = can_get_proto(protocol);
#ifdef CONFIG_MODULES
@@ -228,6 +217,7 @@ int can_send(struct sk_buff *skb, int loop)
{
struct sk_buff *newskb = NULL;
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+ struct s_stats *can_stats = dev_net(skb->dev)->can.can_stats;
int err = -EINVAL;
if (skb->len == CAN_MTU) {
@@ -316,8 +306,8 @@ int can_send(struct sk_buff *skb, int loop)
netif_rx_ni(newskb);
/* update statistics */
- can_stats.tx_frames++;
- can_stats.tx_frames_delta++;
+ can_stats->tx_frames++;
+ can_stats->tx_frames_delta++;
return 0;
@@ -331,10 +321,11 @@ EXPORT_SYMBOL(can_send);
* af_can rx path
*/
-static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev)
+static struct dev_rcv_lists *find_dev_rcv_lists(struct net *net,
+ struct net_device *dev)
{
if (!dev)
- return &can_rx_alldev_list;
+ return net->can.can_rx_alldev_list;
else
return (struct dev_rcv_lists *)dev->ml_priv;
}
@@ -467,13 +458,14 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
* -ENOMEM on missing cache mem to create subscription entry
* -ENODEV unknown device
*/
-int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
- void (*func)(struct sk_buff *, void *), void *data,
- char *ident, struct sock *sk)
+int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
+ canid_t mask, void (*func)(struct sk_buff *, void *),
+ void *data, char *ident, struct sock *sk)
{
struct receiver *r;
struct hlist_head *rl;
struct dev_rcv_lists *d;
+ struct s_pstats *can_pstats = net->can.can_pstats;
int err = 0;
/* insert new receiver (dev,canid,mask) -> (func,data) */
@@ -481,13 +473,16 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
if (dev && dev->type != ARPHRD_CAN)
return -ENODEV;
+ if (dev && !net_eq(net, dev_net(dev)))
+ return -ENODEV;
+
r = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
if (!r)
return -ENOMEM;
- spin_lock(&can_rcvlists_lock);
+ spin_lock(&net->can.can_rcvlists_lock);
- d = find_dev_rcv_lists(dev);
+ d = find_dev_rcv_lists(net, dev);
if (d) {
rl = find_rcv_list(&can_id, &mask, d);
@@ -502,15 +497,15 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
hlist_add_head_rcu(&r->list, rl);
d->entries++;
- can_pstats.rcv_entries++;
- if (can_pstats.rcv_entries_max < can_pstats.rcv_entries)
- can_pstats.rcv_entries_max = can_pstats.rcv_entries;
+ can_pstats->rcv_entries++;
+ if (can_pstats->rcv_entries_max < can_pstats->rcv_entries)
+ can_pstats->rcv_entries_max = can_pstats->rcv_entries;
} else {
kmem_cache_free(rcv_cache, r);
err = -ENODEV;
}
- spin_unlock(&can_rcvlists_lock);
+ spin_unlock(&net->can.can_rcvlists_lock);
return err;
}
@@ -540,19 +535,24 @@ static void can_rx_delete_receiver(struct rcu_head *rp)
* Description:
* Removes subscription entry depending on given (subscription) values.
*/
-void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
- void (*func)(struct sk_buff *, void *), void *data)
+void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
+ canid_t mask, void (*func)(struct sk_buff *, void *),
+ void *data)
{
struct receiver *r = NULL;
struct hlist_head *rl;
+ struct s_pstats *can_pstats = net->can.can_pstats;
struct dev_rcv_lists *d;
if (dev && dev->type != ARPHRD_CAN)
return;
- spin_lock(&can_rcvlists_lock);
+ if (dev && !net_eq(net, dev_net(dev)))
+ return;
+
+ spin_lock(&net->can.can_rcvlists_lock);
- d = find_dev_rcv_lists(dev);
+ d = find_dev_rcv_lists(net, dev);
if (!d) {
pr_err("BUG: receive list not found for "
"dev %s, id %03X, mask %03X\n",
@@ -588,8 +588,8 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
hlist_del_rcu(&r->list);
d->entries--;
- if (can_pstats.rcv_entries > 0)
- can_pstats.rcv_entries--;
+ if (can_pstats->rcv_entries > 0)
+ can_pstats->rcv_entries--;
/* remove device structure requested by NETDEV_UNREGISTER */
if (d->remove_on_zero_entries && !d->entries) {
@@ -598,7 +598,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
}
out:
- spin_unlock(&can_rcvlists_lock);
+ spin_unlock(&net->can.can_rcvlists_lock);
/* schedule the receiver item for deletion */
if (r) {
@@ -683,11 +683,13 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
static void can_receive(struct sk_buff *skb, struct net_device *dev)
{
struct dev_rcv_lists *d;
+ struct net *net = dev_net(dev);
+ struct s_stats *can_stats = net->can.can_stats;
int matches;
/* update statistics */
- can_stats.rx_frames++;
- can_stats.rx_frames_delta++;
+ can_stats->rx_frames++;
+ can_stats->rx_frames_delta++;
/* create non-zero unique skb identifier together with *skb */
while (!(can_skb_prv(skb)->skbcnt))
@@ -696,10 +698,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
rcu_read_lock();
/* deliver the packet to sockets listening on all devices */
- matches = can_rcv_filter(&can_rx_alldev_list, skb);
+ matches = can_rcv_filter(net->can.can_rx_alldev_list, skb);
/* find receive list for this device */
- d = find_dev_rcv_lists(dev);
+ d = find_dev_rcv_lists(net, dev);
if (d)
matches += can_rcv_filter(d, skb);
@@ -709,8 +711,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
consume_skb(skb);
if (matches > 0) {
- can_stats.matches++;
- can_stats.matches_delta++;
+ can_stats->matches++;
+ can_stats->matches_delta++;
}
}
@@ -719,9 +721,6 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (unlikely(!net_eq(dev_net(dev), &init_net)))
- goto drop;
-
if (WARN_ONCE(dev->type != ARPHRD_CAN ||
skb->len != CAN_MTU ||
cfd->len > CAN_MAX_DLEN,
@@ -743,9 +742,6 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (unlikely(!net_eq(dev_net(dev), &init_net)))
- goto drop;
-
if (WARN_ONCE(dev->type != ARPHRD_CAN ||
skb->len != CANFD_MTU ||
cfd->len > CANFD_MAX_DLEN,
@@ -835,9 +831,6 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct dev_rcv_lists *d;
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
if (dev->type != ARPHRD_CAN)
return NOTIFY_DONE;
@@ -855,7 +848,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
break;
case NETDEV_UNREGISTER:
- spin_lock(&can_rcvlists_lock);
+ spin_lock(&dev_net(dev)->can.can_rcvlists_lock);
d = dev->ml_priv;
if (d) {
@@ -869,7 +862,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
pr_err("can: notifier: receive list not found for dev "
"%s\n", dev->name);
- spin_unlock(&can_rcvlists_lock);
+ spin_unlock(&dev_net(dev)->can.can_rcvlists_lock);
break;
}
@@ -877,6 +870,59 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
return NOTIFY_DONE;
}
+static int can_pernet_init(struct net *net)
+{
+ net->can.can_rcvlists_lock =
+ __SPIN_LOCK_UNLOCKED(net->can.can_rcvlists_lock);
+ net->can.can_rx_alldev_list =
+ kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
+
+ net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL);
+ net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL);
+
+ if (IS_ENABLED(CONFIG_PROC_FS)) {
+ /* the statistics are updated every second (timer triggered) */
+ if (stats_timer) {
+ setup_timer(&net->can.can_stattimer, can_stat_update,
+ (unsigned long)net);
+ mod_timer(&net->can.can_stattimer,
+ round_jiffies(jiffies + HZ));
+ }
+ net->can.can_stats->jiffies_init = jiffies;
+ can_init_proc(net);
+ }
+
+ return 0;
+}
+
+static void can_pernet_exit(struct net *net)
+{
+ struct net_device *dev;
+
+ if (IS_ENABLED(CONFIG_PROC_FS)) {
+ can_remove_proc(net);
+ if (stats_timer)
+ del_timer_sync(&net->can.can_stattimer);
+ }
+
+ /* remove created dev_rcv_lists from still registered CAN devices */
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ if (dev->type == ARPHRD_CAN && dev->ml_priv) {
+ struct dev_rcv_lists *d = dev->ml_priv;
+
+ BUG_ON(d->entries);
+ kfree(d);
+ dev->ml_priv = NULL;
+ }
+ }
+ rcu_read_unlock();
+
+ kfree(net->can.can_rx_alldev_list);
+ kfree(net->can.can_stats);
+ kfree(net->can.can_pstats);
+}
+
/*
* af_can module init/exit functions
*/
@@ -902,6 +948,11 @@ static struct notifier_block can_netdev_notifier __read_mostly = {
.notifier_call = can_notifier,
};
+static struct pernet_operations can_pernet_ops __read_mostly = {
+ .init = can_pernet_init,
+ .exit = can_pernet_exit,
+};
+
static __init int can_init(void)
{
/* check for correct padding to be able to use the structs similarly */
@@ -912,21 +963,12 @@ static __init int can_init(void)
pr_info("can: controller area network core (" CAN_VERSION_STRING ")\n");
- memset(&can_rx_alldev_list, 0, sizeof(can_rx_alldev_list));
-
rcv_cache = kmem_cache_create("can_receiver", sizeof(struct receiver),
0, 0, NULL);
if (!rcv_cache)
return -ENOMEM;
- if (IS_ENABLED(CONFIG_PROC_FS)) {
- if (stats_timer) {
- /* the statistics are updated every second (timer triggered) */
- setup_timer(&can_stattimer, can_stat_update, 0);
- mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
- }
- can_init_proc();
- }
+ register_pernet_subsys(&can_pernet_ops);
/* protocol register */
sock_register(&can_family_ops);
@@ -939,34 +981,13 @@ static __init int can_init(void)
static __exit void can_exit(void)
{
- struct net_device *dev;
-
- if (IS_ENABLED(CONFIG_PROC_FS)) {
- if (stats_timer)
- del_timer_sync(&can_stattimer);
-
- can_remove_proc();
- }
-
/* protocol unregister */
dev_remove_pack(&canfd_packet);
dev_remove_pack(&can_packet);
unregister_netdevice_notifier(&can_netdev_notifier);
sock_unregister(PF_CAN);
- /* remove created dev_rcv_lists from still registered CAN devices */
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv) {
-
- struct dev_rcv_lists *d = dev->ml_priv;
-
- BUG_ON(d->entries);
- kfree(d);
- dev->ml_priv = NULL;
- }
- }
- rcu_read_unlock();
+ unregister_pernet_subsys(&can_pernet_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
diff --git a/net/can/af_can.h b/net/can/af_can.h
index b86f5129e838..d0ef45bb2a72 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -110,18 +110,9 @@ struct s_pstats {
unsigned long rcv_entries_max;
};
-/* receive filters subscribed for 'all' CAN devices */
-extern struct dev_rcv_lists can_rx_alldev_list;
-
/* function prototypes for the CAN networklayer procfs (proc.c) */
-void can_init_proc(void);
-void can_remove_proc(void);
+void can_init_proc(struct net *net);
+void can_remove_proc(struct net *net);
void can_stat_update(unsigned long data);
-/* structures and variables from af_can.c needed in proc.c for reading */
-extern struct timer_list can_stattimer; /* timer for statistics update */
-extern struct s_stats can_stats; /* packet statistics */
-extern struct s_pstats can_pstats; /* receive list statistics */
-extern struct hlist_head can_rx_dev_list; /* rx dispatcher structures */
-
#endif /* AF_CAN_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 95d13b233c65..65432633a250 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1,7 +1,7 @@
/*
* bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
*
- * Copyright (c) 2002-2016 Volkswagen Group Electronic Research
+ * Copyright (c) 2002-2017 Volkswagen Group Electronic Research
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -77,7 +77,7 @@
(CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
(CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
-#define CAN_BCM_VERSION "20161123"
+#define CAN_BCM_VERSION "20170425"
MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
MODULE_LICENSE("Dual BSD/GPL");
@@ -118,8 +118,6 @@ struct bcm_op {
struct net_device *rx_reg_dev;
};
-static struct proc_dir_entry *proc_dir;
-
struct bcm_sock {
struct sock sk;
int bound;
@@ -149,7 +147,8 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv)
/*
* procfs functions
*/
-static char *bcm_proc_getifname(char *result, int ifindex)
+#if IS_ENABLED(CONFIG_PROC_FS)
+static char *bcm_proc_getifname(struct net *net, char *result, int ifindex)
{
struct net_device *dev;
@@ -157,7 +156,7 @@ static char *bcm_proc_getifname(char *result, int ifindex)
return "any";
rcu_read_lock();
- dev = dev_get_by_index_rcu(&init_net, ifindex);
+ dev = dev_get_by_index_rcu(net, ifindex);
if (dev)
strcpy(result, dev->name);
else
@@ -170,7 +169,8 @@ static char *bcm_proc_getifname(char *result, int ifindex)
static int bcm_proc_show(struct seq_file *m, void *v)
{
char ifname[IFNAMSIZ];
- struct sock *sk = (struct sock *)m->private;
+ struct net *net = m->private;
+ struct sock *sk = (struct sock *)PDE_DATA(m->file->f_inode);
struct bcm_sock *bo = bcm_sk(sk);
struct bcm_op *op;
@@ -178,7 +178,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
seq_printf(m, " / sk %pK", sk);
seq_printf(m, " / bo %pK", bo);
seq_printf(m, " / dropped %lu", bo->dropped_usr_msgs);
- seq_printf(m, " / bound %s", bcm_proc_getifname(ifname, bo->ifindex));
+ seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex));
seq_printf(m, " <<<\n");
list_for_each_entry(op, &bo->rx_ops, list) {
@@ -190,7 +190,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
continue;
seq_printf(m, "rx_op: %03X %-5s ", op->can_id,
- bcm_proc_getifname(ifname, op->ifindex));
+ bcm_proc_getifname(net, ifname, op->ifindex));
if (op->flags & CAN_FD_FRAME)
seq_printf(m, "(%u)", op->nframes);
@@ -219,7 +219,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
list_for_each_entry(op, &bo->tx_ops, list) {
seq_printf(m, "tx_op: %03X %s ", op->can_id,
- bcm_proc_getifname(ifname, op->ifindex));
+ bcm_proc_getifname(net, ifname, op->ifindex));
if (op->flags & CAN_FD_FRAME)
seq_printf(m, "(%u) ", op->nframes);
@@ -242,7 +242,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
static int bcm_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, bcm_proc_show, PDE_DATA(inode));
+ return single_open_net(inode, file, bcm_proc_show);
}
static const struct file_operations bcm_proc_fops = {
@@ -252,6 +252,7 @@ static const struct file_operations bcm_proc_fops = {
.llseek = seq_lseek,
.release = single_release,
};
+#endif /* CONFIG_PROC_FS */
/*
* bcm_can_tx - send the (next) CAN frame to the appropriate CAN interface
@@ -267,7 +268,7 @@ static void bcm_can_tx(struct bcm_op *op)
if (!op->ifindex)
return;
- dev = dev_get_by_index(&init_net, op->ifindex);
+ dev = dev_get_by_index(sock_net(op->sk), op->ifindex);
if (!dev) {
/* RFC: should this bcm_op remove itself here? */
return;
@@ -764,8 +765,8 @@ static void bcm_remove_op(struct bcm_op *op)
static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
{
if (op->rx_reg_dev == dev) {
- can_rx_unregister(dev, op->can_id, REGMASK(op->can_id),
- bcm_rx_handler, op);
+ can_rx_unregister(dev_net(dev), dev, op->can_id,
+ REGMASK(op->can_id), bcm_rx_handler, op);
/* mark as removed subscription */
op->rx_reg_dev = NULL;
@@ -800,7 +801,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
if (op->rx_reg_dev) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net,
+ dev = dev_get_by_index(sock_net(op->sk),
op->ifindex);
if (dev) {
bcm_rx_unreg(dev, op);
@@ -808,7 +809,8 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
}
}
} else
- can_rx_unregister(NULL, op->can_id,
+ can_rx_unregister(sock_net(op->sk), NULL,
+ op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op);
@@ -1220,9 +1222,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(sock_net(sk), ifindex);
if (dev) {
- err = can_rx_register(dev, op->can_id,
+ err = can_rx_register(sock_net(sk), dev,
+ op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op,
"bcm", sk);
@@ -1232,7 +1235,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
}
} else
- err = can_rx_register(NULL, op->can_id,
+ err = can_rx_register(sock_net(sk), NULL, op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op, "bcm", sk);
if (err) {
@@ -1272,7 +1275,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk,
return err;
}
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(sock_net(sk), ifindex);
if (!dev) {
kfree_skb(skb);
return -ENODEV;
@@ -1337,7 +1340,7 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(sock_net(sk), ifindex);
if (!dev)
return -ENODEV;
@@ -1418,7 +1421,7 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
struct bcm_op *op;
int notify_enodev = 0;
- if (!net_eq(dev_net(dev), &init_net))
+ if (!net_eq(dev_net(dev), sock_net(sk)))
return NOTIFY_DONE;
if (dev->type != ARPHRD_CAN)
@@ -1490,6 +1493,7 @@ static int bcm_init(struct sock *sk)
static int bcm_release(struct socket *sock)
{
struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
struct bcm_sock *bo;
struct bcm_op *op, *next;
@@ -1521,23 +1525,25 @@ static int bcm_release(struct socket *sock)
if (op->rx_reg_dev) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, op->ifindex);
+ dev = dev_get_by_index(net, op->ifindex);
if (dev) {
bcm_rx_unreg(dev, op);
dev_put(dev);
}
}
} else
- can_rx_unregister(NULL, op->can_id,
+ can_rx_unregister(net, NULL, op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op);
bcm_remove_op(op);
}
+#if IS_ENABLED(CONFIG_PROC_FS)
/* remove procfs entry */
- if (proc_dir && bo->bcm_proc_read)
- remove_proc_entry(bo->procname, proc_dir);
+ if (net->can.bcmproc_dir && bo->bcm_proc_read)
+ remove_proc_entry(bo->procname, net->can.bcmproc_dir);
+#endif /* CONFIG_PROC_FS */
/* remove device reference */
if (bo->bound) {
@@ -1560,6 +1566,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
struct bcm_sock *bo = bcm_sk(sk);
+ struct net *net = sock_net(sk);
int ret = 0;
if (len < sizeof(*addr))
@@ -1576,7 +1583,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
if (addr->can_ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, addr->can_ifindex);
+ dev = dev_get_by_index(net, addr->can_ifindex);
if (!dev) {
ret = -ENODEV;
goto fail;
@@ -1595,17 +1602,19 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
bo->ifindex = 0;
}
- if (proc_dir) {
+#if IS_ENABLED(CONFIG_PROC_FS)
+ if (net->can.bcmproc_dir) {
/* unique socket address as filename */
sprintf(bo->procname, "%lu", sock_i_ino(sk));
bo->bcm_proc_read = proc_create_data(bo->procname, 0644,
- proc_dir,
+ net->can.bcmproc_dir,
&bcm_proc_fops, sk);
if (!bo->bcm_proc_read) {
ret = -ENOMEM;
goto fail;
}
}
+#endif /* CONFIG_PROC_FS */
bo->bound = 1;
@@ -1686,6 +1695,30 @@ static const struct can_proto bcm_can_proto = {
.prot = &bcm_proto,
};
+static int canbcm_pernet_init(struct net *net)
+{
+#if IS_ENABLED(CONFIG_PROC_FS)
+ /* create /proc/net/can-bcm directory */
+ net->can.bcmproc_dir = proc_net_mkdir(net, "can-bcm", net->proc_net);
+#endif /* CONFIG_PROC_FS */
+
+ return 0;
+}
+
+static void canbcm_pernet_exit(struct net *net)
+{
+#if IS_ENABLED(CONFIG_PROC_FS)
+ /* remove /proc/net/can-bcm directory */
+ if (net->can.bcmproc_dir)
+ remove_proc_entry("can-bcm", net->proc_net);
+#endif /* CONFIG_PROC_FS */
+}
+
+static struct pernet_operations canbcm_pernet_ops __read_mostly = {
+ .init = canbcm_pernet_init,
+ .exit = canbcm_pernet_exit,
+};
+
static int __init bcm_module_init(void)
{
int err;
@@ -1698,17 +1731,14 @@ static int __init bcm_module_init(void)
return err;
}
- /* create /proc/net/can-bcm directory */
- proc_dir = proc_mkdir("can-bcm", init_net.proc_net);
+ register_pernet_subsys(&canbcm_pernet_ops);
return 0;
}
static void __exit bcm_module_exit(void)
{
can_proto_unregister(&bcm_can_proto);
-
- if (proc_dir)
- remove_proc_entry("can-bcm", init_net.proc_net);
+ unregister_pernet_subsys(&canbcm_pernet_ops);
}
module_init(bcm_module_init);
diff --git a/net/can/gw.c b/net/can/gw.c
index 7056a1a2bb70..29748d844c3f 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1,7 +1,7 @@
/*
* gw.c - CAN frame Gateway/Router/Bridge with netlink interface
*
- * Copyright (c) 2011 Volkswagen Group Electronic Research
+ * Copyright (c) 2017 Volkswagen Group Electronic Research
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -59,7 +59,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
-#define CAN_GW_VERSION "20130117"
+#define CAN_GW_VERSION "20170425"
#define CAN_GW_NAME "can-gw"
MODULE_DESCRIPTION("PF_CAN netlink gateway");
@@ -79,9 +79,7 @@ MODULE_PARM_DESC(max_hops,
__stringify(CGW_MAX_HOPS) " hops, "
"default: " __stringify(CGW_DEFAULT_HOPS) ")");
-static HLIST_HEAD(cgw_list);
static struct notifier_block notifier;
-
static struct kmem_cache *cgw_cache __read_mostly;
/* structure that contains the (on-the-fly) CAN frame modifications */
@@ -438,16 +436,16 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
gwj->handled_frames++;
}
-static inline int cgw_register_filter(struct cgw_job *gwj)
+static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj)
{
- return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id,
+ return can_rx_register(net, gwj->src.dev, gwj->ccgw.filter.can_id,
gwj->ccgw.filter.can_mask, can_can_gw_rcv,
gwj, "gw", NULL);
}
-static inline void cgw_unregister_filter(struct cgw_job *gwj)
+static inline void cgw_unregister_filter(struct net *net, struct cgw_job *gwj)
{
- can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id,
+ can_rx_unregister(net, gwj->src.dev, gwj->ccgw.filter.can_id,
gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj);
}
@@ -455,9 +453,8 @@ static int cgw_notifier(struct notifier_block *nb,
unsigned long msg, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
if (dev->type != ARPHRD_CAN)
return NOTIFY_DONE;
@@ -468,11 +465,11 @@ static int cgw_notifier(struct notifier_block *nb,
ASSERT_RTNL();
- hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
+ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
if (gwj->src.dev == dev || gwj->dst.dev == dev) {
hlist_del(&gwj->list);
- cgw_unregister_filter(gwj);
+ cgw_unregister_filter(net, gwj);
kmem_cache_free(cgw_cache, gwj);
}
}
@@ -592,12 +589,13 @@ cancel:
/* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */
static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
struct cgw_job *gwj = NULL;
int idx = 0;
int s_idx = cb->args[0];
rcu_read_lock();
- hlist_for_each_entry_rcu(gwj, &cgw_list, list) {
+ hlist_for_each_entry_rcu(gwj, &net->can.cgw_list, list) {
if (idx < s_idx)
goto cont;
@@ -641,7 +639,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
memset(mod, 0, sizeof(*mod));
err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX,
- cgw_policy);
+ cgw_policy, NULL);
if (err < 0)
return err;
@@ -809,8 +807,10 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
return 0;
}
-static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
+ struct net *net = sock_net(skb->sk);
struct rtcanmsg *r;
struct cgw_job *gwj;
struct cf_mod mod;
@@ -841,7 +841,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
/* check for updating an existing job with identical uid */
- hlist_for_each_entry(gwj, &cgw_list, list) {
+ hlist_for_each_entry(gwj, &net->can.cgw_list, list) {
if (gwj->mod.uid != mod.uid)
continue;
@@ -879,7 +879,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
err = -ENODEV;
- gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx);
+ gwj->src.dev = __dev_get_by_index(net, gwj->ccgw.src_idx);
if (!gwj->src.dev)
goto out;
@@ -887,7 +887,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
if (gwj->src.dev->type != ARPHRD_CAN)
goto out;
- gwj->dst.dev = __dev_get_by_index(&init_net, gwj->ccgw.dst_idx);
+ gwj->dst.dev = __dev_get_by_index(net, gwj->ccgw.dst_idx);
if (!gwj->dst.dev)
goto out;
@@ -897,9 +897,9 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
- err = cgw_register_filter(gwj);
+ err = cgw_register_filter(net, gwj);
if (!err)
- hlist_add_head_rcu(&gwj->list, &cgw_list);
+ hlist_add_head_rcu(&gwj->list, &net->can.cgw_list);
out:
if (err)
kmem_cache_free(cgw_cache, gwj);
@@ -907,22 +907,24 @@ out:
return err;
}
-static void cgw_remove_all_jobs(void)
+static void cgw_remove_all_jobs(struct net *net)
{
struct cgw_job *gwj = NULL;
struct hlist_node *nx;
ASSERT_RTNL();
- hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
+ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
hlist_del(&gwj->list);
- cgw_unregister_filter(gwj);
+ cgw_unregister_filter(net, gwj);
kmem_cache_free(cgw_cache, gwj);
}
}
-static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
+ struct net *net = sock_net(skb->sk);
struct cgw_job *gwj = NULL;
struct hlist_node *nx;
struct rtcanmsg *r;
@@ -951,7 +953,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
/* two interface indices both set to 0 => remove all entries */
if (!ccgw.src_idx && !ccgw.dst_idx) {
- cgw_remove_all_jobs();
+ cgw_remove_all_jobs(net);
return 0;
}
@@ -960,7 +962,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
/* remove only the first matching entry */
- hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
+ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
if (gwj->flags != r->flags)
continue;
@@ -983,7 +985,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
continue;
hlist_del(&gwj->list);
- cgw_unregister_filter(gwj);
+ cgw_unregister_filter(net, gwj);
kmem_cache_free(cgw_cache, gwj);
err = 0;
break;
@@ -992,6 +994,24 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
}
+static int __net_init cangw_pernet_init(struct net *net)
+{
+ INIT_HLIST_HEAD(&net->can.cgw_list);
+ return 0;
+}
+
+static void __net_exit cangw_pernet_exit(struct net *net)
+{
+ rtnl_lock();
+ cgw_remove_all_jobs(net);
+ rtnl_unlock();
+}
+
+static struct pernet_operations cangw_pernet_ops = {
+ .init = cangw_pernet_init,
+ .exit = cangw_pernet_exit,
+};
+
static __init int cgw_module_init(void)
{
/* sanitize given module parameter */
@@ -1000,6 +1020,7 @@ static __init int cgw_module_init(void)
pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n",
max_hops);
+ register_pernet_subsys(&cangw_pernet_ops);
cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
0, 0, NULL);
@@ -1029,10 +1050,7 @@ static __exit void cgw_module_exit(void)
unregister_netdevice_notifier(&notifier);
- rtnl_lock();
- cgw_remove_all_jobs();
- rtnl_unlock();
-
+ unregister_pernet_subsys(&cangw_pernet_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
kmem_cache_destroy(cgw_cache);
diff --git a/net/can/proc.c b/net/can/proc.c
index 85ef7bb0f176..83045f00c63c 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -62,17 +62,6 @@
#define CAN_PROC_RCVLIST_EFF "rcvlist_eff"
#define CAN_PROC_RCVLIST_ERR "rcvlist_err"
-static struct proc_dir_entry *can_dir;
-static struct proc_dir_entry *pde_version;
-static struct proc_dir_entry *pde_stats;
-static struct proc_dir_entry *pde_reset_stats;
-static struct proc_dir_entry *pde_rcvlist_all;
-static struct proc_dir_entry *pde_rcvlist_fil;
-static struct proc_dir_entry *pde_rcvlist_inv;
-static struct proc_dir_entry *pde_rcvlist_sff;
-static struct proc_dir_entry *pde_rcvlist_eff;
-static struct proc_dir_entry *pde_rcvlist_err;
-
static int user_reset;
static const char rx_list_name[][8] = {
@@ -86,21 +75,23 @@ static const char rx_list_name[][8] = {
* af_can statistics stuff
*/
-static void can_init_stats(void)
+static void can_init_stats(struct net *net)
{
+ struct s_stats *can_stats = net->can.can_stats;
+ struct s_pstats *can_pstats = net->can.can_pstats;
/*
* This memset function is called from a timer context (when
* can_stattimer is active which is the default) OR in a process
* context (reading the proc_fs when can_stattimer is disabled).
*/
- memset(&can_stats, 0, sizeof(can_stats));
- can_stats.jiffies_init = jiffies;
+ memset(can_stats, 0, sizeof(struct s_stats));
+ can_stats->jiffies_init = jiffies;
- can_pstats.stats_reset++;
+ can_pstats->stats_reset++;
if (user_reset) {
user_reset = 0;
- can_pstats.user_reset++;
+ can_pstats->user_reset++;
}
}
@@ -126,64 +117,66 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
void can_stat_update(unsigned long data)
{
+ struct net *net = (struct net *)data;
+ struct s_stats *can_stats = net->can.can_stats;
unsigned long j = jiffies; /* snapshot */
/* restart counting in timer context on user request */
if (user_reset)
- can_init_stats();
+ can_init_stats(net);
/* restart counting on jiffies overflow */
- if (j < can_stats.jiffies_init)
- can_init_stats();
+ if (j < can_stats->jiffies_init)
+ can_init_stats(net);
/* prevent overflow in calc_rate() */
- if (can_stats.rx_frames > (ULONG_MAX / HZ))
- can_init_stats();
+ if (can_stats->rx_frames > (ULONG_MAX / HZ))
+ can_init_stats(net);
/* prevent overflow in calc_rate() */
- if (can_stats.tx_frames > (ULONG_MAX / HZ))
- can_init_stats();
+ if (can_stats->tx_frames > (ULONG_MAX / HZ))
+ can_init_stats(net);
/* matches overflow - very improbable */
- if (can_stats.matches > (ULONG_MAX / 100))
- can_init_stats();
+ if (can_stats->matches > (ULONG_MAX / 100))
+ can_init_stats(net);
/* calc total values */
- if (can_stats.rx_frames)
- can_stats.total_rx_match_ratio = (can_stats.matches * 100) /
- can_stats.rx_frames;
+ if (can_stats->rx_frames)
+ can_stats->total_rx_match_ratio = (can_stats->matches * 100) /
+ can_stats->rx_frames;
- can_stats.total_tx_rate = calc_rate(can_stats.jiffies_init, j,
- can_stats.tx_frames);
- can_stats.total_rx_rate = calc_rate(can_stats.jiffies_init, j,
- can_stats.rx_frames);
+ can_stats->total_tx_rate = calc_rate(can_stats->jiffies_init, j,
+ can_stats->tx_frames);
+ can_stats->total_rx_rate = calc_rate(can_stats->jiffies_init, j,
+ can_stats->rx_frames);
/* calc current values */
- if (can_stats.rx_frames_delta)
- can_stats.current_rx_match_ratio =
- (can_stats.matches_delta * 100) /
- can_stats.rx_frames_delta;
+ if (can_stats->rx_frames_delta)
+ can_stats->current_rx_match_ratio =
+ (can_stats->matches_delta * 100) /
+ can_stats->rx_frames_delta;
- can_stats.current_tx_rate = calc_rate(0, HZ, can_stats.tx_frames_delta);
- can_stats.current_rx_rate = calc_rate(0, HZ, can_stats.rx_frames_delta);
+ can_stats->current_tx_rate = calc_rate(0, HZ, can_stats->tx_frames_delta);
+ can_stats->current_rx_rate = calc_rate(0, HZ, can_stats->rx_frames_delta);
/* check / update maximum values */
- if (can_stats.max_tx_rate < can_stats.current_tx_rate)
- can_stats.max_tx_rate = can_stats.current_tx_rate;
+ if (can_stats->max_tx_rate < can_stats->current_tx_rate)
+ can_stats->max_tx_rate = can_stats->current_tx_rate;
- if (can_stats.max_rx_rate < can_stats.current_rx_rate)
- can_stats.max_rx_rate = can_stats.current_rx_rate;
+ if (can_stats->max_rx_rate < can_stats->current_rx_rate)
+ can_stats->max_rx_rate = can_stats->current_rx_rate;
- if (can_stats.max_rx_match_ratio < can_stats.current_rx_match_ratio)
- can_stats.max_rx_match_ratio = can_stats.current_rx_match_ratio;
+ if (can_stats->max_rx_match_ratio < can_stats->current_rx_match_ratio)
+ can_stats->max_rx_match_ratio = can_stats->current_rx_match_ratio;
/* clear values for 'current rate' calculation */
- can_stats.tx_frames_delta = 0;
- can_stats.rx_frames_delta = 0;
- can_stats.matches_delta = 0;
+ can_stats->tx_frames_delta = 0;
+ can_stats->rx_frames_delta = 0;
+ can_stats->matches_delta = 0;
/* restart timer (one second) */
- mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
+ mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ));
}
/*
@@ -217,57 +210,61 @@ static void can_print_recv_banner(struct seq_file *m)
static int can_stats_proc_show(struct seq_file *m, void *v)
{
+ struct net *net = m->private;
+ struct s_stats *can_stats = net->can.can_stats;
+ struct s_pstats *can_pstats = net->can.can_pstats;
+
seq_putc(m, '\n');
- seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats.tx_frames);
- seq_printf(m, " %8ld received frames (RXF)\n", can_stats.rx_frames);
- seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats.matches);
+ seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames);
+ seq_printf(m, " %8ld received frames (RXF)\n", can_stats->rx_frames);
+ seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats->matches);
seq_putc(m, '\n');
- if (can_stattimer.function == can_stat_update) {
+ if (net->can.can_stattimer.function == can_stat_update) {
seq_printf(m, " %8ld %% total match ratio (RXMR)\n",
- can_stats.total_rx_match_ratio);
+ can_stats->total_rx_match_ratio);
seq_printf(m, " %8ld frames/s total tx rate (TXR)\n",
- can_stats.total_tx_rate);
+ can_stats->total_tx_rate);
seq_printf(m, " %8ld frames/s total rx rate (RXR)\n",
- can_stats.total_rx_rate);
+ can_stats->total_rx_rate);
seq_putc(m, '\n');
seq_printf(m, " %8ld %% current match ratio (CRXMR)\n",
- can_stats.current_rx_match_ratio);
+ can_stats->current_rx_match_ratio);
seq_printf(m, " %8ld frames/s current tx rate (CTXR)\n",
- can_stats.current_tx_rate);
+ can_stats->current_tx_rate);
seq_printf(m, " %8ld frames/s current rx rate (CRXR)\n",
- can_stats.current_rx_rate);
+ can_stats->current_rx_rate);
seq_putc(m, '\n');
seq_printf(m, " %8ld %% max match ratio (MRXMR)\n",
- can_stats.max_rx_match_ratio);
+ can_stats->max_rx_match_ratio);
seq_printf(m, " %8ld frames/s max tx rate (MTXR)\n",
- can_stats.max_tx_rate);
+ can_stats->max_tx_rate);
seq_printf(m, " %8ld frames/s max rx rate (MRXR)\n",
- can_stats.max_rx_rate);
+ can_stats->max_rx_rate);
seq_putc(m, '\n');
}
seq_printf(m, " %8ld current receive list entries (CRCV)\n",
- can_pstats.rcv_entries);
+ can_pstats->rcv_entries);
seq_printf(m, " %8ld maximum receive list entries (MRCV)\n",
- can_pstats.rcv_entries_max);
+ can_pstats->rcv_entries_max);
- if (can_pstats.stats_reset)
+ if (can_pstats->stats_reset)
seq_printf(m, "\n %8ld statistic resets (STR)\n",
- can_pstats.stats_reset);
+ can_pstats->stats_reset);
- if (can_pstats.user_reset)
+ if (can_pstats->user_reset)
seq_printf(m, " %8ld user statistic resets (USTR)\n",
- can_pstats.user_reset);
+ can_pstats->user_reset);
seq_putc(m, '\n');
return 0;
@@ -275,7 +272,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
static int can_stats_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_stats_proc_show, NULL);
+ return single_open_net(inode, file, can_stats_proc_show);
}
static const struct file_operations can_stats_proc_fops = {
@@ -288,25 +285,28 @@ static const struct file_operations can_stats_proc_fops = {
static int can_reset_stats_proc_show(struct seq_file *m, void *v)
{
+ struct net *net = m->private;
+ struct s_pstats *can_pstats = net->can.can_pstats;
+ struct s_stats *can_stats = net->can.can_stats;
+
user_reset = 1;
- if (can_stattimer.function == can_stat_update) {
+ if (net->can.can_stattimer.function == can_stat_update) {
seq_printf(m, "Scheduled statistic reset #%ld.\n",
- can_pstats.stats_reset + 1);
-
+ can_pstats->stats_reset + 1);
} else {
- if (can_stats.jiffies_init != jiffies)
- can_init_stats();
+ if (can_stats->jiffies_init != jiffies)
+ can_init_stats(net);
seq_printf(m, "Performed statistic reset #%ld.\n",
- can_pstats.stats_reset);
+ can_pstats->stats_reset);
}
return 0;
}
static int can_reset_stats_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_reset_stats_proc_show, NULL);
+ return single_open_net(inode, file, can_reset_stats_proc_show);
}
static const struct file_operations can_reset_stats_proc_fops = {
@@ -325,7 +325,7 @@ static int can_version_proc_show(struct seq_file *m, void *v)
static int can_version_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_version_proc_show, NULL);
+ return single_open_net(inode, file, can_version_proc_show);
}
static const struct file_operations can_version_proc_fops = {
@@ -351,20 +351,21 @@ static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
static int can_rcvlist_proc_show(struct seq_file *m, void *v)
{
/* double cast to prevent GCC warning */
- int idx = (int)(long)m->private;
+ int idx = (int)(long)PDE_DATA(m->file->f_inode);
struct net_device *dev;
struct dev_rcv_lists *d;
+ struct net *net = m->private;
seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]);
rcu_read_lock();
/* receive list for 'all' CAN devices (dev == NULL) */
- d = &can_rx_alldev_list;
+ d = net->can.can_rx_alldev_list;
can_rcvlist_proc_show_one(m, idx, NULL, d);
/* receive list for registered CAN devices */
- for_each_netdev_rcu(&init_net, dev) {
+ for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv)
can_rcvlist_proc_show_one(m, idx, dev, dev->ml_priv);
}
@@ -377,7 +378,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_rcvlist_proc_show, PDE_DATA(inode));
+ return single_open_net(inode, file, can_rcvlist_proc_show);
}
static const struct file_operations can_rcvlist_proc_fops = {
@@ -417,6 +418,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
struct dev_rcv_lists *d;
+ struct net *net = m->private;
/* RX_SFF */
seq_puts(m, "\nreceive list 'rx_sff':\n");
@@ -424,11 +426,11 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* sff receive list for 'all' CAN devices (dev == NULL) */
- d = &can_rx_alldev_list;
+ d = net->can.can_rx_alldev_list;
can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff));
/* sff receive list for registered CAN devices */
- for_each_netdev_rcu(&init_net, dev) {
+ for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv) {
d = dev->ml_priv;
can_rcvlist_proc_show_array(m, dev, d->rx_sff,
@@ -444,7 +446,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_rcvlist_sff_proc_show, NULL);
+ return single_open_net(inode, file, can_rcvlist_sff_proc_show);
}
static const struct file_operations can_rcvlist_sff_proc_fops = {
@@ -460,6 +462,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
struct dev_rcv_lists *d;
+ struct net *net = m->private;
/* RX_EFF */
seq_puts(m, "\nreceive list 'rx_eff':\n");
@@ -467,11 +470,11 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* eff receive list for 'all' CAN devices (dev == NULL) */
- d = &can_rx_alldev_list;
+ d = net->can.can_rx_alldev_list;
can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff));
/* eff receive list for registered CAN devices */
- for_each_netdev_rcu(&init_net, dev) {
+ for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv) {
d = dev->ml_priv;
can_rcvlist_proc_show_array(m, dev, d->rx_eff,
@@ -487,7 +490,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_rcvlist_eff_proc_show, NULL);
+ return single_open_net(inode, file, can_rcvlist_eff_proc_show);
}
static const struct file_operations can_rcvlist_eff_proc_fops = {
@@ -499,81 +502,85 @@ static const struct file_operations can_rcvlist_eff_proc_fops = {
};
/*
- * proc utility functions
- */
-
-static void can_remove_proc_readentry(const char *name)
-{
- if (can_dir)
- remove_proc_entry(name, can_dir);
-}
-
-/*
* can_init_proc - create main CAN proc directory and procfs entries
*/
-void can_init_proc(void)
+void can_init_proc(struct net *net)
{
/* create /proc/net/can directory */
- can_dir = proc_mkdir("can", init_net.proc_net);
+ net->can.proc_dir = proc_net_mkdir(net, "can", net->proc_net);
- if (!can_dir) {
- pr_info("can: failed to create /proc/net/can.\n");
+ if (!net->can.proc_dir) {
+ printk(KERN_INFO "can: failed to create /proc/net/can . "
+ "CONFIG_PROC_FS missing?\n");
return;
}
/* own procfs entries from the AF_CAN core */
- pde_version = proc_create(CAN_PROC_VERSION, 0644, can_dir,
- &can_version_proc_fops);
- pde_stats = proc_create(CAN_PROC_STATS, 0644, can_dir,
- &can_stats_proc_fops);
- pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644, can_dir,
- &can_reset_stats_proc_fops);
- pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_ERR);
- pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_ALL);
- pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_FIL);
- pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_INV);
- pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, can_dir,
- &can_rcvlist_eff_proc_fops);
- pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, can_dir,
- &can_rcvlist_sff_proc_fops);
+ net->can.pde_version = proc_create(CAN_PROC_VERSION, 0644,
+ net->can.proc_dir,
+ &can_version_proc_fops);
+ net->can.pde_stats = proc_create(CAN_PROC_STATS, 0644,
+ net->can.proc_dir,
+ &can_stats_proc_fops);
+ net->can.pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644,
+ net->can.proc_dir,
+ &can_reset_stats_proc_fops);
+ net->can.pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_ERR);
+ net->can.pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_ALL);
+ net->can.pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_FIL);
+ net->can.pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_INV);
+ net->can.pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_eff_proc_fops);
+ net->can.pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_sff_proc_fops);
}
/*
* can_remove_proc - remove procfs entries and main CAN proc directory
*/
-void can_remove_proc(void)
+void can_remove_proc(struct net *net)
{
- if (pde_version)
- can_remove_proc_readentry(CAN_PROC_VERSION);
+ if (net->can.pde_version)
+ remove_proc_entry(CAN_PROC_VERSION, net->can.proc_dir);
- if (pde_stats)
- can_remove_proc_readentry(CAN_PROC_STATS);
+ if (net->can.pde_stats)
+ remove_proc_entry(CAN_PROC_STATS, net->can.proc_dir);
- if (pde_reset_stats)
- can_remove_proc_readentry(CAN_PROC_RESET_STATS);
+ if (net->can.pde_reset_stats)
+ remove_proc_entry(CAN_PROC_RESET_STATS, net->can.proc_dir);
- if (pde_rcvlist_err)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_ERR);
+ if (net->can.pde_rcvlist_err)
+ remove_proc_entry(CAN_PROC_RCVLIST_ERR, net->can.proc_dir);
- if (pde_rcvlist_all)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_ALL);
+ if (net->can.pde_rcvlist_all)
+ remove_proc_entry(CAN_PROC_RCVLIST_ALL, net->can.proc_dir);
- if (pde_rcvlist_fil)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_FIL);
+ if (net->can.pde_rcvlist_fil)
+ remove_proc_entry(CAN_PROC_RCVLIST_FIL, net->can.proc_dir);
- if (pde_rcvlist_inv)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_INV);
+ if (net->can.pde_rcvlist_inv)
+ remove_proc_entry(CAN_PROC_RCVLIST_INV, net->can.proc_dir);
- if (pde_rcvlist_eff)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_EFF);
+ if (net->can.pde_rcvlist_eff)
+ remove_proc_entry(CAN_PROC_RCVLIST_EFF, net->can.proc_dir);
- if (pde_rcvlist_sff)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF);
+ if (net->can.pde_rcvlist_sff)
+ remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir);
- if (can_dir)
- remove_proc_entry("can", init_net.proc_net);
+ if (net->can.proc_dir)
+ remove_proc_entry("can", net->proc_net);
}
diff --git a/net/can/raw.c b/net/can/raw.c
index 6dc546a06673..864c80dbdb72 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -181,20 +181,21 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
kfree_skb(skb);
}
-static int raw_enable_filters(struct net_device *dev, struct sock *sk,
- struct can_filter *filter, int count)
+static int raw_enable_filters(struct net *net, struct net_device *dev,
+ struct sock *sk, struct can_filter *filter,
+ int count)
{
int err = 0;
int i;
for (i = 0; i < count; i++) {
- err = can_rx_register(dev, filter[i].can_id,
+ err = can_rx_register(net, dev, filter[i].can_id,
filter[i].can_mask,
raw_rcv, sk, "raw", sk);
if (err) {
/* clean up successfully registered filters */
while (--i >= 0)
- can_rx_unregister(dev, filter[i].can_id,
+ can_rx_unregister(net, dev, filter[i].can_id,
filter[i].can_mask,
raw_rcv, sk);
break;
@@ -204,57 +205,62 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk,
return err;
}
-static int raw_enable_errfilter(struct net_device *dev, struct sock *sk,
- can_err_mask_t err_mask)
+static int raw_enable_errfilter(struct net *net, struct net_device *dev,
+ struct sock *sk, can_err_mask_t err_mask)
{
int err = 0;
if (err_mask)
- err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG,
+ err = can_rx_register(net, dev, 0, err_mask | CAN_ERR_FLAG,
raw_rcv, sk, "raw", sk);
return err;
}
-static void raw_disable_filters(struct net_device *dev, struct sock *sk,
- struct can_filter *filter, int count)
+static void raw_disable_filters(struct net *net, struct net_device *dev,
+ struct sock *sk, struct can_filter *filter,
+ int count)
{
int i;
for (i = 0; i < count; i++)
- can_rx_unregister(dev, filter[i].can_id, filter[i].can_mask,
- raw_rcv, sk);
+ can_rx_unregister(net, dev, filter[i].can_id,
+ filter[i].can_mask, raw_rcv, sk);
}
-static inline void raw_disable_errfilter(struct net_device *dev,
+static inline void raw_disable_errfilter(struct net *net,
+ struct net_device *dev,
struct sock *sk,
can_err_mask_t err_mask)
{
if (err_mask)
- can_rx_unregister(dev, 0, err_mask | CAN_ERR_FLAG,
+ can_rx_unregister(net, dev, 0, err_mask | CAN_ERR_FLAG,
raw_rcv, sk);
}
-static inline void raw_disable_allfilters(struct net_device *dev,
+static inline void raw_disable_allfilters(struct net *net,
+ struct net_device *dev,
struct sock *sk)
{
struct raw_sock *ro = raw_sk(sk);
- raw_disable_filters(dev, sk, ro->filter, ro->count);
- raw_disable_errfilter(dev, sk, ro->err_mask);
+ raw_disable_filters(net, dev, sk, ro->filter, ro->count);
+ raw_disable_errfilter(net, dev, sk, ro->err_mask);
}
-static int raw_enable_allfilters(struct net_device *dev, struct sock *sk)
+static int raw_enable_allfilters(struct net *net, struct net_device *dev,
+ struct sock *sk)
{
struct raw_sock *ro = raw_sk(sk);
int err;
- err = raw_enable_filters(dev, sk, ro->filter, ro->count);
+ err = raw_enable_filters(net, dev, sk, ro->filter, ro->count);
if (!err) {
- err = raw_enable_errfilter(dev, sk, ro->err_mask);
+ err = raw_enable_errfilter(net, dev, sk, ro->err_mask);
if (err)
- raw_disable_filters(dev, sk, ro->filter, ro->count);
+ raw_disable_filters(net, dev, sk, ro->filter,
+ ro->count);
}
return err;
@@ -267,7 +273,7 @@ static int raw_notifier(struct notifier_block *nb,
struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
struct sock *sk = &ro->sk;
- if (!net_eq(dev_net(dev), &init_net))
+ if (!net_eq(dev_net(dev), sock_net(sk)))
return NOTIFY_DONE;
if (dev->type != ARPHRD_CAN)
@@ -282,7 +288,7 @@ static int raw_notifier(struct notifier_block *nb,
lock_sock(sk);
/* remove current filters & unregister */
if (ro->bound)
- raw_disable_allfilters(dev, sk);
+ raw_disable_allfilters(dev_net(dev), dev, sk);
if (ro->count > 1)
kfree(ro->filter);
@@ -358,13 +364,13 @@ static int raw_release(struct socket *sock)
if (ro->ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk), ro->ifindex);
if (dev) {
- raw_disable_allfilters(dev, sk);
+ raw_disable_allfilters(dev_net(dev), dev, sk);
dev_put(dev);
}
} else
- raw_disable_allfilters(NULL, sk);
+ raw_disable_allfilters(sock_net(sk), NULL, sk);
}
if (ro->count > 1)
@@ -404,7 +410,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (addr->can_ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, addr->can_ifindex);
+ dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
if (!dev) {
err = -ENODEV;
goto out;
@@ -420,13 +426,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
ifindex = dev->ifindex;
/* filters set by default/setsockopt */
- err = raw_enable_allfilters(dev, sk);
+ err = raw_enable_allfilters(sock_net(sk), dev, sk);
dev_put(dev);
} else {
ifindex = 0;
/* filters set by default/setsockopt */
- err = raw_enable_allfilters(NULL, sk);
+ err = raw_enable_allfilters(sock_net(sk), NULL, sk);
}
if (!err) {
@@ -435,13 +441,15 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (ro->ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk),
+ ro->ifindex);
if (dev) {
- raw_disable_allfilters(dev, sk);
+ raw_disable_allfilters(dev_net(dev),
+ dev, sk);
dev_put(dev);
}
} else
- raw_disable_allfilters(NULL, sk);
+ raw_disable_allfilters(sock_net(sk), NULL, sk);
}
ro->ifindex = ifindex;
ro->bound = 1;
@@ -517,15 +525,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
if (ro->bound && ro->ifindex)
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk), ro->ifindex);
if (ro->bound) {
/* (try to) register the new filters */
if (count == 1)
- err = raw_enable_filters(dev, sk, &sfilter, 1);
+ err = raw_enable_filters(sock_net(sk), dev, sk,
+ &sfilter, 1);
else
- err = raw_enable_filters(dev, sk, filter,
- count);
+ err = raw_enable_filters(sock_net(sk), dev, sk,
+ filter, count);
if (err) {
if (count > 1)
kfree(filter);
@@ -533,7 +542,8 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
}
/* remove old filter registrations */
- raw_disable_filters(dev, sk, ro->filter, ro->count);
+ raw_disable_filters(sock_net(sk), dev, sk, ro->filter,
+ ro->count);
}
/* remove old filter space */
@@ -569,18 +579,20 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
if (ro->bound && ro->ifindex)
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk), ro->ifindex);
/* remove current error mask */
if (ro->bound) {
/* (try to) register the new err_mask */
- err = raw_enable_errfilter(dev, sk, err_mask);
+ err = raw_enable_errfilter(sock_net(sk), dev, sk,
+ err_mask);
if (err)
goto out_err;
/* remove old err_mask registration */
- raw_disable_errfilter(dev, sk, ro->err_mask);
+ raw_disable_errfilter(sock_net(sk), dev, sk,
+ ro->err_mask);
}
/* link new err_mask to the socket */
@@ -741,7 +753,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
return -EINVAL;
}
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(sock_net(sk), ifindex);
if (!dev)
return -ENXIO;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 108533859a53..4fd02831beed 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -45,6 +45,17 @@ bool libceph_compatible(void *data)
}
EXPORT_SYMBOL(libceph_compatible);
+static int param_get_supported_features(char *buffer,
+ const struct kernel_param *kp)
+{
+ return sprintf(buffer, "0x%llx", CEPH_FEATURES_SUPPORTED_DEFAULT);
+}
+static const struct kernel_param_ops param_ops_supported_features = {
+ .get = param_get_supported_features,
+};
+module_param_cb(supported_features, &param_ops_supported_features, NULL,
+ S_IRUGO);
+
/*
* find filename portion of a path (/foo/bar/baz -> baz)
*/
@@ -187,7 +198,7 @@ void *ceph_kvmalloc(size_t size, gfp_t flags)
return ptr;
}
- return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
+ return __vmalloc(size, flags, PAGE_KERNEL);
}
@@ -596,9 +607,7 @@ EXPORT_SYMBOL(ceph_client_gid);
/*
* create a fresh client instance
*/
-struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
- u64 supported_features,
- u64 required_features)
+struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
{
struct ceph_client *client;
struct ceph_entity_addr *myaddr = NULL;
@@ -615,14 +624,12 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
init_waitqueue_head(&client->auth_wq);
client->auth_err = 0;
- if (!ceph_test_opt(client, NOMSGAUTH))
- required_features |= CEPH_FEATURE_MSG_AUTH;
-
client->extra_mon_dispatch = NULL;
- client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
- supported_features;
- client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT |
- required_features;
+ client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+ client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT;
+
+ if (!ceph_test_opt(client, NOMSGAUTH))
+ client->required_features |= CEPH_FEATURE_MSG_AUTH;
/* msgr */
if (ceph_test_opt(client, MYIP))
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index b9233b990399..08ada893f01e 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -179,6 +179,57 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc,
}
EXPORT_SYMBOL(ceph_cls_break_lock);
+int ceph_cls_set_cookie(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ char *lock_name, u8 type, char *old_cookie,
+ char *tag, char *new_cookie)
+{
+ int cookie_op_buf_size;
+ int name_len = strlen(lock_name);
+ int old_cookie_len = strlen(old_cookie);
+ int tag_len = strlen(tag);
+ int new_cookie_len = strlen(new_cookie);
+ void *p, *end;
+ struct page *cookie_op_page;
+ int ret;
+
+ cookie_op_buf_size = name_len + sizeof(__le32) +
+ old_cookie_len + sizeof(__le32) +
+ tag_len + sizeof(__le32) +
+ new_cookie_len + sizeof(__le32) +
+ sizeof(u8) + CEPH_ENCODING_START_BLK_LEN;
+ if (cookie_op_buf_size > PAGE_SIZE)
+ return -E2BIG;
+
+ cookie_op_page = alloc_page(GFP_NOIO);
+ if (!cookie_op_page)
+ return -ENOMEM;
+
+ p = page_address(cookie_op_page);
+ end = p + cookie_op_buf_size;
+
+ /* encode cls_lock_set_cookie_op struct */
+ ceph_start_encoding(&p, 1, 1,
+ cookie_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_string(&p, end, lock_name, name_len);
+ ceph_encode_8(&p, type);
+ ceph_encode_string(&p, end, old_cookie, old_cookie_len);
+ ceph_encode_string(&p, end, tag, tag_len);
+ ceph_encode_string(&p, end, new_cookie, new_cookie_len);
+
+ dout("%s lock_name %s type %d old_cookie %s tag %s new_cookie %s\n",
+ __func__, lock_name, type, old_cookie, tag, new_cookie);
+ ret = ceph_osdc_call(osdc, oid, oloc, "lock", "set_cookie",
+ CEPH_OSD_FLAG_WRITE, cookie_op_page,
+ cookie_op_buf_size, NULL, NULL);
+
+ dout("%s: status %d\n", __func__, ret);
+ __free_page(cookie_op_page);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_cls_set_cookie);
+
void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers)
{
int i;
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index c62b2b029a6e..71ba13927b3d 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -62,7 +62,8 @@ static int osdmap_show(struct seq_file *s, void *p)
return 0;
down_read(&osdc->lock);
- seq_printf(s, "epoch %d flags 0x%x\n", map->epoch, map->flags);
+ seq_printf(s, "epoch %u barrier %u flags 0x%x\n", map->epoch,
+ osdc->epoch_barrier, map->flags);
for (n = rb_first(&map->pg_pools); n; n = rb_next(n)) {
struct ceph_pg_pool_info *pi =
@@ -177,9 +178,7 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req)
seq_printf(s, "%llu\t", req->r_tid);
dump_target(s, &req->r_t);
- seq_printf(s, "\t%d\t%u'%llu", req->r_attempts,
- le32_to_cpu(req->r_replay_version.epoch),
- le64_to_cpu(req->r_replay_version.version));
+ seq_printf(s, "\t%d", req->r_attempts);
for (i = 0; i < req->r_num_ops; i++) {
struct ceph_osd_req_op *op = &req->r_ops[i];
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index f76bb3332613..5766a6c896c4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1386,8 +1386,9 @@ static void prepare_write_keepalive(struct ceph_connection *con)
dout("prepare_write_keepalive %p\n", con);
con_out_kvec_reset(con);
if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
- struct timespec now = CURRENT_TIME;
+ struct timespec now;
+ ktime_get_real_ts(&now);
con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
ceph_encode_timespec(&con->out_temp_keepalive2, &now);
con_out_kvec_add(con, sizeof(con->out_temp_keepalive2),
@@ -3176,8 +3177,9 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con,
{
if (interval > 0 &&
(con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) {
- struct timespec now = CURRENT_TIME;
+ struct timespec now;
struct timespec ts;
+ ktime_get_real_ts(&now);
jiffies_to_timespec(interval, &ts);
ts = timespec_add(con->last_keepalive_ack, ts);
return timespec_compare(&now, &ts) >= 0;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e15ea9e4c495..924f07c36ddb 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -961,6 +961,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
truncate_size, truncate_seq);
}
+ req->r_abort_on_full = true;
req->r_flags = flags;
req->r_base_oloc.pool = layout->pool_id;
req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns);
@@ -1005,7 +1006,7 @@ static bool osd_registered(struct ceph_osd *osd)
*/
static void osd_init(struct ceph_osd *osd)
{
- atomic_set(&osd->o_ref, 1);
+ refcount_set(&osd->o_ref, 1);
RB_CLEAR_NODE(&osd->o_node);
osd->o_requests = RB_ROOT;
osd->o_linger_requests = RB_ROOT;
@@ -1050,9 +1051,9 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
static struct ceph_osd *get_osd(struct ceph_osd *osd)
{
- if (atomic_inc_not_zero(&osd->o_ref)) {
- dout("get_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref)-1,
- atomic_read(&osd->o_ref));
+ if (refcount_inc_not_zero(&osd->o_ref)) {
+ dout("get_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref)-1,
+ refcount_read(&osd->o_ref));
return osd;
} else {
dout("get_osd %p FAIL\n", osd);
@@ -1062,9 +1063,9 @@ static struct ceph_osd *get_osd(struct ceph_osd *osd)
static void put_osd(struct ceph_osd *osd)
{
- dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
- atomic_read(&osd->o_ref) - 1);
- if (atomic_dec_and_test(&osd->o_ref)) {
+ dout("put_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref),
+ refcount_read(&osd->o_ref) - 1);
+ if (refcount_dec_and_test(&osd->o_ref)) {
osd_cleanup(osd);
kfree(osd);
}
@@ -1297,8 +1298,9 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
__pool_full(pi);
WARN_ON(pi->id != t->base_oloc.pool);
- return (t->flags & CEPH_OSD_FLAG_READ && pauserd) ||
- (t->flags & CEPH_OSD_FLAG_WRITE && pausewr);
+ return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
+ ((t->flags & CEPH_OSD_FLAG_WRITE) && pausewr) ||
+ (osdc->osdmap->epoch < osdc->epoch_barrier);
}
enum calc_target_result {
@@ -1503,9 +1505,10 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
ceph_encode_32(&p, req->r_flags);
ceph_encode_timespec(p, &req->r_mtime);
p += sizeof(struct ceph_timespec);
- /* aka reassert_version */
- memcpy(p, &req->r_replay_version, sizeof(req->r_replay_version));
- p += sizeof(req->r_replay_version);
+
+ /* reassert_version */
+ memset(p, 0, sizeof(struct ceph_eversion));
+ p += sizeof(struct ceph_eversion);
/* oloc */
ceph_start_encoding(&p, 5, 4,
@@ -1626,6 +1629,7 @@ static void maybe_request_map(struct ceph_osd_client *osdc)
ceph_monc_renew_subs(&osdc->client->monc);
}
+static void complete_request(struct ceph_osd_request *req, int err);
static void send_map_check(struct ceph_osd_request *req);
static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -1635,6 +1639,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
enum calc_target_result ct_res;
bool need_send = false;
bool promoted = false;
+ bool need_abort = false;
WARN_ON(req->r_tid);
dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
@@ -1650,8 +1655,13 @@ again:
goto promote;
}
- if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
- ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) {
+ if (osdc->osdmap->epoch < osdc->epoch_barrier) {
+ dout("req %p epoch %u barrier %u\n", req, osdc->osdmap->epoch,
+ osdc->epoch_barrier);
+ req->r_t.paused = true;
+ maybe_request_map(osdc);
+ } else if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
+ ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) {
dout("req %p pausewr\n", req);
req->r_t.paused = true;
maybe_request_map(osdc);
@@ -1669,6 +1679,8 @@ again:
pr_warn_ratelimited("FULL or reached pool quota\n");
req->r_t.paused = true;
maybe_request_map(osdc);
+ if (req->r_abort_on_full)
+ need_abort = true;
} else if (!osd_homeless(osd)) {
need_send = true;
} else {
@@ -1685,6 +1697,8 @@ again:
link_request(osd, req);
if (need_send)
send_request(req);
+ else if (need_abort)
+ complete_request(req, -ENOSPC);
mutex_unlock(&osd->lock);
if (ct_res == CALC_TARGET_POOL_DNE)
@@ -1799,6 +1813,97 @@ static void abort_request(struct ceph_osd_request *req, int err)
complete_request(req, err);
}
+static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
+{
+ if (likely(eb > osdc->epoch_barrier)) {
+ dout("updating epoch_barrier from %u to %u\n",
+ osdc->epoch_barrier, eb);
+ osdc->epoch_barrier = eb;
+ /* Request map if we're not to the barrier yet */
+ if (eb > osdc->osdmap->epoch)
+ maybe_request_map(osdc);
+ }
+}
+
+void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
+{
+ down_read(&osdc->lock);
+ if (unlikely(eb > osdc->epoch_barrier)) {
+ up_read(&osdc->lock);
+ down_write(&osdc->lock);
+ update_epoch_barrier(osdc, eb);
+ up_write(&osdc->lock);
+ } else {
+ up_read(&osdc->lock);
+ }
+}
+EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier);
+
+/*
+ * Drop all pending requests that are stalled waiting on a full condition to
+ * clear, and complete them with ENOSPC as the return code. Set the
+ * osdc->epoch_barrier to the latest map epoch that we've seen if any were
+ * cancelled.
+ */
+static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc)
+{
+ struct rb_node *n;
+ bool victims = false;
+
+ dout("enter abort_on_full\n");
+
+ if (!ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && !have_pool_full(osdc))
+ goto out;
+
+ /* Scan list and see if there is anything to abort */
+ for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
+ struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+ struct rb_node *m;
+
+ m = rb_first(&osd->o_requests);
+ while (m) {
+ struct ceph_osd_request *req = rb_entry(m,
+ struct ceph_osd_request, r_node);
+ m = rb_next(m);
+
+ if (req->r_abort_on_full) {
+ victims = true;
+ break;
+ }
+ }
+ if (victims)
+ break;
+ }
+
+ if (!victims)
+ goto out;
+
+ /*
+ * Update the barrier to current epoch if it's behind that point,
+ * since we know we have some calls to be aborted in the tree.
+ */
+ update_epoch_barrier(osdc, osdc->osdmap->epoch);
+
+ for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
+ struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+ struct rb_node *m;
+
+ m = rb_first(&osd->o_requests);
+ while (m) {
+ struct ceph_osd_request *req = rb_entry(m,
+ struct ceph_osd_request, r_node);
+ m = rb_next(m);
+
+ if (req->r_abort_on_full &&
+ (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
+ pool_full(osdc, req->r_t.target_oloc.pool)))
+ abort_request(req, -ENOSPC);
+ }
+ }
+out:
+ dout("return abort_on_full barrier=%u\n", osdc->epoch_barrier);
+}
+
static void check_pool_dne(struct ceph_osd_request *req)
{
struct ceph_osd_client *osdc = req->r_osdc;
@@ -3252,11 +3357,13 @@ done:
pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) ||
ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
have_pool_full(osdc);
- if (was_pauserd || was_pausewr || pauserd || pausewr)
+ if (was_pauserd || was_pausewr || pauserd || pausewr ||
+ osdc->osdmap->epoch < osdc->epoch_barrier)
maybe_request_map(osdc);
kick_requests(osdc, &need_resend, &need_resend_linger);
+ ceph_osdc_abort_on_full(osdc);
ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP,
osdc->osdmap->epoch);
up_write(&osdc->lock);
@@ -3574,7 +3681,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
ceph_oid_copy(&lreq->t.base_oid, oid);
ceph_oloc_copy(&lreq->t.base_oloc, oloc);
lreq->t.flags = CEPH_OSD_FLAG_WRITE;
- lreq->mtime = CURRENT_TIME;
+ ktime_get_real_ts(&lreq->mtime);
lreq->reg_req = alloc_linger_request(lreq);
if (!lreq->reg_req) {
@@ -3632,7 +3739,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
req->r_flags = CEPH_OSD_FLAG_WRITE;
- req->r_mtime = CURRENT_TIME;
+ ktime_get_real_ts(&req->r_mtime);
osd_req_op_watch_init(req, 0, lreq->linger_id,
CEPH_OSD_WATCH_OP_UNWATCH);
@@ -4126,7 +4233,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
close_osd(osd);
}
up_write(&osdc->lock);
- WARN_ON(atomic_read(&osdc->homeless_osd.o_ref) != 1);
+ WARN_ON(refcount_read(&osdc->homeless_osd.o_ref) != 1);
osd_cleanup(&osdc->homeless_osd);
WARN_ON(!list_empty(&osdc->osd_lru));
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 6864007e64fc..ce09f73be759 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -16,7 +16,7 @@ static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
void ceph_pagelist_release(struct ceph_pagelist *pl)
{
- if (!atomic_dec_and_test(&pl->refcnt))
+ if (!refcount_dec_and_test(&pl->refcnt))
return;
ceph_pagelist_unmap_tail(pl);
while (!list_empty(&pl->head)) {
diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
index 705414e78ae0..e14a5d038656 100644
--- a/net/ceph/snapshot.c
+++ b/net/ceph/snapshot.c
@@ -49,7 +49,7 @@ struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
if (!snapc)
return NULL;
- atomic_set(&snapc->nref, 1);
+ refcount_set(&snapc->nref, 1);
snapc->num_snaps = snap_count;
return snapc;
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(ceph_create_snap_context);
struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc)
{
if (sc)
- atomic_inc(&sc->nref);
+ refcount_inc(&sc->nref);
return sc;
}
EXPORT_SYMBOL(ceph_get_snap_context);
@@ -68,7 +68,7 @@ void ceph_put_snap_context(struct ceph_snap_context *sc)
{
if (!sc)
return;
- if (atomic_dec_and_test(&sc->nref)) {
+ if (refcount_dec_and_test(&sc->nref)) {
/*printk(" deleting snap_context %p\n", sc);*/
kfree(sc);
}
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f4947e737f34..db1866f2ffcf 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -256,8 +256,12 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
- } while (sk_can_busy_loop(sk) &&
- sk_busy_loop(sk, flags & MSG_DONTWAIT));
+
+ if (!sk_can_busy_loop(sk))
+ break;
+
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
+ } while (!skb_queue_empty(&sk->sk_receive_queue));
error = -EAGAIN;
@@ -760,7 +764,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
if (msg_data_left(msg) < chunk) {
if (__skb_checksum_complete(skb))
- goto csum_error;
+ return -EINVAL;
if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
goto fault;
} else {
@@ -768,15 +772,16 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
chunk, &csum))
goto fault;
- if (csum_fold(csum))
- goto csum_error;
+
+ if (csum_fold(csum)) {
+ iov_iter_revert(&msg->msg_iter, chunk);
+ return -EINVAL;
+ }
+
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
}
return 0;
-csum_error:
- iov_iter_revert(&msg->msg_iter, chunk);
- return -EINVAL;
fault:
return -EFAULT;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index c57878be09d2..fca407b4a6ea 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -81,6 +81,7 @@
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/mutex.h>
#include <linux/string.h>
#include <linux/mm.h>
@@ -95,6 +96,7 @@
#include <linux/notifier.h>
#include <linux/skbuff.h>
#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/busy_poll.h>
@@ -2975,6 +2977,9 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
__skb_linearize(skb))
goto out_kfree_skb;
+ if (validate_xmit_xfrm(skb, features))
+ goto out_kfree_skb;
+
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
* checksumming here.
@@ -3444,6 +3449,7 @@ EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
int netdev_budget __read_mostly = 300;
+unsigned int __read_mostly netdev_budget_usecs = 2000;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
@@ -4230,7 +4236,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
int ret;
if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
- unsigned long pflags = current->flags;
+ unsigned int noreclaim_flag;
/*
* PFMEMALLOC skbs are special, they should
@@ -4241,15 +4247,134 @@ static int __netif_receive_skb(struct sk_buff *skb)
* Use PF_MEMALLOC as this saves us from propagating the allocation
* context down to all allocation sites.
*/
- current->flags |= PF_MEMALLOC;
+ noreclaim_flag = memalloc_noreclaim_save();
ret = __netif_receive_skb_core(skb, true);
- current_restore_flags(pflags, PF_MEMALLOC);
+ memalloc_noreclaim_restore(noreclaim_flag);
} else
ret = __netif_receive_skb_core(skb, false);
return ret;
}
+static struct static_key generic_xdp_needed __read_mostly;
+
+static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+{
+ struct bpf_prog *new = xdp->prog;
+ int ret = 0;
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG: {
+ struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
+
+ rcu_assign_pointer(dev->xdp_prog, new);
+ if (old)
+ bpf_prog_put(old);
+
+ if (old && !new) {
+ static_key_slow_dec(&generic_xdp_needed);
+ } else if (new && !old) {
+ static_key_slow_inc(&generic_xdp_needed);
+ dev_disable_lro(dev);
+ }
+ break;
+ }
+
+ case XDP_QUERY_PROG:
+ xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog);
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
+{
+ struct xdp_buff xdp;
+ u32 act = XDP_DROP;
+ void *orig_data;
+ int hlen, off;
+ u32 mac_len;
+
+ /* Reinjected packets coming from act_mirred or similar should
+ * not get XDP generic processing.
+ */
+ if (skb_cloned(skb))
+ return XDP_PASS;
+
+ if (skb_linearize(skb))
+ goto do_drop;
+
+ /* The XDP program wants to see the packet starting at the MAC
+ * header.
+ */
+ mac_len = skb->data - skb_mac_header(skb);
+ hlen = skb_headlen(skb) + mac_len;
+ xdp.data = skb->data - mac_len;
+ xdp.data_end = xdp.data + hlen;
+ xdp.data_hard_start = skb->data - skb_headroom(skb);
+ orig_data = xdp.data;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ off = xdp.data - orig_data;
+ if (off > 0)
+ __skb_pull(skb, off);
+ else if (off < 0)
+ __skb_push(skb, -off);
+
+ switch (act) {
+ case XDP_TX:
+ __skb_push(skb, mac_len);
+ /* fall through */
+ case XDP_PASS:
+ break;
+
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+ trace_xdp_exception(skb->dev, xdp_prog, act);
+ /* fall through */
+ case XDP_DROP:
+ do_drop:
+ kfree_skb(skb);
+ break;
+ }
+
+ return act;
+}
+
+/* When doing generic XDP we have to bypass the qdisc layer and the
+ * network taps in order to match in-driver-XDP behavior.
+ */
+static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
+{
+ struct net_device *dev = skb->dev;
+ struct netdev_queue *txq;
+ bool free_skb = true;
+ int cpu, rc;
+
+ txq = netdev_pick_tx(dev, skb, NULL);
+ cpu = smp_processor_id();
+ HARD_TX_LOCK(dev, txq, cpu);
+ if (!netif_xmit_stopped(txq)) {
+ rc = netdev_start_xmit(skb, dev, txq, 0);
+ if (dev_xmit_complete(rc))
+ free_skb = false;
+ }
+ HARD_TX_UNLOCK(dev, txq);
+ if (free_skb) {
+ trace_xdp_exception(dev, xdp_prog, XDP_TX);
+ kfree_skb(skb);
+ }
+}
+
static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
@@ -4261,6 +4386,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
rcu_read_lock();
+ if (static_key_false(&generic_xdp_needed)) {
+ struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+
+ if (xdp_prog) {
+ u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+
+ if (act != XDP_PASS) {
+ rcu_read_unlock();
+ if (act == XDP_TX)
+ generic_xdp_tx(skb, xdp_prog);
+ return NET_RX_DROP;
+ }
+ }
+ }
+
#ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4493,7 +4633,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
enum gro_result ret;
int grow;
- if (!(skb->dev->features & NETIF_F_GRO))
+ if (netif_elide_gro(skb->dev))
goto normal;
if (skb->csum_bad)
@@ -5063,27 +5203,28 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
do_softirq();
}
-bool sk_busy_loop(struct sock *sk, int nonblock)
+void napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg)
{
- unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
+ unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
int (*napi_poll)(struct napi_struct *napi, int budget);
void *have_poll_lock = NULL;
struct napi_struct *napi;
- int rc;
restart:
- rc = false;
napi_poll = NULL;
rcu_read_lock();
- napi = napi_by_id(sk->sk_napi_id);
+ napi = napi_by_id(napi_id);
if (!napi)
goto out;
preempt_disable();
for (;;) {
- rc = 0;
+ int work = 0;
+
local_bh_disable();
if (!napi_poll) {
unsigned long val = READ_ONCE(napi->state);
@@ -5101,16 +5242,15 @@ restart:
have_poll_lock = netpoll_poll_lock(napi);
napi_poll = napi->poll;
}
- rc = napi_poll(napi, BUSY_POLL_BUDGET);
- trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
+ work = napi_poll(napi, BUSY_POLL_BUDGET);
+ trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
count:
- if (rc > 0)
- __NET_ADD_STATS(sock_net(sk),
- LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+ if (work > 0)
+ __NET_ADD_STATS(dev_net(napi->dev),
+ LINUX_MIB_BUSYPOLLRXPACKETS, work);
local_bh_enable();
- if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) ||
- busy_loop_timeout(end_time))
+ if (!loop_end || loop_end(loop_end_arg, start_time))
break;
if (unlikely(need_resched())) {
@@ -5119,9 +5259,8 @@ count:
preempt_enable();
rcu_read_unlock();
cond_resched();
- rc = !skb_queue_empty(&sk->sk_receive_queue);
- if (rc || busy_loop_timeout(end_time))
- return rc;
+ if (loop_end(loop_end_arg, start_time))
+ return;
goto restart;
}
cpu_relax();
@@ -5129,12 +5268,10 @@ count:
if (napi_poll)
busy_poll_stop(napi, have_poll_lock);
preempt_enable();
- rc = !skb_queue_empty(&sk->sk_receive_queue);
out:
rcu_read_unlock();
- return rc;
}
-EXPORT_SYMBOL(sk_busy_loop);
+EXPORT_SYMBOL(napi_busy_loop);
#endif /* CONFIG_NET_RX_BUSY_POLL */
@@ -5146,10 +5283,10 @@ static void napi_hash_add(struct napi_struct *napi)
spin_lock(&napi_hash_lock);
- /* 0..NR_CPUS+1 range is reserved for sender_cpu use */
+ /* 0..NR_CPUS range is reserved for sender_cpu use */
do {
- if (unlikely(++napi_gen_id < NR_CPUS + 1))
- napi_gen_id = NR_CPUS + 1;
+ if (unlikely(++napi_gen_id < MIN_NAPI_ID))
+ napi_gen_id = MIN_NAPI_ID;
} while (napi_by_id(napi_gen_id));
napi->napi_id = napi_gen_id;
@@ -5313,7 +5450,8 @@ out_unlock:
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
- unsigned long time_limit = jiffies + 2;
+ unsigned long time_limit = jiffies +
+ usecs_to_jiffies(netdev_budget_usecs);
int budget = netdev_budget;
LIST_HEAD(list);
LIST_HEAD(repoll);
@@ -6714,47 +6852,72 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
+bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op)
+{
+ struct netdev_xdp xdp;
+
+ memset(&xdp, 0, sizeof(xdp));
+ xdp.command = XDP_QUERY_PROG;
+
+ /* Query must always succeed. */
+ WARN_ON(xdp_op(dev, &xdp) < 0);
+ return xdp.prog_attached;
+}
+
+static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+ struct netlink_ext_ack *extack,
+ struct bpf_prog *prog)
+{
+ struct netdev_xdp xdp;
+
+ memset(&xdp, 0, sizeof(xdp));
+ xdp.command = XDP_SETUP_PROG;
+ xdp.extack = extack;
+ xdp.prog = prog;
+
+ return xdp_op(dev, &xdp);
+}
+
/**
* dev_change_xdp_fd - set or clear a bpf program for a device rx path
* @dev: device
+ * @extack: netlink extended ack
* @fd: new program fd or negative value to clear
* @flags: xdp-related flags
*
* Set or clear a bpf program for a device
*/
-int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
+int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+ int fd, u32 flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
- struct netdev_xdp xdp;
+ xdp_op_t xdp_op, xdp_chk;
int err;
ASSERT_RTNL();
- if (!ops->ndo_xdp)
+ xdp_op = xdp_chk = ops->ndo_xdp;
+ if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE))
return -EOPNOTSUPP;
+ if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
+ xdp_op = generic_xdp_install;
+ if (xdp_op == xdp_chk)
+ xdp_chk = generic_xdp_install;
+
if (fd >= 0) {
- if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
-
- err = ops->ndo_xdp(dev, &xdp);
- if (err < 0)
- return err;
- if (xdp.prog_attached)
- return -EBUSY;
- }
+ if (xdp_chk && __dev_xdp_attached(dev, xdp_chk))
+ return -EEXIST;
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
+ __dev_xdp_attached(dev, xdp_op))
+ return -EBUSY;
prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_SETUP_PROG;
- xdp.prog = prog;
-
- err = ops->ndo_xdp(dev, &xdp);
+ err = dev_xdp_install(dev, xdp_op, extack, prog);
if (err < 0 && prog)
bpf_prog_put(prog);
@@ -7105,13 +7268,10 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
else
netif_dormant_off(dev);
- if (netif_carrier_ok(rootdev)) {
- if (!netif_carrier_ok(dev))
- netif_carrier_on(dev);
- } else {
- if (netif_carrier_ok(dev))
- netif_carrier_off(dev);
- }
+ if (netif_carrier_ok(rootdev))
+ netif_carrier_on(dev);
+ else
+ netif_carrier_off(dev);
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);
@@ -7124,12 +7284,10 @@ static int netif_alloc_rx_queues(struct net_device *dev)
BUG_ON(count < 1);
- rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
- if (!rx) {
- rx = vzalloc(sz);
- if (!rx)
- return -ENOMEM;
- }
+ rx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+ if (!rx)
+ return -ENOMEM;
+
dev->_rx = rx;
for (i = 0; i < count; i++)
@@ -7166,12 +7324,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
if (count < 1 || count > 0xffff)
return -EINVAL;
- tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
- if (!tx) {
- tx = vzalloc(sz);
- if (!tx)
- return -ENOMEM;
- }
+ tx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+ if (!tx)
+ return -ENOMEM;
+
dev->_tx = tx;
netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
@@ -7705,9 +7861,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
- p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
- if (!p)
- p = vzalloc(alloc_size);
+ p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_REPEAT);
if (!p)
return NULL;
@@ -7794,6 +7948,7 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
+ struct bpf_prog *prog;
might_sleep();
netif_free_tx_queues(dev);
@@ -7812,6 +7967,12 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
+ prog = rcu_dereference_protected(dev->xdp_prog, 1);
+ if (prog) {
+ bpf_prog_put(prog);
+ static_key_slow_dec(&generic_xdp_needed);
+ }
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index e9c1e6acfb6d..b0b87a292e7c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1397,10 +1397,10 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
u32 seq, int flags)
{
const struct devlink_ops *ops = devlink->ops;
+ u8 inline_mode, encap_mode;
void *hdr;
int err = 0;
u16 mode;
- u8 inline_mode;
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
if (!hdr)
@@ -1429,6 +1429,15 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
goto nla_put_failure;
}
+ if (ops->eswitch_encap_mode_get) {
+ err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
genlmsg_end(msg, hdr);
return 0;
@@ -1468,9 +1477,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
- u16 mode;
- u8 inline_mode;
+ u8 inline_mode, encap_mode;
int err = 0;
+ u16 mode;
if (!ops)
return -EOPNOTSUPP;
@@ -1494,7 +1503,695 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
return err;
}
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
+ if (!ops->eswitch_encap_mode_set)
+ return -EOPNOTSUPP;
+ encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
+ err = ops->eswitch_encap_mode_set(devlink, encap_mode);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int devlink_dpipe_match_put(struct sk_buff *skb,
+ struct devlink_dpipe_match *match)
+{
+ struct devlink_dpipe_header *header = match->header;
+ struct devlink_dpipe_field *field = &header->fields[match->field_id];
+ struct nlattr *match_attr;
+
+ match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH);
+ if (!match_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, match_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, match_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_match_put);
+
+static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *matches_attr;
+
+ matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
+ if (!matches_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->matches_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, matches_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, matches_attr);
+ return -EMSGSIZE;
+}
+
+int devlink_dpipe_action_put(struct sk_buff *skb,
+ struct devlink_dpipe_action *action)
+{
+ struct devlink_dpipe_header *header = action->header;
+ struct devlink_dpipe_field *field = &header->fields[action->field_id];
+ struct nlattr *action_attr;
+
+ action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION);
+ if (!action_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, action_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, action_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_action_put);
+
+static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *actions_attr;
+
+ actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
+ if (!actions_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->actions_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, actions_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, actions_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_table_put(struct sk_buff *skb,
+ struct devlink_dpipe_table *table)
+{
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
+ table->counters_enabled))
+ goto nla_put_failure;
+
+ if (devlink_dpipe_matches_put(table, skb))
+ goto nla_put_failure;
+
+ if (devlink_dpipe_actions_put(table, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, table_attr);
return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, table_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb,
+ struct genl_info *info)
+{
+ int err;
+
+ if (*pskb) {
+ err = genlmsg_reply(*pskb, info);
+ if (err)
+ return err;
+ }
+ *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!*pskb)
+ return -ENOMEM;
+ return 0;
+}
+
+static int devlink_dpipe_tables_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct list_head *dpipe_tables,
+ const char *table_name)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ struct nlattr *tables_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ bool incomplete;
+ void *hdr;
+ int i;
+ int err;
+
+ table = list_first_entry(dpipe_tables,
+ struct devlink_dpipe_table, list);
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES);
+ if (!tables_attr)
+ goto nla_put_failure;
+
+ i = 0;
+ incomplete = false;
+ list_for_each_entry_from(table, dpipe_tables, list) {
+ if (!table_name) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err) {
+ if (!i)
+ goto err_table_put;
+ incomplete = true;
+ break;
+ }
+ } else {
+ if (!strcmp(table->name, table_name)) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err)
+ break;
+ }
+ }
+ i++;
+ }
+
+ nla_nest_end(skb, tables_attr);
+ genlmsg_end(skb, hdr);
+ if (incomplete)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+err_skb_send_alloc:
+ genlmsg_cancel(skb, hdr);
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name = NULL;
+
+ if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+
+ return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0,
+ &devlink->dpipe_table_list,
+ table_name);
+}
+
+static int devlink_dpipe_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE,
+ value->value_size, value->value))
+ return -EMSGSIZE;
+ if (value->mask)
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK,
+ value->value_size, value->mask))
+ return -EMSGSIZE;
+ if (value->mapping_valid)
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING,
+ value->mapping_value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->action)
+ return -EINVAL;
+ if (devlink_dpipe_action_put(skb, value->action))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *action_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ action_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ACTION_VALUE);
+ if (!action_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_action_value_put(skb, &values[i]);
+ if (err)
+ goto err_action_value_put;
+ nla_nest_end(skb, action_attr);
+ }
+ return 0;
+
+err_action_value_put:
+ nla_nest_cancel(skb, action_attr);
+ return err;
+}
+
+static int devlink_dpipe_match_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->match)
+ return -EINVAL;
+ if (devlink_dpipe_match_put(skb, value->match))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_match_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *match_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ match_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_MATCH_VALUE);
+ if (!match_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_match_value_put(skb, &values[i]);
+ if (err)
+ goto err_match_value_put;
+ nla_nest_end(skb, match_attr);
+ }
+ return 0;
+
+err_match_value_put:
+ nla_nest_cancel(skb, match_attr);
+ return err;
+}
+
+static int devlink_dpipe_entry_put(struct sk_buff *skb,
+ struct devlink_dpipe_entry *entry)
+{
+ struct nlattr *entry_attr, *matches_attr, *actions_attr;
+ int err;
+
+ entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (entry->counter_valid)
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
+ entry->counter, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ matches_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
+ if (!matches_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_match_values_put(skb, entry->match_values,
+ entry->match_values_count);
+ if (err) {
+ nla_nest_cancel(skb, matches_attr);
+ goto err_match_values_put;
+ }
+ nla_nest_end(skb, matches_attr);
+
+ actions_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
+ if (!actions_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_action_values_put(skb, entry->action_values,
+ entry->action_values_count);
+ if (err) {
+ nla_nest_cancel(skb, actions_attr);
+ goto err_action_values_put;
+ }
+ nla_nest_end(skb, actions_attr);
+
+ nla_nest_end(skb, entry_attr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_match_values_put:
+err_action_values_put:
+ nla_nest_cancel(skb, entry_attr);
+ return err;
+}
+
+static struct devlink_dpipe_table *
+devlink_dpipe_table_find(struct list_head *dpipe_tables,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+
+ list_for_each_entry_rcu(table, dpipe_tables, list) {
+ if (!strcmp(table->name, table_name))
+ return table;
+ }
+ return NULL;
+}
+
+int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct devlink *devlink;
+ int err;
+
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb,
+ dump_ctx->info);
+ if (err)
+ return err;
+
+ dump_ctx->hdr = genlmsg_put(dump_ctx->skb,
+ dump_ctx->info->snd_portid,
+ dump_ctx->info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI,
+ dump_ctx->cmd);
+ if (!dump_ctx->hdr)
+ goto nla_put_failure;
+
+ devlink = dump_ctx->info->user_ptr[0];
+ if (devlink_nl_put_handle(dump_ctx->skb, devlink))
+ goto nla_put_failure;
+ dump_ctx->nest = nla_nest_start(dump_ctx->skb,
+ DEVLINK_ATTR_DPIPE_ENTRIES);
+ if (!dump_ctx->nest)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(dump_ctx->skb, dump_ctx->hdr);
+ nlmsg_free(dump_ctx->skb);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare);
+
+int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
+ struct devlink_dpipe_entry *entry)
+{
+ return devlink_dpipe_entry_put(dump_ctx->skb, entry);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append);
+
+int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ nla_nest_end(dump_ctx->skb, dump_ctx->nest);
+ genlmsg_end(dump_ctx->skb, dump_ctx->hdr);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
+
+static int devlink_dpipe_entries_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_table *table)
+{
+ struct devlink_dpipe_dump_ctx dump_ctx;
+ struct nlmsghdr *nlh;
+ int err;
+
+ dump_ctx.skb = NULL;
+ dump_ctx.cmd = cmd;
+ dump_ctx.info = info;
+
+ err = table->table_ops->entries_dump(table->priv,
+ table->counters_enabled,
+ &dump_ctx);
+ if (err)
+ goto err_entries_dump;
+
+send_done:
+ nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+ return genlmsg_reply(dump_ctx.skb, info);
+
+err_entries_dump:
+err_skb_send_alloc:
+ genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr);
+ nlmsg_free(dump_ctx.skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ const char *table_name;
+
+ if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ return -EINVAL;
+
+ if (!table->table_ops->entries_dump)
+ return -EINVAL;
+
+ return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ 0, table);
+}
+
+static int devlink_dpipe_fields_put(struct sk_buff *skb,
+ const struct devlink_dpipe_header *header)
+{
+ struct devlink_dpipe_field *field;
+ struct nlattr *field_attr;
+ int i;
+
+ for (i = 0; i < header->fields_count; i++) {
+ field = &header->fields[i];
+ field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD);
+ if (!field_attr)
+ return -EMSGSIZE;
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type))
+ goto nla_put_failure;
+ nla_nest_end(skb, field_attr);
+ }
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, field_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_header_put(struct sk_buff *skb,
+ struct devlink_dpipe_header *header)
+{
+ struct nlattr *fields_attr, *header_attr;
+ int err;
+
+ header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER);
+ if (!header_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
+ if (!fields_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_fields_put(skb, header);
+ if (err) {
+ nla_nest_cancel(skb, fields_attr);
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, fields_attr);
+ nla_nest_end(skb, header_attr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+ nla_nest_cancel(skb, header_attr);
+ return err;
+}
+
+static int devlink_dpipe_headers_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_headers *
+ dpipe_headers)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *headers_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ void *hdr;
+ int i, j;
+ int err;
+
+ i = 0;
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS);
+ if (!headers_attr)
+ goto nla_put_failure;
+
+ j = 0;
+ for (; i < dpipe_headers->headers_count; i++) {
+ err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]);
+ if (err) {
+ if (!j)
+ goto err_table_put;
+ break;
+ }
+ j++;
+ }
+ nla_nest_end(skb, headers_attr);
+ genlmsg_end(skb, hdr);
+ if (i != dpipe_headers->headers_count)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+err_skb_send_alloc:
+ genlmsg_cancel(skb, hdr);
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (!devlink->dpipe_headers)
+ return -EOPNOTSUPP;
+ return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET,
+ 0, devlink->dpipe_headers);
+}
+
+static int devlink_dpipe_table_counters_set(struct devlink *devlink,
+ const char *table_name,
+ bool enable)
+{
+ struct devlink_dpipe_table *table;
+
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ return -EINVAL;
+
+ if (table->counter_control_extern)
+ return -EOPNOTSUPP;
+
+ if (!(table->counters_enabled ^ enable))
+ return 0;
+
+ table->counters_enabled = enable;
+ if (table->table_ops->counters_set_update)
+ table->table_ops->counters_set_update(table->priv, enable);
+ return 0;
+}
+
+static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name;
+ bool counters_enable;
+
+ if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME] ||
+ !info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED])
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]);
+
+ return devlink_dpipe_table_counters_set(devlink, table_name,
+ counters_enable);
}
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
@@ -1512,6 +2209,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
[DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -1644,6 +2344,34 @@ static const struct genl_ops devlink_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
+ .doit = devlink_nl_cmd_dpipe_table_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ .doit = devlink_nl_cmd_dpipe_entries_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
+ .doit = devlink_nl_cmd_dpipe_headers_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+ .doit = devlink_nl_cmd_dpipe_table_counters_set,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -1680,6 +2408,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
devlink_net_set(devlink, &init_net);
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->sb_list);
+ INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
return devlink;
}
EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -1880,6 +2609,133 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
}
EXPORT_SYMBOL_GPL(devlink_sb_unregister);
+/**
+ * devlink_dpipe_headers_register - register dpipe headers
+ *
+ * @devlink: devlink
+ * @dpipe_headers: dpipe header array
+ *
+ * Register the headers supported by hardware.
+ */
+int devlink_dpipe_headers_register(struct devlink *devlink,
+ struct devlink_dpipe_headers *dpipe_headers)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->dpipe_headers = dpipe_headers;
+ mutex_unlock(&devlink_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
+
+/**
+ * devlink_dpipe_headers_unregister - unregister dpipe headers
+ *
+ * @devlink: devlink
+ *
+ * Unregister the headers supported by hardware.
+ */
+void devlink_dpipe_headers_unregister(struct devlink *devlink)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->dpipe_headers = NULL;
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister);
+
+/**
+ * devlink_dpipe_table_counter_enabled - check if counter allocation
+ * required
+ * @devlink: devlink
+ * @table_name: tables name
+ *
+ * Used by driver to check if counter allocation is required.
+ * After counter allocation is turned on the table entries
+ * are updated to include counter statistics.
+ *
+ * After that point on the driver must respect the counter
+ * state so that each entry added to the table is added
+ * with a counter.
+ */
+bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+ bool enabled;
+
+ rcu_read_lock();
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ enabled = false;
+ if (table)
+ enabled = table->counters_enabled;
+ rcu_read_unlock();
+ return enabled;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
+
+/**
+ * devlink_dpipe_table_register - register dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @table_ops: table ops
+ * @priv: priv
+ * @size: size
+ * @counter_control_extern: external control for counters
+ */
+int devlink_dpipe_table_register(struct devlink *devlink,
+ const char *table_name,
+ struct devlink_dpipe_table_ops *table_ops,
+ void *priv, u64 size,
+ bool counter_control_extern)
+{
+ struct devlink_dpipe_table *table;
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
+ return -EEXIST;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ table->name = table_name;
+ table->table_ops = table_ops;
+ table->priv = priv;
+ table->size = size;
+ table->counter_control_extern = counter_control_extern;
+
+ mutex_lock(&devlink_mutex);
+ list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+ mutex_unlock(&devlink_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
+
+/**
+ * devlink_dpipe_table_unregister - unregister dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ */
+void devlink_dpipe_table_unregister(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+
+ mutex_lock(&devlink_mutex);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ goto unlock;
+ list_del_rcu(&table->list);
+ mutex_unlock(&devlink_mutex);
+ kfree_rcu(table, rcu);
+ return;
+unlock:
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
+
static int __init devlink_module_init(void)
{
return genl_register_family(&devlink_nl_family);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index fb55327dcfea..70ccda233bd1 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -412,9 +412,8 @@ static int __init init_net_drop_monitor(void)
for_each_possible_cpu(cpu) {
data = &per_cpu(dm_cpu_data, cpu);
INIT_WORK(&data->dm_alert_work, send_dm_alert);
- init_timer(&data->send_timer);
- data->send_timer.data = (unsigned long)data;
- data->send_timer.function = sched_send_work;
+ setup_timer(&data->send_timer, sched_send_work,
+ (unsigned long)data);
spin_lock_init(&data->lock);
reset_per_cpu_data(data);
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index aecb2c7241b6..03111a2d6653 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -90,6 +90,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
+ [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
@@ -103,12 +104,15 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_RXALL_BIT] = "rx-all",
[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
[NETIF_F_HW_TC_BIT] = "hw-tc-offload",
+ [NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
+ [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
};
static const char
rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
[ETH_RSS_HASH_TOP_BIT] = "toeplitz",
[ETH_RSS_HASH_XOR_BIT] = "xor",
+ [ETH_RSS_HASH_CRC32_BIT] = "crc32",
};
static const char
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index b6791d94841d..f21c4d3aeae0 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -23,6 +23,20 @@ static const struct fib_kuid_range fib_kuid_range_unset = {
KUIDT_INIT(~0),
};
+bool fib_rule_matchall(const struct fib_rule *rule)
+{
+ if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
+ rule->flags)
+ return false;
+ if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
+ return false;
+ if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
+ !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fib_rule_matchall);
+
int fib_default_rule_add(struct fib_rules_ops *ops,
u32 pref, u32 table, u32 flags)
{
@@ -354,7 +368,8 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
return 0;
}
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -372,7 +387,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
goto errout;
}
- err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+ err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
if (err < 0)
goto errout;
@@ -425,6 +440,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
if (tb[FRA_TUN_ID])
rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
+ err = -EINVAL;
if (tb[FRA_L3MDEV]) {
#ifdef CONFIG_NET_L3_MASTER_DEV
rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
@@ -446,7 +462,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
else
rule->suppress_ifgroup = -1;
- err = -EINVAL;
if (tb[FRA_GOTO]) {
if (rule->action != FR_ACT_GOTO)
goto errout_free;
@@ -547,7 +562,8 @@ errout:
}
EXPORT_SYMBOL_GPL(fib_nl_newrule);
-int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -566,7 +582,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
goto errout;
}
- err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+ err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
if (err < 0)
goto errout;
@@ -576,8 +592,10 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
if (tb[FRA_UID_RANGE]) {
range = nla_get_kuid_range(tb);
- if (!uid_range_set(&range))
+ if (!uid_range_set(&range)) {
+ err = -EINVAL;
goto errout;
+ }
} else {
range = fib_kuid_range_unset;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index ebaeaf2e46e8..a253a6197e6b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
+#include <linux/sock_diag.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
@@ -52,6 +53,7 @@
#include <net/dst_metadata.h>
#include <net/dst.h>
#include <net/sock_reuseport.h>
+#include <net/busy_poll.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -91,7 +93,12 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
+ struct sock *save_sk = skb->sk;
+ unsigned int pkt_len;
+
+ skb->sk = sk;
+ pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
+ skb->sk = save_sk;
err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
}
rcu_read_unlock();
@@ -348,7 +355,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* @new_prog: buffer where converted program will be stored
* @new_len: pointer to store length of converted program
*
- * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
+ * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
+ * style extended BPF (eBPF).
* Conversion workflow:
*
* 1) First pass for calculating the new program length:
@@ -928,7 +936,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
*/
static void sk_filter_release(struct sk_filter *fp)
{
- if (atomic_dec_and_test(&fp->refcnt))
+ if (refcount_dec_and_test(&fp->refcnt))
call_rcu(&fp->rcu, sk_filter_release_rcu);
}
@@ -943,20 +951,27 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
/* try to charge the socket memory if there is space available
* return true on success
*/
-bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
u32 filter_size = bpf_prog_size(fp->prog->len);
/* same check as in sock_kmalloc() */
if (filter_size <= sysctl_optmem_max &&
atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
- atomic_inc(&fp->refcnt);
atomic_add(filter_size, &sk->sk_omem_alloc);
return true;
}
return false;
}
+bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+{
+ bool ret = __sk_filter_charge(sk, fp);
+ if (ret)
+ refcount_inc(&fp->refcnt);
+ return ret;
+}
+
static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
{
struct sock_filter *old_prog;
@@ -1179,12 +1194,12 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
return -ENOMEM;
fp->prog = prog;
- atomic_set(&fp->refcnt, 0);
- if (!sk_filter_charge(sk, fp)) {
+ if (!__sk_filter_charge(sk, fp)) {
kfree(fp);
return -ENOMEM;
}
+ refcount_set(&fp->refcnt, 1);
old_fp = rcu_dereference_protected(sk->sk_filter,
lockdep_sock_is_held(sk));
@@ -2599,6 +2614,36 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
+{
+ return skb->sk ? sock_gen_cookie(skb->sk) : 0;
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
+ .func = bpf_get_socket_cookie,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
+{
+ struct sock *sk = sk_to_full_sk(skb->sk);
+ kuid_t kuid;
+
+ if (!sk || !sk_fullsock(sk))
+ return overflowuid;
+ kuid = sock_net_uid(sock_net(sk), sk);
+ return from_kuid_munged(sock_net(sk)->user_ns, kuid);
+}
+
+static const struct bpf_func_proto bpf_get_socket_uid_proto = {
+ .func = bpf_get_socket_uid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -2633,6 +2678,10 @@ sk_filter_func_proto(enum bpf_func_id func_id)
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_proto;
+ case BPF_FUNC_get_socket_uid:
+ return &bpf_get_socket_uid_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -2692,6 +2741,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_proto;
+ case BPF_FUNC_get_socket_uid:
+ return &bpf_get_socket_uid_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -2715,12 +2768,7 @@ xdp_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
cg_skb_func_proto(enum bpf_func_id func_id)
{
- switch (func_id) {
- case BPF_FUNC_skb_load_bytes:
- return &bpf_skb_load_bytes_proto;
- default:
- return bpf_base_func_proto(func_id);
- }
+ return sk_filter_func_proto(func_id);
}
static const struct bpf_func_proto *
@@ -3156,6 +3204,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#endif
break;
+
+ case offsetof(struct __sk_buff, napi_id):
+#if defined(CONFIG_NET_RX_BUSY_POLL)
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, napi_id));
+ *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
+ *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
+#else
+ *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
+#endif
+ break;
}
return insn - insn_buf;
@@ -3252,111 +3313,55 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
-static const struct bpf_verifier_ops sk_filter_ops = {
+const struct bpf_verifier_ops sk_filter_prog_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
-static const struct bpf_verifier_ops tc_cls_act_ops = {
+const struct bpf_verifier_ops tc_cls_act_prog_ops = {
.get_func_proto = tc_cls_act_func_proto,
.is_valid_access = tc_cls_act_is_valid_access,
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops xdp_ops = {
+const struct bpf_verifier_ops xdp_prog_ops = {
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
+ .test_run = bpf_prog_test_run_xdp,
};
-static const struct bpf_verifier_ops cg_skb_ops = {
+const struct bpf_verifier_ops cg_skb_prog_ops = {
.get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops lwt_inout_ops = {
+const struct bpf_verifier_ops lwt_inout_prog_ops = {
.get_func_proto = lwt_inout_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops lwt_xmit_ops = {
+const struct bpf_verifier_ops lwt_xmit_prog_ops = {
.get_func_proto = lwt_xmit_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops cg_sock_ops = {
+const struct bpf_verifier_ops cg_sock_prog_ops = {
.get_func_proto = bpf_base_func_proto,
.is_valid_access = sock_filter_is_valid_access,
.convert_ctx_access = sock_filter_convert_ctx_access,
};
-static struct bpf_prog_type_list sk_filter_type __ro_after_init = {
- .ops = &sk_filter_ops,
- .type = BPF_PROG_TYPE_SOCKET_FILTER,
-};
-
-static struct bpf_prog_type_list sched_cls_type __ro_after_init = {
- .ops = &tc_cls_act_ops,
- .type = BPF_PROG_TYPE_SCHED_CLS,
-};
-
-static struct bpf_prog_type_list sched_act_type __ro_after_init = {
- .ops = &tc_cls_act_ops,
- .type = BPF_PROG_TYPE_SCHED_ACT,
-};
-
-static struct bpf_prog_type_list xdp_type __ro_after_init = {
- .ops = &xdp_ops,
- .type = BPF_PROG_TYPE_XDP,
-};
-
-static struct bpf_prog_type_list cg_skb_type __ro_after_init = {
- .ops = &cg_skb_ops,
- .type = BPF_PROG_TYPE_CGROUP_SKB,
-};
-
-static struct bpf_prog_type_list lwt_in_type __ro_after_init = {
- .ops = &lwt_inout_ops,
- .type = BPF_PROG_TYPE_LWT_IN,
-};
-
-static struct bpf_prog_type_list lwt_out_type __ro_after_init = {
- .ops = &lwt_inout_ops,
- .type = BPF_PROG_TYPE_LWT_OUT,
-};
-
-static struct bpf_prog_type_list lwt_xmit_type __ro_after_init = {
- .ops = &lwt_xmit_ops,
- .type = BPF_PROG_TYPE_LWT_XMIT,
-};
-
-static struct bpf_prog_type_list cg_sock_type __ro_after_init = {
- .ops = &cg_sock_ops,
- .type = BPF_PROG_TYPE_CGROUP_SOCK
-};
-
-static int __init register_sk_filter_ops(void)
-{
- bpf_register_prog_type(&sk_filter_type);
- bpf_register_prog_type(&sched_cls_type);
- bpf_register_prog_type(&sched_act_type);
- bpf_register_prog_type(&xdp_type);
- bpf_register_prog_type(&cg_skb_type);
- bpf_register_prog_type(&cg_sock_type);
- bpf_register_prog_type(&lwt_in_type);
- bpf_register_prog_type(&lwt_out_type);
- bpf_register_prog_type(&lwt_xmit_type);
-
- return 0;
-}
-late_initcall(register_sk_filter_ops);
-
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/flow.c b/net/core/flow.c
index f765c11d8df5..f7f5d1932a27 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -47,7 +47,7 @@ struct flow_flush_info {
static struct kmem_cache *flow_cachep __read_mostly;
-#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
+#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift)
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
static void flow_cache_new_hashrnd(unsigned long arg)
@@ -99,7 +99,8 @@ static void flow_cache_gc_task(struct work_struct *work)
}
static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
- int deleted, struct list_head *gc_list,
+ unsigned int deleted,
+ struct list_head *gc_list,
struct netns_xfrm *xfrm)
{
if (deleted) {
@@ -114,17 +115,18 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
static void __flow_cache_shrink(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
- int shrink_to)
+ unsigned int shrink_to)
{
struct flow_cache_entry *fle;
struct hlist_node *tmp;
LIST_HEAD(gc_list);
- int i, deleted = 0;
+ unsigned int deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
+ unsigned int i;
for (i = 0; i < flow_cache_hash_size(fc); i++) {
- int saved = 0;
+ unsigned int saved = 0;
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
@@ -145,7 +147,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
static void flow_cache_shrink(struct flow_cache *fc,
struct flow_cache_percpu *fcp)
{
- int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
+ unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
__flow_cache_shrink(fc, fcp, shrink_to);
}
@@ -161,7 +163,7 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
static u32 flow_hash_code(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
const struct flowi *key,
- size_t keysize)
+ unsigned int keysize)
{
const u32 *k = (const u32 *) key;
const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
@@ -174,7 +176,7 @@ static u32 flow_hash_code(struct flow_cache *fc,
* important assumptions that we can here, such as alignment.
*/
static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
- size_t keysize)
+ unsigned int keysize)
{
const flow_compare_t *k1, *k1_lim, *k2;
@@ -199,7 +201,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle, *tfle;
struct flow_cache_object *flo;
- size_t keysize;
+ unsigned int keysize;
unsigned int hash;
local_bh_disable();
@@ -295,9 +297,10 @@ static void flow_cache_flush_tasklet(unsigned long data)
struct flow_cache_entry *fle;
struct hlist_node *tmp;
LIST_HEAD(gc_list);
- int i, deleted = 0;
+ unsigned int deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
+ unsigned int i;
fcp = this_cpu_ptr(fc->percpu);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
@@ -327,7 +330,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp;
- int i;
+ unsigned int i;
fcp = per_cpu_ptr(fc->percpu, cpu);
for (i = 0; i < flow_cache_hash_size(fc); i++)
@@ -402,12 +405,12 @@ void flow_cache_flush_deferred(struct net *net)
static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
- size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
+ unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
if (!fcp->hash_table) {
fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
if (!fcp->hash_table) {
- pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
+ pr_err("NET: failed to allocate flow cache sz %u\n", sz);
return -ENOMEM;
}
fcp->hash_rnd_recalc = 1;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d98d4998213d..28d94bce4df8 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -113,6 +113,235 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+enum flow_dissect_ret {
+ FLOW_DISSECT_RET_OUT_GOOD,
+ FLOW_DISSECT_RET_OUT_BAD,
+ FLOW_DISSECT_RET_OUT_PROTO_AGAIN,
+};
+
+static enum flow_dissect_ret
+__skb_flow_dissect_mpls(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, int nhoff, int hlen)
+{
+ struct flow_dissector_key_keyid *key_keyid;
+ struct mpls_label *hdr, _hdr[2];
+ u32 entry, label;
+
+ if (!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY) &&
+ !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
+ hlen, &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ entry = ntohl(hdr[0].entry);
+ label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) {
+ struct flow_dissector_key_mpls *key_mpls;
+
+ key_mpls = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS,
+ target_container);
+ key_mpls->mpls_label = label;
+ key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK)
+ >> MPLS_LS_TTL_SHIFT;
+ key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK)
+ >> MPLS_LS_TC_SHIFT;
+ key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK)
+ >> MPLS_LS_S_SHIFT;
+ }
+
+ if (label == MPLS_LABEL_ENTROPY) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
+ target_container);
+ key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK);
+ }
+ return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_arp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, int nhoff, int hlen)
+{
+ struct flow_dissector_key_arp *key_arp;
+ struct {
+ unsigned char ar_sha[ETH_ALEN];
+ unsigned char ar_sip[4];
+ unsigned char ar_tha[ETH_ALEN];
+ unsigned char ar_tip[4];
+ } *arp_eth, _arp_eth;
+ const struct arphdr *arp;
+ struct arphdr _arp;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
+ hlen, &_arp);
+ if (!arp)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
+ arp->ar_pro != htons(ETH_P_IP) ||
+ arp->ar_hln != ETH_ALEN ||
+ arp->ar_pln != 4 ||
+ (arp->ar_op != htons(ARPOP_REPLY) &&
+ arp->ar_op != htons(ARPOP_REQUEST)))
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
+ sizeof(_arp_eth), data,
+ hlen, &_arp_eth);
+ if (!arp_eth)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ key_arp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ARP,
+ target_container);
+
+ memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip));
+ memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip));
+
+ /* Only store the lower byte of the opcode;
+ * this covers ARPOP_REPLY and ARPOP_REQUEST.
+ */
+ key_arp->op = ntohs(arp->ar_op) & 0xff;
+
+ ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
+ ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
+
+ return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_gre(const struct sk_buff *skb,
+ struct flow_dissector_key_control *key_control,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data,
+ __be16 *p_proto, int *p_nhoff, int *p_hlen,
+ unsigned int flags)
+{
+ struct flow_dissector_key_keyid *key_keyid;
+ struct gre_base_hdr *hdr, _hdr;
+ int offset = 0;
+ u16 gre_ver;
+
+ hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr),
+ data, *p_hlen, &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ /* Only look inside GRE without routing */
+ if (hdr->flags & GRE_ROUTING)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ /* Only look inside GRE for version 0 and 1 */
+ gre_ver = ntohs(hdr->flags & GRE_VERSION);
+ if (gre_ver > 1)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ *p_proto = hdr->protocol;
+ if (gre_ver) {
+ /* Version1 must be PPTP, and check the flags */
+ if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+ }
+
+ offset += sizeof(struct gre_base_hdr);
+
+ if (hdr->flags & GRE_CSUM)
+ offset += sizeof(((struct gre_full_hdr *) 0)->csum) +
+ sizeof(((struct gre_full_hdr *) 0)->reserved1);
+
+ if (hdr->flags & GRE_KEY) {
+ const __be32 *keyid;
+ __be32 _keyid;
+
+ keyid = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_keyid),
+ data, *p_hlen, &_keyid);
+ if (!keyid)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID)) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID,
+ target_container);
+ if (gre_ver == 0)
+ key_keyid->keyid = *keyid;
+ else
+ key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
+ }
+ offset += sizeof(((struct gre_full_hdr *) 0)->key);
+ }
+
+ if (hdr->flags & GRE_SEQ)
+ offset += sizeof(((struct pptp_gre_header *) 0)->seq);
+
+ if (gre_ver == 0) {
+ if (*p_proto == htons(ETH_P_TEB)) {
+ const struct ethhdr *eth;
+ struct ethhdr _eth;
+
+ eth = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_eth),
+ data, *p_hlen, &_eth);
+ if (!eth)
+ return FLOW_DISSECT_RET_OUT_BAD;
+ *p_proto = eth->h_proto;
+ offset += sizeof(*eth);
+
+ /* Cap headers that we access via pointers at the
+ * end of the Ethernet header as our maximum alignment
+ * at that point is only 2 bytes.
+ */
+ if (NET_IP_ALIGN)
+ *p_hlen = *p_nhoff + offset;
+ }
+ } else { /* version 1, must be PPTP */
+ u8 _ppp_hdr[PPP_HDRLEN];
+ u8 *ppp_hdr;
+
+ if (hdr->flags & GRE_ACK)
+ offset += sizeof(((struct pptp_gre_header *) 0)->ack);
+
+ ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_ppp_hdr),
+ data, *p_hlen, _ppp_hdr);
+ if (!ppp_hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ switch (PPP_PROTOCOL(ppp_hdr)) {
+ case PPP_IP:
+ *p_proto = htons(ETH_P_IP);
+ break;
+ case PPP_IPV6:
+ *p_proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ /* Could probably catch some more like MPLS */
+ break;
+ }
+
+ offset += PPP_HDRLEN;
+ }
+
+ *p_nhoff += offset;
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -138,12 +367,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
- struct flow_dissector_key_arp *key_arp;
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
- struct flow_dissector_key_keyid *key_keyid;
bool skip_vlan = false;
u8 ip_proto = 0;
bool ret;
@@ -181,7 +408,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
}
-again:
+proto_again:
switch (proto) {
case htons(ETH_P_IP): {
const struct iphdr *iph;
@@ -284,7 +511,7 @@ ipv6:
proto = vlan->h_vlan_encapsulated_proto;
nhoff += sizeof(*vlan);
if (skip_vlan)
- goto again;
+ goto proto_again;
}
skip_vlan = true;
@@ -307,7 +534,7 @@ ipv6:
}
}
- goto again;
+ goto proto_again;
}
case htons(ETH_P_PPP_SES): {
struct {
@@ -349,31 +576,17 @@ ipv6:
}
case htons(ETH_P_MPLS_UC):
- case htons(ETH_P_MPLS_MC): {
- struct mpls_label *hdr, _hdr[2];
+ case htons(ETH_P_MPLS_MC):
mpls:
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
- hlen, &_hdr);
- if (!hdr)
- goto out_bad;
-
- if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
- MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
- key_keyid = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
- target_container);
- key_keyid->keyid = hdr[1].entry &
- htonl(MPLS_LS_LABEL_MASK);
- }
-
+ switch (__skb_flow_dissect_mpls(skb, flow_dissector,
+ target_container, data,
+ nhoff, hlen)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
+ default:
+ goto out_bad;
}
-
- goto out_good;
- }
-
case htons(ETH_P_FCOE):
if ((hlen - nhoff) < FCOE_HEADER_LEN)
goto out_bad;
@@ -382,177 +595,33 @@ mpls:
goto out_good;
case htons(ETH_P_ARP):
- case htons(ETH_P_RARP): {
- struct {
- unsigned char ar_sha[ETH_ALEN];
- unsigned char ar_sip[4];
- unsigned char ar_tha[ETH_ALEN];
- unsigned char ar_tip[4];
- } *arp_eth, _arp_eth;
- const struct arphdr *arp;
- struct arphdr _arp;
-
- arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
- hlen, &_arp);
- if (!arp)
- goto out_bad;
-
- if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
- arp->ar_pro != htons(ETH_P_IP) ||
- arp->ar_hln != ETH_ALEN ||
- arp->ar_pln != 4 ||
- (arp->ar_op != htons(ARPOP_REPLY) &&
- arp->ar_op != htons(ARPOP_REQUEST)))
- goto out_bad;
-
- arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
- sizeof(_arp_eth), data,
- hlen,
- &_arp_eth);
- if (!arp_eth)
+ case htons(ETH_P_RARP):
+ switch (__skb_flow_dissect_arp(skb, flow_dissector,
+ target_container, data,
+ nhoff, hlen)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
+ default:
goto out_bad;
-
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ARP)) {
-
- key_arp = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_ARP,
- target_container);
-
- memcpy(&key_arp->sip, arp_eth->ar_sip,
- sizeof(key_arp->sip));
- memcpy(&key_arp->tip, arp_eth->ar_tip,
- sizeof(key_arp->tip));
-
- /* Only store the lower byte of the opcode;
- * this covers ARPOP_REPLY and ARPOP_REQUEST.
- */
- key_arp->op = ntohs(arp->ar_op) & 0xff;
-
- ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
- ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
}
-
- goto out_good;
- }
-
default:
goto out_bad;
}
ip_proto_again:
switch (ip_proto) {
- case IPPROTO_GRE: {
- struct gre_base_hdr *hdr, _hdr;
- u16 gre_ver;
- int offset = 0;
-
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
- if (!hdr)
+ case IPPROTO_GRE:
+ switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector,
+ target_container, data,
+ &proto, &nhoff, &hlen, flags)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
goto out_bad;
-
- /* Only look inside GRE without routing */
- if (hdr->flags & GRE_ROUTING)
- break;
-
- /* Only look inside GRE for version 0 and 1 */
- gre_ver = ntohs(hdr->flags & GRE_VERSION);
- if (gre_ver > 1)
- break;
-
- proto = hdr->protocol;
- if (gre_ver) {
- /* Version1 must be PPTP, and check the flags */
- if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
- break;
- }
-
- offset += sizeof(struct gre_base_hdr);
-
- if (hdr->flags & GRE_CSUM)
- offset += sizeof(((struct gre_full_hdr *)0)->csum) +
- sizeof(((struct gre_full_hdr *)0)->reserved1);
-
- if (hdr->flags & GRE_KEY) {
- const __be32 *keyid;
- __be32 _keyid;
-
- keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
- data, hlen, &_keyid);
- if (!keyid)
- goto out_bad;
-
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_GRE_KEYID)) {
- key_keyid = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_GRE_KEYID,
- target_container);
- if (gre_ver == 0)
- key_keyid->keyid = *keyid;
- else
- key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
- }
- offset += sizeof(((struct gre_full_hdr *)0)->key);
- }
-
- if (hdr->flags & GRE_SEQ)
- offset += sizeof(((struct pptp_gre_header *)0)->seq);
-
- if (gre_ver == 0) {
- if (proto == htons(ETH_P_TEB)) {
- const struct ethhdr *eth;
- struct ethhdr _eth;
-
- eth = __skb_header_pointer(skb, nhoff + offset,
- sizeof(_eth),
- data, hlen, &_eth);
- if (!eth)
- goto out_bad;
- proto = eth->h_proto;
- offset += sizeof(*eth);
-
- /* Cap headers that we access via pointers at the
- * end of the Ethernet header as our maximum alignment
- * at that point is only 2 bytes.
- */
- if (NET_IP_ALIGN)
- hlen = (nhoff + offset);
- }
- } else { /* version 1, must be PPTP */
- u8 _ppp_hdr[PPP_HDRLEN];
- u8 *ppp_hdr;
-
- if (hdr->flags & GRE_ACK)
- offset += sizeof(((struct pptp_gre_header *)0)->ack);
-
- ppp_hdr = __skb_header_pointer(skb, nhoff + offset,
- sizeof(_ppp_hdr),
- data, hlen, _ppp_hdr);
- if (!ppp_hdr)
- goto out_bad;
-
- switch (PPP_PROTOCOL(ppp_hdr)) {
- case PPP_IP:
- proto = htons(ETH_P_IP);
- break;
- case PPP_IPV6:
- proto = htons(ETH_P_IPV6);
- break;
- default:
- /* Could probably catch some more like MPLS */
- break;
- }
-
- offset += PPP_HDRLEN;
+ case FLOW_DISSECT_RET_OUT_PROTO_AGAIN:
+ goto proto_again;
}
-
- nhoff += offset;
- key_control->flags |= FLOW_DIS_ENCAPSULATION;
- if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
- goto out_good;
-
- goto again;
- }
case NEXTHDR_HOP:
case NEXTHDR_ROUTING:
case NEXTHDR_DEST: {
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index c98bbfbd26b8..814e58a3ce8b 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -13,7 +13,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
struct net_device *dev = skb->dev;
struct gro_cell *cell;
- if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO))
+ if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev))
return netif_rx(skb);
cell = this_cpu_ptr(gcells->cells);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 0cfe7b0216c3..b3bc0a31af9f 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -209,7 +209,8 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog,
int ret;
u32 fd;
- ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy);
+ ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy,
+ NULL);
if (ret < 0)
return ret;
@@ -249,7 +250,7 @@ static int bpf_build_state(struct nlattr *nla,
if (family != AF_INET && family != AF_INET6)
return -EAFNOSUPPORT;
- ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy);
+ ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, NULL);
if (ret < 0)
return ret;
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 6df9f8fabf0c..cfae3d5fe11f 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -162,7 +162,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
struct rtnexthop *rtnh = (struct rtnexthop *)attr;
struct nlattr *nla_entype;
struct nlattr *attrs;
- struct nlattr *nla;
u16 encap_type;
int attrlen;
@@ -170,7 +169,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
attrs = rtnh_attrs(rtnh);
- nla = nla_find(attrs, attrlen, RTA_ENCAP);
nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
if (nla_entype) {
@@ -205,7 +203,7 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
{
const struct lwtunnel_encap_ops *ops;
struct nlattr *nest;
- int ret = -EINVAL;
+ int ret;
if (!lwtstate)
return 0;
@@ -214,8 +212,11 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
lwtstate->type > LWTUNNEL_ENCAP_MAX)
return 0;
- ret = -EOPNOTSUPP;
nest = nla_nest_start(skb, RTA_ENCAP);
+ if (!nest)
+ return -EMSGSIZE;
+
+ ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
if (likely(ops && ops->fill_encap))
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4526cbd7e28a..d274f81fcc2c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -52,8 +52,9 @@ do { \
#define PNEIGH_HASHMASK 0xF
static void neigh_timer_handler(unsigned long arg);
-static void __neigh_notify(struct neighbour *n, int type, int flags);
-static void neigh_update_notify(struct neighbour *neigh);
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid);
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
#ifdef CONFIG_PROC_FS
@@ -99,7 +100,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
if (neigh->parms->neigh_cleanup)
neigh->parms->neigh_cleanup(neigh);
- __neigh_notify(neigh, RTM_DELNEIGH, 0);
+ __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
neigh_release(neigh);
}
@@ -949,7 +950,7 @@ out:
}
if (notify)
- neigh_update_notify(neigh);
+ neigh_update_notify(neigh, 0);
neigh_release(neigh);
}
@@ -1073,7 +1074,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
*/
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
- u32 flags)
+ u32 flags, u32 nlmsg_pid)
{
u8 old;
int err;
@@ -1131,10 +1132,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
lladdr = neigh->ha;
}
- if (new & NUD_CONNECTED)
- neigh->confirmed = jiffies;
- neigh->updated = jiffies;
-
/* If entry was valid and address is not changed,
do not change entry state, if new one is STALE.
*/
@@ -1156,6 +1153,16 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
}
}
+ /* Update timestamps only once we know we will make a change to the
+ * neighbour entry. Otherwise we risk to move the locktime window with
+ * noop updates and ignore relevant ARP updates.
+ */
+ if (new != old || lladdr != neigh->ha) {
+ if (new & NUD_CONNECTED)
+ neigh->confirmed = jiffies;
+ neigh->updated = jiffies;
+ }
+
if (new != old) {
neigh_del_timer(neigh);
if (new & NUD_PROBE)
@@ -1230,7 +1237,7 @@ out:
write_unlock_bh(&neigh->lock);
if (notify)
- neigh_update_notify(neigh);
+ neigh_update_notify(neigh, nlmsg_pid);
return err;
}
@@ -1261,7 +1268,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
lladdr || !dev->addr_len);
if (neigh)
neigh_update(neigh, lladdr, NUD_STALE,
- NEIGH_UPDATE_F_OVERRIDE);
+ NEIGH_UPDATE_F_OVERRIDE, 0);
return neigh;
}
EXPORT_SYMBOL(neigh_event_ns);
@@ -1589,7 +1596,8 @@ static struct neigh_table *neigh_find_table(int family)
return tbl;
}
-static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -1639,14 +1647,16 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN,
+ NETLINK_CB(skb).portid);
neigh_release(neigh);
out:
return err;
}
-static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
struct net *net = sock_net(skb->sk);
@@ -1659,7 +1669,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
int err;
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
if (err < 0)
goto out;
@@ -1730,7 +1740,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
neigh_event_send(neigh, NULL);
err = 0;
} else
- err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+ err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid);
neigh_release(neigh);
out:
@@ -1933,7 +1944,8 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_LOCKTIME] = { .type = NLA_U64 },
};
-static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct neigh_table *tbl;
@@ -1943,7 +1955,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
int err, tidx;
err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
- nl_neightbl_policy);
+ nl_neightbl_policy, extack);
if (err < 0)
goto errout;
@@ -1981,7 +1993,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
int i, ifindex = 0;
err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
- nl_ntbl_parm_policy);
+ nl_ntbl_parm_policy, extack);
if (err < 0)
goto errout_tbl_lock;
@@ -2230,10 +2242,10 @@ nla_put_failure:
return -EMSGSIZE;
}
-static void neigh_update_notify(struct neighbour *neigh)
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
{
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
- __neigh_notify(neigh, RTM_NEWNEIGH, 0);
+ __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
}
static bool neigh_master_filtered(struct net_device *dev, int master_idx)
@@ -2272,7 +2284,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
unsigned int flags = NLM_F_MULTI;
int err;
- err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
if (!err) {
if (tb[NDA_IFINDEX])
filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
@@ -2831,7 +2843,8 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(4); /* NDA_PROBES */
}
-static void __neigh_notify(struct neighbour *n, int type, int flags)
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid)
{
struct net *net = dev_net(n->dev);
struct sk_buff *skb;
@@ -2841,7 +2854,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
if (skb == NULL)
goto errout;
- err = neigh_fill_info(skb, n, 0, 0, type, flags);
+ err = neigh_fill_info(skb, n, pid, 0, type, flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2857,7 +2870,7 @@ errout:
void neigh_app_ns(struct neighbour *n)
{
- __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+ __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
}
EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 652468ff65b7..1934efd4a9d4 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -35,7 +35,8 @@ LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
struct net init_net = {
- .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
+ .count = ATOMIC_INIT(1),
+ .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
};
EXPORT_SYMBOL(init_net);
@@ -571,7 +572,8 @@ static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
[NETNSA_FD] = { .type = NLA_U32 },
};
-static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
@@ -579,7 +581,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
int nsid, err;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
- rtnl_net_policy);
+ rtnl_net_policy, extack);
if (err < 0)
return err;
if (!tb[NETNSA_NSID])
@@ -644,7 +646,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
@@ -653,7 +656,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
int err, id;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
- rtnl_net_policy);
+ rtnl_net_policy, extack);
if (err < 0)
return err;
if (tb[NETNSA_PID])
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 0f9275ee5595..1c4810919a0a 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -11,6 +11,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/module.h>
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c4e84c558240..49a279a7cc15 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -896,15 +896,12 @@ static size_t rtnl_port_size(const struct net_device *dev,
return port_self_size;
}
-static size_t rtnl_xdp_size(const struct net_device *dev)
+static size_t rtnl_xdp_size(void)
{
size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
nla_total_size(1); /* XDP_ATTACHED */
- if (!dev->netdev_ops->ndo_xdp)
- return 0;
- else
- return xdp_size;
+ return xdp_size;
}
static noinline size_t if_nlmsg_size(const struct net_device *dev,
@@ -943,7 +940,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
- + rtnl_xdp_size(dev) /* IFLA_XDP */
+ + rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(1); /* IFLA_PROTO_DOWN */
}
@@ -1056,7 +1053,7 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
return err;
}
- if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name))
+ if (nla_put_string(skb, IFLA_PHYS_PORT_NAME, name))
return -EMSGSIZE;
return 0;
@@ -1249,22 +1246,31 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static u8 rtnl_xdp_attached_mode(struct net_device *dev)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ ASSERT_RTNL();
+
+ if (rcu_access_pointer(dev->xdp_prog))
+ return XDP_ATTACHED_SKB;
+ if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp))
+ return XDP_ATTACHED_DRV;
+
+ return XDP_ATTACHED_NONE;
+}
+
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
{
- struct netdev_xdp xdp_op = {};
struct nlattr *xdp;
int err;
- if (!dev->netdev_ops->ndo_xdp)
- return 0;
xdp = nla_nest_start(skb, IFLA_XDP);
if (!xdp)
return -EMSGSIZE;
- xdp_op.command = XDP_QUERY_PROG;
- err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
- if (err)
- goto err_cancel;
- err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached);
+
+ err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
+ rtnl_xdp_attached_mode(dev));
if (err)
goto err_cancel;
@@ -1515,7 +1521,8 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla
const struct rtnl_link_ops *ops = NULL;
struct nlattr *linfo[IFLA_INFO_MAX + 1];
- if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0)
+ if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla,
+ ifla_info_policy, NULL) < 0)
return NULL;
if (linfo[IFLA_INFO_KIND]) {
@@ -1592,8 +1599,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
-
+ if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
+ ifla_policy, NULL) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1620,13 +1627,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq, 0,
flags,
ext_filter_mask);
- /* If we ran out of room on the first message,
- * we're in trouble
- */
- WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
- if (err < 0)
- goto out;
+ if (err < 0) {
+ if (likely(skb->len))
+ goto out;
+
+ goto out_err;
+ }
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
@@ -1634,15 +1641,18 @@ cont:
}
}
out:
+ err = skb->len;
+out_err:
cb->args[1] = idx;
cb->args[0] = h;
- return skb->len;
+ return err;
}
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len)
+int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
+ struct netlink_ext_ack *exterr)
{
- return nla_parse(tb, IFLA_MAX, head, len, ifla_policy);
+ return nla_parse(tb, IFLA_MAX, head, len, ifla_policy, exterr);
}
EXPORT_SYMBOL(rtnl_nla_parse_ifla);
@@ -1907,6 +1917,7 @@ static int do_set_master(struct net_device *dev, int ifindex)
#define DO_SETLINK_NOTIFY 0x03
static int do_setlink(const struct sk_buff *skb,
struct net_device *dev, struct ifinfomsg *ifm,
+ struct netlink_ext_ack *extack,
struct nlattr **tb, char *ifname, int status)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -2078,7 +2089,7 @@ static int do_setlink(const struct sk_buff *skb,
goto errout;
}
err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr,
- ifla_vf_policy);
+ ifla_vf_policy, NULL);
if (err < 0)
goto errout;
err = do_setvfinfo(dev, vfinfo);
@@ -2106,7 +2117,7 @@ static int do_setlink(const struct sk_buff *skb,
goto errout;
}
err = nla_parse_nested(port, IFLA_PORT_MAX, attr,
- ifla_port_policy);
+ ifla_port_policy, NULL);
if (err < 0)
goto errout;
if (!port[IFLA_PORT_VF]) {
@@ -2126,7 +2137,8 @@ static int do_setlink(const struct sk_buff *skb,
struct nlattr *port[IFLA_PORT_MAX+1];
err = nla_parse_nested(port, IFLA_PORT_MAX,
- tb[IFLA_PORT_SELF], ifla_port_policy);
+ tb[IFLA_PORT_SELF], ifla_port_policy,
+ NULL);
if (err < 0)
goto errout;
@@ -2170,7 +2182,7 @@ static int do_setlink(const struct sk_buff *skb,
u32 xdp_flags = 0;
err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP],
- ifla_xdp_policy);
+ ifla_xdp_policy, NULL);
if (err < 0)
goto errout;
@@ -2185,10 +2197,15 @@ static int do_setlink(const struct sk_buff *skb,
err = -EINVAL;
goto errout;
}
+ if ((xdp_flags & XDP_FLAGS_SKB_MODE) &&
+ (xdp_flags & XDP_FLAGS_DRV_MODE)) {
+ err = -EINVAL;
+ goto errout;
+ }
}
if (xdp[IFLA_XDP_FD]) {
- err = dev_change_xdp_fd(dev,
+ err = dev_change_xdp_fd(dev, extack,
nla_get_s32(xdp[IFLA_XDP_FD]),
xdp_flags);
if (err)
@@ -2210,7 +2227,8 @@ errout:
return err;
}
-static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -2219,7 +2237,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[IFLA_MAX+1];
char ifname[IFNAMSIZ];
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy,
+ extack);
if (err < 0)
goto errout;
@@ -2246,7 +2265,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (err < 0)
goto errout;
- err = do_setlink(skb, dev, ifm, tb, ifname, 0);
+ err = do_setlink(skb, dev, ifm, extack, tb, ifname, 0);
errout:
return err;
}
@@ -2303,7 +2322,8 @@ int rtnl_delete_link(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(rtnl_delete_link);
-static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_device *dev;
@@ -2312,7 +2332,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[IFLA_MAX+1];
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
@@ -2407,6 +2427,7 @@ EXPORT_SYMBOL(rtnl_create_link);
static int rtnl_group_changelink(const struct sk_buff *skb,
struct net *net, int group,
struct ifinfomsg *ifm,
+ struct netlink_ext_ack *extack,
struct nlattr **tb)
{
struct net_device *dev, *aux;
@@ -2414,7 +2435,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
for_each_netdev_safe(net, dev, aux) {
if (dev->group == group) {
- err = do_setlink(skb, dev, ifm, tb, NULL, 0);
+ err = do_setlink(skb, dev, ifm, extack, tb, NULL, 0);
if (err < 0)
return err;
}
@@ -2423,7 +2444,8 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
return 0;
}
-static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
@@ -2441,7 +2463,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
#ifdef CONFIG_MODULES
replay:
#endif
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
@@ -2472,7 +2494,8 @@ replay:
if (tb[IFLA_LINKINFO]) {
err = nla_parse_nested(linkinfo, IFLA_INFO_MAX,
- tb[IFLA_LINKINFO], ifla_info_policy);
+ tb[IFLA_LINKINFO], ifla_info_policy,
+ NULL);
if (err < 0)
return err;
} else
@@ -2497,7 +2520,7 @@ replay:
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
err = nla_parse_nested(attr, ops->maxtype,
linkinfo[IFLA_INFO_DATA],
- ops->policy);
+ ops->policy, NULL);
if (err < 0)
return err;
data = attr;
@@ -2515,7 +2538,8 @@ replay:
err = nla_parse_nested(slave_attr,
m_ops->slave_maxtype,
linkinfo[IFLA_INFO_SLAVE_DATA],
- m_ops->slave_policy);
+ m_ops->slave_policy,
+ NULL);
if (err < 0)
return err;
slave_data = slave_attr;
@@ -2557,14 +2581,15 @@ replay:
status |= DO_SETLINK_NOTIFY;
}
- return do_setlink(skb, dev, ifm, tb, ifname, status);
+ return do_setlink(skb, dev, ifm, extack, tb, ifname,
+ status);
}
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
return rtnl_group_changelink(skb, net,
nla_get_u32(tb[IFLA_GROUP]),
- ifm, tb);
+ ifm, extack, tb);
return -ENODEV;
}
@@ -2673,7 +2698,8 @@ out_unregister:
}
}
-static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh)
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -2684,7 +2710,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh)
int err;
u32 ext_filter_mask = 0;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
@@ -2734,7 +2760,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
+ if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
}
@@ -2955,7 +2981,8 @@ static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
return 0;
}
-static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -2965,7 +2992,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
u16 vid;
int err;
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -3055,7 +3082,8 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_del);
-static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -3068,7 +3096,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -3203,8 +3231,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
int err = 0;
int fidx = 0;
- if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) == 0) {
+ if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+ IFLA_MAX, ifla_policy, NULL) == 0) {
if (tb[IFLA_MASTER])
br_idx = nla_get_u32(tb[IFLA_MASTER]);
}
@@ -3427,8 +3455,12 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
err = br_dev->netdev_ops->ndo_bridge_getlink(
skb, portid, seq, dev,
filter_mask, NLM_F_MULTI);
- if (err < 0 && err != -EOPNOTSUPP)
- break;
+ if (err < 0 && err != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ break;
+
+ goto out_err;
+ }
}
idx++;
}
@@ -3439,16 +3471,22 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
seq, dev,
filter_mask,
NLM_F_MULTI);
- if (err < 0 && err != -EOPNOTSUPP)
- break;
+ if (err < 0 && err != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ break;
+
+ goto out_err;
+ }
}
idx++;
}
}
+ err = skb->len;
+out_err:
rcu_read_unlock();
cb->args[0] = idx;
- return skb->len;
+ return err;
}
static inline size_t bridge_nlmsg_size(void)
@@ -3498,7 +3536,8 @@ errout:
return err;
}
-static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -3572,7 +3611,8 @@ out:
return err;
}
-static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -3940,7 +3980,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
return size;
}
-static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_device *dev = NULL;
@@ -4046,7 +4087,8 @@ out:
/* Process one rtnetlink message. */
-static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
rtnl_doit_func doit;
@@ -4101,7 +4143,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (doit == NULL)
return -EOPNOTSUPP;
- return doit(skb, nlh);
+ return doit(skb, nlh, extack);
}
static void rtnetlink_rcv(struct sk_buff *skb)
@@ -4116,22 +4158,16 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
switch (event) {
- case NETDEV_UP:
- case NETDEV_DOWN:
- case NETDEV_PRE_UP:
- case NETDEV_POST_INIT:
- case NETDEV_REGISTER:
- case NETDEV_CHANGE:
- case NETDEV_PRE_TYPE_CHANGE:
- case NETDEV_GOING_DOWN:
- case NETDEV_UNREGISTER:
- case NETDEV_UNREGISTER_FINAL:
- case NETDEV_RELEASE:
- case NETDEV_JOIN:
- case NETDEV_BONDING_INFO:
+ case NETDEV_REBOOT:
+ case NETDEV_CHANGENAME:
+ case NETDEV_FEAT_CHANGE:
+ case NETDEV_BONDING_FAILOVER:
+ case NETDEV_NOTIFY_PEERS:
+ case NETDEV_RESEND_IGMP:
+ case NETDEV_CHANGEINFODATA:
+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
break;
default:
- rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
break;
}
return NOTIFY_DONE;
@@ -4185,6 +4221,7 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL);
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index d28da7d363f1..ae35cce3a40d 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -24,9 +24,13 @@ static siphash_key_t ts_secret __read_mostly;
static __always_inline void net_secret_init(void)
{
- net_get_random_once(&ts_secret, sizeof(ts_secret));
net_get_random_once(&net_secret, sizeof(net_secret));
}
+
+static __always_inline void ts_secret_init(void)
+{
+ net_get_random_once(&ts_secret, sizeof(ts_secret));
+}
#endif
#ifdef CONFIG_INET
@@ -47,7 +51,7 @@ static u32 seq_scale(u32 seq)
#endif
#if IS_ENABLED(CONFIG_IPV6)
-static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
+u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
{
const struct {
struct in6_addr saddr;
@@ -60,12 +64,14 @@ static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
if (sysctl_tcp_timestamps != 1)
return 0;
+ ts_secret_init();
return siphash(&combined, offsetofend(typeof(combined), daddr),
&ts_secret);
}
+EXPORT_SYMBOL(secure_tcpv6_ts_off);
-u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
- __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
+ __be16 sport, __be16 dport)
{
const struct {
struct in6_addr saddr;
@@ -78,14 +84,14 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
.sport = sport,
.dport = dport
};
- u64 hash;
+ u32 hash;
+
net_secret_init();
hash = siphash(&combined, offsetofend(typeof(combined), dport),
&net_secret);
- *tsoff = secure_tcpv6_ts_off(saddr, daddr);
return seq_scale(hash);
}
-EXPORT_SYMBOL(secure_tcpv6_sequence_number);
+EXPORT_SYMBOL(secure_tcpv6_seq);
u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
@@ -107,30 +113,30 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
-static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
+u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
{
if (sysctl_tcp_timestamps != 1)
return 0;
+ ts_secret_init();
return siphash_2u32((__force u32)saddr, (__force u32)daddr,
&ts_secret);
}
-/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
+/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
* but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
* it would be easy enough to have the former function use siphash_4u32, passing
* the arguments as separate u32.
*/
-
-u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport)
{
- u64 hash;
+ u32 hash;
+
net_secret_init();
hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
(__force u32)sport << 16 | (__force u32)dport,
&net_secret);
- *tsoff = secure_tcp_ts_off(saddr, daddr);
return seq_scale(hash);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f1d04592ace0..346d3e85dfbc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1982,7 +1982,6 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
.pages = pages,
.partial = partial,
.nr_pages_max = MAX_SKB_FRAGS,
- .flags = flags,
.ops = &nosteal_pipe_buf_ops,
.spd_release = sock_spd_release,
};
@@ -3102,7 +3101,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
skb_walk_frags(head_skb, iter) {
if (frag_len != iter->len && iter->next)
goto normal;
- if (skb_headlen(iter))
+ if (skb_headlen(iter) && !iter->head_frag)
goto normal;
len -= iter->len;
diff --git a/net/core/sock.c b/net/core/sock.c
index b416a537bd0a..727f924b7f91 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -102,6 +102,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
@@ -138,10 +139,7 @@
#include <trace/events/sock.h>
-#ifdef CONFIG_INET
#include <net/tcp.h>
-#endif
-
#include <net/busy_poll.h>
static DEFINE_MUTEX(proto_list_mutex);
@@ -247,12 +245,66 @@ static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
_sock_locks("k-clock-")
};
+static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
+ "rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" ,
+ "rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK",
+ "rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" ,
+ "rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" ,
+ "rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" ,
+ "rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" ,
+ "rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" ,
+ "rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" ,
+ "rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" ,
+ "rlock-27" , "rlock-28" , "rlock-AF_CAN" ,
+ "rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" ,
+ "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" ,
+ "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" ,
+ "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" ,
+ "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX"
+};
+static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
+ "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" ,
+ "wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK",
+ "wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" ,
+ "wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" ,
+ "wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" ,
+ "wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" ,
+ "wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" ,
+ "wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" ,
+ "wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" ,
+ "wlock-27" , "wlock-28" , "wlock-AF_CAN" ,
+ "wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" ,
+ "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" ,
+ "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" ,
+ "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" ,
+ "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX"
+};
+static const char *const af_family_elock_key_strings[AF_MAX+1] = {
+ "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" ,
+ "elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK",
+ "elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" ,
+ "elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" ,
+ "elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" ,
+ "elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" ,
+ "elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" ,
+ "elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" ,
+ "elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" ,
+ "elock-27" , "elock-28" , "elock-AF_CAN" ,
+ "elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" ,
+ "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" ,
+ "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" ,
+ "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" ,
+ "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX"
+};
/*
- * sk_callback_lock locking rules are per-address-family,
+ * sk_callback_lock and sk queues locking rules are per-address-family,
* so split the lock classes by using a per-AF key:
*/
static struct lock_class_key af_callback_keys[AF_MAX];
+static struct lock_class_key af_rlock_keys[AF_MAX];
+static struct lock_class_key af_wlock_keys[AF_MAX];
+static struct lock_class_key af_elock_keys[AF_MAX];
static struct lock_class_key af_kern_callback_keys[AF_MAX];
/* Take into consideration the size of the struct sk_buff overhead in the
@@ -318,14 +370,14 @@ EXPORT_SYMBOL_GPL(sk_clear_memalloc);
int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
int ret;
- unsigned long pflags = current->flags;
+ unsigned int noreclaim_flag;
/* these should have been dropped before queueing */
BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
- current->flags |= PF_MEMALLOC;
+ noreclaim_flag = memalloc_noreclaim_save();
ret = sk->sk_backlog_rcv(sk, skb);
- current_restore_flags(pflags, PF_MEMALLOC);
+ memalloc_noreclaim_restore(noreclaim_flag);
return ret;
}
@@ -1029,6 +1081,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
union {
int val;
+ u64 val64;
struct linger ling;
struct timeval tm;
} v;
@@ -1259,6 +1312,40 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_incoming_cpu;
break;
+ case SO_MEMINFO:
+ {
+ u32 meminfo[SK_MEMINFO_VARS];
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ sk_get_meminfo(sk, meminfo);
+
+ len = min_t(unsigned int, len, sizeof(meminfo));
+ if (copy_to_user(optval, &meminfo, len))
+ return -EFAULT;
+
+ goto lenout;
+ }
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ case SO_INCOMING_NAPI_ID:
+ v.val = READ_ONCE(sk->sk_napi_id);
+
+ /* aggregate non-NAPI IDs down to 0 */
+ if (v.val < MIN_NAPI_ID)
+ v.val = 0;
+
+ break;
+#endif
+
+ case SO_COOKIE:
+ lv = sizeof(u64);
+ if (len < lv)
+ return -EINVAL;
+ v.val64 = sock_gen_cookie(sk);
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1483,6 +1570,27 @@ void sk_free(struct sock *sk)
}
EXPORT_SYMBOL(sk_free);
+static void sk_init_common(struct sock *sk)
+{
+ skb_queue_head_init(&sk->sk_receive_queue);
+ skb_queue_head_init(&sk->sk_write_queue);
+ skb_queue_head_init(&sk->sk_error_queue);
+
+ rwlock_init(&sk->sk_callback_lock);
+ lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
+ af_rlock_keys + sk->sk_family,
+ af_family_rlock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_write_queue.lock,
+ af_wlock_keys + sk->sk_family,
+ af_family_wlock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_error_queue.lock,
+ af_elock_keys + sk->sk_family,
+ af_family_elock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_callback_lock,
+ af_callback_keys + sk->sk_family,
+ af_family_clock_key_strings[sk->sk_family]);
+}
+
/**
* sk_clone_lock - clone a socket, and lock its clone
* @sk: the socket to clone
@@ -1516,13 +1624,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
atomic_set(&newsk->sk_wmem_alloc, 1);
atomic_set(&newsk->sk_omem_alloc, 0);
- skb_queue_head_init(&newsk->sk_receive_queue);
- skb_queue_head_init(&newsk->sk_write_queue);
-
- rwlock_init(&newsk->sk_callback_lock);
- lockdep_set_class_and_name(&newsk->sk_callback_lock,
- af_callback_keys + newsk->sk_family,
- af_family_clock_key_strings[newsk->sk_family]);
+ sk_init_common(newsk);
newsk->sk_dst_cache = NULL;
newsk->sk_dst_pending_confirm = 0;
@@ -1533,7 +1635,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
sock_reset_flag(newsk, SOCK_DONE);
- skb_queue_head_init(&newsk->sk_error_queue);
filter = rcu_dereference_protected(newsk->sk_filter, 1);
if (filter != NULL)
@@ -1699,28 +1800,24 @@ EXPORT_SYMBOL(skb_set_owner_w);
* delay queue. We want to allow the owner socket to send more
* packets, as if they were already TX completed by a typical driver.
* But we also want to keep skb->sk set because some packet schedulers
- * rely on it (sch_fq for example). So we set skb->truesize to a small
- * amount (1) and decrease sk_wmem_alloc accordingly.
+ * rely on it (sch_fq for example).
*/
void skb_orphan_partial(struct sk_buff *skb)
{
- /* If this skb is a TCP pure ACK or already went here,
- * we have nothing to do. 2 is already a very small truesize.
- */
- if (skb->truesize <= 2)
+ if (skb_is_tcp_pure_ack(skb))
return;
- /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
- * so we do not completely orphan skb, but transfert all
- * accounted bytes but one, to avoid unexpected reorders.
- */
if (skb->destructor == sock_wfree
#ifdef CONFIG_INET
|| skb->destructor == tcp_wfree
#endif
) {
- atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
- skb->truesize = 1;
+ struct sock *sk = skb->sk;
+
+ if (atomic_inc_not_zero(&sk->sk_refcnt)) {
+ atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+ skb->destructor = sock_efree;
+ }
} else {
skb_orphan(skb);
}
@@ -2466,10 +2563,7 @@ EXPORT_SYMBOL(sk_stop_timer);
void sock_init_data(struct socket *sock, struct sock *sk)
{
- skb_queue_head_init(&sk->sk_receive_queue);
- skb_queue_head_init(&sk->sk_write_queue);
- skb_queue_head_init(&sk->sk_error_queue);
-
+ sk_init_common(sk);
sk->sk_send_head = NULL;
init_timer(&sk->sk_timer);
@@ -2521,7 +2615,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
- sk->sk_stamp = ktime_set(-1L, 0);
+ sk->sk_stamp = SK_DEFAULT_STAMP;
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
@@ -2802,6 +2896,21 @@ void sk_common_release(struct sock *sk)
}
EXPORT_SYMBOL(sk_common_release);
+void sk_get_meminfo(const struct sock *sk, u32 *mem)
+{
+ memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
+
+ mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
+ mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+ mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
+ mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+ mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+ mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+ mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+ mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+ mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+}
+
#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR 64 /* should be enough for the first time */
struct prot_inuse {
@@ -3142,3 +3251,14 @@ static int __init proto_init(void)
subsys_initcall(proto_init);
#endif /* PROC_FS */
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+bool sk_busy_loop_end(void *p, unsigned long start_time)
+{
+ struct sock *sk = p;
+
+ return !skb_queue_empty(&sk->sk_receive_queue) ||
+ sk_busy_loop_timeout(sk, start_time);
+}
+EXPORT_SYMBOL(sk_busy_loop_end);
+#endif /* CONFIG_NET_RX_BUSY_POLL */
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 6b10573cc9fa..217f4e3b82f6 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -19,7 +19,7 @@ static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
static struct workqueue_struct *broadcast_wq;
-static u64 sock_gen_cookie(struct sock *sk)
+u64 sock_gen_cookie(struct sock *sk)
{
while (1) {
u64 res = atomic64_read(&sk->sk_cookie);
@@ -59,15 +59,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
{
u32 mem[SK_MEMINFO_VARS];
- mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
- mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
- mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
- mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
- mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
- mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
- mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
- mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
- mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+ sk_get_meminfo(sk, mem);
return nla_put(skb, attrtype, sizeof(mem), &mem);
}
@@ -246,7 +238,8 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
}
-static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
int ret;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 9a1a352fd1eb..eed1ebf7f29d 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -13,9 +13,9 @@
static DEFINE_SPINLOCK(reuseport_lock);
-static struct sock_reuseport *__reuseport_alloc(u16 max_socks)
+static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
- size_t size = sizeof(struct sock_reuseport) +
+ unsigned int size = sizeof(struct sock_reuseport) +
sizeof(struct sock *) * max_socks;
struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7f9cc400eca0..ea23254b2457 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -452,6 +452,14 @@ static struct ctl_table net_core_table[] = {
.extra1 = &one,
.extra2 = &max_skb_frags,
},
+ {
+ .procname = "netdev_budget_usecs",
+ .data = &netdev_budget_usecs,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
{ }
};
diff --git a/net/core/utils.c b/net/core/utils.c
index 32c467cf52d6..93066bd0305a 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL(net_ratelimit);
__be32 in_aton(const char *str)
{
- unsigned long l;
+ unsigned int l;
unsigned int val;
int i;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 3202d75329b5..93106120f987 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -245,8 +245,7 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
- tb[DCB_ATTR_PFC_CFG],
- dcbnl_pfc_up_nest);
+ tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL);
if (ret)
return ret;
@@ -304,7 +303,7 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP],
- dcbnl_cap_nest);
+ dcbnl_cap_nest, NULL);
if (ret)
return ret;
@@ -348,7 +347,7 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS],
- dcbnl_numtcs_nest);
+ dcbnl_numtcs_nest, NULL);
if (ret)
return ret;
@@ -393,7 +392,7 @@ static int dcbnl_setnumtcs(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS],
- dcbnl_numtcs_nest);
+ dcbnl_numtcs_nest, NULL);
if (ret)
return ret;
@@ -452,7 +451,7 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlmsghdr *nlh,
return -EINVAL;
ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP],
- dcbnl_app_nest);
+ dcbnl_app_nest, NULL);
if (ret)
return ret;
@@ -520,7 +519,7 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlmsghdr *nlh,
return -EINVAL;
ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP],
- dcbnl_app_nest);
+ dcbnl_app_nest, NULL);
if (ret)
return ret;
@@ -577,8 +576,8 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh,
!netdev->dcbnl_ops->getpgbwgcfgrx)
return -EOPNOTSUPP;
- ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
- tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+ ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG],
+ dcbnl_pg_nest, NULL);
if (ret)
return ret;
@@ -597,8 +596,8 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh,
data = pg_tb[DCB_PG_ATTR_TC_ALL];
else
data = pg_tb[i];
- ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
- data, dcbnl_tc_param_nest);
+ ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, data,
+ dcbnl_tc_param_nest, NULL);
if (ret)
goto err_pg;
@@ -735,8 +734,7 @@ static int dcbnl_setpfccfg(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
- tb[DCB_ATTR_PFC_CFG],
- dcbnl_pfc_up_nest);
+ tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL);
if (ret)
return ret;
@@ -791,8 +789,8 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
!netdev->dcbnl_ops->setpgbwgcfgrx)
return -EOPNOTSUPP;
- ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
- tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+ ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG],
+ dcbnl_pg_nest, NULL);
if (ret)
return ret;
@@ -801,7 +799,7 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
continue;
ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
- pg_tb[i], dcbnl_tc_param_nest);
+ pg_tb[i], dcbnl_tc_param_nest, NULL);
if (ret)
return ret;
@@ -889,8 +887,8 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlmsghdr *nlh,
!netdev->dcbnl_ops->getbcncfg)
return -EOPNOTSUPP;
- ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX,
- tb[DCB_ATTR_BCN], dcbnl_bcn_nest);
+ ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN],
+ dcbnl_bcn_nest, NULL);
if (ret)
return ret;
@@ -948,9 +946,8 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
!netdev->dcbnl_ops->setbcnrp)
return -EOPNOTSUPP;
- ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX,
- tb[DCB_ATTR_BCN],
- dcbnl_pfc_up_nest);
+ ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN],
+ dcbnl_pfc_up_nest, NULL);
if (ret)
return ret;
@@ -1424,8 +1421,8 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
if (!tb[DCB_ATTR_IEEE])
return -EINVAL;
- err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX,
- tb[DCB_ATTR_IEEE], dcbnl_ieee_policy);
+ err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE],
+ dcbnl_ieee_policy, NULL);
if (err)
return err;
@@ -1508,8 +1505,8 @@ static int dcbnl_ieee_del(struct net_device *netdev, struct nlmsghdr *nlh,
if (!tb[DCB_ATTR_IEEE])
return -EINVAL;
- err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX,
- tb[DCB_ATTR_IEEE], dcbnl_ieee_policy);
+ err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE],
+ dcbnl_ieee_policy, NULL);
if (err)
return err;
@@ -1581,8 +1578,8 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh,
if (!tb[DCB_ATTR_FEATCFG])
return -EINVAL;
- ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG],
- dcbnl_featcfg_nest);
+ ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX,
+ tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL);
if (ret)
return ret;
@@ -1625,8 +1622,8 @@ static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh,
if (!tb[DCB_ATTR_FEATCFG])
return -EINVAL;
- ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG],
- dcbnl_featcfg_nest);
+ ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX,
+ tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL);
if (ret)
goto err;
@@ -1699,7 +1696,8 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = {
[DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get },
};
-static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
@@ -1715,7 +1713,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EPERM;
ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
- dcbnl_rtnl_policy);
+ dcbnl_rtnl_policy, extack);
if (ret < 0)
return ret;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b99168b0fabf..f75482bdee9a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -951,7 +951,7 @@ static struct proto dccp_v4_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
.rsk_prot = &dccp_request_sock_ops,
.twsk_prot = &dccp_timewait_sock_ops,
.h.hashinfo = &dccp_hashinfo,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d9b6a4e403e7..992621172220 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -426,6 +426,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
newsk->sk_backlog_rcv = dccp_v4_do_rcv;
newnp->pktoptions = NULL;
newnp->opt = NULL;
+ newnp->ipv6_mc_list = NULL;
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
@@ -490,6 +493,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
/* Clone RX bits */
newnp->rxopt.all = np->rxopt.all;
+ newnp->ipv6_mc_list = NULL;
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
@@ -1014,7 +1020,7 @@ static struct proto dccp_v6_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,
.h.hashinfo = &dccp_hashinfo,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 7de5b40a5d0d..405483a07efc 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -132,6 +132,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/fib_rules.h>
+#include <net/tcp.h>
#include <net/dn.h>
#include <net/dn_nsp.h>
#include <net/dn_dev.h>
@@ -1469,18 +1470,18 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
case DSO_NODELAY:
if (optlen != sizeof(int))
return -EINVAL;
- if (scp->nonagle == 2)
+ if (scp->nonagle == TCP_NAGLE_CORK)
return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : 1;
+ scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF;
/* if (scp->nonagle == 1) { Push pending frames } */
break;
case DSO_CORK:
if (optlen != sizeof(int))
return -EINVAL;
- if (scp->nonagle == 1)
+ if (scp->nonagle == TCP_NAGLE_OFF)
return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : 2;
+ scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK;
/* if (scp->nonagle == 0) { Push pending frames } */
break;
@@ -1608,14 +1609,14 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
case DSO_NODELAY:
if (r_len > sizeof(int))
r_len = sizeof(int);
- val = (scp->nonagle == 1);
+ val = (scp->nonagle == TCP_NAGLE_OFF);
r_data = &val;
break;
case DSO_CORK:
if (r_len > sizeof(int))
r_len = sizeof(int);
- val = (scp->nonagle == 2);
+ val = (scp->nonagle == TCP_NAGLE_CORK);
r_data = &val;
break;
@@ -2360,7 +2361,8 @@ MODULE_AUTHOR("Linux DECnet Project Team");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_DECnet);
-static char banner[] __initdata = KERN_INFO "NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n";
+static const char banner[] __initconst = KERN_INFO
+"NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n";
static int __init decnet_init(void)
{
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 8fdd9f492b0e..9017a9a73ab5 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -565,7 +565,8 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
[IFA_FLAGS] = { .type = NLA_U32 },
};
-static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
@@ -581,7 +582,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!net_eq(net, &init_net))
goto errout;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy,
+ extack);
if (err < 0)
goto errout;
@@ -609,7 +611,8 @@ errout:
return err;
}
-static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
@@ -625,7 +628,8 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!net_eq(net, &init_net))
return -EINVAL;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy,
+ extack);
if (err < 0)
return err;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 7af0ba6157a1..f9058ebeb635 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -501,7 +501,8 @@ static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table)
return table;
}
-static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct dn_fib_table *tb;
@@ -515,7 +516,8 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!net_eq(net, &init_net))
return -EINVAL;
- err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
+ err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy,
+ extack);
if (err < 0)
return err;
@@ -526,7 +528,8 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb));
}
-static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct dn_fib_table *tb;
@@ -540,7 +543,8 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!net_eq(net, &init_net))
return -EINVAL;
- err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
+ err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy,
+ extack);
if (err < 0)
return err;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 482730cd8a56..eeb5fc561f80 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -110,7 +110,7 @@ struct neigh_table dn_neigh_table = {
static int dn_neigh_construct(struct neighbour *neigh)
{
struct net_device *dev = neigh->dev;
- struct dn_neigh *dn = (struct dn_neigh *)neigh;
+ struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
struct dn_dev *dn_db;
struct neigh_parms *parms;
@@ -339,7 +339,7 @@ int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *) dst;
struct neighbour *neigh = rt->n;
- struct dn_neigh *dn = (struct dn_neigh *)neigh;
+ struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
struct dn_dev *dn_db;
bool use_long;
@@ -391,7 +391,7 @@ int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
- dn = (struct dn_neigh *)neigh;
+ dn = container_of(neigh, struct dn_neigh, n);
if (neigh) {
write_lock(&neigh->lock);
@@ -451,7 +451,7 @@ int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb
neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
- dn = (struct dn_neigh *)neigh;
+ dn = container_of(neigh, struct dn_neigh, n);
if (neigh) {
write_lock(&neigh->lock);
@@ -510,7 +510,7 @@ static void neigh_elist_cb(struct neighbour *neigh, void *_info)
if (neigh->dev != s->dev)
return;
- dn = (struct dn_neigh *) neigh;
+ dn = container_of(neigh, struct dn_neigh, n);
if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2)))
return;
@@ -549,7 +549,7 @@ int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n)
static inline void dn_neigh_format_entry(struct seq_file *seq,
struct neighbour *n)
{
- struct dn_neigh *dn = (struct dn_neigh *) n;
+ struct dn_neigh *dn = container_of(n, struct dn_neigh, n);
char buf[DN_ASCBUF_LEN];
read_lock(&n->lock);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index b1dc096d22f8..4b9518a0d248 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1640,7 +1640,8 @@ const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = {
/*
* This is called by both endnodes and routers now.
*/
-static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm = nlmsg_data(nlh);
@@ -1654,7 +1655,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
if (!net_eq(net, &init_net))
return -EINVAL;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy,
+ extack);
if (err < 0)
return err;
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 85f2fdc360c2..1ed81ac6dd1a 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -96,7 +96,7 @@ static unsigned int dnrmg_hook(void *priv,
}
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err), NULL); return; } while (0)
static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
{
@@ -134,7 +134,7 @@ static int __init dn_rtmsg_init(void)
return -ENOMEM;
}
- rv = nf_register_hook(&dnrmg_ops);
+ rv = nf_register_net_hook(&init_net, &dnrmg_ops);
if (rv) {
netlink_kernel_release(dnrmg);
}
@@ -144,7 +144,7 @@ static int __init dn_rtmsg_init(void)
static void __exit dn_rtmsg_fini(void)
{
- nf_unregister_hook(&dnrmg_ops);
+ nf_unregister_net_hook(&init_net, &dnrmg_ops);
netlink_kernel_release(dnrmg);
}
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 9649238eef40..81a0868edb1d 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -6,7 +6,7 @@ config HAVE_NET_DSA
config NET_DSA
tristate "Distributed Switch Architecture"
- depends on HAVE_NET_DSA
+ depends on HAVE_NET_DSA && MAY_USE_DEVLINK
select NET_SWITCHDEV
select PHYLIB
---help---
@@ -31,4 +31,10 @@ config NET_DSA_TAG_TRAILER
config NET_DSA_TAG_QCA
bool
+config NET_DSA_TAG_MTK
+ bool
+
+config NET_DSA_TAG_LAN9303
+ bool
+
endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 31d343796251..0b747d75e65a 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,6 +1,6 @@
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o slave.o dsa2.o switch.o
+dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
@@ -8,3 +8,5 @@ dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
+dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
+dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index b6d4f6a23f06..26130ae438da 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -14,15 +14,17 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <net/dsa.h>
#include <linux/of.h>
#include <linux/of_mdio.h>
#include <linux/of_platform.h>
#include <linux/of_net.h>
#include <linux/of_gpio.h>
+#include <linux/netdevice.h>
#include <linux/sysfs.h>
#include <linux/phy_fixed.h>
#include <linux/gpio/consumer.h>
+#include <linux/etherdevice.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
@@ -53,62 +55,15 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
#ifdef CONFIG_NET_DSA_TAG_QCA
[DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
#endif
+#ifdef CONFIG_NET_DSA_TAG_MTK
+ [DSA_TAG_PROTO_MTK] = &mtk_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_LAN9303
+ [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
+#endif
[DSA_TAG_PROTO_NONE] = &none_ops,
};
-/* switch driver registration ***********************************************/
-static DEFINE_MUTEX(dsa_switch_drivers_mutex);
-static LIST_HEAD(dsa_switch_drivers);
-
-void register_switch_driver(struct dsa_switch_driver *drv)
-{
- mutex_lock(&dsa_switch_drivers_mutex);
- list_add_tail(&drv->list, &dsa_switch_drivers);
- mutex_unlock(&dsa_switch_drivers_mutex);
-}
-EXPORT_SYMBOL_GPL(register_switch_driver);
-
-void unregister_switch_driver(struct dsa_switch_driver *drv)
-{
- mutex_lock(&dsa_switch_drivers_mutex);
- list_del_init(&drv->list);
- mutex_unlock(&dsa_switch_drivers_mutex);
-}
-EXPORT_SYMBOL_GPL(unregister_switch_driver);
-
-static const struct dsa_switch_ops *
-dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
- const char **_name, void **priv)
-{
- const struct dsa_switch_ops *ret;
- struct list_head *list;
- const char *name;
-
- ret = NULL;
- name = NULL;
-
- mutex_lock(&dsa_switch_drivers_mutex);
- list_for_each(list, &dsa_switch_drivers) {
- const struct dsa_switch_ops *ops;
- struct dsa_switch_driver *drv;
-
- drv = list_entry(list, struct dsa_switch_driver, list);
- ops = drv->ops;
-
- name = ops->probe(parent, host_dev, sw_addr, priv);
- if (name != NULL) {
- ret = ops;
- break;
- }
- }
- mutex_unlock(&dsa_switch_drivers_mutex);
-
- *_name = name;
-
- return ret;
-}
-
-/* basic switch operations **************************************************/
int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
struct dsa_port *dport, int port)
{
@@ -140,23 +95,6 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
return 0;
}
-static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev)
-{
- struct dsa_port *dport;
- int ret, port;
-
- for (port = 0; port < ds->num_ports; port++) {
- if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
- continue;
-
- dport = &ds->ports[port];
- ret = dsa_cpu_dsa_setup(ds, dev, dport, port);
- if (ret)
- return ret;
- }
- return 0;
-}
-
const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
{
const struct dsa_device_ops *ops;
@@ -206,168 +144,6 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds)
master->ethtool_ops = ds->dst->master_orig_ethtool_ops;
}
-static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
-{
- const struct dsa_switch_ops *ops = ds->ops;
- struct dsa_switch_tree *dst = ds->dst;
- struct dsa_chip_data *cd = ds->cd;
- bool valid_name_found = false;
- int index = ds->index;
- int i, ret;
-
- /*
- * Validate supplied switch configuration.
- */
- for (i = 0; i < ds->num_ports; i++) {
- char *name;
-
- name = cd->port_names[i];
- if (name == NULL)
- continue;
-
- if (!strcmp(name, "cpu")) {
- if (dst->cpu_switch) {
- netdev_err(dst->master_netdev,
- "multiple cpu ports?!\n");
- return -EINVAL;
- }
- dst->cpu_switch = ds;
- dst->cpu_port = i;
- ds->cpu_port_mask |= 1 << i;
- } else if (!strcmp(name, "dsa")) {
- ds->dsa_port_mask |= 1 << i;
- } else {
- ds->enabled_port_mask |= 1 << i;
- }
- valid_name_found = true;
- }
-
- if (!valid_name_found && i == ds->num_ports)
- return -EINVAL;
-
- /* Make the built-in MII bus mask match the number of ports,
- * switch drivers can override this later
- */
- ds->phys_mii_mask = ds->enabled_port_mask;
-
- /*
- * If the CPU connects to this switch, set the switch tree
- * tagging protocol to the preferred tagging format of this
- * switch.
- */
- if (dst->cpu_switch == ds) {
- enum dsa_tag_protocol tag_protocol;
-
- tag_protocol = ops->get_tag_protocol(ds);
- dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
- if (IS_ERR(dst->tag_ops))
- return PTR_ERR(dst->tag_ops);
-
- dst->rcv = dst->tag_ops->rcv;
- }
-
- memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
-
- /*
- * Do basic register setup.
- */
- ret = ops->setup(ds);
- if (ret < 0)
- return ret;
-
- ret = dsa_switch_register_notifier(ds);
- if (ret)
- return ret;
-
- if (ops->set_addr) {
- ret = ops->set_addr(ds, dst->master_netdev->dev_addr);
- if (ret < 0)
- return ret;
- }
-
- if (!ds->slave_mii_bus && ops->phy_read) {
- ds->slave_mii_bus = devm_mdiobus_alloc(parent);
- if (!ds->slave_mii_bus)
- return -ENOMEM;
- dsa_slave_mii_bus_init(ds);
-
- ret = mdiobus_register(ds->slave_mii_bus);
- if (ret < 0)
- return ret;
- }
-
- /*
- * Create network devices for physical switch ports.
- */
- for (i = 0; i < ds->num_ports; i++) {
- ds->ports[i].dn = cd->port_dn[i];
-
- if (!(ds->enabled_port_mask & (1 << i)))
- continue;
-
- ret = dsa_slave_create(ds, parent, i, cd->port_names[i]);
- if (ret < 0)
- netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
- index, i, cd->port_names[i], ret);
- }
-
- /* Perform configuration of the CPU and DSA ports */
- ret = dsa_cpu_dsa_setups(ds, parent);
- if (ret < 0)
- netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n",
- index);
-
- ret = dsa_cpu_port_ethtool_setup(ds);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static struct dsa_switch *
-dsa_switch_setup(struct dsa_switch_tree *dst, int index,
- struct device *parent, struct device *host_dev)
-{
- struct dsa_chip_data *cd = dst->pd->chip + index;
- const struct dsa_switch_ops *ops;
- struct dsa_switch *ds;
- int ret;
- const char *name;
- void *priv;
-
- /*
- * Probe for switch model.
- */
- ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
- if (!ops) {
- netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
- index);
- return ERR_PTR(-EINVAL);
- }
- netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n",
- index, name);
-
-
- /*
- * Allocate and initialise switch state.
- */
- ds = dsa_switch_alloc(parent, DSA_MAX_PORTS);
- if (!ds)
- return ERR_PTR(-ENOMEM);
-
- ds->dst = dst;
- ds->index = index;
- ds->cd = cd;
- ds->ops = ops;
- ds->priv = priv;
-
- ret = dsa_switch_setup_one(ds, parent);
- if (ret)
- return ERR_PTR(ret);
-
- return ds;
-}
-
void dsa_cpu_dsa_destroy(struct dsa_port *port)
{
struct device_node *port_dn = port->dn;
@@ -376,86 +152,6 @@ void dsa_cpu_dsa_destroy(struct dsa_port *port)
of_phy_deregister_fixed_link(port_dn);
}
-static void dsa_switch_destroy(struct dsa_switch *ds)
-{
- int port;
-
- /* Destroy network devices for physical switch ports. */
- for (port = 0; port < ds->num_ports; port++) {
- if (!(ds->enabled_port_mask & (1 << port)))
- continue;
-
- if (!ds->ports[port].netdev)
- continue;
-
- dsa_slave_destroy(ds->ports[port].netdev);
- }
-
- /* Disable configuration of the CPU and DSA ports */
- for (port = 0; port < ds->num_ports; port++) {
- if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
- continue;
- dsa_cpu_dsa_destroy(&ds->ports[port]);
-
- /* Clearing a bit which is not set does no harm */
- ds->cpu_port_mask |= ~(1 << port);
- ds->dsa_port_mask |= ~(1 << port);
- }
-
- if (ds->slave_mii_bus && ds->ops->phy_read)
- mdiobus_unregister(ds->slave_mii_bus);
-
- dsa_switch_unregister_notifier(ds);
-}
-
-#ifdef CONFIG_PM_SLEEP
-int dsa_switch_suspend(struct dsa_switch *ds)
-{
- int i, ret = 0;
-
- /* Suspend slave network devices */
- for (i = 0; i < ds->num_ports; i++) {
- if (!dsa_is_port_initialized(ds, i))
- continue;
-
- ret = dsa_slave_suspend(ds->ports[i].netdev);
- if (ret)
- return ret;
- }
-
- if (ds->ops->suspend)
- ret = ds->ops->suspend(ds);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(dsa_switch_suspend);
-
-int dsa_switch_resume(struct dsa_switch *ds)
-{
- int i, ret = 0;
-
- if (ds->ops->resume)
- ret = ds->ops->resume(ds);
-
- if (ret)
- return ret;
-
- /* Resume slave network devices */
- for (i = 0; i < ds->num_ports; i++) {
- if (!dsa_is_port_initialized(ds, i))
- continue;
-
- ret = dsa_slave_resume(ds->ports[i].netdev);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(dsa_switch_resume);
-#endif
-
-/* platform driver init and cleanup *****************************************/
static int dev_is_class(struct device *dev, void *class)
{
if (dev->class != NULL && !strcmp(dev->class->name, class))
@@ -474,24 +170,6 @@ static struct device *dev_find_class(struct device *parent, char *class)
return device_find_child(parent, class, dev_is_class);
}
-struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev)
-{
- struct device *d;
-
- d = dev_find_class(dev, "mdio_bus");
- if (d != NULL) {
- struct mii_bus *bus;
-
- bus = to_mii_bus(d);
- put_device(d);
-
- return bus;
- }
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus);
-
struct net_device *dsa_dev_to_net_device(struct device *dev)
{
struct device *d;
@@ -511,456 +189,43 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
}
EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
-#ifdef CONFIG_OF
-static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
- struct dsa_chip_data *cd,
- int chip_index, int port_index,
- struct device_node *link)
-{
- const __be32 *reg;
- int link_sw_addr;
- struct device_node *parent_sw;
- int len;
-
- parent_sw = of_get_parent(link);
- if (!parent_sw)
- return -EINVAL;
-
- reg = of_get_property(parent_sw, "reg", &len);
- if (!reg || (len != sizeof(*reg) * 2))
- return -EINVAL;
-
- /*
- * Get the destination switch number from the second field of its 'reg'
- * property, i.e. for "reg = <0x19 1>" sw_addr is '1'.
- */
- link_sw_addr = be32_to_cpup(reg + 1);
-
- if (link_sw_addr >= pd->nr_chips)
- return -EINVAL;
-
- cd->rtable[link_sw_addr] = port_index;
-
- return 0;
-}
-
-static int dsa_of_probe_links(struct dsa_platform_data *pd,
- struct dsa_chip_data *cd,
- int chip_index, int port_index,
- struct device_node *port,
- const char *port_name)
-{
- struct device_node *link;
- int link_index;
- int ret;
-
- for (link_index = 0;; link_index++) {
- link = of_parse_phandle(port, "link", link_index);
- if (!link)
- break;
-
- if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) {
- ret = dsa_of_setup_routing_table(pd, cd, chip_index,
- port_index, link);
- if (ret)
- return ret;
- }
- }
- return 0;
-}
-
-static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
-{
- int i;
- int port_index;
-
- for (i = 0; i < pd->nr_chips; i++) {
- port_index = 0;
- while (port_index < DSA_MAX_PORTS) {
- kfree(pd->chip[i].port_names[port_index]);
- port_index++;
- }
-
- /* Drop our reference to the MDIO bus device */
- if (pd->chip[i].host_dev)
- put_device(pd->chip[i].host_dev);
- }
- kfree(pd->chip);
-}
-
-static int dsa_of_probe(struct device *dev)
-{
- struct device_node *np = dev->of_node;
- struct device_node *child, *mdio, *ethernet, *port;
- struct mii_bus *mdio_bus, *mdio_bus_switch;
- struct net_device *ethernet_dev;
- struct dsa_platform_data *pd;
- struct dsa_chip_data *cd;
- const char *port_name;
- int chip_index, port_index;
- const unsigned int *sw_addr, *port_reg;
- u32 eeprom_len;
- int ret;
-
- mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
- if (!mdio)
- return -EINVAL;
-
- mdio_bus = of_mdio_find_bus(mdio);
- if (!mdio_bus)
- return -EPROBE_DEFER;
-
- ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
- if (!ethernet) {
- ret = -EINVAL;
- goto out_put_mdio;
- }
-
- ethernet_dev = of_find_net_device_by_node(ethernet);
- if (!ethernet_dev) {
- ret = -EPROBE_DEFER;
- goto out_put_mdio;
- }
-
- pd = kzalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd) {
- ret = -ENOMEM;
- goto out_put_ethernet;
- }
-
- dev->platform_data = pd;
- pd->of_netdev = ethernet_dev;
- pd->nr_chips = of_get_available_child_count(np);
- if (pd->nr_chips > DSA_MAX_SWITCHES)
- pd->nr_chips = DSA_MAX_SWITCHES;
-
- pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data),
- GFP_KERNEL);
- if (!pd->chip) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- chip_index = -1;
- for_each_available_child_of_node(np, child) {
- int i;
-
- chip_index++;
- cd = &pd->chip[chip_index];
-
- cd->of_node = child;
-
- /* Initialize the routing table */
- for (i = 0; i < DSA_MAX_SWITCHES; ++i)
- cd->rtable[i] = DSA_RTABLE_NONE;
-
- /* When assigning the host device, increment its refcount */
- cd->host_dev = get_device(&mdio_bus->dev);
-
- sw_addr = of_get_property(child, "reg", NULL);
- if (!sw_addr)
- continue;
-
- cd->sw_addr = be32_to_cpup(sw_addr);
- if (cd->sw_addr >= PHY_MAX_ADDR)
- continue;
-
- if (!of_property_read_u32(child, "eeprom-length", &eeprom_len))
- cd->eeprom_len = eeprom_len;
-
- mdio = of_parse_phandle(child, "mii-bus", 0);
- if (mdio) {
- mdio_bus_switch = of_mdio_find_bus(mdio);
- if (!mdio_bus_switch) {
- ret = -EPROBE_DEFER;
- goto out_free_chip;
- }
-
- /* Drop the mdio_bus device ref, replacing the host
- * device with the mdio_bus_switch device, keeping
- * the refcount from of_mdio_find_bus() above.
- */
- put_device(cd->host_dev);
- cd->host_dev = &mdio_bus_switch->dev;
- }
-
- for_each_available_child_of_node(child, port) {
- port_reg = of_get_property(port, "reg", NULL);
- if (!port_reg)
- continue;
-
- port_index = be32_to_cpup(port_reg);
- if (port_index >= DSA_MAX_PORTS)
- break;
-
- port_name = of_get_property(port, "label", NULL);
- if (!port_name)
- continue;
-
- cd->port_dn[port_index] = port;
-
- cd->port_names[port_index] = kstrdup(port_name,
- GFP_KERNEL);
- if (!cd->port_names[port_index]) {
- ret = -ENOMEM;
- goto out_free_chip;
- }
-
- ret = dsa_of_probe_links(pd, cd, chip_index,
- port_index, port, port_name);
- if (ret)
- goto out_free_chip;
-
- }
- }
-
- /* The individual chips hold their own refcount on the mdio bus,
- * so drop ours */
- put_device(&mdio_bus->dev);
-
- return 0;
-
-out_free_chip:
- dsa_of_free_platform_data(pd);
-out_free:
- kfree(pd);
- dev->platform_data = NULL;
-out_put_ethernet:
- put_device(&ethernet_dev->dev);
-out_put_mdio:
- put_device(&mdio_bus->dev);
- return ret;
-}
-
-static void dsa_of_remove(struct device *dev)
-{
- struct dsa_platform_data *pd = dev->platform_data;
-
- if (!dev->of_node)
- return;
-
- dsa_of_free_platform_data(pd);
- put_device(&pd->of_netdev->dev);
- kfree(pd);
-}
-#else
-static inline int dsa_of_probe(struct device *dev)
-{
- return 0;
-}
-
-static inline void dsa_of_remove(struct device *dev)
-{
-}
-#endif
-
-static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
- struct device *parent, struct dsa_platform_data *pd)
-{
- int i;
- unsigned configured = 0;
-
- dst->pd = pd;
- dst->master_netdev = dev;
- dst->cpu_port = -1;
-
- for (i = 0; i < pd->nr_chips; i++) {
- struct dsa_switch *ds;
-
- ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev);
- if (IS_ERR(ds)) {
- netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n",
- i, PTR_ERR(ds));
- continue;
- }
-
- dst->ds[i] = ds;
-
- ++configured;
- }
-
- /*
- * If no switch was found, exit cleanly
- */
- if (!configured)
- return -EPROBE_DEFER;
-
- /*
- * If we use a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point on get
- * sent to the tag format's receive function.
- */
- wmb();
- dev->dsa_ptr = (void *)dst;
-
- return 0;
-}
-
-static int dsa_probe(struct platform_device *pdev)
-{
- struct dsa_platform_data *pd = pdev->dev.platform_data;
- struct net_device *dev;
- struct dsa_switch_tree *dst;
- int ret;
-
- if (pdev->dev.of_node) {
- ret = dsa_of_probe(&pdev->dev);
- if (ret)
- return ret;
-
- pd = pdev->dev.platform_data;
- }
-
- if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL))
- return -EINVAL;
-
- if (pd->of_netdev) {
- dev = pd->of_netdev;
- dev_hold(dev);
- } else {
- dev = dsa_dev_to_net_device(pd->netdev);
- }
- if (dev == NULL) {
- ret = -EPROBE_DEFER;
- goto out;
- }
-
- if (dev->dsa_ptr != NULL) {
- dev_put(dev);
- ret = -EEXIST;
- goto out;
- }
-
- dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL);
- if (dst == NULL) {
- dev_put(dev);
- ret = -ENOMEM;
- goto out;
- }
-
- platform_set_drvdata(pdev, dst);
-
- ret = dsa_setup_dst(dst, dev, &pdev->dev, pd);
- if (ret) {
- dev_put(dev);
- goto out;
- }
-
- return 0;
-
-out:
- dsa_of_remove(&pdev->dev);
-
- return ret;
-}
-
-static void dsa_remove_dst(struct dsa_switch_tree *dst)
-{
- int i;
-
- dst->master_netdev->dsa_ptr = NULL;
-
- /* If we used a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point get sent
- * without the tag and go through the regular receive path.
- */
- wmb();
-
- for (i = 0; i < dst->pd->nr_chips; i++) {
- struct dsa_switch *ds = dst->ds[i];
-
- if (ds)
- dsa_switch_destroy(ds);
- }
-
- dsa_cpu_port_ethtool_restore(dst->cpu_switch);
-
- dev_put(dst->master_netdev);
-}
-
-static int dsa_remove(struct platform_device *pdev)
-{
- struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
-
- dsa_remove_dst(dst);
- dsa_of_remove(&pdev->dev);
-
- return 0;
-}
-
-static void dsa_shutdown(struct platform_device *pdev)
-{
-}
-
static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct sk_buff *nskb = NULL;
if (unlikely(dst == NULL)) {
kfree_skb(skb);
return 0;
}
- return dst->rcv(skb, dev, pt, orig_dev);
-}
-
-static struct packet_type dsa_pack_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_XDSA),
- .func = dsa_switch_rcv,
-};
-
-#ifdef CONFIG_PM_SLEEP
-static int dsa_suspend(struct device *d)
-{
- struct platform_device *pdev = to_platform_device(d);
- struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
- int i, ret = 0;
-
- for (i = 0; i < dst->pd->nr_chips; i++) {
- struct dsa_switch *ds = dst->ds[i];
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb)
+ return 0;
- if (ds != NULL)
- ret = dsa_switch_suspend(ds);
+ nskb = dst->rcv(skb, dev, pt, orig_dev);
+ if (!nskb) {
+ kfree_skb(skb);
+ return 0;
}
- return ret;
-}
-
-static int dsa_resume(struct device *d)
-{
- struct platform_device *pdev = to_platform_device(d);
- struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
- int i, ret = 0;
+ skb = nskb;
+ skb_push(skb, ETH_HLEN);
+ skb->pkt_type = PACKET_HOST;
+ skb->protocol = eth_type_trans(skb, skb->dev);
- for (i = 0; i < dst->pd->nr_chips; i++) {
- struct dsa_switch *ds = dst->ds[i];
+ skb->dev->stats.rx_packets++;
+ skb->dev->stats.rx_bytes += skb->len;
- if (ds != NULL)
- ret = dsa_switch_resume(ds);
- }
+ netif_receive_skb(skb);
- return ret;
+ return 0;
}
-#endif
-
-static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
-static const struct of_device_id dsa_of_match_table[] = {
- { .compatible = "marvell,dsa", },
- {}
-};
-MODULE_DEVICE_TABLE(of, dsa_of_match_table);
-
-static struct platform_driver dsa_driver = {
- .probe = dsa_probe,
- .remove = dsa_remove,
- .shutdown = dsa_shutdown,
- .driver = {
- .name = "dsa",
- .of_match_table = dsa_of_match_table,
- .pm = &dsa_pm_ops,
- },
+static struct packet_type dsa_pack_type __read_mostly = {
+ .type = cpu_to_be16(ETH_P_XDSA),
+ .func = dsa_switch_rcv,
};
static int __init dsa_init_module(void)
@@ -971,7 +236,7 @@ static int __init dsa_init_module(void)
if (rc)
return rc;
- rc = platform_driver_register(&dsa_driver);
+ rc = dsa_legacy_register();
if (rc)
return rc;
@@ -985,7 +250,7 @@ static void __exit dsa_cleanup_module(void)
{
dsa_slave_unregister_notifier();
dev_remove_pack(&dsa_pack_type);
- platform_driver_unregister(&dsa_driver);
+ dsa_legacy_unregister();
}
module_exit(dsa_cleanup_module);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 737be6470c7f..033b3bfb63dc 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -13,16 +13,20 @@
#include <linux/device.h>
#include <linux/err.h>
#include <linux/list.h>
+#include <linux/netdevice.h>
#include <linux/slab.h>
#include <linux/rtnetlink.h>
-#include <net/dsa.h>
#include <linux/of.h>
#include <linux/of_net.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
static LIST_HEAD(dsa_switch_trees);
static DEFINE_MUTEX(dsa2_mutex);
+static const struct devlink_ops dsa_devlink_ops = {
+};
+
static struct dsa_switch_tree *dsa_get_dst(u32 tree)
{
struct dsa_switch_tree *dst;
@@ -222,12 +226,18 @@ static int dsa_dsa_port_apply(struct dsa_port *port, u32 index,
return err;
}
- return 0;
+ memset(&ds->ports[index].devlink_port, 0,
+ sizeof(ds->ports[index].devlink_port));
+
+ return devlink_port_register(ds->devlink,
+ &ds->ports[index].devlink_port,
+ index);
}
static void dsa_dsa_port_unapply(struct dsa_port *port, u32 index,
struct dsa_switch *ds)
{
+ devlink_port_unregister(&ds->ports[index].devlink_port);
dsa_cpu_dsa_destroy(port);
}
@@ -245,12 +255,17 @@ static int dsa_cpu_port_apply(struct dsa_port *port, u32 index,
ds->cpu_port_mask |= BIT(index);
- return 0;
+ memset(&ds->ports[index].devlink_port, 0,
+ sizeof(ds->ports[index].devlink_port));
+ err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
+ index);
+ return err;
}
static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index,
struct dsa_switch *ds)
{
+ devlink_port_unregister(&ds->ports[index].devlink_port);
dsa_cpu_dsa_destroy(port);
ds->cpu_port_mask &= ~BIT(index);
@@ -275,12 +290,23 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index,
return err;
}
+ memset(&ds->ports[index].devlink_port, 0,
+ sizeof(ds->ports[index].devlink_port));
+ err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
+ index);
+ if (err)
+ return err;
+
+ devlink_port_type_eth_set(&ds->ports[index].devlink_port,
+ ds->ports[index].netdev);
+
return 0;
}
static void dsa_user_port_unapply(struct dsa_port *port, u32 index,
struct dsa_switch *ds)
{
+ devlink_port_unregister(&ds->ports[index].devlink_port);
if (ds->ports[index].netdev) {
dsa_slave_destroy(ds->ports[index].netdev);
ds->ports[index].netdev = NULL;
@@ -301,6 +327,17 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
*/
ds->phys_mii_mask = ds->enabled_port_mask;
+ /* Add the switch to devlink before calling setup, so that setup can
+ * add dpipe tables
+ */
+ ds->devlink = devlink_alloc(&dsa_devlink_ops, 0);
+ if (!ds->devlink)
+ return -ENOMEM;
+
+ err = devlink_register(ds->devlink, ds->dev);
+ if (err)
+ return err;
+
err = ds->ops->setup(ds);
if (err < 0)
return err;
@@ -381,6 +418,13 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
mdiobus_unregister(ds->slave_mii_bus);
dsa_switch_unregister_notifier(ds);
+
+ if (ds->devlink) {
+ devlink_unregister(ds->devlink);
+ devlink_free(ds->devlink);
+ ds->devlink = NULL;
+ }
+
}
static int dsa_dst_apply(struct dsa_switch_tree *dst)
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 0706a511244e..f4a88e485213 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -17,8 +17,9 @@
struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
- int (*rcv)(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev);
+ struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev);
};
struct dsa_slave_priv {
@@ -54,6 +55,10 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds);
void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds);
+/* legacy.c */
+int dsa_legacy_register(void);
+void dsa_legacy_unregister(void);
+
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
@@ -85,4 +90,10 @@ extern const struct dsa_device_ops brcm_netdev_ops;
/* tag_qca.c */
extern const struct dsa_device_ops qca_netdev_ops;
+/* tag_mtk.c */
+extern const struct dsa_device_ops mtk_netdev_ops;
+
+/* tag_lan9303.c */
+extern const struct dsa_device_ops lan9303_netdev_ops;
+
#endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
new file mode 100644
index 000000000000..ad345c8b0b06
--- /dev/null
+++ b/net/dsa/legacy.c
@@ -0,0 +1,818 @@
+/*
+ * net/dsa/legacy.c - Hardware switch handling
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_platform.h>
+#include <linux/of_net.h>
+#include <linux/netdevice.h>
+#include <linux/sysfs.h>
+#include <linux/phy_fixed.h>
+#include <linux/etherdevice.h>
+#include <net/dsa.h>
+#include "dsa_priv.h"
+
+/* switch driver registration ***********************************************/
+static DEFINE_MUTEX(dsa_switch_drivers_mutex);
+static LIST_HEAD(dsa_switch_drivers);
+
+void register_switch_driver(struct dsa_switch_driver *drv)
+{
+ mutex_lock(&dsa_switch_drivers_mutex);
+ list_add_tail(&drv->list, &dsa_switch_drivers);
+ mutex_unlock(&dsa_switch_drivers_mutex);
+}
+EXPORT_SYMBOL_GPL(register_switch_driver);
+
+void unregister_switch_driver(struct dsa_switch_driver *drv)
+{
+ mutex_lock(&dsa_switch_drivers_mutex);
+ list_del_init(&drv->list);
+ mutex_unlock(&dsa_switch_drivers_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_switch_driver);
+
+static const struct dsa_switch_ops *
+dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
+ const char **_name, void **priv)
+{
+ const struct dsa_switch_ops *ret;
+ struct list_head *list;
+ const char *name;
+
+ ret = NULL;
+ name = NULL;
+
+ mutex_lock(&dsa_switch_drivers_mutex);
+ list_for_each(list, &dsa_switch_drivers) {
+ const struct dsa_switch_ops *ops;
+ struct dsa_switch_driver *drv;
+
+ drv = list_entry(list, struct dsa_switch_driver, list);
+ ops = drv->ops;
+
+ name = ops->probe(parent, host_dev, sw_addr, priv);
+ if (name != NULL) {
+ ret = ops;
+ break;
+ }
+ }
+ mutex_unlock(&dsa_switch_drivers_mutex);
+
+ *_name = name;
+
+ return ret;
+}
+
+/* basic switch operations **************************************************/
+static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev)
+{
+ struct dsa_port *dport;
+ int ret, port;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
+ continue;
+
+ dport = &ds->ports[port];
+ ret = dsa_cpu_dsa_setup(ds, dev, dport, port);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
+{
+ const struct dsa_switch_ops *ops = ds->ops;
+ struct dsa_switch_tree *dst = ds->dst;
+ struct dsa_chip_data *cd = ds->cd;
+ bool valid_name_found = false;
+ int index = ds->index;
+ int i, ret;
+
+ /*
+ * Validate supplied switch configuration.
+ */
+ for (i = 0; i < ds->num_ports; i++) {
+ char *name;
+
+ name = cd->port_names[i];
+ if (name == NULL)
+ continue;
+
+ if (!strcmp(name, "cpu")) {
+ if (dst->cpu_switch) {
+ netdev_err(dst->master_netdev,
+ "multiple cpu ports?!\n");
+ return -EINVAL;
+ }
+ dst->cpu_switch = ds;
+ dst->cpu_port = i;
+ ds->cpu_port_mask |= 1 << i;
+ } else if (!strcmp(name, "dsa")) {
+ ds->dsa_port_mask |= 1 << i;
+ } else {
+ ds->enabled_port_mask |= 1 << i;
+ }
+ valid_name_found = true;
+ }
+
+ if (!valid_name_found && i == ds->num_ports)
+ return -EINVAL;
+
+ /* Make the built-in MII bus mask match the number of ports,
+ * switch drivers can override this later
+ */
+ ds->phys_mii_mask = ds->enabled_port_mask;
+
+ /*
+ * If the CPU connects to this switch, set the switch tree
+ * tagging protocol to the preferred tagging format of this
+ * switch.
+ */
+ if (dst->cpu_switch == ds) {
+ enum dsa_tag_protocol tag_protocol;
+
+ tag_protocol = ops->get_tag_protocol(ds);
+ dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+ if (IS_ERR(dst->tag_ops))
+ return PTR_ERR(dst->tag_ops);
+
+ dst->rcv = dst->tag_ops->rcv;
+ }
+
+ memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
+
+ /*
+ * Do basic register setup.
+ */
+ ret = ops->setup(ds);
+ if (ret < 0)
+ return ret;
+
+ ret = dsa_switch_register_notifier(ds);
+ if (ret)
+ return ret;
+
+ if (ops->set_addr) {
+ ret = ops->set_addr(ds, dst->master_netdev->dev_addr);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (!ds->slave_mii_bus && ops->phy_read) {
+ ds->slave_mii_bus = devm_mdiobus_alloc(parent);
+ if (!ds->slave_mii_bus)
+ return -ENOMEM;
+ dsa_slave_mii_bus_init(ds);
+
+ ret = mdiobus_register(ds->slave_mii_bus);
+ if (ret < 0)
+ return ret;
+ }
+
+ /*
+ * Create network devices for physical switch ports.
+ */
+ for (i = 0; i < ds->num_ports; i++) {
+ ds->ports[i].dn = cd->port_dn[i];
+
+ if (!(ds->enabled_port_mask & (1 << i)))
+ continue;
+
+ ret = dsa_slave_create(ds, parent, i, cd->port_names[i]);
+ if (ret < 0)
+ netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
+ index, i, cd->port_names[i], ret);
+ }
+
+ /* Perform configuration of the CPU and DSA ports */
+ ret = dsa_cpu_dsa_setups(ds, parent);
+ if (ret < 0)
+ netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n",
+ index);
+
+ ret = dsa_cpu_port_ethtool_setup(ds);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static struct dsa_switch *
+dsa_switch_setup(struct dsa_switch_tree *dst, int index,
+ struct device *parent, struct device *host_dev)
+{
+ struct dsa_chip_data *cd = dst->pd->chip + index;
+ const struct dsa_switch_ops *ops;
+ struct dsa_switch *ds;
+ int ret;
+ const char *name;
+ void *priv;
+
+ /*
+ * Probe for switch model.
+ */
+ ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
+ if (!ops) {
+ netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
+ index);
+ return ERR_PTR(-EINVAL);
+ }
+ netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n",
+ index, name);
+
+
+ /*
+ * Allocate and initialise switch state.
+ */
+ ds = dsa_switch_alloc(parent, DSA_MAX_PORTS);
+ if (!ds)
+ return ERR_PTR(-ENOMEM);
+
+ ds->dst = dst;
+ ds->index = index;
+ ds->cd = cd;
+ ds->ops = ops;
+ ds->priv = priv;
+
+ ret = dsa_switch_setup_one(ds, parent);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return ds;
+}
+
+static void dsa_switch_destroy(struct dsa_switch *ds)
+{
+ int port;
+
+ /* Destroy network devices for physical switch ports. */
+ for (port = 0; port < ds->num_ports; port++) {
+ if (!(ds->enabled_port_mask & (1 << port)))
+ continue;
+
+ if (!ds->ports[port].netdev)
+ continue;
+
+ dsa_slave_destroy(ds->ports[port].netdev);
+ }
+
+ /* Disable configuration of the CPU and DSA ports */
+ for (port = 0; port < ds->num_ports; port++) {
+ if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
+ continue;
+ dsa_cpu_dsa_destroy(&ds->ports[port]);
+
+ /* Clearing a bit which is not set does no harm */
+ ds->cpu_port_mask |= ~(1 << port);
+ ds->dsa_port_mask |= ~(1 << port);
+ }
+
+ if (ds->slave_mii_bus && ds->ops->phy_read)
+ mdiobus_unregister(ds->slave_mii_bus);
+
+ dsa_switch_unregister_notifier(ds);
+}
+
+#ifdef CONFIG_PM_SLEEP
+int dsa_switch_suspend(struct dsa_switch *ds)
+{
+ int i, ret = 0;
+
+ /* Suspend slave network devices */
+ for (i = 0; i < ds->num_ports; i++) {
+ if (!dsa_is_port_initialized(ds, i))
+ continue;
+
+ ret = dsa_slave_suspend(ds->ports[i].netdev);
+ if (ret)
+ return ret;
+ }
+
+ if (ds->ops->suspend)
+ ret = ds->ops->suspend(ds);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_suspend);
+
+int dsa_switch_resume(struct dsa_switch *ds)
+{
+ int i, ret = 0;
+
+ if (ds->ops->resume)
+ ret = ds->ops->resume(ds);
+
+ if (ret)
+ return ret;
+
+ /* Resume slave network devices */
+ for (i = 0; i < ds->num_ports; i++) {
+ if (!dsa_is_port_initialized(ds, i))
+ continue;
+
+ ret = dsa_slave_resume(ds->ports[i].netdev);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_resume);
+#endif
+
+/* platform driver init and cleanup *****************************************/
+static int dev_is_class(struct device *dev, void *class)
+{
+ if (dev->class != NULL && !strcmp(dev->class->name, class))
+ return 1;
+
+ return 0;
+}
+
+static struct device *dev_find_class(struct device *parent, char *class)
+{
+ if (dev_is_class(parent, class)) {
+ get_device(parent);
+ return parent;
+ }
+
+ return device_find_child(parent, class, dev_is_class);
+}
+
+struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev)
+{
+ struct device *d;
+
+ d = dev_find_class(dev, "mdio_bus");
+ if (d != NULL) {
+ struct mii_bus *bus;
+
+ bus = to_mii_bus(d);
+ put_device(d);
+
+ return bus;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus);
+
+#ifdef CONFIG_OF
+static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
+ struct dsa_chip_data *cd,
+ int chip_index, int port_index,
+ struct device_node *link)
+{
+ const __be32 *reg;
+ int link_sw_addr;
+ struct device_node *parent_sw;
+ int len;
+
+ parent_sw = of_get_parent(link);
+ if (!parent_sw)
+ return -EINVAL;
+
+ reg = of_get_property(parent_sw, "reg", &len);
+ if (!reg || (len != sizeof(*reg) * 2))
+ return -EINVAL;
+
+ /*
+ * Get the destination switch number from the second field of its 'reg'
+ * property, i.e. for "reg = <0x19 1>" sw_addr is '1'.
+ */
+ link_sw_addr = be32_to_cpup(reg + 1);
+
+ if (link_sw_addr >= pd->nr_chips)
+ return -EINVAL;
+
+ cd->rtable[link_sw_addr] = port_index;
+
+ return 0;
+}
+
+static int dsa_of_probe_links(struct dsa_platform_data *pd,
+ struct dsa_chip_data *cd,
+ int chip_index, int port_index,
+ struct device_node *port,
+ const char *port_name)
+{
+ struct device_node *link;
+ int link_index;
+ int ret;
+
+ for (link_index = 0;; link_index++) {
+ link = of_parse_phandle(port, "link", link_index);
+ if (!link)
+ break;
+
+ if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) {
+ ret = dsa_of_setup_routing_table(pd, cd, chip_index,
+ port_index, link);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
+{
+ int i;
+ int port_index;
+
+ for (i = 0; i < pd->nr_chips; i++) {
+ port_index = 0;
+ while (port_index < DSA_MAX_PORTS) {
+ kfree(pd->chip[i].port_names[port_index]);
+ port_index++;
+ }
+
+ /* Drop our reference to the MDIO bus device */
+ if (pd->chip[i].host_dev)
+ put_device(pd->chip[i].host_dev);
+ }
+ kfree(pd->chip);
+}
+
+static int dsa_of_probe(struct device *dev)
+{
+ struct device_node *np = dev->of_node;
+ struct device_node *child, *mdio, *ethernet, *port;
+ struct mii_bus *mdio_bus, *mdio_bus_switch;
+ struct net_device *ethernet_dev;
+ struct dsa_platform_data *pd;
+ struct dsa_chip_data *cd;
+ const char *port_name;
+ int chip_index, port_index;
+ const unsigned int *sw_addr, *port_reg;
+ u32 eeprom_len;
+ int ret;
+
+ mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
+ if (!mdio)
+ return -EINVAL;
+
+ mdio_bus = of_mdio_find_bus(mdio);
+ if (!mdio_bus)
+ return -EPROBE_DEFER;
+
+ ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
+ if (!ethernet) {
+ ret = -EINVAL;
+ goto out_put_mdio;
+ }
+
+ ethernet_dev = of_find_net_device_by_node(ethernet);
+ if (!ethernet_dev) {
+ ret = -EPROBE_DEFER;
+ goto out_put_mdio;
+ }
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd) {
+ ret = -ENOMEM;
+ goto out_put_ethernet;
+ }
+
+ dev->platform_data = pd;
+ pd->of_netdev = ethernet_dev;
+ pd->nr_chips = of_get_available_child_count(np);
+ if (pd->nr_chips > DSA_MAX_SWITCHES)
+ pd->nr_chips = DSA_MAX_SWITCHES;
+
+ pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data),
+ GFP_KERNEL);
+ if (!pd->chip) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ chip_index = -1;
+ for_each_available_child_of_node(np, child) {
+ int i;
+
+ chip_index++;
+ cd = &pd->chip[chip_index];
+
+ cd->of_node = child;
+
+ /* Initialize the routing table */
+ for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+ cd->rtable[i] = DSA_RTABLE_NONE;
+
+ /* When assigning the host device, increment its refcount */
+ cd->host_dev = get_device(&mdio_bus->dev);
+
+ sw_addr = of_get_property(child, "reg", NULL);
+ if (!sw_addr)
+ continue;
+
+ cd->sw_addr = be32_to_cpup(sw_addr);
+ if (cd->sw_addr >= PHY_MAX_ADDR)
+ continue;
+
+ if (!of_property_read_u32(child, "eeprom-length", &eeprom_len))
+ cd->eeprom_len = eeprom_len;
+
+ mdio = of_parse_phandle(child, "mii-bus", 0);
+ if (mdio) {
+ mdio_bus_switch = of_mdio_find_bus(mdio);
+ if (!mdio_bus_switch) {
+ ret = -EPROBE_DEFER;
+ goto out_free_chip;
+ }
+
+ /* Drop the mdio_bus device ref, replacing the host
+ * device with the mdio_bus_switch device, keeping
+ * the refcount from of_mdio_find_bus() above.
+ */
+ put_device(cd->host_dev);
+ cd->host_dev = &mdio_bus_switch->dev;
+ }
+
+ for_each_available_child_of_node(child, port) {
+ port_reg = of_get_property(port, "reg", NULL);
+ if (!port_reg)
+ continue;
+
+ port_index = be32_to_cpup(port_reg);
+ if (port_index >= DSA_MAX_PORTS)
+ break;
+
+ port_name = of_get_property(port, "label", NULL);
+ if (!port_name)
+ continue;
+
+ cd->port_dn[port_index] = port;
+
+ cd->port_names[port_index] = kstrdup(port_name,
+ GFP_KERNEL);
+ if (!cd->port_names[port_index]) {
+ ret = -ENOMEM;
+ goto out_free_chip;
+ }
+
+ ret = dsa_of_probe_links(pd, cd, chip_index,
+ port_index, port, port_name);
+ if (ret)
+ goto out_free_chip;
+
+ }
+ }
+
+ /* The individual chips hold their own refcount on the mdio bus,
+ * so drop ours */
+ put_device(&mdio_bus->dev);
+
+ return 0;
+
+out_free_chip:
+ dsa_of_free_platform_data(pd);
+out_free:
+ kfree(pd);
+ dev->platform_data = NULL;
+out_put_ethernet:
+ put_device(&ethernet_dev->dev);
+out_put_mdio:
+ put_device(&mdio_bus->dev);
+ return ret;
+}
+
+static void dsa_of_remove(struct device *dev)
+{
+ struct dsa_platform_data *pd = dev->platform_data;
+
+ if (!dev->of_node)
+ return;
+
+ dsa_of_free_platform_data(pd);
+ put_device(&pd->of_netdev->dev);
+ kfree(pd);
+}
+#else
+static inline int dsa_of_probe(struct device *dev)
+{
+ return 0;
+}
+
+static inline void dsa_of_remove(struct device *dev)
+{
+}
+#endif
+
+static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
+ struct device *parent, struct dsa_platform_data *pd)
+{
+ int i;
+ unsigned configured = 0;
+
+ dst->pd = pd;
+ dst->master_netdev = dev;
+ dst->cpu_port = -1;
+
+ for (i = 0; i < pd->nr_chips; i++) {
+ struct dsa_switch *ds;
+
+ ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev);
+ if (IS_ERR(ds)) {
+ netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n",
+ i, PTR_ERR(ds));
+ continue;
+ }
+
+ dst->ds[i] = ds;
+
+ ++configured;
+ }
+
+ /*
+ * If no switch was found, exit cleanly
+ */
+ if (!configured)
+ return -EPROBE_DEFER;
+
+ /*
+ * If we use a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point on get
+ * sent to the tag format's receive function.
+ */
+ wmb();
+ dev->dsa_ptr = (void *)dst;
+
+ return 0;
+}
+
+static int dsa_probe(struct platform_device *pdev)
+{
+ struct dsa_platform_data *pd = pdev->dev.platform_data;
+ struct net_device *dev;
+ struct dsa_switch_tree *dst;
+ int ret;
+
+ if (pdev->dev.of_node) {
+ ret = dsa_of_probe(&pdev->dev);
+ if (ret)
+ return ret;
+
+ pd = pdev->dev.platform_data;
+ }
+
+ if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL))
+ return -EINVAL;
+
+ if (pd->of_netdev) {
+ dev = pd->of_netdev;
+ dev_hold(dev);
+ } else {
+ dev = dsa_dev_to_net_device(pd->netdev);
+ }
+ if (dev == NULL) {
+ ret = -EPROBE_DEFER;
+ goto out;
+ }
+
+ if (dev->dsa_ptr != NULL) {
+ dev_put(dev);
+ ret = -EEXIST;
+ goto out;
+ }
+
+ dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL);
+ if (dst == NULL) {
+ dev_put(dev);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ platform_set_drvdata(pdev, dst);
+
+ ret = dsa_setup_dst(dst, dev, &pdev->dev, pd);
+ if (ret) {
+ dev_put(dev);
+ goto out;
+ }
+
+ return 0;
+
+out:
+ dsa_of_remove(&pdev->dev);
+
+ return ret;
+}
+
+static void dsa_remove_dst(struct dsa_switch_tree *dst)
+{
+ int i;
+
+ dst->master_netdev->dsa_ptr = NULL;
+
+ /* If we used a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point get sent
+ * without the tag and go through the regular receive path.
+ */
+ wmb();
+
+ for (i = 0; i < dst->pd->nr_chips; i++) {
+ struct dsa_switch *ds = dst->ds[i];
+
+ if (ds)
+ dsa_switch_destroy(ds);
+ }
+
+ dsa_cpu_port_ethtool_restore(dst->cpu_switch);
+
+ dev_put(dst->master_netdev);
+}
+
+static int dsa_remove(struct platform_device *pdev)
+{
+ struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
+
+ dsa_remove_dst(dst);
+ dsa_of_remove(&pdev->dev);
+
+ return 0;
+}
+
+static void dsa_shutdown(struct platform_device *pdev)
+{
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int dsa_suspend(struct device *d)
+{
+ struct platform_device *pdev = to_platform_device(d);
+ struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
+ int i, ret = 0;
+
+ for (i = 0; i < dst->pd->nr_chips; i++) {
+ struct dsa_switch *ds = dst->ds[i];
+
+ if (ds != NULL)
+ ret = dsa_switch_suspend(ds);
+ }
+
+ return ret;
+}
+
+static int dsa_resume(struct device *d)
+{
+ struct platform_device *pdev = to_platform_device(d);
+ struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
+ int i, ret = 0;
+
+ for (i = 0; i < dst->pd->nr_chips; i++) {
+ struct dsa_switch *ds = dst->ds[i];
+
+ if (ds != NULL)
+ ret = dsa_switch_resume(ds);
+ }
+
+ return ret;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
+
+static const struct of_device_id dsa_of_match_table[] = {
+ { .compatible = "marvell,dsa", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, dsa_of_match_table);
+
+static struct platform_driver dsa_driver = {
+ .probe = dsa_probe,
+ .remove = dsa_remove,
+ .shutdown = dsa_shutdown,
+ .driver = {
+ .name = "dsa",
+ .of_match_table = dsa_of_match_table,
+ .pm = &dsa_pm_ops,
+ },
+};
+
+int dsa_legacy_register(void)
+{
+ return platform_driver_register(&dsa_driver);
+}
+
+void dsa_legacy_unregister(void)
+{
+ platform_driver_unregister(&dsa_driver);
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index c34872e1febc..7693182df81e 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -17,6 +17,7 @@
#include <linux/of_mdio.h>
#include <linux/mdio.h>
#include <linux/list.h>
+#include <net/dsa.h>
#include <net/rtnetlink.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>
@@ -419,8 +420,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev,
return 0;
}
-static int dsa_fastest_ageing_time(struct dsa_switch *ds,
- unsigned int ageing_time)
+static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds,
+ unsigned int ageing_time)
{
int i;
@@ -443,9 +444,13 @@ static int dsa_slave_ageing_time(struct net_device *dev,
unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time);
unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
- /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
- if (switchdev_trans_ph_prepare(trans))
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
+ return -ERANGE;
+ if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
+ return -ERANGE;
return 0;
+ }
/* Keep the fastest ageing time in case of multiple bridges */
p->dp->ageing_time = ageing_time;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 6456dacf9ae9..ca6e26e514f0 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -1,7 +1,8 @@
/*
* Handling of a single switch chip, part of a switch fabric
*
- * Copyright (c) 2017 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,9 +20,9 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
if (ds->index == info->sw_index && ds->ops->port_bridge_join)
return ds->ops->port_bridge_join(ds, info->port, info->br);
- if (ds->index != info->sw_index)
- dev_dbg(ds->dev, "crosschip DSA port %d.%d bridged to %s\n",
- info->sw_index, info->port, netdev_name(info->br));
+ if (ds->index != info->sw_index && ds->ops->crosschip_bridge_join)
+ return ds->ops->crosschip_bridge_join(ds, info->sw_index,
+ info->port, info->br);
return 0;
}
@@ -32,9 +33,9 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
if (ds->index == info->sw_index && ds->ops->port_bridge_leave)
ds->ops->port_bridge_leave(ds, info->port, info->br);
- if (ds->index != info->sw_index)
- dev_dbg(ds->dev, "crosschip DSA port %d.%d unbridged from %s\n",
- info->sw_index, info->port, netdev_name(info->br));
+ if (ds->index != info->sw_index && ds->ops->crosschip_bridge_leave)
+ ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port,
+ info->br);
return 0;
}
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 5d925b6b2bb1..2a9b52c5af86 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -12,6 +12,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
/* This tag length is 4 bytes, older ones were 6 bytes, we do not
@@ -91,23 +92,17 @@ out_free:
return NULL;
}
-static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
int source_port;
u8 *brcm_tag;
- if (unlikely(dst == NULL))
- goto out_drop;
-
ds = dst->cpu_switch;
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
goto out_drop;
@@ -139,22 +134,12 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
skb->data - ETH_HLEN - BRCM_TAG_LEN,
2 * ETH_ALEN);
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
skb->dev = ds->ports[source_port].netdev;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops brcm_netdev_ops = {
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 72579ceea381..1c6633f0de01 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -11,6 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
#define DSA_HLEN 4
@@ -67,8 +68,9 @@ out_free:
return NULL;
}
-static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
@@ -76,13 +78,6 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
int source_device;
int source_port;
- if (unlikely(dst == NULL))
- goto out_drop;
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
goto out_drop;
@@ -164,21 +159,11 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
}
skb->dev = ds->ports[source_port].netdev;
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops dsa_netdev_ops = {
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 648c051817a1..d9c668aa5e54 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -11,6 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
#define DSA_HLEN 4
@@ -80,8 +81,9 @@ out_free:
return NULL;
}
-static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
@@ -89,13 +91,6 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
int source_device;
int source_port;
- if (unlikely(dst == NULL))
- goto out_drop;
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
goto out_drop;
@@ -183,21 +178,11 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
}
skb->dev = ds->ports[source_port].netdev;
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops edsa_netdev_ops = {
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
new file mode 100644
index 000000000000..70130ed5c21a
--- /dev/null
+++ b/net/dsa/tag_lan9303.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2017 Pengutronix, Juergen Borleis <jbe@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/etherdevice.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <net/dsa.h>
+#include "dsa_priv.h"
+
+/* To define the outgoing port and to discover the incoming port a regular
+ * VLAN tag is used by the LAN9303. But its VID meaning is 'special':
+ *
+ * Dest MAC Src MAC TAG Type
+ * ...| 1 2 3 4 5 6 | 1 2 3 4 5 6 | 1 2 3 4 | 1 2 |...
+ * |<------->|
+ * TAG:
+ * |<------------->|
+ * | 1 2 | 3 4 |
+ * TPID VID
+ * 0x8100
+ *
+ * VID bit 3 indicates a request for an ALR lookup.
+ *
+ * If VID bit 3 is zero, then bits 0 and 1 specify the destination port
+ * (0, 1, 2) or broadcast (3) or the source port (1, 2).
+ *
+ * VID bit 4 is used to specify if the STP port state should be overridden.
+ * Required when no forwarding between the external ports should happen.
+ */
+
+#define LAN9303_TAG_LEN 4
+#define LAN9303_MAX_PORTS 3
+
+static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ u16 *lan9303_tag;
+
+ /* insert a special VLAN tag between the MAC addresses
+ * and the current ethertype field.
+ */
+ if (skb_cow_head(skb, LAN9303_TAG_LEN) < 0) {
+ dev_dbg(&dev->dev,
+ "Cannot make room for the special tag. Dropping packet\n");
+ goto out_free;
+ }
+
+ /* provide 'LAN9303_TAG_LEN' bytes additional space */
+ skb_push(skb, LAN9303_TAG_LEN);
+
+ /* make room between MACs and Ether-Type */
+ memmove(skb->data, skb->data + LAN9303_TAG_LEN, 2 * ETH_ALEN);
+
+ lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN);
+ lan9303_tag[0] = htons(ETH_P_8021Q);
+ lan9303_tag[1] = htons(p->dp->index | BIT(4));
+
+ return skb;
+out_free:
+ kfree_skb(skb);
+ return NULL;
+}
+
+static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ u16 *lan9303_tag;
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_switch *ds;
+ unsigned int source_port;
+
+ ds = dst->ds[0];
+
+ if (unlikely(!ds)) {
+ dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n");
+ return NULL;
+ }
+
+ if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) {
+ dev_warn_ratelimited(&dev->dev,
+ "Dropping packet, cannot pull\n");
+ return NULL;
+ }
+
+ /* '->data' points into the middle of our special VLAN tag information:
+ *
+ * ~ MAC src | 0x81 | 0x00 | 0xyy | 0xzz | ether type
+ * ^
+ * ->data
+ */
+ lan9303_tag = (u16 *)(skb->data - 2);
+
+ if (lan9303_tag[0] != htons(ETH_P_8021Q)) {
+ dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n");
+ return NULL;
+ }
+
+ source_port = ntohs(lan9303_tag[1]) & 0x3;
+
+ if (source_port >= LAN9303_MAX_PORTS) {
+ dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n");
+ return NULL;
+ }
+
+ if (!ds->ports[source_port].netdev) {
+ dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n");
+ return NULL;
+ }
+
+ /* remove the special VLAN tag between the MAC addresses
+ * and the current ethertype field.
+ */
+ skb_pull_rcsum(skb, 2 + 2);
+ memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
+ 2 * ETH_ALEN);
+
+ /* forward the packet to the dedicated interface */
+ skb->dev = ds->ports[source_port].netdev;
+
+ return skb;
+}
+
+const struct dsa_device_ops lan9303_netdev_ops = {
+ .xmit = lan9303_xmit,
+ .rcv = lan9303_rcv,
+};
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
new file mode 100644
index 000000000000..837cdddb53f0
--- /dev/null
+++ b/net/dsa/tag_mtk.c
@@ -0,0 +1,100 @@
+/*
+ * Mediatek DSA Tag support
+ * Copyright (C) 2017 Landen Chao <landen.chao@mediatek.com>
+ * Sean Wang <sean.wang@mediatek.com>
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/etherdevice.h>
+#include <net/dsa.h>
+#include "dsa_priv.h"
+
+#define MTK_HDR_LEN 4
+#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
+#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0)
+
+static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ u8 *mtk_tag;
+
+ if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
+ goto out_free;
+
+ skb_push(skb, MTK_HDR_LEN);
+
+ memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+
+ /* Build the tag after the MAC Source Address */
+ mtk_tag = skb->data + 2 * ETH_ALEN;
+ mtk_tag[0] = 0;
+ mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
+ mtk_tag[2] = 0;
+ mtk_tag[3] = 0;
+
+ return skb;
+
+out_free:
+ kfree_skb(skb);
+ return NULL;
+}
+
+static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_switch *ds;
+ int port;
+ __be16 *phdr, hdr;
+
+ if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
+ goto out_drop;
+
+ /* The MTK header is added by the switch between src addr
+ * and ethertype at this point, skb->data points to 2 bytes
+ * after src addr so header should be 2 bytes right before.
+ */
+ phdr = (__be16 *)(skb->data - 2);
+ hdr = ntohs(*phdr);
+
+ /* Remove MTK tag and recalculate checksum. */
+ skb_pull_rcsum(skb, MTK_HDR_LEN);
+
+ memmove(skb->data - ETH_HLEN,
+ skb->data - ETH_HLEN - MTK_HDR_LEN,
+ 2 * ETH_ALEN);
+
+ /* This protocol doesn't support cascading multiple
+ * switches so it's safe to assume the switch is first
+ * in the tree.
+ */
+ ds = dst->ds[0];
+ if (!ds)
+ goto out_drop;
+
+ /* Get source port information */
+ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
+ if (!ds->ports[port].netdev)
+ goto out_drop;
+
+ skb->dev = ds->ports[port].netdev;
+
+ return skb;
+
+out_drop:
+ return NULL;
+}
+
+const struct dsa_device_ops mtk_netdev_ops = {
+ .xmit = mtk_tag_xmit,
+ .rcv = mtk_tag_rcv,
+};
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 30240f343aea..3ba3f59f7a34 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -12,6 +12,7 @@
*/
#include <linux/etherdevice.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
#define QCA_HDR_LEN 2
@@ -65,8 +66,9 @@ out_free:
return NULL;
}
-static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
@@ -74,13 +76,6 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
int port;
__be16 *phdr, hdr;
- if (unlikely(!dst))
- goto out_drop;
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
goto out_drop;
@@ -114,22 +109,12 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
goto out_drop;
/* Update skb & forward the frame accordingly */
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
skb->dev = ds->ports[port].netdev;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops qca_netdev_ops = {
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 26f977176978..aafc2fc74c30 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -11,6 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -57,22 +58,17 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
return nskb;
}
-static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
u8 *trailer;
int source_port;
- if (unlikely(dst == NULL))
- goto out_drop;
ds = dst->cpu_switch;
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (skb_linearize(skb))
goto out_drop;
@@ -88,21 +84,11 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
pskb_trim_rcsum(skb, skb->len - 4);
skb->dev = ds->ports[source_port].netdev;
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops trailer_netdev_ops = {
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 1ab30e7d3f99..81dac16933fc 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -350,7 +350,7 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
return 0;
invalid:
- netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
+ netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL);
return 0;
nla_put_failure:
@@ -432,7 +432,7 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
return 0;
invalid:
- netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
+ netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL);
return 0;
nla_put_failure:
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index fc60cd061f39..99f6c254ea77 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -249,8 +249,7 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
if (!cb->args[0]) {
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
genl_family_attrbuf(&nl802154_fam),
- nl802154_fam.maxattr,
- nl802154_policy);
+ nl802154_fam.maxattr, nl802154_policy, NULL);
if (err)
goto out_unlock;
@@ -562,8 +561,8 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
struct nl802154_dump_wpan_phy_state *state)
{
struct nlattr **tb = genl_family_attrbuf(&nl802154_fam);
- int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
- tb, nl802154_fam.maxattr, nl802154_policy);
+ int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, tb,
+ nl802154_fam.maxattr, nl802154_policy, NULL);
/* TODO check if we can handle error here,
* we have no backward compatibility
@@ -1308,7 +1307,7 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla,
struct nlattr *attrs[NL802154_DEV_ADDR_ATTR_MAX + 1];
if (!nla || nla_parse_nested(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla,
- nl802154_dev_addr_policy))
+ nl802154_dev_addr_policy, NULL))
return -EINVAL;
if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] ||
@@ -1348,7 +1347,7 @@ ieee802154_llsec_parse_key_id(struct nlattr *nla,
struct nlattr *attrs[NL802154_KEY_ID_ATTR_MAX + 1];
if (!nla || nla_parse_nested(attrs, NL802154_KEY_ID_ATTR_MAX, nla,
- nl802154_key_id_policy))
+ nl802154_key_id_policy, NULL))
return -EINVAL;
if (!attrs[NL802154_KEY_ID_ATTR_MODE])
@@ -1565,7 +1564,7 @@ static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info)
if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_KEY],
- nl802154_key_policy))
+ nl802154_key_policy, info->extack))
return -EINVAL;
if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] ||
@@ -1615,7 +1614,7 @@ static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info)
if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_KEY],
- nl802154_key_policy))
+ nl802154_key_policy, info->extack))
return -EINVAL;
if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
@@ -1729,8 +1728,8 @@ ieee802154_llsec_parse_device(struct nlattr *nla,
{
struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
- if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, nla,
- nl802154_dev_policy))
+ if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
+ nla, nl802154_dev_policy, NULL))
return -EINVAL;
memset(dev, 0, sizeof(*dev));
@@ -1783,7 +1782,7 @@ static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info)
if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVICE],
- nl802154_dev_policy))
+ nl802154_dev_policy, info->extack))
return -EINVAL;
if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR])
@@ -1911,7 +1910,7 @@ static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info
if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] ||
nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVKEY],
- nl802154_devkey_policy) < 0)
+ nl802154_devkey_policy, info->extack) < 0)
return -EINVAL;
if (!attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER] ||
@@ -1943,7 +1942,7 @@ static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info
if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVKEY],
- nl802154_devkey_policy))
+ nl802154_devkey_policy, info->extack))
return -EINVAL;
if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR])
@@ -2063,8 +2062,8 @@ llsec_parse_seclevel(struct nlattr *nla, struct ieee802154_llsec_seclevel *sl)
{
struct nlattr *attrs[NL802154_SECLEVEL_ATTR_MAX + 1];
- if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, nla,
- nl802154_seclevel_policy))
+ if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX,
+ nla, nl802154_seclevel_policy, NULL))
return -EINVAL;
memset(sl, 0, sizeof(*sl));
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index c6d4238ff94a..f83de23a30e7 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_rate.o tcp_recovery.o \
tcp_offload.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
- fib_frontend.o fib_semantics.o fib_trie.o \
+ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 13a9a3297eae..f3dad1661343 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1602,8 +1602,9 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-static const struct net_protocol tcp_protocol = {
+static struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
+ .early_demux_handler = tcp_v4_early_demux,
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
@@ -1611,8 +1612,9 @@ static const struct net_protocol tcp_protocol = {
.icmp_strict_tag_validation = 1,
};
-static const struct net_protocol udp_protocol = {
+static struct net_protocol udp_protocol = {
.early_demux = udp_v4_early_demux,
+ .early_demux_handler = udp_v4_early_demux,
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
@@ -1723,6 +1725,8 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
net->ipv4.sysctl_ip_dynaddr = 0;
net->ipv4.sysctl_ip_early_demux = 1;
+ net->ipv4.sysctl_udp_early_demux = 1;
+ net->ipv4.sysctl_tcp_early_demux = 1;
#ifdef CONFIG_SYSCTL
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
#endif
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 51b27ae09fbd..d54345a06f72 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -653,6 +653,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
unsigned char *arp_ptr;
struct rtable *rt;
unsigned char *sha;
+ unsigned char *tha = NULL;
__be32 sip, tip;
u16 dev_type = dev->type;
int addr_type;
@@ -724,6 +725,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
break;
#endif
default:
+ tha = arp_ptr;
arp_ptr += dev->addr_len;
}
memcpy(&tip, arp_ptr, 4);
@@ -842,8 +844,18 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
It is possible, that this option should be enabled for some
devices (strip is candidate)
*/
- is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip &&
- addr_type == RTN_UNICAST;
+ is_garp = tip == sip && addr_type == RTN_UNICAST;
+
+ /* Unsolicited ARP _replies_ also require target hwaddr to be
+ * the same as source.
+ */
+ if (is_garp && arp->ar_op == htons(ARPOP_REPLY))
+ is_garp =
+ /* IPv4 over IEEE 1394 doesn't provide target
+ * hardware address field in its ARP payload.
+ */
+ tha &&
+ !memcmp(tha, sha, dev->addr_len);
if (!n &&
((arp->ar_op == htons(ARPOP_REPLY) &&
@@ -872,7 +884,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->pkt_type != PACKET_HOST)
state = NUD_STALE;
neigh_update(n, sha, state,
- override ? NEIGH_UPDATE_F_OVERRIDE : 0);
+ override ? NEIGH_UPDATE_F_OVERRIDE : 0, 0);
neigh_release(n);
}
@@ -1033,7 +1045,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
r->arp_ha.sa_data : NULL, state,
NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN, 0);
neigh_release(neigh);
}
return err;
@@ -1084,7 +1096,7 @@ static int arp_invalidate(struct net_device *dev, __be32 ip)
if (neigh->nud_state & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN, 0);
neigh_release(neigh);
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cebedd545e5e..df14815a3b8c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -571,7 +571,8 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
return ret;
}
-static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
@@ -582,7 +583,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
+ extack);
if (err < 0)
goto errout;
@@ -752,7 +754,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
struct in_device *in_dev;
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
+ NULL);
if (err < 0)
goto errout;
@@ -843,7 +846,8 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
return NULL;
}
-static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct in_ifaddr *ifa;
@@ -1192,6 +1196,18 @@ out:
return done;
}
+static __be32 in_dev_select_addr(const struct in_device *in_dev,
+ int scope)
+{
+ for_primary_ifa(in_dev) {
+ if (ifa->ifa_scope != RT_SCOPE_LINK &&
+ ifa->ifa_scope <= scope)
+ return ifa->ifa_local;
+ } endfor_ifa(in_dev);
+
+ return 0;
+}
+
__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
{
__be32 addr = 0;
@@ -1228,13 +1244,9 @@ no_in_dev:
if (master_idx &&
(dev = dev_get_by_index_rcu(net, master_idx)) &&
(in_dev = __in_dev_get_rcu(dev))) {
- for_primary_ifa(in_dev) {
- if (ifa->ifa_scope != RT_SCOPE_LINK &&
- ifa->ifa_scope <= scope) {
- addr = ifa->ifa_local;
- goto out_unlock;
- }
- } endfor_ifa(in_dev);
+ addr = in_dev_select_addr(in_dev, scope);
+ if (addr)
+ goto out_unlock;
}
/* Not loopback addresses on loopback should be preferred
@@ -1249,13 +1261,9 @@ no_in_dev:
if (!in_dev)
continue;
- for_primary_ifa(in_dev) {
- if (ifa->ifa_scope != RT_SCOPE_LINK &&
- ifa->ifa_scope <= scope) {
- addr = ifa->ifa_local;
- goto out_unlock;
- }
- } endfor_ifa(in_dev);
+ addr = in_dev_select_addr(in_dev, scope);
+ if (addr)
+ goto out_unlock;
}
out_unlock:
rcu_read_unlock();
@@ -1713,7 +1721,7 @@ static int inet_validate_link_af(const struct net_device *dev,
if (dev && !__in_dev_get_rtnl(dev))
return -EAFNOSUPPORT;
- err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
+ err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
if (err < 0)
return err;
@@ -1741,7 +1749,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
if (!in_dev)
return -EAFNOSUPPORT;
- if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
+ if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
BUG();
if (tb[IFLA_INET_CONF]) {
@@ -1798,6 +1806,9 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
goto nla_put_failure;
+ if (!devconf)
+ goto out;
+
if ((all || type == NETCONFA_FORWARDING) &&
nla_put_s32(skb, NETCONFA_FORWARDING,
IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
@@ -1819,6 +1830,7 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
goto nla_put_failure;
+out:
nlmsg_end(skb, nlh);
return 0;
@@ -1827,8 +1839,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
- struct ipv4_devconf *devconf)
+void inet_netconf_notify_devconf(struct net *net, int event, int type,
+ int ifindex, struct ipv4_devconf *devconf)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -1838,7 +1850,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
goto errout;
err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
- RTM_NEWNETCONF, 0, type);
+ event, 0, type);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
WARN_ON(err == -EMSGSIZE);
@@ -1861,7 +1873,8 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
};
static int inet_netconf_get_devconf(struct sk_buff *in_skb,
- struct nlmsghdr *nlh)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NETCONFA_MAX+1];
@@ -1874,7 +1887,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
int err;
err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
- devconf_ipv4_policy);
+ devconf_ipv4_policy, extack);
if (err < 0)
goto errout;
@@ -2017,10 +2030,12 @@ static void inet_forward_change(struct net *net)
IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv4.devconf_dflt);
@@ -2033,7 +2048,8 @@ static void inet_forward_change(struct net *net)
in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
dev->ifindex, &in_dev->cnf);
}
}
@@ -2078,19 +2094,22 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
new_value != old_value) {
ifindex = devinet_conf_ifindex(net, cnf);
- inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_RP_FILTER,
ifindex, cnf);
}
if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
new_value != old_value) {
ifindex = devinet_conf_ifindex(net, cnf);
- inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
ifindex, cnf);
}
if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
new_value != old_value) {
ifindex = devinet_conf_ifindex(net, cnf);
- inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
ifindex, cnf);
}
}
@@ -2125,7 +2144,7 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
container_of(cnf, struct in_device, cnf);
if (*valp)
dev_disable_lro(idev->dev);
- inet_netconf_notify_devconf(net,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_FORWARDING,
idev->dev->ifindex,
cnf);
@@ -2133,7 +2152,8 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
rtnl_unlock();
rt_cache_flush(net);
} else
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv4.devconf_dflt);
}
@@ -2255,7 +2275,8 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
p->sysctl = t;
- inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
+ ifindex, p);
return 0;
free:
@@ -2264,16 +2285,18 @@ out:
return -ENOBUFS;
}
-static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
+static void __devinet_sysctl_unregister(struct net *net,
+ struct ipv4_devconf *cnf, int ifindex)
{
struct devinet_sysctl_table *t = cnf->sysctl;
- if (!t)
- return;
+ if (t) {
+ cnf->sysctl = NULL;
+ unregister_net_sysctl_table(t->sysctl_header);
+ kfree(t);
+ }
- cnf->sysctl = NULL;
- unregister_net_sysctl_table(t->sysctl_header);
- kfree(t);
+ inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
}
static int devinet_sysctl_register(struct in_device *idev)
@@ -2295,7 +2318,9 @@ static int devinet_sysctl_register(struct in_device *idev)
static void devinet_sysctl_unregister(struct in_device *idev)
{
- __devinet_sysctl_unregister(&idev->cnf);
+ struct net *net = dev_net(idev->dev);
+
+ __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
neigh_sysctl_unregister(idev->arp_parms);
}
@@ -2370,9 +2395,9 @@ static __net_init int devinet_init_net(struct net *net)
#ifdef CONFIG_SYSCTL
err_reg_ctl:
- __devinet_sysctl_unregister(dflt);
+ __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
err_reg_dflt:
- __devinet_sysctl_unregister(all);
+ __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
err_reg_all:
if (tbl != ctl_forward_entry)
kfree(tbl);
@@ -2394,8 +2419,10 @@ static __net_exit void devinet_exit_net(struct net *net)
tbl = net->ipv4.forw_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.forw_hdr);
- __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
- __devinet_sysctl_unregister(net->ipv4.devconf_all);
+ __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
+ NETCONFA_IFINDEX_DEFAULT);
+ __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
+ NETCONFA_IFINDEX_ALL);
kfree(tbl);
#endif
kfree(net->ipv4.devconf_dflt);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b1e24446e297..65cc02bd82bc 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -152,21 +152,28 @@ static void esp_output_restore_header(struct sk_buff *skb)
}
static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb,
+ struct xfrm_state *x,
struct ip_esp_hdr *esph,
struct esp_output_extra *extra)
{
- struct xfrm_state *x = skb_dst(skb)->xfrm;
-
/* For ESN we move the header forward by 4 bytes to
* accomodate the high bits. We will move it back after
* encryption.
*/
if ((x->props.flags & XFRM_STATE_ESN)) {
+ __u32 seqhi;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (xo)
+ seqhi = xo->seq.hi;
+ else
+ seqhi = XFRM_SKB_CB(skb)->seq.output.hi;
+
extra->esphoff = (unsigned char *)esph -
skb_transport_header(skb);
esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
extra->seqhi = esph->spi;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ esph->seq_no = htonl(seqhi);
}
esph->spi = x->id.spi;
@@ -198,98 +205,56 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
tail[plen - 1] = proto;
}
-static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
+static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
- struct esp_output_extra *extra;
- int err = -ENOMEM;
- struct ip_esp_hdr *esph;
- struct crypto_aead *aead;
- struct aead_request *req;
- struct scatterlist *sg, *dsg;
- struct sk_buff *trailer;
- struct page *page;
- void *tmp;
- u8 *iv;
- u8 *tail;
- u8 *vaddr;
- int blksize;
- int clen;
- int alen;
- int plen;
- int ivlen;
- int tfclen;
- int nfrags;
- int assoclen;
- int extralen;
- int tailen;
- __be64 seqno;
- __u8 proto = *skb_mac_header(skb);
-
- /* skb is pure payload to encrypt */
-
- aead = x->data;
- alen = crypto_aead_authsize(aead);
- ivlen = crypto_aead_ivsize(aead);
-
- tfclen = 0;
- if (x->tfcpad) {
- struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
- u32 padto;
-
- padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached));
- if (skb->len < padto)
- tfclen = padto - skb->len;
+ int encap_type;
+ struct udphdr *uh;
+ __be32 *udpdata32;
+ __be16 sport, dport;
+ struct xfrm_encap_tmpl *encap = x->encap;
+ struct ip_esp_hdr *esph = esp->esph;
+
+ spin_lock_bh(&x->lock);
+ sport = encap->encap_sport;
+ dport = encap->encap_dport;
+ encap_type = encap->encap_type;
+ spin_unlock_bh(&x->lock);
+
+ uh = (struct udphdr *)esph;
+ uh->source = sport;
+ uh->dest = dport;
+ uh->len = htons(skb->len + esp->tailen
+ - skb_transport_offset(skb));
+ uh->check = 0;
+
+ switch (encap_type) {
+ default:
+ case UDP_ENCAP_ESPINUDP:
+ esph = (struct ip_esp_hdr *)(uh + 1);
+ break;
+ case UDP_ENCAP_ESPINUDP_NON_IKE:
+ udpdata32 = (__be32 *)(uh + 1);
+ udpdata32[0] = udpdata32[1] = 0;
+ esph = (struct ip_esp_hdr *)(udpdata32 + 2);
+ break;
}
- blksize = ALIGN(crypto_aead_blocksize(aead), 4);
- clen = ALIGN(skb->len + 2 + tfclen, blksize);
- plen = clen - skb->len - tfclen;
- tailen = tfclen + plen + alen;
- assoclen = sizeof(*esph);
- extralen = 0;
- if (x->props.flags & XFRM_STATE_ESN) {
- extralen += sizeof(*extra);
- assoclen += sizeof(__be32);
- }
+ *skb_mac_header(skb) = IPPROTO_UDP;
+ esp->esph = esph;
+}
- *skb_mac_header(skb) = IPPROTO_ESP;
- esph = ip_esp_hdr(skb);
+int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+{
+ u8 *tail;
+ u8 *vaddr;
+ int nfrags;
+ struct page *page;
+ struct sk_buff *trailer;
+ int tailen = esp->tailen;
/* this is non-NULL only with UDP Encapsulation */
- if (x->encap) {
- struct xfrm_encap_tmpl *encap = x->encap;
- struct udphdr *uh;
- __be32 *udpdata32;
- __be16 sport, dport;
- int encap_type;
-
- spin_lock_bh(&x->lock);
- sport = encap->encap_sport;
- dport = encap->encap_dport;
- encap_type = encap->encap_type;
- spin_unlock_bh(&x->lock);
-
- uh = (struct udphdr *)esph;
- uh->source = sport;
- uh->dest = dport;
- uh->len = htons(skb->len + tailen
- - skb_transport_offset(skb));
- uh->check = 0;
-
- switch (encap_type) {
- default:
- case UDP_ENCAP_ESPINUDP:
- esph = (struct ip_esp_hdr *)(uh + 1);
- break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- udpdata32 = (__be32 *)(uh + 1);
- udpdata32[0] = udpdata32[1] = 0;
- esph = (struct ip_esp_hdr *)(udpdata32 + 2);
- break;
- }
-
- *skb_mac_header(skb) = IPPROTO_UDP;
- }
+ if (x->encap)
+ esp_output_udp_encap(x, skb, esp);
if (!skb_cloned(skb)) {
if (tailen <= skb_availroom(skb)) {
@@ -304,6 +269,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct sock *sk = skb->sk;
struct page_frag *pfrag = &x->xfrag;
+ esp->inplace = false;
+
allocsize = ALIGN(tailen, L1_CACHE_BYTES);
spin_lock_bh(&x->lock);
@@ -320,10 +287,12 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
tail = vaddr + pfrag->offset;
- esp_output_fill_trailer(tail, tfclen, plen, proto);
+ esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
kunmap_atomic(vaddr);
+ spin_unlock_bh(&x->lock);
+
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -339,107 +308,113 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
if (sk)
atomic_add(tailen, &sk->sk_wmem_alloc);
- skb_push(skb, -skb_network_offset(skb));
-
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- esph->spi = x->id.spi;
-
- tmp = esp_alloc_tmp(aead, nfrags + 2, extralen);
- if (!tmp) {
- spin_unlock_bh(&x->lock);
- err = -ENOMEM;
- goto error;
- }
-
- extra = esp_tmp_extra(tmp);
- iv = esp_tmp_iv(aead, tmp, extralen);
- req = esp_tmp_req(aead, iv);
- sg = esp_req_sg(aead, req);
- dsg = &sg[nfrags];
-
- esph = esp_output_set_extra(skb, esph, extra);
-
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
-
- allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
-
- if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
- spin_unlock_bh(&x->lock);
- err = -ENOMEM;
- goto error;
- }
-
- skb_shinfo(skb)->nr_frags = 1;
-
- page = pfrag->page;
- get_page(page);
- /* replace page frags in skb with new page */
- __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
- pfrag->offset = pfrag->offset + allocsize;
-
- sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
- skb_to_sgvec(skb, dsg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
-
- spin_unlock_bh(&x->lock);
-
- goto skip_cow2;
+ goto out;
}
}
cow:
- err = skb_cow_data(skb, tailen, &trailer);
- if (err < 0)
- goto error;
- nfrags = err;
+ nfrags = skb_cow_data(skb, tailen, &trailer);
+ if (nfrags < 0)
+ goto out;
tail = skb_tail_pointer(trailer);
- esph = ip_esp_hdr(skb);
+ esp->esph = ip_esp_hdr(skb);
skip_cow:
- esp_output_fill_trailer(tail, tfclen, plen, proto);
+ esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
+ pskb_put(skb, trailer, tailen);
- pskb_put(skb, trailer, clen - skb->len + alen);
- skb_push(skb, -skb_network_offset(skb));
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- esph->spi = x->id.spi;
+out:
+ return nfrags;
+}
+EXPORT_SYMBOL_GPL(esp_output_head);
- tmp = esp_alloc_tmp(aead, nfrags, extralen);
- if (!tmp) {
- err = -ENOMEM;
- goto error;
+int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+{
+ u8 *iv;
+ int alen;
+ void *tmp;
+ int ivlen;
+ int assoclen;
+ int extralen;
+ struct page *page;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct aead_request *req;
+ struct scatterlist *sg, *dsg;
+ struct esp_output_extra *extra;
+ int err = -ENOMEM;
+
+ assoclen = sizeof(struct ip_esp_hdr);
+ extralen = 0;
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ extralen += sizeof(*extra);
+ assoclen += sizeof(__be32);
}
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+ ivlen = crypto_aead_ivsize(aead);
+
+ tmp = esp_alloc_tmp(aead, esp->nfrags + 2, extralen);
+ if (!tmp)
+ goto error;
+
extra = esp_tmp_extra(tmp);
iv = esp_tmp_iv(aead, tmp, extralen);
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
- dsg = sg;
- esph = esp_output_set_extra(skb, esph, extra);
+ if (esp->inplace)
+ dsg = sg;
+ else
+ dsg = &sg[esp->nfrags];
- sg_init_table(sg, nfrags);
+ esph = esp_output_set_extra(skb, x, esp->esph, extra);
+ esp->esph = esph;
+
+ sg_init_table(sg, esp->nfrags);
skb_to_sgvec(skb, sg,
(unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
+ assoclen + ivlen + esp->clen + alen);
+
+ if (!esp->inplace) {
+ int allocsize;
+ struct page_frag *pfrag = &x->xfrag;
+
+ allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
+
+ spin_lock_bh(&x->lock);
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ goto error;
+ }
+
+ skb_shinfo(skb)->nr_frags = 1;
+
+ page = pfrag->page;
+ get_page(page);
+ /* replace page frags in skb with new page */
+ __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
+ pfrag->offset = pfrag->offset + allocsize;
+ spin_unlock_bh(&x->lock);
+
+ sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
+ skb_to_sgvec(skb, dsg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + esp->clen + alen);
+ }
-skip_cow2:
if ((x->props.flags & XFRM_STATE_ESN))
aead_request_set_callback(req, 0, esp_output_done_esn, skb);
else
aead_request_set_callback(req, 0, esp_output_done, skb);
- aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv);
+ aead_request_set_crypt(req, sg, dsg, ivlen + esp->clen, iv);
aead_request_set_ad(req, assoclen);
- seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
- ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
-
memset(iv, 0, ivlen);
- memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8),
+ memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&esp->seqno + 8 - min(ivlen, 8),
min(ivlen, 8));
ESP_SKB_CB(skb)->tmp = tmp;
@@ -465,11 +440,63 @@ skip_cow2:
error:
return err;
}
+EXPORT_SYMBOL_GPL(esp_output_tail);
-static int esp_input_done2(struct sk_buff *skb, int err)
+static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int alen;
+ int blksize;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct esp_info esp;
+
+ esp.inplace = true;
+
+ esp.proto = *skb_mac_header(skb);
+ *skb_mac_header(skb) = IPPROTO_ESP;
+
+ /* skb is pure payload to encrypt */
+
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+
+ esp.tfclen = 0;
+ if (x->tfcpad) {
+ struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+ u32 padto;
+
+ padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached));
+ if (skb->len < padto)
+ esp.tfclen = padto - skb->len;
+ }
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize);
+ esp.plen = esp.clen - skb->len - esp.tfclen;
+ esp.tailen = esp.tfclen + esp.plen + alen;
+
+ esp.esph = ip_esp_hdr(skb);
+
+ esp.nfrags = esp_output_head(x, skb, &esp);
+ if (esp.nfrags < 0)
+ return esp.nfrags;
+
+ esph = esp.esph;
+ esph->spi = x->id.spi;
+
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ esp.seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
+ ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ return esp_output_tail(x, skb, &esp);
+}
+
+int esp_input_done2(struct sk_buff *skb, int err)
{
const struct iphdr *iph;
struct xfrm_state *x = xfrm_input_state(skb);
+ struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
int alen = crypto_aead_authsize(aead);
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
@@ -478,7 +505,8 @@ static int esp_input_done2(struct sk_buff *skb, int err)
u8 nexthdr[2];
int padlen;
- kfree(ESP_SKB_CB(skb)->tmp);
+ if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+ kfree(ESP_SKB_CB(skb)->tmp);
if (unlikely(err))
goto out;
@@ -549,6 +577,7 @@ static int esp_input_done2(struct sk_buff *skb, int err)
out:
return err;
}
+EXPORT_SYMBOL_GPL(esp_input_done2);
static void esp_input_done(struct crypto_async_request *base, int err)
{
@@ -751,13 +780,17 @@ static int esp_init_aead(struct xfrm_state *x)
char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
+ u32 mask = 0;
err = -ENAMETOOLONG;
if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
goto error;
- aead = crypto_alloc_aead(aead_name, 0, 0);
+ if (x->xso.offload_handle)
+ mask |= CRYPTO_ALG_ASYNC;
+
+ aead = crypto_alloc_aead(aead_name, 0, mask);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -787,6 +820,7 @@ static int esp_init_authenc(struct xfrm_state *x)
char authenc_name[CRYPTO_MAX_ALG_NAME];
unsigned int keylen;
int err;
+ u32 mask = 0;
err = -EINVAL;
if (!x->ealg)
@@ -812,7 +846,10 @@ static int esp_init_authenc(struct xfrm_state *x)
goto error;
}
- aead = crypto_alloc_aead(authenc_name, 0, 0);
+ if (x->xso.offload_handle)
+ mask |= CRYPTO_ALG_ASYNC;
+
+ aead = crypto_alloc_aead(authenc_name, 0, mask);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -931,7 +968,7 @@ static const struct xfrm_type esp_type =
.destructor = esp_destroy,
.get_mtu = esp4_get_mtu,
.input = esp_input,
- .output = esp_output
+ .output = esp_output,
};
static struct xfrm4_protocol esp4_protocol = {
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 1de442632406..e0666016a764 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -43,27 +43,31 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
goto out;
- err = secpath_set(skb);
- if (err)
- goto out;
+ xo = xfrm_offload(skb);
+ if (!xo || !(xo->flags & CRYPTO_DONE)) {
+ err = secpath_set(skb);
+ if (err)
+ goto out;
- if (skb->sp->len == XFRM_MAX_DEPTH)
- goto out;
+ if (skb->sp->len == XFRM_MAX_DEPTH)
+ goto out;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ip_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET);
- if (!x)
- goto out;
+ x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ip_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET);
+ if (!x)
+ goto out;
- skb->sp->xvec[skb->sp->len++] = x;
- skb->sp->olen++;
+ skb->sp->xvec[skb->sp->len++] = x;
+ skb->sp->olen++;
- xo = xfrm_offload(skb);
- if (!xo) {
- xfrm_state_put(x);
- goto out;
+ xo = xfrm_offload(skb);
+ if (!xo) {
+ xfrm_state_put(x);
+ goto out;
+ }
}
+
xo->flags |= XFRM_GRO;
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
@@ -84,19 +88,214 @@ out:
return NULL;
}
+static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ip_esp_hdr *esph;
+ struct iphdr *iph = ip_hdr(skb);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ int proto = iph->protocol;
+
+ skb_push(skb, -skb_network_offset(skb));
+ esph = ip_esp_hdr(skb);
+ *skb_mac_header(skb) = IPPROTO_ESP;
+
+ esph->spi = x->id.spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+
+ xo->proto = proto;
+}
+
+static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ __u32 seq;
+ int err = 0;
+ struct sk_buff *skb2;
+ struct xfrm_state *x;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ netdev_features_t esp_features = features;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (!xo)
+ goto out;
+
+ seq = xo->seq.low;
+
+ x = skb->sp->xvec[skb->sp->len - 1];
+ aead = x->data;
+ esph = ip_esp_hdr(skb);
+
+ if (esph->spi != x->id.spi)
+ goto out;
+
+ if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
+ goto out;
+
+ __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
+
+ skb->encap_hdr_csum = 1;
+
+ if (!(features & NETIF_F_HW_ESP))
+ esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+
+ segs = x->outer_mode->gso_segment(x, skb, esp_features);
+ if (IS_ERR_OR_NULL(segs))
+ goto out;
+
+ __skb_pull(skb, skb->data - skb_mac_header(skb));
+
+ skb2 = segs;
+ do {
+ struct sk_buff *nskb = skb2->next;
+
+ xo = xfrm_offload(skb2);
+ xo->flags |= XFRM_GSO_SEGMENT;
+ xo->seq.low = seq;
+ xo->seq.hi = xfrm_replay_seqhi(x, seq);
+
+ if(!(features & NETIF_F_HW_ESP))
+ xo->flags |= CRYPTO_FALLBACK;
+
+ x->outer_mode->xmit(x, skb2);
+
+ err = x->type_offload->xmit(x, skb2, esp_features);
+ if (err) {
+ kfree_skb_list(segs);
+ return ERR_PTR(err);
+ }
+
+ if (!skb_is_gso(skb2))
+ seq++;
+ else
+ seq += skb_shinfo(skb2)->gso_segs;
+
+ skb_push(skb2, skb2->mac_len);
+ skb2 = nskb;
+ } while (skb2);
+
+out:
+ return segs;
+}
+
+static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct crypto_aead *aead = x->data;
+
+ if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
+ return -EINVAL;
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return esp_input_done2(skb, 0);
+}
+
+static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features)
+{
+ int err;
+ int alen;
+ int blksize;
+ struct xfrm_offload *xo;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct esp_info esp;
+ bool hw_offload = true;
+
+ esp.inplace = true;
+
+ xo = xfrm_offload(skb);
+
+ if (!xo)
+ return -EINVAL;
+
+ if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+ (x->xso.dev != skb->dev)) {
+ xo->flags |= CRYPTO_FALLBACK;
+ hw_offload = false;
+ }
+
+ esp.proto = xo->proto;
+
+ /* skb is pure payload to encrypt */
+
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+
+ esp.tfclen = 0;
+ /* XXX: Add support for tfc padding here. */
+
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize);
+ esp.plen = esp.clen - skb->len - esp.tfclen;
+ esp.tailen = esp.tfclen + esp.plen + alen;
+
+ esp.esph = ip_esp_hdr(skb);
+
+
+ if (!hw_offload || (hw_offload && !skb_is_gso(skb))) {
+ esp.nfrags = esp_output_head(x, skb, &esp);
+ if (esp.nfrags < 0)
+ return esp.nfrags;
+ }
+
+ esph = esp.esph;
+ esph->spi = x->id.spi;
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ esph->seq_no = htonl(xo->seq.low);
+ } else {
+ ip_hdr(skb)->tot_len = htons(skb->len);
+ ip_send_check(ip_hdr(skb));
+ }
+
+ if (hw_offload)
+ return 0;
+
+ esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+
+ err = esp_output_tail(x, skb, &esp);
+ if (err < 0)
+ return err;
+
+ secpath_reset(skb);
+
+ return 0;
+}
+
static const struct net_offload esp4_offload = {
.callbacks = {
.gro_receive = esp4_gro_receive,
+ .gso_segment = esp4_gso_segment,
},
};
+static const struct xfrm_type_offload esp_type_offload = {
+ .description = "ESP4 OFFLOAD",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_ESP,
+ .input_tail = esp_input_tail,
+ .xmit = esp_xmit,
+ .encap = esp4_gso_encap,
+};
+
static int __init esp4_offload_init(void)
{
+ if (xfrm_register_type_offload(&esp_type_offload, AF_INET) < 0) {
+ pr_info("%s: can't add xfrm type offload\n", __func__);
+ return -EAGAIN;
+ }
+
return inet_add_offload(&esp4_offload, IPPROTO_ESP);
}
static void __exit esp4_offload_exit(void)
{
+ if (xfrm_unregister_type_offload(&esp_type_offload, AF_INET) < 0)
+ pr_info("%s: can't remove xfrm type offload\n", __func__);
+
inet_del_offload(&esp4_offload, IPPROTO_ESP);
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8f2133ffc2ff..83e3ed258467 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -632,7 +632,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
int err, remaining;
struct rtmsg *rtm;
- err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
+ err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy,
+ NULL);
if (err < 0)
goto errout;
@@ -709,7 +710,8 @@ errout:
return err;
}
-static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct fib_config cfg;
@@ -731,7 +733,8 @@ errout:
return err;
}
-static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct fib_config cfg;
@@ -760,7 +763,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
unsigned int e = 0, s_e;
struct fib_table *tb;
struct hlist_head *head;
- int dumped = 0;
+ int dumped = 0, err;
if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
@@ -780,20 +783,27 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (dumped)
memset(&cb->args[2], 0, sizeof(cb->args) -
2 * sizeof(cb->args[0]));
- if (fib_table_dump(tb, skb, cb) < 0)
- goto out;
+ err = fib_table_dump(tb, skb, cb);
+ if (err < 0) {
+ if (likely(skb->len))
+ goto out;
+
+ goto out_err;
+ }
dumped = 1;
next:
e++;
}
}
out:
+ err = skb->len;
+out_err:
rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = h;
- return skb->len;
+ return err;
}
/* Prepare and feed intra-kernel routing request.
@@ -1127,7 +1137,8 @@ static void fib_disable_ip(struct net_device *dev, unsigned long event,
{
if (fib_sync_down_dev(dev, event, force))
fib_flush(dev_net(dev));
- rt_cache_flush(dev_net(dev));
+ else
+ rt_cache_flush(dev_net(dev));
arp_ifdown(dev);
}
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
new file mode 100644
index 000000000000..e0714d975947
--- /dev/null
+++ b/net/ipv4/fib_notifier.c
@@ -0,0 +1,86 @@
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <net/net_namespace.h>
+#include <net/netns/ipv4.h>
+#include <net/ip_fib.h>
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->net = net;
+ return nb->notifier_call(nb, event_type, info);
+}
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ net->ipv4.fib_seq++;
+ info->net = net;
+ return atomic_notifier_call_chain(&fib_chain, event_type, info);
+}
+
+static unsigned int fib_seq_sum(void)
+{
+ unsigned int fib_seq = 0;
+ struct net *net;
+
+ rtnl_lock();
+ for_each_net(net)
+ fib_seq += net->ipv4.fib_seq;
+ rtnl_unlock();
+
+ return fib_seq;
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb),
+ unsigned int fib_seq)
+{
+ atomic_notifier_chain_register(&fib_chain, nb);
+ if (fib_seq == fib_seq_sum())
+ return true;
+ atomic_notifier_chain_unregister(&fib_chain, nb);
+ if (cb)
+ cb(nb);
+ return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb))
+{
+ int retries = 0;
+
+ do {
+ unsigned int fib_seq = fib_seq_sum();
+ struct net *net;
+
+ /* Mutex semantics guarantee that every change done to
+ * FIB tries before we read the change sequence counter
+ * is now visible to us.
+ */
+ rcu_read_lock();
+ for_each_net_rcu(net) {
+ fib_rules_notify(net, nb);
+ fib_notify(net, nb);
+ }
+ rcu_read_unlock();
+
+ if (fib_dump_is_consistent(nb, cb, fib_seq))
+ return 0;
+ } while (++retries < FIB_DUMP_MAX_RETRIES);
+
+ return -EBUSY;
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 2e50062f642d..778ecf977eb2 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -47,6 +47,27 @@ struct fib4_rule {
#endif
};
+static bool fib4_rule_matchall(const struct fib_rule *rule)
+{
+ struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
+
+ if (r->dst_len || r->src_len || r->tos)
+ return false;
+ return fib_rule_matchall(rule);
+}
+
+bool fib4_rule_default(const struct fib_rule *rule)
+{
+ if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+ rule->l3mdev)
+ return false;
+ if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN &&
+ rule->table != RT_TABLE_DEFAULT)
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fib4_rule_default);
+
int __fib_lookup(struct net *net, struct flowi4 *flp,
struct fib_result *res, unsigned int flags)
{
@@ -164,12 +185,36 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_rule *rule)
+{
+ struct fib_rule_notifier_info info = {
+ .rule = rule,
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
static int call_fib_rule_notifiers(struct net *net,
- enum fib_event_type event_type)
+ enum fib_event_type event_type,
+ struct fib_rule *rule)
+{
+ struct fib_rule_notifier_info info = {
+ .rule = rule,
+ };
+
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
+/* Called with rcu_read_lock() */
+void fib_rules_notify(struct net *net, struct notifier_block *nb)
{
- struct fib_notifier_info info;
+ struct fib_rules_ops *ops = net->ipv4.rules_ops;
+ struct fib_rule *rule;
- return call_fib_notifiers(net, event_type, &info);
+ list_for_each_entry_rcu(rule, &ops->rules_list, list)
+ call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
}
static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
@@ -228,7 +273,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD);
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
err = 0;
errout:
@@ -250,7 +295,7 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL);
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
errout:
return err;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 317026a39cfa..da449ddb8cc1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -57,7 +57,6 @@ static unsigned int fib_info_cnt;
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-u32 fib_multipath_secret __read_mostly;
#define for_nexthops(fi) { \
int nhsel; const struct fib_nh *nh; \
@@ -576,9 +575,6 @@ static void fib_rebalance(struct fib_info *fi)
atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
} endfor_nexthops(fi);
-
- net_get_random_once(&fib_multipath_secret,
- sizeof(fib_multipath_secret));
}
static inline void fib_add_weight(struct fib_info *fi,
@@ -1641,7 +1637,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
#endif
void fib_select_path(struct net *net, struct fib_result *res,
- struct flowi4 *fl4, int mp_hash)
+ struct flowi4 *fl4, const struct sk_buff *skb)
{
bool oif_check;
@@ -1650,10 +1646,9 @@ void fib_select_path(struct net *net, struct fib_result *res,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi->fib_nhs > 1 && oif_check) {
- if (mp_hash < 0)
- mp_hash = get_hash_from_flowi4(fl4) >> 1;
+ int h = fib_multipath_hash(res->fi, fl4, skb);
- fib_select_multipath(res, mp_hash);
+ fib_select_multipath(res, h);
}
else
#endif
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2f0d8233950f..51182ff2b441 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -84,43 +84,6 @@
#include <trace/events/fib.h>
#include "fib_lookup.h"
-static unsigned int fib_seq_sum(void)
-{
- unsigned int fib_seq = 0;
- struct net *net;
-
- rtnl_lock();
- for_each_net(net)
- fib_seq += net->ipv4.fib_seq;
- rtnl_unlock();
-
- return fib_seq;
-}
-
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-static int call_fib_notifier(struct notifier_block *nb, struct net *net,
- enum fib_event_type event_type,
- struct fib_notifier_info *info)
-{
- info->net = net;
- return nb->notifier_call(nb, event_type, info);
-}
-
-static void fib_rules_notify(struct net *net, struct notifier_block *nb,
- enum fib_event_type event_type)
-{
-#ifdef CONFIG_IP_MULTIPLE_TABLES
- struct fib_notifier_info info;
-
- if (net->ipv4.fib_has_custom_rules)
- call_fib_notifier(nb, net, event_type, &info);
-#endif
-}
-
-static void fib_notify(struct net *net, struct notifier_block *nb,
- enum fib_event_type event_type);
-
static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type, u32 dst,
int dst_len, struct fib_info *fi,
@@ -137,62 +100,6 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
return call_fib_notifier(nb, net, event_type, &info.info);
}
-static bool fib_dump_is_consistent(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb),
- unsigned int fib_seq)
-{
- atomic_notifier_chain_register(&fib_chain, nb);
- if (fib_seq == fib_seq_sum())
- return true;
- atomic_notifier_chain_unregister(&fib_chain, nb);
- if (cb)
- cb(nb);
- return false;
-}
-
-#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb))
-{
- int retries = 0;
-
- do {
- unsigned int fib_seq = fib_seq_sum();
- struct net *net;
-
- /* Mutex semantics guarantee that every change done to
- * FIB tries before we read the change sequence counter
- * is now visible to us.
- */
- rcu_read_lock();
- for_each_net_rcu(net) {
- fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD);
- fib_notify(net, nb, FIB_EVENT_ENTRY_ADD);
- }
- rcu_read_unlock();
-
- if (fib_dump_is_consistent(nb, cb, fib_seq))
- return 0;
- } while (++retries < FIB_DUMP_MAX_RETRIES);
-
- return -EBUSY;
-}
-EXPORT_SYMBOL(register_fib_notifier);
-
-int unregister_fib_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&fib_chain, nb);
-}
-EXPORT_SYMBOL(unregister_fib_notifier);
-
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
- struct fib_notifier_info *info)
-{
- net->ipv4.fib_seq++;
- info->net = net;
- return atomic_notifier_call_chain(&fib_chain, event_type, info);
-}
-
static int call_fib_entry_notifiers(struct net *net,
enum fib_event_type event_type, u32 dst,
int dst_len, struct fib_info *fi,
@@ -1995,8 +1902,7 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
}
static void fib_leaf_notify(struct net *net, struct key_vector *l,
- struct fib_table *tb, struct notifier_block *nb,
- enum fib_event_type event_type)
+ struct fib_table *tb, struct notifier_block *nb)
{
struct fib_alias *fa;
@@ -2012,22 +1918,21 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
if (tb->tb_id != fa->tb_id)
continue;
- call_fib_entry_notifier(nb, net, event_type, l->key,
+ call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
fa->fa_type, fa->tb_id);
}
}
static void fib_table_notify(struct net *net, struct fib_table *tb,
- struct notifier_block *nb,
- enum fib_event_type event_type)
+ struct notifier_block *nb)
{
struct trie *t = (struct trie *)tb->tb_data;
struct key_vector *l, *tp = t->kv;
t_key key = 0;
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
- fib_leaf_notify(net, l, tb, nb, event_type);
+ fib_leaf_notify(net, l, tb, nb);
key = l->key + 1;
/* stop in case of wrap around */
@@ -2036,8 +1941,7 @@ static void fib_table_notify(struct net *net, struct fib_table *tb,
}
}
-static void fib_notify(struct net *net, struct notifier_block *nb,
- enum fib_event_type event_type)
+void fib_notify(struct net *net, struct notifier_block *nb)
{
unsigned int h;
@@ -2046,7 +1950,7 @@ static void fib_notify(struct net *net, struct notifier_block *nb,
struct fib_table *tb;
hlist_for_each_entry_rcu(tb, head, tb_hlist)
- fib_table_notify(net, tb, nb, event_type);
+ fib_table_notify(net, tb, nb);
}
}
@@ -2079,6 +1983,8 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
/* rcu_read_lock is hold by caller */
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+ int err;
+
if (i < s_i) {
i++;
continue;
@@ -2089,17 +1995,14 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
continue;
}
- if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWROUTE,
- tb->tb_id,
- fa->fa_type,
- xkey,
- KEYLENGTH - fa->fa_slen,
- fa->fa_tos,
- fa->fa_info, NLM_F_MULTI) < 0) {
+ err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWROUTE,
+ tb->tb_id, fa->fa_type,
+ xkey, KEYLENGTH - fa->fa_slen,
+ fa->fa_tos, fa->fa_info, NLM_F_MULTI);
+ if (err < 0) {
cb->args[4] = i;
- return -1;
+ return err;
}
i++;
}
@@ -2121,10 +2024,13 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
t_key key = cb->args[3];
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
- if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
+ int err;
+
+ err = fn_trie_dump_leaf(l, tb, skb, cb);
+ if (err < 0) {
cb->args[3] = key;
cb->args[2] = count;
- return -1;
+ return err;
}
++count;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fc310db2708b..43318b5f5647 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -464,22 +464,6 @@ out_bh_enable:
local_bh_enable();
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
-/* Source and destination is swapped. See ip_multipath_icmp_hash */
-static int icmp_multipath_hash_skb(const struct sk_buff *skb)
-{
- const struct iphdr *iph = ip_hdr(skb);
-
- return fib_multipath_hash(iph->daddr, iph->saddr);
-}
-
-#else
-
-#define icmp_multipath_hash_skb(skb) (-1)
-
-#endif
-
static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
@@ -505,8 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
- rt = __ip_route_output_key_hash(net, fl4,
- icmp_multipath_hash_skb(skb_in));
+ rt = __ip_route_output_key_hash(net, fl4, skb_in);
if (IS_ERR(rt))
return rt;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5e313c1ac94f..1054d330bf9d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -794,6 +794,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
/* listeners have SOCK_RCU_FREE, not the children */
sock_reset_flag(newsk, SOCK_RCU_FREE);
+ inet_sk(newsk)->mc_list = NULL;
+
newsk->sk_mark = inet_rsk(req)->ir_mark;
atomic64_set(&newsk->sk_cookie,
atomic64_read(&inet_rsk(req)->ir_cookie));
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 8bea74298173..e9a59d2d91d4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -678,11 +678,7 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
/* no more locks than number of hash buckets */
nblocks = min(nblocks, hashinfo->ehash_mask + 1);
- hashinfo->ehash_locks = kmalloc_array(nblocks, locksz,
- GFP_KERNEL | __GFP_NOWARN);
- if (!hashinfo->ehash_locks)
- hashinfo->ehash_locks = vmalloc(nblocks * locksz);
-
+ hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
if (!hashinfo->ehash_locks)
return -ENOMEM;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c9c1cb635d9a..e90c80a548ad 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -829,7 +829,8 @@ out:
static int ipgre_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm *parms,
+ __u32 *fwmark)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -886,6 +887,9 @@ static int ipgre_netlink_parms(struct net_device *dev,
t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
}
+ if (data[IFLA_GRE_FWMARK])
+ *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
+
return 0;
}
@@ -957,6 +961,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ __u32 fwmark = 0;
int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
@@ -967,31 +972,32 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
return err;
}
- err = ipgre_netlink_parms(dev, data, tb, &p);
+ err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
return err;
- return ip_tunnel_newlink(dev, tb, &p);
+ return ip_tunnel_newlink(dev, tb, &p, fwmark);
}
static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
+ struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ __u32 fwmark = t->fwmark;
int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
- struct ip_tunnel *t = netdev_priv(dev);
err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
- err = ipgre_netlink_parms(dev, data, tb, &p);
+ err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
return err;
- return ip_tunnel_changelink(dev, tb, &p);
+ return ip_tunnel_changelink(dev, tb, &p, fwmark);
}
static size_t ipgre_get_size(const struct net_device *dev)
@@ -1029,6 +1035,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(0) +
/* IFLA_GRE_IGNORE_DF */
nla_total_size(1) +
+ /* IFLA_GRE_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -1049,7 +1057,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
nla_put_u8(skb, IFLA_GRE_PMTUDISC,
- !!(p->iph.frag_off & htons(IP_DF))))
+ !!(p->iph.frag_off & htons(IP_DF))) ||
+ nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -1093,6 +1102,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
+ [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d6feabb03516..fa2dc8f692c6 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -313,6 +313,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct net_device *dev = skb->dev;
+ void (*edemux)(struct sk_buff *skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -329,8 +330,8 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
int protocol = iph->protocol;
ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && ipprot->early_demux) {
- ipprot->early_demux(skb);
+ if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
+ edemux(skb);
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 1d46d05efb0f..ec4fe3d4b5c9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -330,7 +330,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
sent to multicast group to reach destination designated router.
*/
struct ip_ra_chain __rcu *ip_ra_chain;
-static DEFINE_SPINLOCK(ip_ra_lock);
static void ip_ra_destroy_rcu(struct rcu_head *head)
@@ -352,21 +351,17 @@ int ip_ra_control(struct sock *sk, unsigned char on,
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
- spin_lock_bh(&ip_ra_lock);
for (rap = &ip_ra_chain;
- (ra = rcu_dereference_protected(*rap,
- lockdep_is_held(&ip_ra_lock))) != NULL;
+ (ra = rtnl_dereference(*rap)) != NULL;
rap = &ra->next) {
if (ra->sk == sk) {
if (on) {
- spin_unlock_bh(&ip_ra_lock);
kfree(new_ra);
return -EADDRINUSE;
}
/* dont let ip_call_ra_chain() use sk again */
ra->sk = NULL;
RCU_INIT_POINTER(*rap, ra->next);
- spin_unlock_bh(&ip_ra_lock);
if (ra->destructor)
ra->destructor(sk);
@@ -380,17 +375,14 @@ int ip_ra_control(struct sock *sk, unsigned char on,
return 0;
}
}
- if (!new_ra) {
- spin_unlock_bh(&ip_ra_lock);
+ if (!new_ra)
return -ENOBUFS;
- }
new_ra->sk = sk;
new_ra->destructor = destructor;
RCU_INIT_POINTER(new_ra->next, ra);
rcu_assign_pointer(*rap, new_ra);
sock_hold(sk);
- spin_unlock_bh(&ip_ra_lock);
return 0;
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 823abaef006b..b878ecbc0608 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -293,7 +293,8 @@ failed:
static inline void init_tunnel_flow(struct flowi4 *fl4,
int proto,
__be32 daddr, __be32 saddr,
- __be32 key, __u8 tos, int oif)
+ __be32 key, __u8 tos, int oif,
+ __u32 mark)
{
memset(fl4, 0, sizeof(*fl4));
fl4->flowi4_oif = oif;
@@ -302,6 +303,7 @@ static inline void init_tunnel_flow(struct flowi4 *fl4,
fl4->flowi4_tos = tos;
fl4->flowi4_proto = proto;
fl4->fl4_gre_key = key;
+ fl4->flowi4_mark = mark;
}
static int ip_tunnel_bind_dev(struct net_device *dev)
@@ -322,7 +324,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link);
+ RT_TOS(iph->tos), tunnel->parms.link,
+ tunnel->fwmark);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
@@ -578,7 +581,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
- RT_TOS(tos), tunnel->parms.link);
+ RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -707,7 +710,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ tunnel->fwmark);
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
@@ -795,7 +799,8 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
struct ip_tunnel *t,
struct net_device *dev,
struct ip_tunnel_parm *p,
- bool set_mtu)
+ bool set_mtu,
+ __u32 fwmark)
{
ip_tunnel_del(itn, t);
t->parms.iph.saddr = p->iph.saddr;
@@ -812,10 +817,11 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
t->parms.iph.tos = p->iph.tos;
t->parms.iph.frag_off = p->iph.frag_off;
- if (t->parms.link != p->link) {
+ if (t->parms.link != p->link || t->fwmark != fwmark) {
int mtu;
t->parms.link = p->link;
+ t->fwmark = fwmark;
mtu = ip_tunnel_bind_dev(dev);
if (set_mtu)
dev->mtu = mtu;
@@ -893,7 +899,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
if (t) {
err = 0;
- ip_tunnel_update(itn, t, dev, p, true);
+ ip_tunnel_update(itn, t, dev, p, true, 0);
} else {
err = -ENOENT;
}
@@ -1066,7 +1072,7 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p)
+ struct ip_tunnel_parm *p, __u32 fwmark)
{
struct ip_tunnel *nt;
struct net *net = dev_net(dev);
@@ -1087,6 +1093,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
nt->net = net;
nt->parms = *p;
+ nt->fwmark = fwmark;
err = register_netdevice(dev);
if (err)
goto out;
@@ -1105,7 +1112,7 @@ out:
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p)
+ struct ip_tunnel_parm *p, __u32 fwmark)
{
struct ip_tunnel *t;
struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -1137,7 +1144,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
}
}
- ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
+ ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
return 0;
}
EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index a31f47ccaad9..baf196eaf1d8 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -235,7 +235,7 @@ static int ip_tun_build_state(struct nlattr *attr,
struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
int err;
- err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy);
+ err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, NULL);
if (err < 0)
return err;
@@ -332,7 +332,8 @@ static int ip6_tun_build_state(struct nlattr *attr,
struct nlattr *tb[LWTUNNEL_IP6_MAX + 1];
int err;
- err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy);
+ err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy,
+ NULL);
if (err < 0)
return err;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 8b14f1404c8f..4ec9affb2252 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -471,7 +471,8 @@ static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
}
static void vti_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm *parms,
+ __u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -497,24 +498,29 @@ static void vti_netlink_parms(struct nlattr *data[],
if (data[IFLA_VTI_REMOTE])
parms->iph.daddr = nla_get_in_addr(data[IFLA_VTI_REMOTE]);
+ if (data[IFLA_VTI_FWMARK])
+ *fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]);
}
static int vti_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ip_tunnel_parm parms;
+ __u32 fwmark = 0;
- vti_netlink_parms(data, &parms);
- return ip_tunnel_newlink(dev, tb, &parms);
+ vti_netlink_parms(data, &parms, &fwmark);
+ return ip_tunnel_newlink(dev, tb, &parms, fwmark);
}
static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
+ struct ip_tunnel *t = netdev_priv(dev);
+ __u32 fwmark = t->fwmark;
struct ip_tunnel_parm p;
- vti_netlink_parms(data, &p);
- return ip_tunnel_changelink(dev, tb, &p);
+ vti_netlink_parms(data, &p, &fwmark);
+ return ip_tunnel_changelink(dev, tb, &p, fwmark);
}
static size_t vti_get_size(const struct net_device *dev)
@@ -530,6 +536,8 @@ static size_t vti_get_size(const struct net_device *dev)
nla_total_size(4) +
/* IFLA_VTI_REMOTE */
nla_total_size(4) +
+ /* IFLA_VTI_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -538,11 +546,13 @@ static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm *p = &t->parms;
- nla_put_u32(skb, IFLA_VTI_LINK, p->link);
- nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
- nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
- nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr);
- nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr);
+ if (nla_put_u32(skb, IFLA_VTI_LINK, p->link) ||
+ nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key) ||
+ nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key) ||
+ nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr) ||
+ nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr) ||
+ nla_put_u32(skb, IFLA_VTI_FWMARK, t->fwmark))
+ return -EMSGSIZE;
return 0;
}
@@ -553,6 +563,7 @@ static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
[IFLA_VTI_OKEY] = { .type = NLA_U32 },
[IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
[IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_VTI_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops vti_link_ops __read_mostly = {
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index dfb2ab2dd3c8..c3b12b1c7162 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -57,6 +57,7 @@
#include <linux/export.h>
#include <net/net_namespace.h>
#include <net/arp.h>
+#include <net/dsa.h>
#include <net/ip.h>
#include <net/ipconfig.h>
#include <net/route.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 00d4229b6954..1e441c6f2160 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -390,7 +390,8 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
}
static void ipip_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms, bool *collect_md)
+ struct ip_tunnel_parm *parms, bool *collect_md,
+ __u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -428,6 +429,9 @@ static void ipip_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_COLLECT_METADATA])
*collect_md = true;
+
+ if (data[IFLA_IPTUN_FWMARK])
+ *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}
/* This function returns true when ENCAP attributes are present in the nl msg */
@@ -470,6 +474,7 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ __u32 fwmark = 0;
if (ipip_netlink_encap_parms(data, &ipencap)) {
int err = ip_tunnel_encap_setup(t, &ipencap);
@@ -478,26 +483,27 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev,
return err;
}
- ipip_netlink_parms(data, &p, &t->collect_md);
- return ip_tunnel_newlink(dev, tb, &p);
+ ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
+ return ip_tunnel_newlink(dev, tb, &p, fwmark);
}
static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
+ struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
bool collect_md;
+ __u32 fwmark = t->fwmark;
if (ipip_netlink_encap_parms(data, &ipencap)) {
- struct ip_tunnel *t = netdev_priv(dev);
int err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
- ipip_netlink_parms(data, &p, &collect_md);
+ ipip_netlink_parms(data, &p, &collect_md, &fwmark);
if (collect_md)
return -EINVAL;
@@ -505,7 +511,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
(!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
return -EINVAL;
- return ip_tunnel_changelink(dev, tb, &p);
+ return ip_tunnel_changelink(dev, tb, &p, fwmark);
}
static size_t ipip_get_size(const struct net_device *dev)
@@ -535,6 +541,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_COLLECT_METADATA */
nla_total_size(0) +
+ /* IFLA_IPTUN_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -550,7 +558,8 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
- !!(parm->iph.frag_off & htons(IP_DF))))
+ !!(parm->iph.frag_off & htons(IP_DF))) ||
+ nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
@@ -585,6 +594,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
+ [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops ipip_link_ops __read_mostly = {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b036e85e093b..551de4d023a8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -631,7 +631,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
- inet_netconf_notify_devconf(dev_net(dev),
+ inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in_dev->cnf);
ip_rt_multicast_event(in_dev);
@@ -820,8 +820,8 @@ static int vif_add(struct net *net, struct mr_table *mrt,
return -EADDRNOTAVAIL;
}
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
- inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex,
- &in_dev->cnf);
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING,
+ dev->ifindex, &in_dev->cnf);
ip_rt_multicast_event(in_dev);
/* Fill in the VIF structures */
@@ -1282,7 +1282,8 @@ static void mrtsock_destruct(struct sock *sk)
ipmr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
- inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
RCU_INIT_POINTER(mrt->mroute_sk, NULL);
@@ -1343,7 +1344,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
if (ret == 0) {
rcu_assign_pointer(mrt->mroute_sk, sk);
IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
- inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
}
@@ -1978,6 +1980,20 @@ int ip_mr_input(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
struct mr_table *mrt;
+ struct net_device *dev;
+
+ /* skb->dev passed in is the loX master dev for vrfs.
+ * As there are no vifs associated with loopback devices,
+ * get the proper interface that does have a vif associated with it.
+ */
+ dev = skb->dev;
+ if (netif_is_l3_master(skb->dev)) {
+ dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
+ if (!dev) {
+ kfree_skb(skb);
+ return -ENODEV;
+ }
+ }
/* Packet is looped back after forward, it should not be
* forwarded second time, but still can be delivered locally.
@@ -2015,7 +2031,7 @@ int ip_mr_input(struct sk_buff *skb)
/* already under rcu_read_lock() */
cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
if (!cache) {
- int vif = ipmr_find_vif(mrt, skb->dev);
+ int vif = ipmr_find_vif(mrt, dev);
if (vif >= 0)
cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
@@ -2035,7 +2051,7 @@ int ip_mr_input(struct sk_buff *skb)
}
read_lock(&mrt_lock);
- vif = ipmr_find_vif(mrt, skb->dev);
+ vif = ipmr_find_vif(mrt, dev);
if (vif >= 0) {
int err2 = ipmr_cache_unresolved(mrt, vif, skb);
read_unlock(&mrt_lock);
@@ -2421,7 +2437,8 @@ static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc)
/* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */
static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
struct mfcctl *mfcc, int *mrtsock,
- struct mr_table **mrtret)
+ struct mr_table **mrtret,
+ struct netlink_ext_ack *extack)
{
struct net_device *dev = NULL;
u32 tblid = RT_TABLE_DEFAULT;
@@ -2430,7 +2447,8 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
struct rtmsg *rtm;
int ret, rem;
- ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy);
+ ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy,
+ extack);
if (ret < 0)
goto out;
rtm = nlmsg_data(nlh);
@@ -2489,7 +2507,8 @@ out:
}
/* takes care of both newroute and delroute */
-static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
int ret, mrtsock, parent;
@@ -2498,7 +2517,7 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh)
mrtsock = 0;
tbl = NULL;
- ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl);
+ ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack);
if (ret < 0)
return ret;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 6241a81fd7f5..0bc3c3d73e61 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -309,8 +309,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
*/
for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
unsigned int pos = newinfo->hook_entry[hook];
- struct arpt_entry *e
- = (struct arpt_entry *)(entry0 + pos);
+ struct arpt_entry *e = entry0 + pos;
if (!(valid_hooks & (1 << hook)))
continue;
@@ -354,14 +353,12 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
if (pos == oldpos)
goto next;
- e = (struct arpt_entry *)
- (entry0 + pos);
+ e = entry0 + pos;
} while (oldpos == pos + e->next_offset);
/* Move along one */
size = e->next_offset;
- e = (struct arpt_entry *)
- (entry0 + pos + size);
+ e = entry0 + pos + size;
if (pos + size >= newinfo->size)
return 0;
e->counters.pcnt = pos;
@@ -376,16 +373,14 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = (struct arpt_entry *)
- (entry0 + newpos);
+ e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
if (newpos >= newinfo->size)
return 0;
}
- e = (struct arpt_entry *)
- (entry0 + newpos);
+ e = entry0 + newpos;
e->counters.pcnt = pos;
pos = newpos;
}
@@ -562,8 +557,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
- if (ret != 0)
- goto out_free;
ret = -EINVAL;
if (i != repl->num_entries)
@@ -683,7 +676,7 @@ static int copy_entries_to_user(unsigned int total_size,
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
const struct xt_entry_target *t;
- e = (struct arpt_entry *)(loc_cpu_entry + off);
+ e = loc_cpu_entry + off;
if (copy_to_user(userptr + off, e, sizeof(*e))) {
ret = -EFAULT;
goto free_counters;
@@ -1130,7 +1123,7 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
int h;
origsize = *size;
- de = (struct arpt_entry *)*dstptr;
+ de = *dstptr;
memcpy(de, e, sizeof(struct arpt_entry));
memcpy(&de->counters, &e->counters, sizeof(e->counters));
@@ -1324,7 +1317,7 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
int ret;
origsize = *size;
- ce = (struct compat_arpt_entry __user *)*dstptr;
+ ce = *dstptr;
if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 ||
copy_to_user(&ce->counters, &counters[i],
sizeof(counters[i])) != 0)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 384b85713e06..2a55a40211cb 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -382,7 +382,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
to 0 as we leave), and comefrom to save source hook bitmask */
for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
unsigned int pos = newinfo->hook_entry[hook];
- struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
+ struct ipt_entry *e = entry0 + pos;
if (!(valid_hooks & (1 << hook)))
continue;
@@ -424,14 +424,12 @@ mark_source_chains(const struct xt_table_info *newinfo,
if (pos == oldpos)
goto next;
- e = (struct ipt_entry *)
- (entry0 + pos);
+ e = entry0 + pos;
} while (oldpos == pos + e->next_offset);
/* Move along one */
size = e->next_offset;
- e = (struct ipt_entry *)
- (entry0 + pos + size);
+ e = entry0 + pos + size;
if (pos + size >= newinfo->size)
return 0;
e->counters.pcnt = pos;
@@ -446,16 +444,14 @@ mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = (struct ipt_entry *)
- (entry0 + newpos);
+ e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
if (newpos >= newinfo->size)
return 0;
}
- e = (struct ipt_entry *)
- (entry0 + newpos);
+ e = entry0 + newpos;
e->counters.pcnt = pos;
pos = newpos;
}
@@ -834,7 +830,7 @@ copy_entries_to_user(unsigned int total_size,
const struct xt_entry_match *m;
const struct xt_entry_target *t;
- e = (struct ipt_entry *)(loc_cpu_entry + off);
+ e = loc_cpu_entry + off;
if (copy_to_user(userptr + off, e, sizeof(*e))) {
ret = -EFAULT;
goto free_counters;
@@ -1229,7 +1225,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
int ret = 0;
origsize = *size;
- ce = (struct compat_ipt_entry __user *)*dstptr;
+ ce = *dstptr;
if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
copy_to_user(&ce->counters, &counters[i],
sizeof(counters[i])) != 0)
@@ -1366,7 +1362,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
struct xt_entry_match *ematch;
origsize = *size;
- de = (struct ipt_entry *)*dstptr;
+ de = *dstptr;
memcpy(de, e, sizeof(struct ipt_entry));
memcpy(&de->counters, &e->counters, sizeof(e->counters));
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 9b8841316e7b..038f293c2376 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -22,6 +22,7 @@
#include <linux/icmp.h>
#include <linux/if_arp.h>
#include <linux/seq_file.h>
+#include <linux/refcount.h>
#include <linux/netfilter_arp.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -40,8 +41,8 @@ MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
struct clusterip_config {
struct list_head list; /* list of all configs */
- atomic_t refcount; /* reference count */
- atomic_t entries; /* number of entries/rules
+ refcount_t refcount; /* reference count */
+ refcount_t entries; /* number of entries/rules
* referencing us */
__be32 clusterip; /* the IP address */
@@ -77,7 +78,7 @@ struct clusterip_net {
static inline void
clusterip_config_get(struct clusterip_config *c)
{
- atomic_inc(&c->refcount);
+ refcount_inc(&c->refcount);
}
@@ -89,7 +90,7 @@ static void clusterip_config_rcu_free(struct rcu_head *head)
static inline void
clusterip_config_put(struct clusterip_config *c)
{
- if (atomic_dec_and_test(&c->refcount))
+ if (refcount_dec_and_test(&c->refcount))
call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
}
@@ -103,7 +104,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
local_bh_disable();
- if (atomic_dec_and_lock(&c->entries, &cn->lock)) {
+ if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
list_del_rcu(&c->list);
spin_unlock(&cn->lock);
local_bh_enable();
@@ -149,10 +150,10 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
c = NULL;
else
#endif
- if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+ if (unlikely(!refcount_inc_not_zero(&c->refcount)))
c = NULL;
else if (entry)
- atomic_inc(&c->entries);
+ refcount_inc(&c->entries);
}
rcu_read_unlock_bh();
@@ -188,8 +189,8 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
clusterip_config_init_nodelist(c, i);
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
- atomic_set(&c->refcount, 1);
- atomic_set(&c->entries, 1);
+ refcount_set(&c->refcount, 1);
+ refcount_set(&c->entries, 1);
spin_lock_bh(&cn->lock);
if (__clusterip_config_find(net, ip)) {
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 3240a2614e82..af2b69b6895f 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -293,12 +293,16 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
XT_SYNPROXY_OPT_ECN);
synproxy_send_client_synack(net, skb, th, &opts);
- return NF_DROP;
-
+ consume_skb(skb);
+ return NF_STOLEN;
} else if (th->ack && !(th->fin || th->rst || th->syn)) {
/* ACK from client */
- synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq));
- return NF_DROP;
+ if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq))) {
+ consume_skb(skb);
+ return NF_STOLEN;
+ } else {
+ return NF_DROP;
+ }
}
return XT_CONTINUE;
@@ -367,10 +371,13 @@ static unsigned int ipv4_synproxy_hook(void *priv,
* number match the one of first SYN.
*/
if (synproxy_recv_client_ack(net, skb, th, &opts,
- ntohl(th->seq) + 1))
+ ntohl(th->seq) + 1)) {
this_cpu_inc(snet->stats->cookie_retrans);
-
- return NF_DROP;
+ consume_skb(skb);
+ return NF_STOLEN;
+ } else {
+ return NF_DROP;
+ }
}
synproxy->isn = ntohl(th->ack_seq);
@@ -409,19 +416,56 @@ static unsigned int ipv4_synproxy_hook(void *priv,
return NF_ACCEPT;
}
+static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv4_synproxy_hook,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv4_synproxy_hook,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
static int synproxy_tg4_check(const struct xt_tgchk_param *par)
{
+ struct synproxy_net *snet = synproxy_pernet(par->net);
const struct ipt_entry *e = par->entryinfo;
+ int err;
if (e->ip.proto != IPPROTO_TCP ||
e->ip.invflags & XT_INV_PROTO)
return -EINVAL;
- return nf_ct_netns_get(par->net, par->family);
+ err = nf_ct_netns_get(par->net, par->family);
+ if (err)
+ return err;
+
+ if (snet->hook_ref4 == 0) {
+ err = nf_register_net_hooks(par->net, ipv4_synproxy_ops,
+ ARRAY_SIZE(ipv4_synproxy_ops));
+ if (err) {
+ nf_ct_netns_put(par->net, par->family);
+ return err;
+ }
+ }
+
+ snet->hook_ref4++;
+ return err;
}
static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
{
+ struct synproxy_net *snet = synproxy_pernet(par->net);
+
+ snet->hook_ref4--;
+ if (snet->hook_ref4 == 0)
+ nf_unregister_net_hooks(par->net, ipv4_synproxy_ops,
+ ARRAY_SIZE(ipv4_synproxy_ops));
nf_ct_netns_put(par->net, par->family);
}
@@ -436,46 +480,14 @@ static struct xt_target synproxy_tg4_reg __read_mostly = {
.me = THIS_MODULE,
};
-static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
- {
- .hook = ipv4_synproxy_hook,
- .pf = NFPROTO_IPV4,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
- },
- {
- .hook = ipv4_synproxy_hook,
- .pf = NFPROTO_IPV4,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
- },
-};
-
static int __init synproxy_tg4_init(void)
{
- int err;
-
- err = nf_register_hooks(ipv4_synproxy_ops,
- ARRAY_SIZE(ipv4_synproxy_ops));
- if (err < 0)
- goto err1;
-
- err = xt_register_target(&synproxy_tg4_reg);
- if (err < 0)
- goto err2;
-
- return 0;
-
-err2:
- nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
-err1:
- return err;
+ return xt_register_target(&synproxy_tg4_reg);
}
static void __exit synproxy_tg4_exit(void)
{
xt_unregister_target(&synproxy_tg4_reg);
- nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
}
module_init(synproxy_tg4_init);
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index f0dbff05fc28..39895b9ddeb9 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -69,8 +69,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* Avoid counting cloned packets towards the original connection. */
nf_reset(skb);
- nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW);
- nf_conntrack_get(skb_nfct(skb));
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
#endif
/*
* If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 6f5e8d01b876..feedd759ca80 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -264,13 +264,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
if (!ct)
return NF_ACCEPT;
- /* Don't try to NAT if this packet is not conntracked */
- if (nf_ct_is_untracked(ct))
- return NF_ACCEPT;
-
- nat = nf_ct_nat_ext_add(ct);
- if (nat == NULL)
- return NF_ACCEPT;
+ nat = nfct_nat(ct);
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index ea91058b5f6f..dc1dea15c1b4 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -37,7 +37,6 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
ct = nf_ct_get(skb, &ctinfo);
- nat = nfct_nat(ct);
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
ctinfo == IP_CT_RELATED_REPLY));
@@ -56,7 +55,9 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
return NF_DROP;
}
- nat->masq_index = out->ifindex;
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat)
+ nat->masq_index = out->ifindex;
/* Transfer from original range. */
memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index b3ca21b2ba9b..8a69363b4884 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -49,9 +49,14 @@ static void pptp_nat_expected(struct nf_conn *ct,
const struct nf_ct_pptp_master *ct_pptp_info;
const struct nf_nat_pptp *nat_pptp_info;
struct nf_nat_range range;
+ struct nf_conn_nat *nat;
+ nat = nf_ct_nat_ext_add(ct);
+ if (WARN_ON_ONCE(!nat))
+ return;
+
+ nat_pptp_info = &nat->help.nat_pptp_info;
ct_pptp_info = nfct_help_data(master);
- nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
/* And here goes the grand finale of corrosion... */
if (exp->dir == IP_CT_DIR_ORIGINAL) {
@@ -120,13 +125,17 @@ pptp_outbound_pkt(struct sk_buff *skb,
{
struct nf_ct_pptp_master *ct_pptp_info;
+ struct nf_conn_nat *nat = nfct_nat(ct);
struct nf_nat_pptp *nat_pptp_info;
u_int16_t msg;
__be16 new_callid;
unsigned int cid_off;
+ if (WARN_ON_ONCE(!nat))
+ return NF_DROP;
+
+ nat_pptp_info = &nat->help.nat_pptp_info;
ct_pptp_info = nfct_help_data(ct);
- nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
new_callid = ct_pptp_info->pns_call_id;
@@ -177,11 +186,11 @@ pptp_outbound_pkt(struct sk_buff *skb,
ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
/* mangle packet */
- if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
- cid_off + sizeof(struct pptp_pkt_hdr) +
- sizeof(struct PptpControlHeader),
- sizeof(new_callid), (char *)&new_callid,
- sizeof(new_callid)) == 0)
+ if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
+ cid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_callid), (char *)&new_callid,
+ sizeof(new_callid)))
return NF_DROP;
return NF_ACCEPT;
}
@@ -191,11 +200,15 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
struct nf_conntrack_expect *expect_reply)
{
const struct nf_conn *ct = expect_orig->master;
+ struct nf_conn_nat *nat = nfct_nat(ct);
struct nf_ct_pptp_master *ct_pptp_info;
struct nf_nat_pptp *nat_pptp_info;
+ if (WARN_ON_ONCE(!nat))
+ return;
+
+ nat_pptp_info = &nat->help.nat_pptp_info;
ct_pptp_info = nfct_help_data(ct);
- nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
/* save original PAC call ID in nat_info */
nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
@@ -223,11 +236,15 @@ pptp_inbound_pkt(struct sk_buff *skb,
union pptp_ctrl_union *pptpReq)
{
const struct nf_nat_pptp *nat_pptp_info;
+ struct nf_conn_nat *nat = nfct_nat(ct);
u_int16_t msg;
__be16 new_pcid;
unsigned int pcid_off;
- nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+ if (WARN_ON_ONCE(!nat))
+ return NF_DROP;
+
+ nat_pptp_info = &nat->help.nat_pptp_info;
new_pcid = nat_pptp_info->pns_call_id;
switch (msg = ntohs(ctlh->messageType)) {
@@ -271,11 +288,11 @@ pptp_inbound_pkt(struct sk_buff *skb,
pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
- if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
- pcid_off + sizeof(struct pptp_pkt_hdr) +
- sizeof(struct PptpControlHeader),
- sizeof(new_pcid), (char *)&new_pcid,
- sizeof(new_pcid)) == 0)
+ if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
+ pcid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_pcid), (char *)&new_pcid,
+ sizeof(new_pcid)))
return NF_DROP;
return NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 53e49f5011d3..d5b1e0b3f687 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -827,8 +827,8 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
return 1;
}
-static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
- struct snmp_request *request)
+static unsigned char noinline_for_stack
+snmp_request_decode(struct asn1_ctx *ctx, struct snmp_request *request)
{
unsigned int cls, con, tag;
unsigned char *end;
@@ -920,10 +920,10 @@ static inline void mangle_address(unsigned char *begin,
}
}
-static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
- struct snmp_v1_trap *trap,
- const struct oct1_map *map,
- __sum16 *check)
+static unsigned char noinline_for_stack
+snmp_trap_decode(struct asn1_ctx *ctx, struct snmp_v1_trap *trap,
+ const struct oct1_map *map,
+ __sum16 *check)
{
unsigned int cls, con, tag, len;
unsigned char *end;
@@ -998,18 +998,6 @@ err_id_free:
*
*****************************************************************************/
-static void hex_dump(const unsigned char *buf, size_t len)
-{
- size_t i;
-
- for (i = 0; i < len; i++) {
- if (i && !(i % 16))
- printk("\n");
- printk("%02x ", *(buf + i));
- }
- printk("\n");
-}
-
/*
* Parse and mangle SNMP message according to mapping.
* (And this is the fucking 'basic' method).
@@ -1026,7 +1014,8 @@ static int snmp_parse_mangle(unsigned char *msg,
struct snmp_object *obj;
if (debug > 1)
- hex_dump(msg, len);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1,
+ msg, len, 0);
asn1_open(&ctx, msg, len);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 146d86105183..7cd8d0d918f8 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -104,7 +104,6 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
- const struct iphdr *oiph;
struct iphdr *niph;
const struct tcphdr *oth;
struct tcphdr _oth;
@@ -116,8 +115,6 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
return;
- oiph = ip_hdr(oldskb);
-
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
LL_MAX_HEADER, GFP_ATOMIC);
if (!nskb)
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index a83d558e1aae..e9293bdebba0 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -139,7 +139,7 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
* SNAT-ted connection.
*/
ct = nf_ct_get(skb, &ctinfo);
- if (ct && !nf_ct_is_untracked(ct) &&
+ if (ct &&
((iph->protocol != IPPROTO_ICMP &&
ctinfo == IP_CT_ESTABLISHED_REPLY) ||
(iph->protocol == IPPROTO_ICMP &&
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 2981291910dd..de3681df2ce7 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -90,7 +90,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
nft_in(pkt)->ifindex);
return;
}
@@ -99,7 +99,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (ipv4_is_zeronet(iph->saddr)) {
if (ipv4_is_lbcast(iph->daddr) ||
ipv4_is_local_multicast(iph->daddr)) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
get_ifindex(pkt->skb->dev));
return;
}
@@ -212,7 +212,7 @@ nft_fib4_select_ops(const struct nft_ctx *ctx,
static struct nft_expr_type nft_fib4_type __read_mostly = {
.name = "fib",
- .select_ops = &nft_fib4_select_ops,
+ .select_ops = nft_fib4_select_ops,
.policy = nft_fib_policy,
.maxattr = NFTA_FIB_MAX,
.family = NFPROTO_IPV4,
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 69cf49e8356d..fa44e752a9a3 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -199,7 +199,6 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED),
SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED),
SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
- SNMP_MIB_ITEM("PAWSPassive", LINUX_MIB_PAWSPASSIVEREJECTED),
SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
@@ -282,6 +281,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
+ SNMP_MIB_ITEM("TCPFastOpenBlackhole", LINUX_MIB_TCPFASTOPENBLACKHOLE),
SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS),
SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING),
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 4b7c0ec65251..32a691b7ce2c 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
-const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
+struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet_offloads);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9d943974de2b..bdffad875691 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -358,6 +358,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
rt->dst.dev->mtu);
return -EMSGSIZE;
}
+ if (length < sizeof(struct iphdr))
+ return -EINVAL;
+
if (flags&MSG_PROBE)
goto out;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d9724889ff09..655d9eebe43e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1250,15 +1250,11 @@ static void set_class_tag(struct rtable *rt, u32 tag)
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
- unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
+ unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
+ unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size,
+ ip_rt_min_advmss);
- if (advmss == 0) {
- advmss = max_t(unsigned int, dst->dev->mtu - 40,
- ip_rt_min_advmss);
- if (advmss > 65535 - 40)
- advmss = 65535 - 40;
- }
- return advmss;
+ return min(advmss, IPV4_MAX_PMTU - header_size);
}
static unsigned int ipv4_mtu(const struct dst_entry *dst)
@@ -1734,45 +1730,97 @@ out:
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
/* To make ICMP packets follow the right flow, the multipath hash is
- * calculated from the inner IP addresses in reverse order.
+ * calculated from the inner IP addresses.
*/
-static int ip_multipath_icmp_hash(struct sk_buff *skb)
+static void ip_multipath_l3_keys(const struct sk_buff *skb,
+ struct flow_keys *hash_keys)
{
const struct iphdr *outer_iph = ip_hdr(skb);
- struct icmphdr _icmph;
+ const struct iphdr *inner_iph;
const struct icmphdr *icmph;
struct iphdr _inner_iph;
- const struct iphdr *inner_iph;
+ struct icmphdr _icmph;
+
+ hash_keys->addrs.v4addrs.src = outer_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
+ if (likely(outer_iph->protocol != IPPROTO_ICMP))
+ return;
if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
- goto standard_hash;
+ return;
icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph);
if (!icmph)
- goto standard_hash;
+ return;
if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED &&
- icmph->type != ICMP_PARAMETERPROB) {
- goto standard_hash;
- }
+ icmph->type != ICMP_PARAMETERPROB)
+ return;
inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph);
if (!inner_iph)
- goto standard_hash;
+ return;
+ hash_keys->addrs.v4addrs.src = inner_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+}
- return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
+/* if skb is set it will be used and fl4 can be NULL */
+int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+ const struct sk_buff *skb)
+{
+ struct net *net = fi->fib_net;
+ struct flow_keys hash_keys;
+ u32 mhash;
-standard_hash:
- return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
-}
+ switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ case 0:
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ if (skb) {
+ ip_multipath_l3_keys(skb, &hash_keys);
+ } else {
+ hash_keys.addrs.v4addrs.src = fl4->saddr;
+ hash_keys.addrs.v4addrs.dst = fl4->daddr;
+ }
+ break;
+ case 1:
+ /* skb is currently provided only when forwarding */
+ if (skb) {
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+ struct flow_keys keys;
+
+ /* short-circuit if we already have L4 hash present */
+ if (skb->l4_hash)
+ return skb_get_hash_raw(skb) >> 1;
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+ hash_keys.ports.src = keys.ports.src;
+ hash_keys.ports.dst = keys.ports.dst;
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ } else {
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ hash_keys.addrs.v4addrs.src = fl4->saddr;
+ hash_keys.addrs.v4addrs.dst = fl4->daddr;
+ hash_keys.ports.src = fl4->fl4_sport;
+ hash_keys.ports.dst = fl4->fl4_dport;
+ hash_keys.basic.ip_proto = fl4->flowi4_proto;
+ }
+ break;
+ }
+ mhash = flow_hash_from_keys(&hash_keys);
+ return mhash >> 1;
+}
+EXPORT_SYMBOL_GPL(fib_multipath_hash);
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
static int ip_mkroute_input(struct sk_buff *skb,
@@ -1782,12 +1830,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h;
+ int h = fib_multipath_hash(res->fi, NULL, skb);
- if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
- h = ip_multipath_icmp_hash(skb);
- else
- h = fib_multipath_hash(saddr, daddr);
fib_select_multipath(res, h);
}
#endif
@@ -2203,7 +2247,7 @@ add:
*/
struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
- int mp_hash)
+ const struct sk_buff *skb)
{
struct net_device *dev_out = NULL;
__u8 tos = RT_FL_TOS(fl4);
@@ -2366,7 +2410,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
goto make_route;
}
- fib_select_path(net, &res, fl4, mp_hash);
+ fib_select_path(net, &res, fl4, skb);
dev_out = FIB_RES_DEV(res);
fl4->flowi4_oif = dev_out->ifindex;
@@ -2586,7 +2630,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm;
@@ -2602,7 +2647,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
u32 table_id = RT_TABLE_MAIN;
kuid_t uid;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
+ extack);
if (err < 0)
goto errout;
@@ -2620,10 +2666,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
- /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
- ip_hdr(skb)->protocol = IPPROTO_UDP;
- skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
-
src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
@@ -2633,6 +2675,15 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
else
uid = (iif ? INVALID_UID : current_uid());
+ /* Bugfix: need to give ip_route_input enough of an IP header to
+ * not gag.
+ */
+ ip_hdr(skb)->protocol = IPPROTO_UDP;
+ ip_hdr(skb)->saddr = src;
+ ip_hdr(skb)->daddr = dst;
+
+ skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dst;
fl4.saddr = src;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 496b97e17aaf..0257d965f111 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -16,6 +16,7 @@
#include <linux/siphash.h>
#include <linux/kernel.h>
#include <linux/export.h>
+#include <net/secure_seq.h>
#include <net/tcp.h>
#include <net/route.h>
@@ -203,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check);
struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst)
+ struct dst_entry *dst, u32 tsoff)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct sock *child;
@@ -213,6 +214,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
NULL, &own_req);
if (child) {
atomic_set(&req->rsk_refcnt, 1);
+ tcp_sk(child)->tsoffset = tsoff;
sock_rps_save_rxhash(child, skb);
inet_csk_reqsk_queue_add(sk, req, child);
} else {
@@ -292,6 +294,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
struct rtable *rt;
__u8 rcv_wscale;
struct flowi4 fl4;
+ u32 tsoff = 0;
if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
goto out;
@@ -311,6 +314,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
memset(&tcp_opt, 0, sizeof(tcp_opt));
tcp_parse_options(skb, &tcp_opt, 0, NULL);
+ if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
+ tsoff = secure_tcp_ts_off(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
+ tcp_opt.rcv_tsecr -= tsoff;
+ }
+
if (!cookie_timestamp_decode(&tcp_opt))
goto out;
@@ -381,7 +389,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
- ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
+ ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff);
/* ip_queue_xmit() depends on our flow being setup
* Normal sockets get it right from inet_csk_route_child_sock()
*/
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d6880a6149ee..86957e9cd6c6 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -24,6 +24,7 @@
#include <net/cipso_ipv4.h>
#include <net/inet_frag.h>
#include <net/ping.h>
+#include <net/protocol.h>
static int zero;
static int one = 1;
@@ -294,6 +295,74 @@ bad_key:
return ret;
}
+static void proc_configure_early_demux(int enabled, int protocol)
+{
+ struct net_protocol *ipprot;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_protocol *ip6prot;
+#endif
+
+ rcu_read_lock();
+
+ ipprot = rcu_dereference(inet_protos[protocol]);
+ if (ipprot)
+ ipprot->early_demux = enabled ? ipprot->early_demux_handler :
+ NULL;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ ip6prot = rcu_dereference(inet6_protos[protocol]);
+ if (ip6prot)
+ ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
+ NULL;
+#endif
+ rcu_read_unlock();
+}
+
+static int proc_tcp_early_demux(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = 0;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (write && !ret) {
+ int enabled = init_net.ipv4.sysctl_tcp_early_demux;
+
+ proc_configure_early_demux(enabled, IPPROTO_TCP);
+ }
+
+ return ret;
+}
+
+static int proc_udp_early_demux(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = 0;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (write && !ret) {
+ int enabled = init_net.ipv4.sysctl_udp_early_demux;
+
+ proc_configure_early_demux(enabled, IPPROTO_UDP);
+ }
+
+ return ret;
+}
+
+static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
+ int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ tcp_fastopen_active_timeout_reset();
+ return ret;
+}
+
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
@@ -344,6 +413,14 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_tcp_fastopen_key,
},
{
+ .procname = "tcp_fastopen_blackhole_timeout_sec",
+ .data = &sysctl_tcp_fastopen_blackhole_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tfo_blackhole_detect_timeout,
+ .extra1 = &zero,
+ },
+ {
.procname = "tcp_abort_on_overflow",
.data = &sysctl_tcp_abort_on_overflow,
.maxlen = sizeof(int),
@@ -750,6 +827,20 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "udp_early_demux",
+ .data = &init_net.ipv4.sysctl_udp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_udp_early_demux
+ },
+ {
+ .procname = "tcp_early_demux",
+ .data = &init_net.ipv4.sysctl_tcp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tcp_early_demux
+ },
+ {
.procname = "ip_default_ttl",
.data = &init_net.ipv4.sysctl_ip_default_ttl,
.maxlen = sizeof(int),
@@ -981,13 +1072,6 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_tw_recycle",
- .data = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_max_syn_backlog",
.data = &init_net.ipv4.sysctl_max_syn_backlog,
.maxlen = sizeof(int),
@@ -1004,6 +1088,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
+ {
+ .procname = "fib_multipath_hash_policy",
+ .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
#endif
{
.procname = "ip_unprivileged_port_start",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 40ba4249a586..1e4c76d2b827 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -533,7 +533,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (tp->urg_data & TCP_URG_VALID)
mask |= POLLPRI;
- } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
+ } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
/* Active TCP fastopen socket with defer_connect
* Return POLLOUT so application can call write()
* in order for kernel to generate SYN+data
@@ -2296,6 +2296,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
tcp_write_queue_purge(sk);
+ tcp_fastopen_active_disable_ofo_check(sk);
skb_rbtree_purge(&tp->out_of_order_queue);
inet->inet_dport = 0;
@@ -2394,7 +2395,7 @@ static int tcp_repair_options_est(struct tcp_sock *tp,
u16 snd_wscale = opt.opt_val & 0xFFFF;
u16 rcv_wscale = opt.opt_val >> 16;
- if (snd_wscale > 14 || rcv_wscale > 14)
+ if (snd_wscale > TCP_MAX_WSCALE || rcv_wscale > TCP_MAX_WSCALE)
return -EFBIG;
tp->rx_opt.snd_wscale = snd_wscale;
@@ -2471,7 +2472,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
* know which interface is going to be used */
- if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
+ if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
err = -EINVAL;
break;
}
@@ -2852,7 +2853,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_snd_ssthresh = tp->snd_ssthresh;
info->tcpi_advmss = tp->advmss;
- info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
+ info->tcpi_rcv_rtt = tp->rcv_rtt_est.rtt_us >> 3;
info->tcpi_rcv_space = tp->rcvq_space.space;
info->tcpi_total_retrans = tp->total_retrans;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index c99230efcd52..0683ba447d77 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -72,7 +72,7 @@ MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
module_param(hystart, int, 0644);
MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
module_param(hystart_detect, int, 0644);
-MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
+MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms"
" 1: packet-train 2: delay 3: both packet-train and delay");
module_param(hystart_low_window, int, 0644);
MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 8ea4e9787f82..4af82b914dd4 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -341,6 +341,13 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
cookie->len = -1;
return false;
}
+
+ /* Firewall blackhole issue check */
+ if (tcp_fastopen_active_should_disable(sk)) {
+ cookie->len = -1;
+ return false;
+ }
+
if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
cookie->len = -1;
return true;
@@ -380,3 +387,98 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
return false;
}
EXPORT_SYMBOL(tcp_fastopen_defer_connect);
+
+/*
+ * The following code block is to deal with middle box issues with TFO:
+ * Middlebox firewall issues can potentially cause server's data being
+ * blackholed after a successful 3WHS using TFO.
+ * The proposed solution is to disable active TFO globally under the
+ * following circumstances:
+ * 1. client side TFO socket receives out of order FIN
+ * 2. client side TFO socket receives out of order RST
+ * We disable active side TFO globally for 1hr at first. Then if it
+ * happens again, we disable it for 2h, then 4h, 8h, ...
+ * And we reset the timeout back to 1hr when we see a successful active
+ * TFO connection with data exchanges.
+ */
+
+/* Default to 1hr */
+unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
+static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
+static unsigned long tfo_active_disable_stamp __read_mostly;
+
+/* Disable active TFO and record current jiffies and
+ * tfo_active_disable_times
+ */
+void tcp_fastopen_active_disable(struct sock *sk)
+{
+ atomic_inc(&tfo_active_disable_times);
+ tfo_active_disable_stamp = jiffies;
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
+}
+
+/* Reset tfo_active_disable_times to 0 */
+void tcp_fastopen_active_timeout_reset(void)
+{
+ atomic_set(&tfo_active_disable_times, 0);
+}
+
+/* Calculate timeout for tfo active disable
+ * Return true if we are still in the active TFO disable period
+ * Return false if timeout already expired and we should use active TFO
+ */
+bool tcp_fastopen_active_should_disable(struct sock *sk)
+{
+ int tfo_da_times = atomic_read(&tfo_active_disable_times);
+ int multiplier;
+ unsigned long timeout;
+
+ if (!tfo_da_times)
+ return false;
+
+ /* Limit timout to max: 2^6 * initial timeout */
+ multiplier = 1 << min(tfo_da_times - 1, 6);
+ timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ;
+ if (time_before(jiffies, tfo_active_disable_stamp + timeout))
+ return true;
+
+ /* Mark check bit so we can check for successful active TFO
+ * condition and reset tfo_active_disable_times
+ */
+ tcp_sk(sk)->syn_fastopen_ch = 1;
+ return false;
+}
+
+/* Disable active TFO if FIN is the only packet in the ofo queue
+ * and no data is received.
+ * Also check if we can reset tfo_active_disable_times if data is
+ * received successfully on a marked active TFO sockets opened on
+ * a non-loopback interface
+ */
+void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct rb_node *p;
+ struct sk_buff *skb;
+ struct dst_entry *dst;
+
+ if (!tp->syn_fastopen)
+ return;
+
+ if (!tp->data_segs_in) {
+ p = rb_first(&tp->out_of_order_queue);
+ if (p && !rb_next(p)) {
+ skb = rb_entry(p, struct sk_buff, rbnode);
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
+ tcp_fastopen_active_disable(sk);
+ return;
+ }
+ }
+ } else if (tp->syn_fastopen_ch &&
+ atomic_read(&tfo_active_disable_times)) {
+ dst = sk_dst_get(sk);
+ if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
+ tcp_fastopen_active_timeout_reset();
+ dst_release(dst);
+ }
+}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 659d1baefb2b..174d4376baa5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -85,7 +85,6 @@ int sysctl_tcp_dsack __read_mostly = 1;
int sysctl_tcp_app_win __read_mostly = 31;
int sysctl_tcp_adv_win_scale __read_mostly = 1;
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-EXPORT_SYMBOL(sysctl_tcp_timestamps);
/* rfc5961 challenge ack rate limiting */
int sysctl_tcp_challenge_ack_limit = 1000;
@@ -442,7 +441,8 @@ void tcp_init_buffer_space(struct sock *sk)
tcp_sndbuf_expand(sk);
tp->rcvq_space.space = tp->rcv_wnd;
- tp->rcvq_space.time = tcp_time_stamp;
+ skb_mstamp_get(&tp->tcp_mstamp);
+ tp->rcvq_space.time = tp->tcp_mstamp;
tp->rcvq_space.seq = tp->copied_seq;
maxwin = tcp_full_space(sk);
@@ -518,7 +518,7 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss);
*/
static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
{
- u32 new_sample = tp->rcv_rtt_est.rtt;
+ u32 new_sample = tp->rcv_rtt_est.rtt_us;
long m = sample;
if (m == 0)
@@ -548,21 +548,23 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
new_sample = m << 3;
}
- if (tp->rcv_rtt_est.rtt != new_sample)
- tp->rcv_rtt_est.rtt = new_sample;
+ tp->rcv_rtt_est.rtt_us = new_sample;
}
static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
{
- if (tp->rcv_rtt_est.time == 0)
+ u32 delta_us;
+
+ if (tp->rcv_rtt_est.time.v64 == 0)
goto new_measure;
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
- tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
+ delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time);
+ tcp_rcv_rtt_update(tp, delta_us, 1);
new_measure:
tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
- tp->rcv_rtt_est.time = tcp_time_stamp;
+ tp->rcv_rtt_est.time = tp->tcp_mstamp;
}
static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
@@ -572,7 +574,10 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
if (tp->rx_opt.rcv_tsecr &&
(TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
- tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
+ tcp_rcv_rtt_update(tp,
+ jiffies_to_usecs(tcp_time_stamp -
+ tp->rx_opt.rcv_tsecr),
+ 0);
}
/*
@@ -585,8 +590,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
int time;
int copied;
- time = tcp_time_stamp - tp->rcvq_space.time;
- if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
+ time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time);
+ if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
return;
/* Number of bytes copied to user in last RTT */
@@ -642,7 +647,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
new_measure:
tp->rcvq_space.seq = tp->copied_seq;
- tp->rcvq_space.time = tcp_time_stamp;
+ tp->rcvq_space.time = tp->tcp_mstamp;
}
/* There is something which you must keep in mind when you analyze the
@@ -1131,7 +1136,6 @@ struct tcp_sacktag_state {
*/
struct skb_mstamp first_sackt;
struct skb_mstamp last_sackt;
- struct skb_mstamp ack_time; /* Timestamp when the S/ACK was received */
struct rate_sample *rate;
int flag;
};
@@ -1175,13 +1179,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
*/
if (pkt_len > mss) {
unsigned int new_len = (pkt_len / mss) * mss;
- if (!in_sack && new_len < pkt_len) {
+ if (!in_sack && new_len < pkt_len)
new_len += mss;
- if (new_len >= skb->len)
- return 0;
- }
pkt_len = new_len;
}
+
+ if (pkt_len >= skb->len && !in_sack)
+ return 0;
+
err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
if (err < 0)
return err;
@@ -1214,8 +1219,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
return sacked;
if (!(sacked & TCPCB_SACKED_ACKED)) {
- tcp_rack_advance(tp, sacked, end_seq,
- xmit_time, &state->ack_time);
+ tcp_rack_advance(tp, sacked, end_seq, xmit_time);
if (sacked & TCPCB_SACKED_RETRANS) {
/* If the segment is not tagged as lost,
@@ -2760,8 +2764,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked)
return false;
}
-static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag,
- const struct skb_mstamp *ack_time)
+static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2769,7 +2772,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag,
if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
u32 prior_retrans = tp->retrans_out;
- tcp_rack_mark_lost(sk, ack_time);
+ tcp_rack_mark_lost(sk);
if (prior_retrans > tp->retrans_out)
*ack_flag |= FLAG_LOST_RETRANS;
}
@@ -2788,8 +2791,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag,
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, const int acked,
- bool is_dupack, int *ack_flag, int *rexmit,
- const struct skb_mstamp *ack_time)
+ bool is_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -2857,11 +2859,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
tcp_try_keep_open(sk);
return;
}
- tcp_rack_identify_loss(sk, ack_flag, ack_time);
+ tcp_rack_identify_loss(sk, ack_flag);
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, is_dupack, rexmit);
- tcp_rack_identify_loss(sk, ack_flag, ack_time);
+ tcp_rack_identify_loss(sk, ack_flag);
if (!(icsk->icsk_ca_state == TCP_CA_Open ||
(*ack_flag & FLAG_LOST_RETRANS)))
return;
@@ -2877,7 +2879,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
- tcp_rack_identify_loss(sk, ack_flag, ack_time);
+ tcp_rack_identify_loss(sk, ack_flag);
if (!tcp_time_to_recover(sk, flag)) {
tcp_try_to_open(sk, flag);
return;
@@ -3059,8 +3061,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct skb_mstamp first_ackt, last_ackt;
- struct skb_mstamp *now = &sack->ack_time;
struct tcp_sock *tp = tcp_sk(sk);
+ struct skb_mstamp *now = &tp->tcp_mstamp;
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
bool fully_acked = true;
@@ -3120,8 +3122,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, sacked, scb->end_seq,
- &skb->skb_mstamp,
- &sack->ack_time);
+ &skb->skb_mstamp);
}
if (sacked & TCPCB_LOST)
tp->lost_out -= acked_pcount;
@@ -3189,7 +3190,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
int delta;
/* Non-retransmitted hole got filled? That's reordering */
- if (reord < prior_fackets)
+ if (reord < prior_fackets && reord <= tp->fackets_out)
tcp_update_reordering(sk, tp->fackets_out - reord, 0);
delta = tcp_is_fack(tp) ? pkts_acked :
@@ -3576,8 +3577,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, tp->snd_nxt))
goto invalid_ack;
- skb_mstamp_get(&sack_state.ack_time);
-
if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
@@ -3647,8 +3646,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
- &sack_state.ack_time);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
}
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -3660,8 +3658,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_schedule_loss_probe(sk);
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */
- tcp_rate_gen(sk, delivered, lost, &sack_state.ack_time,
- sack_state.rate);
+ tcp_rate_gen(sk, delivered, lost, sack_state.rate);
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
tcp_xmit_recovery(sk, rexmit);
return 1;
@@ -3669,8 +3666,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
- &sack_state.ack_time);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@ -3691,11 +3687,9 @@ old_ack:
* If data was DSACKed, see if we can undo a cwnd reduction.
*/
if (TCP_SKB_CB(skb)->sacked) {
- skb_mstamp_get(&sack_state.ack_time);
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
- &sack_state.ack_time);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
tcp_xmit_recovery(sk, rexmit);
}
@@ -3768,11 +3762,12 @@ void tcp_parse_options(const struct sk_buff *skb,
!estab && sysctl_tcp_window_scaling) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
- if (snd_wscale > 14) {
- net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
+ if (snd_wscale > TCP_MAX_WSCALE) {
+ net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
__func__,
- snd_wscale);
- snd_wscale = 14;
+ snd_wscale,
+ TCP_MAX_WSCALE);
+ snd_wscale = TCP_MAX_WSCALE;
}
opt_rx->snd_wscale = snd_wscale;
}
@@ -4007,10 +4002,10 @@ void tcp_reset(struct sock *sk)
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
+ tcp_done(sk);
+
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_error_report(sk);
-
- tcp_done(sk);
}
/*
@@ -5299,8 +5294,16 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
if (rst_seq_match)
tcp_reset(sk);
- else
+ else {
+ /* Disable TFO if RST is out-of-order
+ * and no data has been received
+ * for current active TFO socket
+ */
+ if (tp->syn_fastopen && !tp->data_segs_in &&
+ sk->sk_state == TCP_ESTABLISHED)
+ tcp_fastopen_active_disable(sk);
tcp_send_challenge_ack(sk, skb);
+ }
goto discard;
}
@@ -5353,6 +5356,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
+ skb_mstamp_get(&tp->tcp_mstamp);
if (unlikely(!sk->sk_rx_dst))
inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
@@ -5579,10 +5583,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
else
tp->pred_flags = 0;
- if (!sock_flag(sk, SOCK_DEAD)) {
- sk->sk_state_change(sk);
- sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
- }
}
static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
@@ -5651,6 +5651,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_cookie foc = { .len = -1 };
int saved_clamp = tp->rx_opt.mss_clamp;
+ bool fastopen_fail;
tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
@@ -5754,10 +5755,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_finish_connect(sk, skb);
- if ((tp->syn_fastopen || tp->syn_data) &&
- tcp_rcv_fastopen_synack(sk, skb, &foc))
- return -1;
+ fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
+ tcp_rcv_fastopen_synack(sk, skb, &foc);
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ sk->sk_state_change(sk);
+ sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
+ }
+ if (fastopen_fail)
+ return -1;
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong) {
@@ -5911,6 +5917,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_SYN_SENT:
tp->rx_opt.saw_tstamp = 0;
+ skb_mstamp_get(&tp->tcp_mstamp);
queued = tcp_rcv_synsent_state_process(sk, skb, th);
if (queued >= 0)
return queued;
@@ -5922,6 +5929,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
return 0;
}
+ skb_mstamp_get(&tp->tcp_mstamp);
tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk;
if (req) {
@@ -6041,9 +6049,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
break;
}
- if (tp->linger2 < 0 ||
- (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
- after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
+ if (tp->linger2 < 0) {
+ tcp_done(sk);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+ return 1;
+ }
+ if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+ after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
+ /* Receive out of order FIN after close() */
+ if (tp->syn_fastopen && th->fin)
+ tcp_fastopen_active_disable(sk);
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
@@ -6332,37 +6347,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
- if (isn && tmp_opt.tstamp_ok)
- af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+ if (tmp_opt.tstamp_ok)
+ tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb);
if (!want_cookie && !isn) {
- /* VJ's idea. We save last timestamp seen
- * from the destination in peer table, when entering
- * state TIME-WAIT, and check against it before
- * accepting new connection request.
- *
- * If "isn" is not zero, this request hit alive
- * timewait bucket, so that all the necessary checks
- * are made in the function processing timewait state.
- */
- if (net->ipv4.tcp_death_row.sysctl_tw_recycle) {
- bool strict;
-
- dst = af_ops->route_req(sk, &fl, req, &strict);
-
- if (dst && strict &&
- !tcp_peer_is_proven(req, dst, true,
- tmp_opt.saw_tstamp)) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
- goto drop_and_release;
- }
- }
/* Kill the following clause, if you dislike this way. */
- else if (!net->ipv4.sysctl_tcp_syncookies &&
- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
- !tcp_peer_is_proven(req, dst, false,
- tmp_opt.saw_tstamp)) {
+ if (!net->ipv4.sysctl_tcp_syncookies &&
+ (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+ !tcp_peer_is_proven(req, dst)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
@@ -6375,10 +6368,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_release;
}
- isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+ isn = af_ops->init_seq(skb);
}
if (!dst) {
- dst = af_ops->route_req(sk, &fl, req, NULL);
+ dst = af_ops->route_req(sk, &fl, req);
if (!dst)
goto drop_and_free;
}
@@ -6387,7 +6380,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (want_cookie) {
isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
- tcp_rsk(req)->ts_off = 0;
req->cookie_ts = tmp_opt.tstamp_ok;
if (!tmp_opt.tstamp_ok)
inet_rsk(req)->ecn_ok = 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 575e19dcc017..5ab2aac5ca19 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -94,12 +94,18 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
-static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v4_init_seq(const struct sk_buff *skb)
{
- return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr,
- tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source, tsoff);
+ return secure_tcp_seq(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
+}
+
+static u32 tcp_v4_init_ts_off(const struct sk_buff *skb)
+{
+ return secure_tcp_ts_off(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -145,7 +151,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct flowi4 *fl4;
struct rtable *rt;
int err;
- u32 seq;
struct ip_options_rcu *inet_opt;
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
@@ -198,10 +203,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tp->write_seq = 0;
}
- if (tcp_death_row->sysctl_tw_recycle &&
- !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
- tcp_fetch_timewait_stamp(sk, &rt->dst);
-
inet->inet_dport = usin->sin_port;
sk_daddr_set(sk, daddr);
@@ -236,13 +237,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
rt = NULL;
if (likely(!tp->repair)) {
- seq = secure_tcp_sequence_number(inet->inet_saddr,
- inet->inet_daddr,
- inet->inet_sport,
- usin->sin_port,
- &tp->tsoffset);
if (!tp->write_seq)
- tp->write_seq = seq;
+ tp->write_seq = secure_tcp_seq(inet->inet_saddr,
+ inet->inet_daddr,
+ inet->inet_sport,
+ usin->sin_port);
+ tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr,
+ inet->inet_daddr);
}
inet->inet_id = tp->write_seq ^ jiffies;
@@ -1217,19 +1218,9 @@ static void tcp_v4_init_req(struct request_sock *req,
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
struct flowi *fl,
- const struct request_sock *req,
- bool *strict)
+ const struct request_sock *req)
{
- struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
-
- if (strict) {
- if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
- *strict = true;
- else
- *strict = false;
- }
-
- return dst;
+ return inet_csk_route_req(sk, &fl->u.ip4, req);
}
struct request_sock_ops tcp_request_sock_ops __read_mostly = {
@@ -1253,7 +1244,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.cookie_init_seq = cookie_v4_init_sequence,
#endif
.route_req = tcp_v4_route_req,
- .init_seq = tcp_v4_init_sequence,
+ .init_seq = tcp_v4_init_seq,
+ .init_ts_off = tcp_v4_init_ts_off,
.send_synack = tcp_v4_send_synack,
};
@@ -1423,8 +1415,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
if (!nsk)
goto discard;
if (nsk != sk) {
- sock_rps_save_rxhash(nsk, skb);
- sk_mark_napi_id(nsk, skb);
if (tcp_child_process(sk, nsk, skb)) {
rsk = nsk;
goto reset;
@@ -1871,6 +1861,9 @@ void tcp_v4_destroy_sock(struct sock *sk)
/* Cleanup up the write buffer. */
tcp_write_queue_purge(sk);
+ /* Check if we want to disable active TFO */
+ tcp_fastopen_active_disable_ofo_check(sk);
+
/* Cleans up our, hopefully empty, out_of_order_queue. */
skb_rbtree_purge(&tp->out_of_order_queue);
@@ -2402,7 +2395,7 @@ struct proto tcp_prot = {
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
@@ -2466,7 +2459,6 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_tw_reuse = 0;
cnt = tcp_hashinfo.ehash_mask + 1;
- net->ipv4.tcp_death_row.sysctl_tw_recycle = 0;
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 046fd3910873..d6fb6c067af4 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -264,13 +264,15 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
{
struct tcp_sock *tp = tcp_sk(sk);
struct lp *lp = inet_csk_ca(sk);
+ u32 delta;
if (sample->rtt_us > 0)
tcp_lp_rtt_sample(sk, sample->rtt_us);
/* calc inference */
- if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
- lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
+ delta = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ if ((s32)delta > 0)
+ lp->inference = 3 * delta;
/* test if within inference */
if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference))
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0f46e5fe31ad..653bbd67e3a3 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -45,8 +45,6 @@ struct tcp_metrics_block {
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
- u32 tcpm_ts;
- u32 tcpm_ts_stamp;
u32 tcpm_lock;
u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
struct tcp_fastopen_metrics tcpm_fastopen;
@@ -123,8 +121,6 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
- tm->tcpm_ts = 0;
- tm->tcpm_ts_stamp = 0;
if (fastopen_clear) {
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
@@ -273,48 +269,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
return tm;
}
-static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
-{
- struct tcp_metrics_block *tm;
- struct inetpeer_addr saddr, daddr;
- unsigned int hash;
- struct net *net;
-
- if (tw->tw_family == AF_INET) {
- inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
- inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
- hash = ipv4_addr_hash(tw->tw_daddr);
- }
-#if IS_ENABLED(CONFIG_IPV6)
- else if (tw->tw_family == AF_INET6) {
- if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) {
- inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
- inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
- hash = ipv4_addr_hash(tw->tw_daddr);
- } else {
- inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr);
- inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr);
- hash = ipv6_addr_hash(&tw->tw_v6_daddr);
- }
- }
-#endif
- else
- return NULL;
-
- net = twsk_net(tw);
- hash ^= net_hash_mix(net);
- hash = hash_32(hash, tcp_metrics_hash_log);
-
- for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
- tm = rcu_dereference(tm->tcpm_next)) {
- if (addr_same(&tm->tcpm_saddr, &saddr) &&
- addr_same(&tm->tcpm_daddr, &daddr) &&
- net_eq(tm_net(tm), net))
- break;
- }
- return tm;
-}
-
static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
struct dst_entry *dst,
bool create)
@@ -573,8 +527,7 @@ reset:
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
- bool paws_check, bool timestamps)
+bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
{
struct tcp_metrics_block *tm;
bool ret;
@@ -584,94 +537,10 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
rcu_read_lock();
tm = __tcp_get_metrics_req(req, dst);
- if (paws_check) {
- if (tm &&
- (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
- ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW ||
- !timestamps))
- ret = false;
- else
- ret = true;
- } else {
- if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
- ret = true;
- else
- ret = false;
- }
- rcu_read_unlock();
-
- return ret;
-}
-
-void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
-{
- struct tcp_metrics_block *tm;
-
- rcu_read_lock();
- tm = tcp_get_metrics(sk, dst, true);
- if (tm) {
- struct tcp_sock *tp = tcp_sk(sk);
-
- if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
- tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
- tp->rx_opt.ts_recent = tm->tcpm_ts;
- }
- }
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);
-
-/* VJ's idea. Save last timestamp seen from this destination and hold
- * it at least for normal timewait interval to use for duplicate
- * segment detection in subsequent connections, before they enter
- * synchronized state.
- */
-bool tcp_remember_stamp(struct sock *sk)
-{
- struct dst_entry *dst = __sk_dst_get(sk);
- bool ret = false;
-
- if (dst) {
- struct tcp_metrics_block *tm;
-
- rcu_read_lock();
- tm = tcp_get_metrics(sk, dst, true);
- if (tm) {
- struct tcp_sock *tp = tcp_sk(sk);
-
- if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
- ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
- tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
- tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
- tm->tcpm_ts = tp->rx_opt.ts_recent;
- }
- ret = true;
- }
- rcu_read_unlock();
- }
- return ret;
-}
-
-bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
-{
- struct tcp_metrics_block *tm;
- bool ret = false;
-
- rcu_read_lock();
- tm = __tcp_get_metrics_tw(tw);
- if (tm) {
- const struct tcp_timewait_sock *tcptw;
- struct sock *sk = (struct sock *) tw;
-
- tcptw = tcp_twsk(sk);
- if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
- ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
- tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
- tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
- tm->tcpm_ts = tcptw->tw_ts_recent;
- }
+ if (tm && tcp_metric_get(tm, TCP_METRIC_RTT))
ret = true;
- }
+ else
+ ret = false;
rcu_read_unlock();
return ret;
@@ -791,14 +660,6 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
jiffies - tm->tcpm_stamp,
TCP_METRICS_ATTR_PAD) < 0)
goto nla_put_failure;
- if (tm->tcpm_ts_stamp) {
- if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP,
- (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0)
- goto nla_put_failure;
- if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL,
- tm->tcpm_ts) < 0)
- goto nla_put_failure;
- }
{
int n = 0;
@@ -1150,10 +1011,7 @@ static int __net_init tcp_net_metrics_init(struct net *net)
tcp_metrics_hash_log = order_base_2(slots);
size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log;
- tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
- if (!tcp_metrics_hash)
- tcp_metrics_hash = vzalloc(size);
-
+ tcp_metrics_hash = kvzalloc(size, GFP_KERNEL);
if (!tcp_metrics_hash)
return -ENOMEM;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 65c0f3d13eca..717be4de5324 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -26,6 +26,7 @@
#include <net/tcp.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
+#include <net/busy_poll.h>
int sysctl_tcp_abort_on_overflow __read_mostly;
@@ -94,7 +95,6 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
bool paws_reject = false;
- struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row;
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
@@ -149,12 +149,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
}
- if (tcp_death_row->sysctl_tw_recycle &&
- tcptw->tw_ts_recent_stamp &&
- tcp_tw_remember_stamp(tw))
- inet_twsk_reschedule(tw, tw->tw_timeout);
- else
- inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
+ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
return TCP_TW_ACK;
}
@@ -259,12 +254,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct inet_timewait_sock *tw;
- bool recycle_ok = false;
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
- if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
- recycle_ok = tcp_remember_stamp(sk);
-
tw = inet_twsk_alloc(sk, tcp_death_row, state);
if (tw) {
@@ -317,13 +308,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (timeo < rto)
timeo = rto;
- if (recycle_ok) {
- tw->tw_timeout = rto;
- } else {
- tw->tw_timeout = TCP_TIMEWAIT_LEN;
- if (state == TCP_TIME_WAIT)
- timeo = TCP_TIMEWAIT_LEN;
- }
+ tw->tw_timeout = TCP_TIMEWAIT_LEN;
+ if (state == TCP_TIME_WAIT)
+ timeo = TCP_TIMEWAIT_LEN;
inet_twsk_schedule(tw, timeo);
/* Linkage updates. */
@@ -536,6 +523,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
tcp_ecn_openreq_child(newtp, req);
+ newtp->fastopen_req = NULL;
newtp->fastopen_rsk = NULL;
newtp->syn_data_acked = 0;
newtp->rack.mstamp.v64 = 0;
@@ -813,6 +801,9 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int ret = 0;
int state = child->sk_state;
+ /* record NAPI ID of child */
+ sk_mark_napi_id(child, skb);
+
tcp_segs_in(tcp_sk(child), skb);
if (!sock_owned_by_user(child)) {
ret = tcp_rcv_state_process(child, skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a85d863c4419..4858e190f6ac 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -212,12 +212,12 @@ void tcp_select_initial_window(int __space, __u32 mss,
/* If no clamp set the clamp to the max possible scaled window */
if (*window_clamp == 0)
- (*window_clamp) = (65535 << 14);
+ (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
space = min(*window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */
if (space > mss)
- space = (space / mss) * mss;
+ space = rounddown(space, mss);
/* NOTE: offering an initial window larger than 32767
* will break some buggy TCP stacks. If the admin tells us
@@ -234,13 +234,11 @@ void tcp_select_initial_window(int __space, __u32 mss,
(*rcv_wscale) = 0;
if (wscale_ok) {
- /* Set window scaling on max possible window
- * See RFC1323 for an explanation of the limit to 14
- */
+ /* Set window scaling on max possible window */
space = max_t(u32, space, sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
- while (space > 65535 && (*rcv_wscale) < 14) {
+ while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
space >>= 1;
(*rcv_wscale)++;
}
@@ -253,7 +251,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
}
/* Set the clamp no higher than max representable value */
- (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
+ (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
}
EXPORT_SYMBOL(tcp_select_initial_window);
@@ -1516,6 +1514,7 @@ static void tcp_cwnd_application_limited(struct sock *sk)
static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
{
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
struct tcp_sock *tp = tcp_sk(sk);
/* Track the maximum number of outstanding packets in each
@@ -1538,7 +1537,8 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
tp->snd_cwnd_used = tp->packets_out;
if (sysctl_tcp_slow_start_after_idle &&
- (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
+ (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
+ !ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
/* The following conditions together indicate the starvation
@@ -2566,7 +2566,6 @@ u32 __tcp_select_window(struct sock *sk)
/* Don't do rounding if we are using window scaling, since the
* scaled window will not line up with the MSS boundary anyway.
*/
- window = tp->rcv_wnd;
if (tp->rx_opt.rcv_wscale) {
window = free_space;
@@ -2574,10 +2573,9 @@ u32 __tcp_select_window(struct sock *sk)
* Import case: prevent zero window announcement if
* 1<<rcv_wscale > mss.
*/
- if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
- window = (((window >> tp->rx_opt.rcv_wscale) + 1)
- << tp->rx_opt.rcv_wscale);
+ window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale));
} else {
+ window = tp->rcv_wnd;
/* Get the largest window that is a nice multiple of mss.
* Window clamp already applied above.
* If our current window offering is within 1 mss of the
@@ -2587,7 +2585,7 @@ u32 __tcp_select_window(struct sock *sk)
* is too small.
*/
if (window <= free_space - mss || window > free_space)
- window = (free_space / mss) * mss;
+ window = rounddown(free_space, mss);
else if (mss == full_space &&
free_space > window + (full_space >> 1))
window = free_space;
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 9be1581a5a08..c6a9fa894646 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
/* Update the connection delivery information and generate a rate sample. */
void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- struct skb_mstamp *now, struct rate_sample *rs)
+ struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 snd_us, ack_us;
@@ -120,7 +120,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
* to carry current time, flags, stats like "tcp_sacktag_state".
*/
if (delivered)
- tp->delivered_mstamp = *now;
+ tp->delivered_mstamp = tp->tcp_mstamp;
rs->acked_sacked = delivered; /* freshly ACKed or SACKed */
rs->losses = lost; /* freshly marked lost */
@@ -138,7 +138,8 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
* longer phase.
*/
snd_us = rs->interval_us; /* send phase */
- ack_us = skb_mstamp_us_delta(now, &rs->prior_mstamp); /* ack phase */
+ ack_us = skb_mstamp_us_delta(&tp->tcp_mstamp,
+ &rs->prior_mstamp); /* ack phase */
rs->interval_us = max(snd_us, ack_us);
/* Normally we expect interval_us >= min-rtt.
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index d8acbd9f477a..362b8c75bfab 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -45,8 +45,7 @@ static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
* or tcp_time_to_recover()'s "Trick#1: the loss is proven" code path will
* make us enter the CA_Recovery state.
*/
-static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
- u32 *reo_timeout)
+static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -79,7 +78,7 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
* A packet is lost if its elapsed time is beyond
* the recent RTT plus the reordering window.
*/
- u32 elapsed = skb_mstamp_us_delta(now,
+ u32 elapsed = skb_mstamp_us_delta(&tp->tcp_mstamp,
&skb->skb_mstamp);
s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
@@ -105,7 +104,7 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
}
}
-void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
+void tcp_rack_mark_lost(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout;
@@ -115,7 +114,7 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
/* Reset the advanced flag to avoid unnecessary queue scanning */
tp->rack.advanced = 0;
- tcp_rack_detect_loss(sk, now, &timeout);
+ tcp_rack_detect_loss(sk, &timeout);
if (timeout) {
timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
@@ -128,8 +127,7 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
* draft-cheng-tcpm-rack-00.txt
*/
void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
- const struct skb_mstamp *xmit_time,
- const struct skb_mstamp *ack_time)
+ const struct skb_mstamp *xmit_time)
{
u32 rtt_us;
@@ -138,7 +136,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
end_seq, tp->rack.end_seq))
return;
- rtt_us = skb_mstamp_us_delta(ack_time, xmit_time);
+ rtt_us = skb_mstamp_us_delta(&tp->tcp_mstamp, xmit_time);
if (sacked & TCPCB_RETRANS) {
/* If the sacked packet was retransmitted, it's ambiguous
* whether the retransmission or the original (or the prior
@@ -165,12 +163,11 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
void tcp_rack_reo_timeout(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct skb_mstamp now;
u32 timeout, prior_inflight;
- skb_mstamp_get(&now);
prior_inflight = tcp_packets_in_flight(tp);
- tcp_rack_detect_loss(sk, &now, &timeout);
+ skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_rack_detect_loss(sk, &timeout);
if (prior_inflight != tcp_packets_in_flight(tp)) {
if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) {
tcp_enter_recovery(sk, false);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b2ab411c6d37..14672543cf0b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -201,11 +201,10 @@ static int tcp_write_timeout(struct sock *sk)
if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) {
/* Some middle-boxes may black-hole Fast Open _after_
* the handshake. Therefore we conservatively disable
- * Fast Open on this path on recurring timeouts with
- * few or zero bytes acked after Fast Open.
+ * Fast Open on this path on recurring timeouts after
+ * successful Fast Open.
*/
- if (tp->syn_data_acked &&
- tp->bytes_acked <= tp->rx_opt.mss_clamp) {
+ if (tp->syn_data_acked) {
tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
NET_INC_STATS(sock_net(sk),
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index fed66dc0e0f5..9775453b8d17 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -265,8 +265,8 @@ static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr,
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
info->vegas.tcpv_enabled = 1;
info->vegas.tcpv_rttcnt = 0;
- info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt),
- info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
+ info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt);
+ info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min);
*attr = INET_DIAG_VEGASINFO;
return sizeof(struct tcpvegas_info);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ea6e4cff9faf..1d6219bf2d6b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1612,7 +1612,7 @@ static void udp_v4_rehash(struct sock *sk)
udp_lib_rehash(sk, new_hash);
}
-int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int rc;
@@ -1657,7 +1657,7 @@ EXPORT_SYMBOL(udp_encap_enable);
* Note that in the success and error cases, the skb is assumed to
* have either been requeued or freed.
*/
-int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
int is_udplite = IS_UDPLITE(sk);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index feb50a16398d..a8cf8c6fb60c 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -25,7 +25,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len);
int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
-int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
void udp_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 4acc0508c5eb..3d36644890bb 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -12,6 +12,7 @@
#include <net/dst.h>
#include <net/ip.h>
#include <net/xfrm.h>
+#include <net/protocol.h>
/* Add encapsulation header.
*
@@ -23,6 +24,8 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph = ip_hdr(skb);
int ihl = iph->ihl * 4;
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
offsetof(struct iphdr, protocol);
@@ -56,9 +59,40 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
+static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ const struct net_offload *ops;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb->transport_header += x->props.header_len;
+ ops = rcu_dereference(inet_offloads[xo->proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+ segs = ops->callbacks.gso_segment(skb, features);
+
+ return segs;
+}
+
+static void xfrm4_transport_xmit(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb_reset_mac_len(skb);
+ pskb_pull(skb, skb->mac_len + sizeof(struct iphdr) + x->props.header_len);
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ skb_reset_transport_header(skb);
+ skb->transport_header -= x->props.header_len;
+ }
+}
+
static struct xfrm_mode xfrm4_transport_mode = {
.input = xfrm4_transport_input,
.output = xfrm4_transport_output,
+ .gso_segment = xfrm4_transport_gso_segment,
+ .xmit = xfrm4_transport_xmit,
.owner = THIS_MODULE,
.encap = XFRM_MODE_TRANSPORT,
};
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 35feda676464..e6265e2c274e 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -33,6 +33,9 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *top_iph;
int flags;
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
offsetof(struct iphdr, protocol);
@@ -96,11 +99,36 @@ out:
return err;
}
+static struct sk_buff *xfrm4_mode_tunnel_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ __skb_push(skb, skb->mac_len);
+ return skb_mac_gso_segment(skb, features);
+
+}
+
+static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ skb->network_header = skb->network_header - x->props.header_len;
+ skb->transport_header = skb->network_header +
+ sizeof(struct iphdr);
+ }
+
+ skb_reset_mac_len(skb);
+ pskb_pull(skb, skb->mac_len + x->props.header_len);
+}
+
static struct xfrm_mode xfrm4_tunnel_mode = {
.input2 = xfrm4_mode_tunnel_input,
.input = xfrm_prepare_input,
.output2 = xfrm4_mode_tunnel_output,
.output = xfrm4_prepare_output,
+ .gso_segment = xfrm4_mode_tunnel_gso_segment,
+ .xmit = xfrm4_mode_tunnel_xmit,
.owner = THIS_MODULE,
.encap = XFRM_MODE_TUNNEL,
.flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 7ee6518afa86..94b8702603bc 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -29,7 +29,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
goto out;
mtu = dst_mtu(skb_dst(skb));
- if (skb->len > mtu) {
+ if ((!skb_is_gso(skb) && skb->len > mtu) ||
+ (skb_is_gso(skb) && skb_gso_network_seglen(skb) > ip_skb_dst_mtu(skb->sk, skb))) {
skb->protocol = htons(ETH_P_IP);
if (skb->sk)
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index e2afe677a9d9..48c452959d2c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -307,6 +307,7 @@ config IPV6_SEG6_LWTUNNEL
bool "IPv6: Segment Routing Header encapsulation support"
depends on IPV6
select LWTUNNEL
+ select DST_CACHE
---help---
Support for encapsulation of packets within an outer IPv6
header and a Segment Routing Header using the lightweight
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0ea96c4d334d..6a4fb1e629fb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -224,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.accept_ra_rtr_pref = 1,
.rtr_probe_interval = 60 * HZ,
#ifdef CONFIG_IPV6_ROUTE_INFO
+ .accept_ra_rt_info_min_plen = 0,
.accept_ra_rt_info_max_plen = 0,
#endif
#endif
@@ -245,6 +246,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
#endif
.enhanced_dad = 1,
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
+ .disable_policy = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -276,6 +278,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.accept_ra_rtr_pref = 1,
.rtr_probe_interval = 60 * HZ,
#ifdef CONFIG_IPV6_ROUTE_INFO
+ .accept_ra_rt_info_min_plen = 0,
.accept_ra_rt_info_max_plen = 0,
#endif
#endif
@@ -297,6 +300,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
#endif
.enhanced_dad = 1,
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
+ .disable_policy = 0,
};
/* Check if a valid qdisc is available */
@@ -545,6 +549,9 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
goto nla_put_failure;
+ if (!devconf)
+ goto out;
+
if ((all || type == NETCONFA_FORWARDING) &&
nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
goto nla_put_failure;
@@ -563,6 +570,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
devconf->ignore_routes_with_linkdown) < 0)
goto nla_put_failure;
+out:
nlmsg_end(skb, nlh);
return 0;
@@ -571,8 +579,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
- struct ipv6_devconf *devconf)
+void inet6_netconf_notify_devconf(struct net *net, int event, int type,
+ int ifindex, struct ipv6_devconf *devconf)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -582,7 +590,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
goto errout;
err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
- RTM_NEWNETCONF, 0, type);
+ event, 0, type);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
WARN_ON(err == -EMSGSIZE);
@@ -603,7 +611,8 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
};
static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
- struct nlmsghdr *nlh)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NETCONFA_MAX+1];
@@ -616,7 +625,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
int err;
err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
- devconf_ipv6_policy);
+ devconf_ipv6_policy, extack);
if (err < 0)
goto errout;
@@ -765,7 +774,8 @@ static void dev_forward_change(struct inet6_dev *idev)
else
addrconf_leave_anycast(ifa);
}
- inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
dev->ifindex, &idev->cnf);
}
@@ -800,7 +810,8 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
if (p == &net->ipv6.devconf_dflt->forwarding) {
if ((!newf) ^ (!old))
- inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
rtnl_unlock();
@@ -812,13 +823,15 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
net->ipv6.devconf_dflt->forwarding = newf;
if ((!newf) ^ (!old_dflt))
- inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
addrconf_forward_change(net, newf);
if ((!newf) ^ (!old))
- inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
} else if ((!newf) ^ (!old))
@@ -843,6 +856,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
idev->cnf.ignore_routes_with_linkdown = newf;
if (changed)
inet6_netconf_notify_devconf(dev_net(dev),
+ RTM_NEWNETCONF,
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
dev->ifindex,
&idev->cnf);
@@ -865,6 +879,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) {
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net,
+ RTM_NEWNETCONF,
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
@@ -877,6 +892,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
addrconf_linkdown_change(net, newf);
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net,
+ RTM_NEWNETCONF,
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
@@ -944,6 +960,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
const struct in6_addr *peer_addr, int pfxlen,
int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
{
+ struct net *net = dev_net(idev->dev);
struct inet6_ifaddr *ifa = NULL;
struct rt6_info *rt;
unsigned int hash;
@@ -990,6 +1007,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
goto out;
}
+ if (net->ipv6.devconf_all->disable_policy ||
+ idev->cnf.disable_policy)
+ rt->dst.flags |= DST_NOPOLICY;
+
neigh_parms_data_state_setall(idev->nd_parms);
ifa->addr = *addr;
@@ -1001,7 +1022,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
INIT_HLIST_NODE(&ifa->addr_lst);
ifa->scope = scope;
ifa->prefix_len = pfxlen;
- ifa->flags = flags | IFA_F_TENTATIVE;
+ ifa->flags = flags;
+ /* No need to add the TENTATIVE flag for addresses with NODAD */
+ if (!(flags & IFA_F_NODAD))
+ ifa->flags |= IFA_F_TENTATIVE;
ifa->valid_lft = valid_lft;
ifa->prefered_lft = prefered_lft;
ifa->cstamp = ifa->tstamp = jiffies;
@@ -2053,12 +2077,23 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
__ipv6_dev_ac_dec(ifp->idev, &addr);
}
-static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
+static int addrconf_ifid_6lowpan(u8 *eui, struct net_device *dev)
{
- if (dev->addr_len != EUI64_ADDR_LEN)
+ switch (dev->addr_len) {
+ case ETH_ALEN:
+ memcpy(eui, dev->dev_addr, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ memcpy(eui + 5, dev->dev_addr + 3, 3);
+ break;
+ case EUI64_ADDR_LEN:
+ memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN);
+ eui[0] ^= 2;
+ break;
+ default:
return -1;
- memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN);
- eui[0] ^= 2;
+ }
+
return 0;
}
@@ -2150,7 +2185,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
case ARPHRD_TUNNEL:
return addrconf_ifid_gre(eui, dev);
case ARPHRD_6LOWPAN:
- return addrconf_ifid_eui64(eui, dev);
+ return addrconf_ifid_6lowpan(eui, dev);
case ARPHRD_IEEE1394:
return addrconf_ifid_ieee1394(eui, dev);
case ARPHRD_TUNNEL6:
@@ -3296,7 +3331,8 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
idev->dev, 0, 0);
}
- addrconf_dad_start(ifp);
+ if (ifp->state == INET6_IFADDR_STATE_PREDAD)
+ addrconf_dad_start(ifp);
return 0;
}
@@ -3515,6 +3551,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
*/
static struct notifier_block ipv6_dev_notf = {
.notifier_call = addrconf_notify,
+ .priority = ADDRCONF_NOTIFY_PRIORITY,
};
static void addrconf_type_change(struct net_device *dev, unsigned long event)
@@ -3651,7 +3688,7 @@ restart:
if (keep) {
/* set state to skip the notifier below */
state = INET6_IFADDR_STATE_DEAD;
- ifa->state = 0;
+ ifa->state = INET6_IFADDR_STATE_PREDAD;
if (!(ifa->flags & IFA_F_NODAD))
ifa->flags |= IFA_F_TENTATIVE;
@@ -4392,7 +4429,8 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
};
static int
-inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
+inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
@@ -4401,7 +4439,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
u32 ifa_flags;
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
+ extack);
if (err < 0)
return err;
@@ -4501,7 +4540,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
}
static int
-inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
+inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
@@ -4513,7 +4553,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
u32 ifa_flags;
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
+ extack);
if (err < 0)
return err;
@@ -4863,7 +4904,8 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
return inet6_dump_addr(skb, cb, type);
}
-static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrmsg *ifm;
@@ -4874,7 +4916,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
struct sk_buff *skb;
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
+ extack);
if (err < 0)
goto errout;
@@ -4985,6 +5028,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_RTR_PROBE_INTERVAL] =
jiffies_to_msecs(cnf->rtr_probe_interval);
#ifdef CONFIG_IPV6_ROUTE_INFO
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen;
array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
#endif
#endif
@@ -5016,6 +5060,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
#endif
array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
+ array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
}
static inline size_t inet6_ifla6_size(void)
@@ -5242,7 +5287,8 @@ static int inet6_validate_link_af(const struct net_device *dev,
if (dev && !__in6_dev_get(dev))
return -EAFNOSUPPORT;
- return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy);
+ return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy,
+ NULL);
}
static int check_addr_gen_mode(int mode)
@@ -5274,7 +5320,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
if (!idev)
return -EAFNOSUPPORT;
- if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
+ if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
BUG();
if (tb[IFLA_INET6_TOKEN]) {
@@ -5677,17 +5723,20 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
return restart_syscall();
if (valp == &net->ipv6.devconf_dflt->proxy_ndp)
- inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
else if (valp == &net->ipv6.devconf_all->proxy_ndp)
- inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
else {
struct inet6_dev *idev = ctl->extra1;
- inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
idev->dev->ifindex,
&idev->cnf);
}
@@ -5840,6 +5889,105 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
return ret;
}
+static
+void addrconf_set_nopolicy(struct rt6_info *rt, int action)
+{
+ if (rt) {
+ if (action)
+ rt->dst.flags |= DST_NOPOLICY;
+ else
+ rt->dst.flags &= ~DST_NOPOLICY;
+ }
+}
+
+static
+void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
+{
+ struct inet6_ifaddr *ifa;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ spin_lock(&ifa->lock);
+ if (ifa->rt) {
+ struct rt6_info *rt = ifa->rt;
+ struct fib6_table *table = rt->rt6i_table;
+ int cpu;
+
+ read_lock(&table->tb6_lock);
+ addrconf_set_nopolicy(ifa->rt, val);
+ if (rt->rt6i_pcpu) {
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **rtp;
+
+ rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+ addrconf_set_nopolicy(*rtp, val);
+ }
+ }
+ read_unlock(&table->tb6_lock);
+ }
+ spin_unlock(&ifa->lock);
+ }
+ read_unlock_bh(&idev->lock);
+}
+
+static
+int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
+{
+ struct inet6_dev *idev;
+ struct net *net;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ *valp = val;
+
+ net = (struct net *)ctl->extra2;
+ if (valp == &net->ipv6.devconf_dflt->disable_policy) {
+ rtnl_unlock();
+ return 0;
+ }
+
+ if (valp == &net->ipv6.devconf_all->disable_policy) {
+ struct net_device *dev;
+
+ for_each_netdev(net, dev) {
+ idev = __in6_dev_get(dev);
+ if (idev)
+ addrconf_disable_policy_idev(idev, val);
+ }
+ } else {
+ idev = (struct inet6_dev *)ctl->extra1;
+ addrconf_disable_policy_idev(idev, val);
+ }
+
+ rtnl_unlock();
+ return 0;
+}
+
+static
+int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int *valp = ctl->data;
+ int val = *valp;
+ loff_t pos = *ppos;
+ struct ctl_table lctl;
+ int ret;
+
+ lctl = *ctl;
+ lctl.data = &val;
+ ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
+
+ if (write && (*valp != val))
+ ret = addrconf_disable_policy(ctl, valp, val);
+
+ if (ret)
+ *ppos = pos;
+
+ return ret;
+}
+
static int minus_one = -1;
static const int one = 1;
static const int two_five_five = 255;
@@ -6028,6 +6176,13 @@ static const struct ctl_table addrconf_sysctl[] = {
},
#ifdef CONFIG_IPV6_ROUTE_INFO
{
+ .procname = "accept_ra_rt_info_min_plen",
+ .data = &ipv6_devconf.accept_ra_rt_info_min_plen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "accept_ra_rt_info_max_plen",
.data = &ipv6_devconf.accept_ra_rt_info_max_plen,
.maxlen = sizeof(int),
@@ -6198,6 +6353,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = addrconf_sysctl_addr_gen_mode,
},
{
+ .procname = "disable_policy",
+ .data = &ipv6_devconf.disable_policy,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_disable_policy,
+ },
+ {
/* sentinel */
}
};
@@ -6237,7 +6399,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
ifindex = NETCONFA_IFINDEX_DEFAULT;
else
ifindex = idev->dev->ifindex;
- inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
+ ifindex, p);
return 0;
free:
@@ -6246,7 +6409,8 @@ out:
return -ENOBUFS;
}
-static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
+static void __addrconf_sysctl_unregister(struct net *net,
+ struct ipv6_devconf *p, int ifindex)
{
struct ctl_table *table;
@@ -6257,6 +6421,8 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
unregister_net_sysctl_table(p->sysctl_header);
p->sysctl_header = NULL;
kfree(table);
+
+ inet6_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
}
static int addrconf_sysctl_register(struct inet6_dev *idev)
@@ -6280,7 +6446,8 @@ static int addrconf_sysctl_register(struct inet6_dev *idev)
static void addrconf_sysctl_unregister(struct inet6_dev *idev)
{
- __addrconf_sysctl_unregister(&idev->cnf);
+ __addrconf_sysctl_unregister(dev_net(idev->dev), &idev->cnf,
+ idev->dev->ifindex);
neigh_sysctl_unregister(idev->nd_parms);
}
@@ -6323,7 +6490,7 @@ static int __net_init addrconf_init_net(struct net *net)
#ifdef CONFIG_SYSCTL
err_reg_dflt:
- __addrconf_sysctl_unregister(all);
+ __addrconf_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
err_reg_all:
kfree(dflt);
#endif
@@ -6336,8 +6503,10 @@ err_alloc_all:
static void __net_exit addrconf_exit_net(struct net *net)
{
#ifdef CONFIG_SYSCTL
- __addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
- __addrconf_sysctl_unregister(net->ipv6.devconf_all);
+ __addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt,
+ NETCONFA_IFINDEX_DEFAULT);
+ __addrconf_sysctl_unregister(net, net->ipv6.devconf_all,
+ NETCONFA_IFINDEX_ALL);
#endif
kfree(net->ipv6.devconf_dflt);
kfree(net->ipv6.devconf_all);
@@ -6408,6 +6577,8 @@ int __init addrconf_init(void)
goto errlo;
}
+ ip6_route_init_special_entries();
+
for (i = 0; i < IN6_ADDR_HSIZE; i++)
INIT_HLIST_HEAD(&inet6_addr_lst[i]);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index a8f6986dcbe5..07cd7d248bb6 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -404,7 +404,8 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
[IFAL_LABEL] = { .len = sizeof(u32), },
};
-static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifaddrlblmsg *ifal;
@@ -413,7 +414,8 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
u32 label;
int err = 0;
- err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy,
+ extack);
if (err < 0)
return err;
@@ -521,7 +523,8 @@ static inline int ip6addrlbl_msgsize(void)
+ nla_total_size(4); /* IFAL_LABEL */
}
-static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrlblmsg *ifal;
@@ -532,7 +535,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
struct ip6addrlbl_entry *p;
struct sk_buff *skb;
- err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy,
+ extack);
if (err < 0)
return err;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e82e59f22dfc..a88b5b5b7955 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1003,6 +1003,10 @@ static int __init inet6_init(void)
if (err)
goto seg6_fail;
+ err = igmp6_late_init();
+ if (err)
+ goto igmp6_late_err;
+
#ifdef CONFIG_SYSCTL
err = ipv6_sysctl_register();
if (err)
@@ -1017,8 +1021,10 @@ out:
#ifdef CONFIG_SYSCTL
sysctl_fail:
- seg6_exit();
+ igmp6_late_cleanup();
#endif
+igmp6_late_err:
+ seg6_exit();
seg6_fail:
calipso_exit();
calipso_fail:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ff54faa75631..1fe99ba8066c 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -170,19 +170,23 @@ static void esp_output_restore_header(struct sk_buff *skb)
}
static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb,
+ struct xfrm_state *x,
struct ip_esp_hdr *esph,
__be32 *seqhi)
{
- struct xfrm_state *x = skb_dst(skb)->xfrm;
-
/* For ESN we move the header forward by 4 bytes to
* accomodate the high bits. We will move it back after
* encryption.
*/
if ((x->props.flags & XFRM_STATE_ESN)) {
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
*seqhi = esph->spi;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ if (xo)
+ esph->seq_no = htonl(xo->seq.hi);
+ else
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
}
esph->spi = x->id.spi;
@@ -214,61 +218,16 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
tail[plen - 1] = proto;
}
-static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
+int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
- int err;
- struct ip_esp_hdr *esph;
- struct crypto_aead *aead;
- struct aead_request *req;
- struct scatterlist *sg, *dsg;
- struct sk_buff *trailer;
- struct page *page;
- void *tmp;
- int blksize;
- int clen;
- int alen;
- int plen;
- int ivlen;
- int tfclen;
- int nfrags;
- int assoclen;
- int seqhilen;
- int tailen;
- u8 *iv;
u8 *tail;
u8 *vaddr;
- __be32 *seqhi;
- __be64 seqno;
- __u8 proto = *skb_mac_header(skb);
-
- /* skb is pure payload to encrypt */
- aead = x->data;
- alen = crypto_aead_authsize(aead);
- ivlen = crypto_aead_ivsize(aead);
-
- tfclen = 0;
- if (x->tfcpad) {
- struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
- u32 padto;
-
- padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached));
- if (skb->len < padto)
- tfclen = padto - skb->len;
- }
- blksize = ALIGN(crypto_aead_blocksize(aead), 4);
- clen = ALIGN(skb->len + 2 + tfclen, blksize);
- plen = clen - skb->len - tfclen;
- tailen = tfclen + plen + alen;
-
- assoclen = sizeof(*esph);
- seqhilen = 0;
-
- if (x->props.flags & XFRM_STATE_ESN) {
- seqhilen += sizeof(__be32);
- assoclen += seqhilen;
- }
+ int nfrags;
+ struct page *page;
+ struct ip_esp_hdr *esph;
+ struct sk_buff *trailer;
+ int tailen = esp->tailen;
- *skb_mac_header(skb) = IPPROTO_ESP;
esph = ip_esp_hdr(skb);
if (!skb_cloned(skb)) {
@@ -284,6 +243,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
struct sock *sk = skb->sk;
struct page_frag *pfrag = &x->xfrag;
+ esp->inplace = false;
+
allocsize = ALIGN(tailen, L1_CACHE_BYTES);
spin_lock_bh(&x->lock);
@@ -300,10 +261,12 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
tail = vaddr + pfrag->offset;
- esp_output_fill_trailer(tail, tfclen, plen, proto);
+ esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
kunmap_atomic(vaddr);
+ spin_unlock_bh(&x->lock);
+
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -319,108 +282,111 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
if (sk)
atomic_add(tailen, &sk->sk_wmem_alloc);
- skb_push(skb, -skb_network_offset(skb));
-
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- esph->spi = x->id.spi;
-
- tmp = esp_alloc_tmp(aead, nfrags + 2, seqhilen);
- if (!tmp) {
- spin_unlock_bh(&x->lock);
- err = -ENOMEM;
- goto error;
- }
- seqhi = esp_tmp_seqhi(tmp);
- iv = esp_tmp_iv(aead, tmp, seqhilen);
- req = esp_tmp_req(aead, iv);
- sg = esp_req_sg(aead, req);
- dsg = &sg[nfrags];
-
- esph = esp_output_set_esn(skb, esph, seqhi);
-
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
-
- allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
-
- if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
- spin_unlock_bh(&x->lock);
- err = -ENOMEM;
- goto error;
- }
-
- skb_shinfo(skb)->nr_frags = 1;
-
- page = pfrag->page;
- get_page(page);
- /* replace page frags in skb with new page */
- __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
- pfrag->offset = pfrag->offset + allocsize;
-
- sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
- skb_to_sgvec(skb, dsg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
-
- spin_unlock_bh(&x->lock);
-
- goto skip_cow2;
+ goto out;
}
}
cow:
- err = skb_cow_data(skb, tailen, &trailer);
- if (err < 0)
- goto error;
- nfrags = err;
-
+ nfrags = skb_cow_data(skb, tailen, &trailer);
+ if (nfrags < 0)
+ goto out;
tail = skb_tail_pointer(trailer);
- esph = ip_esp_hdr(skb);
skip_cow:
- esp_output_fill_trailer(tail, tfclen, plen, proto);
+ esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
+ pskb_put(skb, trailer, tailen);
- pskb_put(skb, trailer, clen - skb->len + alen);
- skb_push(skb, -skb_network_offset(skb));
+out:
+ return nfrags;
+}
+EXPORT_SYMBOL_GPL(esp6_output_head);
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- esph->spi = x->id.spi;
+int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+{
+ u8 *iv;
+ int alen;
+ void *tmp;
+ int ivlen;
+ int assoclen;
+ int seqhilen;
+ __be32 *seqhi;
+ struct page *page;
+ struct ip_esp_hdr *esph;
+ struct aead_request *req;
+ struct crypto_aead *aead;
+ struct scatterlist *sg, *dsg;
+ int err = -ENOMEM;
- tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
- if (!tmp) {
- err = -ENOMEM;
- goto error;
+ assoclen = sizeof(struct ip_esp_hdr);
+ seqhilen = 0;
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ seqhilen += sizeof(__be32);
+ assoclen += sizeof(__be32);
}
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+ ivlen = crypto_aead_ivsize(aead);
+
+ tmp = esp_alloc_tmp(aead, esp->nfrags + 2, seqhilen);
+ if (!tmp)
+ goto error;
+
seqhi = esp_tmp_seqhi(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
- dsg = sg;
- esph = esp_output_set_esn(skb, esph, seqhi);
+ if (esp->inplace)
+ dsg = sg;
+ else
+ dsg = &sg[esp->nfrags];
- sg_init_table(sg, nfrags);
+ esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi);
+
+ sg_init_table(sg, esp->nfrags);
skb_to_sgvec(skb, sg,
(unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
+ assoclen + ivlen + esp->clen + alen);
+
+ if (!esp->inplace) {
+ int allocsize;
+ struct page_frag *pfrag = &x->xfrag;
+
+ allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
+
+ spin_lock_bh(&x->lock);
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ goto error;
+ }
+
+ skb_shinfo(skb)->nr_frags = 1;
+
+ page = pfrag->page;
+ get_page(page);
+ /* replace page frags in skb with new page */
+ __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
+ pfrag->offset = pfrag->offset + allocsize;
+ spin_unlock_bh(&x->lock);
+
+ sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
+ skb_to_sgvec(skb, dsg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + esp->clen + alen);
+ }
-skip_cow2:
if ((x->props.flags & XFRM_STATE_ESN))
aead_request_set_callback(req, 0, esp_output_done_esn, skb);
else
aead_request_set_callback(req, 0, esp_output_done, skb);
- aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv);
+ aead_request_set_crypt(req, sg, dsg, ivlen + esp->clen, iv);
aead_request_set_ad(req, assoclen);
- seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
- ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
-
memset(iv, 0, ivlen);
- memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8),
+ memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&esp->seqno + 8 - min(ivlen, 8),
min(ivlen, 8));
ESP_SKB_CB(skb)->tmp = tmp;
@@ -446,10 +412,60 @@ skip_cow2:
error:
return err;
}
+EXPORT_SYMBOL_GPL(esp6_output_tail);
-static int esp_input_done2(struct sk_buff *skb, int err)
+static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int alen;
+ int blksize;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct esp_info esp;
+
+ esp.inplace = true;
+
+ esp.proto = *skb_mac_header(skb);
+ *skb_mac_header(skb) = IPPROTO_ESP;
+
+ /* skb is pure payload to encrypt */
+
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+
+ esp.tfclen = 0;
+ if (x->tfcpad) {
+ struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+ u32 padto;
+
+ padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached));
+ if (skb->len < padto)
+ esp.tfclen = padto - skb->len;
+ }
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize);
+ esp.plen = esp.clen - skb->len - esp.tfclen;
+ esp.tailen = esp.tfclen + esp.plen + alen;
+
+ esp.nfrags = esp6_output_head(x, skb, &esp);
+ if (esp.nfrags < 0)
+ return esp.nfrags;
+
+ esph = ip_esp_hdr(skb);
+ esph->spi = x->id.spi;
+
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ esp.seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
+ ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ return esp6_output_tail(x, skb, &esp);
+}
+
+int esp6_input_done2(struct sk_buff *skb, int err)
{
struct xfrm_state *x = xfrm_input_state(skb);
+ struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
int alen = crypto_aead_authsize(aead);
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
@@ -458,7 +474,8 @@ static int esp_input_done2(struct sk_buff *skb, int err)
int padlen;
u8 nexthdr[2];
- kfree(ESP_SKB_CB(skb)->tmp);
+ if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+ kfree(ESP_SKB_CB(skb)->tmp);
if (unlikely(err))
goto out;
@@ -492,12 +509,13 @@ static int esp_input_done2(struct sk_buff *skb, int err)
out:
return err;
}
+EXPORT_SYMBOL_GPL(esp6_input_done2);
static void esp_input_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
- xfrm_input_resume(skb, esp_input_done2(skb, err));
+ xfrm_input_resume(skb, esp6_input_done2(skb, err));
}
static void esp_input_restore_header(struct sk_buff *skb)
@@ -619,7 +637,7 @@ skip_cow:
if ((x->props.flags & XFRM_STATE_ESN))
esp_input_restore_header(skb);
- ret = esp_input_done2(skb, ret);
+ ret = esp6_input_done2(skb, ret);
out:
return ret;
@@ -682,13 +700,17 @@ static int esp_init_aead(struct xfrm_state *x)
char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
+ u32 mask = 0;
err = -ENAMETOOLONG;
if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
goto error;
- aead = crypto_alloc_aead(aead_name, 0, 0);
+ if (x->xso.offload_handle)
+ mask |= CRYPTO_ALG_ASYNC;
+
+ aead = crypto_alloc_aead(aead_name, 0, mask);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -718,6 +740,7 @@ static int esp_init_authenc(struct xfrm_state *x)
char authenc_name[CRYPTO_MAX_ALG_NAME];
unsigned int keylen;
int err;
+ u32 mask = 0;
err = -EINVAL;
if (!x->ealg)
@@ -743,7 +766,10 @@ static int esp_init_authenc(struct xfrm_state *x)
goto error;
}
- aead = crypto_alloc_aead(authenc_name, 0, 0);
+ if (x->xso.offload_handle)
+ mask |= CRYPTO_ALG_ASYNC;
+
+ aead = crypto_alloc_aead(authenc_name, 0, mask);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index d914eb93204a..d950d43ba255 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -45,27 +45,31 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
goto out;
- err = secpath_set(skb);
- if (err)
- goto out;
+ xo = xfrm_offload(skb);
+ if (!xo || !(xo->flags & CRYPTO_DONE)) {
+ err = secpath_set(skb);
+ if (err)
+ goto out;
- if (skb->sp->len == XFRM_MAX_DEPTH)
- goto out;
+ if (skb->sp->len == XFRM_MAX_DEPTH)
+ goto out;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET6);
- if (!x)
- goto out;
+ x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET6);
+ if (!x)
+ goto out;
- skb->sp->xvec[skb->sp->len++] = x;
- skb->sp->olen++;
+ skb->sp->xvec[skb->sp->len++] = x;
+ skb->sp->olen++;
- xo = xfrm_offload(skb);
- if (!xo) {
- xfrm_state_put(x);
- goto out;
+ xo = xfrm_offload(skb);
+ if (!xo) {
+ xfrm_state_put(x);
+ goto out;
+ }
}
+
xo->flags |= XFRM_GRO;
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
@@ -86,19 +90,216 @@ out:
return NULL;
}
+static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ip_esp_hdr *esph;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ int proto = iph->nexthdr;
+
+ skb_push(skb, -skb_network_offset(skb));
+ esph = ip_esp_hdr(skb);
+ *skb_mac_header(skb) = IPPROTO_ESP;
+
+ esph->spi = x->id.spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+
+ xo->proto = proto;
+}
+
+static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ __u32 seq;
+ int err = 0;
+ struct sk_buff *skb2;
+ struct xfrm_state *x;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ netdev_features_t esp_features = features;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (!xo)
+ goto out;
+
+ seq = xo->seq.low;
+
+ x = skb->sp->xvec[skb->sp->len - 1];
+ aead = x->data;
+ esph = ip_esp_hdr(skb);
+
+ if (esph->spi != x->id.spi)
+ goto out;
+
+ if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
+ goto out;
+
+ __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
+
+ skb->encap_hdr_csum = 1;
+
+ if (!(features & NETIF_F_HW_ESP))
+ esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+
+ segs = x->outer_mode->gso_segment(x, skb, esp_features);
+ if (IS_ERR_OR_NULL(segs))
+ goto out;
+
+ __skb_pull(skb, skb->data - skb_mac_header(skb));
+
+ skb2 = segs;
+ do {
+ struct sk_buff *nskb = skb2->next;
+
+ xo = xfrm_offload(skb2);
+ xo->flags |= XFRM_GSO_SEGMENT;
+ xo->seq.low = seq;
+ xo->seq.hi = xfrm_replay_seqhi(x, seq);
+
+ if(!(features & NETIF_F_HW_ESP))
+ xo->flags |= CRYPTO_FALLBACK;
+
+ x->outer_mode->xmit(x, skb2);
+
+ err = x->type_offload->xmit(x, skb2, esp_features);
+ if (err) {
+ kfree_skb_list(segs);
+ return ERR_PTR(err);
+ }
+
+ if (!skb_is_gso(skb2))
+ seq++;
+ else
+ seq += skb_shinfo(skb2)->gso_segs;
+
+ skb_push(skb2, skb2->mac_len);
+ skb2 = nskb;
+ } while (skb2);
+
+out:
+ return segs;
+}
+
+static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct crypto_aead *aead = x->data;
+
+ if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
+ return -EINVAL;
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return esp6_input_done2(skb, 0);
+}
+
+static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features)
+{
+ int err;
+ int alen;
+ int blksize;
+ struct xfrm_offload *xo;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct esp_info esp;
+ bool hw_offload = true;
+
+ esp.inplace = true;
+
+ xo = xfrm_offload(skb);
+
+ if (!xo)
+ return -EINVAL;
+
+ if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+ (x->xso.dev != skb->dev)) {
+ xo->flags |= CRYPTO_FALLBACK;
+ hw_offload = false;
+ }
+
+ esp.proto = xo->proto;
+
+ /* skb is pure payload to encrypt */
+
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+
+ esp.tfclen = 0;
+ /* XXX: Add support for tfc padding here. */
+
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize);
+ esp.plen = esp.clen - skb->len - esp.tfclen;
+ esp.tailen = esp.tfclen + esp.plen + alen;
+
+ if (!hw_offload || (hw_offload && !skb_is_gso(skb))) {
+ esp.nfrags = esp6_output_head(x, skb, &esp);
+ if (esp.nfrags < 0)
+ return esp.nfrags;
+ }
+
+ esph = ip_esp_hdr(skb);
+ esph->spi = x->id.spi;
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ esph->seq_no = htonl(xo->seq.low);
+ } else {
+ int len;
+
+ len = skb->len - sizeof(struct ipv6hdr);
+ if (len > IPV6_MAXPLEN)
+ len = 0;
+
+ ipv6_hdr(skb)->payload_len = htons(len);
+ }
+
+ if (hw_offload)
+ return 0;
+
+ esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+
+ err = esp6_output_tail(x, skb, &esp);
+ if (err < 0)
+ return err;
+
+ secpath_reset(skb);
+
+ return 0;
+}
+
static const struct net_offload esp6_offload = {
.callbacks = {
.gro_receive = esp6_gro_receive,
+ .gso_segment = esp6_gso_segment,
},
};
+static const struct xfrm_type_offload esp6_type_offload = {
+ .description = "ESP6 OFFLOAD",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_ESP,
+ .input_tail = esp6_input_tail,
+ .xmit = esp6_xmit,
+ .encap = esp6_gso_encap,
+};
+
static int __init esp6_offload_init(void)
{
+ if (xfrm_register_type_offload(&esp6_type_offload, AF_INET6) < 0) {
+ pr_info("%s: can't add xfrm type offload\n", __func__);
+ return -EAGAIN;
+ }
+
return inet6_add_offload(&esp6_offload, IPPROTO_ESP);
}
static void __exit esp6_offload_exit(void)
{
+ if (xfrm_unregister_type_offload(&esp6_type_offload, AF_INET6) < 0)
+ pr_info("%s: can't remove xfrm type offload\n", __func__);
+
inet6_del_offload(&esp6_offload, IPPROTO_ESP);
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index d32e2110aff2..b636f1da9aec 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -946,13 +946,13 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
if (opt->hopopt)
ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
}
-EXPORT_SYMBOL(ipv6_push_nfrag_opts);
void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
{
if (opt->dst1opt)
ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
}
+EXPORT_SYMBOL(ipv6_push_frag_opts);
struct ipv6_txoptions *
ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index ce1aae4a7fc8..b3df03e3faa0 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -146,8 +146,7 @@ static int ila_build_state(struct nlattr *nla,
return -EINVAL;
}
- ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla,
- ila_nl_policy);
+ ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, NULL);
if (ret < 0)
return ret;
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index af8f52ee7180..2fd5ca151dcf 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -41,13 +41,7 @@ static int alloc_ila_locks(struct ila_net *ilan)
size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU);
if (sizeof(spinlock_t) != 0) {
-#ifdef CONFIG_NUMA
- if (size * sizeof(spinlock_t) > PAGE_SIZE)
- ilan->locks = vmalloc(size * sizeof(spinlock_t));
- else
-#endif
- ilan->locks = kmalloc_array(size, sizeof(spinlock_t),
- GFP_KERNEL);
+ ilan->locks = kvmalloc(size * sizeof(spinlock_t), GFP_KERNEL);
if (!ilan->locks)
return -ENOMEM;
for (i = 0; i < size; i++)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 6fcb7cb49bb2..8d128ba79b66 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -544,6 +544,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
& IPV6_TCLASS_MASK;
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
+ else
+ fl6.flowi6_mark = t->parms.fwmark;
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
@@ -603,6 +605,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
fl6.flowlabel |= ip6_flowlabel(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
+ else
+ fl6.flowi6_mark = t->parms.fwmark;
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
@@ -780,6 +784,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t,
t->parms.o_key = p->o_key;
t->parms.i_flags = p->i_flags;
t->parms.o_flags = p->o_flags;
+ t->parms.fwmark = p->fwmark;
dst_cache_reset(&t->dst_cache);
ip6gre_tnl_link_config(t, set_mtu);
return 0;
@@ -1249,6 +1254,9 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_FLAGS])
parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
+
+ if (data[IFLA_GRE_FWMARK])
+ parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
}
static int ip6gre_tap_init(struct net_device *dev)
@@ -1470,6 +1478,8 @@ static size_t ip6gre_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_GRE_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_GRE_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -1490,7 +1500,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
- nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
+ nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
+ nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -1525,6 +1536,7 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index c45b12b4431c..9ee208a348f5 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,6 +49,8 @@
int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ void (*edemux)(struct sk_buff *skb);
+
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -60,8 +62,8 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && ipprot->early_demux)
- ipprot->early_demux(skb);
+ if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
+ edemux(skb);
}
if (!skb_valid_dst(skb))
ip6_route_input(skb);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 93e58a5e1837..280268f1dd7b 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -63,7 +63,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
const struct net_offload *ops;
int proto;
struct frag_hdr *fptr;
- unsigned int unfrag_ip6hlen;
unsigned int payload_len;
u8 *prevhdr;
int offset = 0;
@@ -116,8 +115,10 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
skb->network_header = (u8 *)ipv6h - skb->head;
if (udpfrag) {
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
+ int err = ip6_find_1stfragopt(skb, &prevhdr);
+ if (err < 0)
+ return ERR_PTR(err);
+ fptr = (struct frag_hdr *)((u8 *)ipv6h + err);
fptr->frag_off = htons(offset);
if (skb->next)
fptr->frag_off |= htons(IP6_MF);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 58f6288e9ba5..d4a31becbd25 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -597,7 +597,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
- hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ err = ip6_find_1stfragopt(skb, &prevhdr);
+ if (err < 0)
+ goto fail;
+ hlen = err;
nexthdr = *prevhdr;
mtu = ip6_skb_dst_mtu(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a9692ec0cd6d..6eb2ae507500 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -954,7 +954,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
opt->dst_opt[5] = IPV6_TLV_PADN;
opt->dst_opt[6] = 1;
- opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
+ opt->ops.dst1opt = (struct ipv6_opt_hdr *) opt->dst_opt;
opt->ops.opt_nflen = 8;
}
@@ -1178,7 +1178,7 @@ route_lookup:
if (encap_limit >= 0) {
init_tel_txopt(&opt, encap_limit);
- ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL, NULL);
+ ipv6_push_frag_opts(skb, &opt.ops, &proto);
}
/* Calculate max headroom for all the headers and adjust
@@ -1258,6 +1258,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
& IPV6_TCLASS_MASK;
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
+ else
+ fl6.flowi6_mark = t->parms.fwmark;
}
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
@@ -1340,6 +1342,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowlabel |= ip6_flowlabel(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
+ else
+ fl6.flowi6_mark = t->parms.fwmark;
}
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
@@ -1469,6 +1473,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.flowinfo = p->flowinfo;
t->parms.link = p->link;
t->parms.proto = p->proto;
+ t->parms.fwmark = p->fwmark;
dst_cache_reset(&t->dst_cache);
ip6_tnl_link_config(t);
return 0;
@@ -1920,6 +1925,9 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_COLLECT_METADATA])
parms->collect_md = true;
+
+ if (data[IFLA_IPTUN_FWMARK])
+ parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}
static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
@@ -2056,6 +2064,8 @@ static size_t ip6_tnl_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_COLLECT_METADATA */
nla_total_size(0) +
+ /* IFLA_IPTUN_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -2071,7 +2081,8 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
- nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) ||
+ nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
@@ -2083,6 +2094,7 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (parm->collect_md)
if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -2111,6 +2123,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
+ [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops ip6_link_ops __read_mostly = {
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 3d8a3b63b4fd..d67ef56454b2 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -657,6 +657,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.i_key = p->i_key;
t->parms.o_key = p->o_key;
t->parms.proto = p->proto;
+ t->parms.fwmark = p->fwmark;
dst_cache_reset(&t->dst_cache);
vti6_link_config(t);
return 0;
@@ -933,6 +934,9 @@ static void vti6_netlink_parms(struct nlattr *data[],
if (data[IFLA_VTI_OKEY])
parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
+
+ if (data[IFLA_VTI_FWMARK])
+ parms->fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]);
}
static int vti6_newlink(struct net *src_net, struct net_device *dev,
@@ -998,6 +1002,8 @@ static size_t vti6_get_size(const struct net_device *dev)
nla_total_size(4) +
/* IFLA_VTI_OKEY */
nla_total_size(4) +
+ /* IFLA_VTI_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -1010,7 +1016,8 @@ static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_in6_addr(skb, IFLA_VTI_LOCAL, &parm->laddr) ||
nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) ||
nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) ||
- nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key))
+ nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key) ||
+ nla_put_u32(skb, IFLA_VTI_FWMARK, parm->fwmark))
goto nla_put_failure;
return 0;
@@ -1024,6 +1031,7 @@ static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = {
[IFLA_VTI_REMOTE] = { .len = sizeof(struct in6_addr) },
[IFLA_VTI_IKEY] = { .type = NLA_U32 },
[IFLA_VTI_OKEY] = { .type = NLA_U32 },
+ [IFLA_VTI_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops vti6_link_ops __read_mostly = {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index bf34d0950752..374997d26488 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -816,7 +816,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
in6_dev->cnf.mc_forwarding--;
- inet6_netconf_notify_devconf(dev_net(dev),
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
}
@@ -975,7 +975,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
in6_dev->cnf.mc_forwarding++;
- inet6_netconf_notify_devconf(dev_net(dev),
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
}
@@ -1598,7 +1598,8 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
write_unlock_bh(&mrt_lock);
if (!err)
- inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
rtnl_unlock();
@@ -1619,7 +1620,7 @@ int ip6mr_sk_done(struct sock *sk)
mrt->mroute6_sk = NULL;
net->ipv6.devconf_all->mc_forwarding--;
write_unlock_bh(&mrt_lock);
- inet6_netconf_notify_devconf(net,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1bdc703cb966..07403fa164e1 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2463,7 +2463,6 @@ static void mld_ifc_event(struct inet6_dev *idev)
mld_ifc_start_timer(idev, 1);
}
-
static void igmp6_timer_handler(unsigned long data)
{
struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
@@ -2599,6 +2598,44 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
write_unlock_bh(&idev->lock);
}
+static void ipv6_mc_rejoin_groups(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *pmc;
+
+ ASSERT_RTNL();
+
+ if (mld_in_v1_mode(idev)) {
+ read_lock_bh(&idev->lock);
+ for (pmc = idev->mc_list; pmc; pmc = pmc->next)
+ igmp6_join_group(pmc);
+ read_unlock_bh(&idev->lock);
+ } else
+ mld_send_report(idev, NULL);
+}
+
+static int ipv6_mc_netdev_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ switch (event) {
+ case NETDEV_RESEND_IGMP:
+ if (idev)
+ ipv6_mc_rejoin_groups(idev);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block igmp6_netdev_notifier = {
+ .notifier_call = ipv6_mc_netdev_event,
+};
+
#ifdef CONFIG_PROC_FS
struct igmp6_mc_iter_state {
struct seq_net_private p;
@@ -2970,7 +3007,17 @@ int __init igmp6_init(void)
return register_pernet_subsys(&igmp6_net_ops);
}
+int __init igmp6_late_init(void)
+{
+ return register_netdevice_notifier(&igmp6_netdev_notifier);
+}
+
void igmp6_cleanup(void)
{
unregister_pernet_subsys(&igmp6_net_ops);
}
+
+void igmp6_late_cleanup(void)
+{
+ unregister_netdevice_notifier(&igmp6_netdev_notifier);
+}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index cb1766724a4c..d310dc41209a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -732,7 +732,7 @@ void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
struct ndisc_options *ndopts)
{
- neigh_update(neigh, lladdr, new, flags);
+ neigh_update(neigh, lladdr, new, flags, 0);
/* report ndisc ops about neighbour update */
ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
}
@@ -1418,6 +1418,8 @@ skip_linkparms:
if (ri->prefix_len == 0 &&
!in6_dev->cnf.accept_ra_defrtr)
continue;
+ if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
+ continue;
if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
continue;
rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
@@ -1746,6 +1748,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
case NETDEV_CHANGEADDR:
neigh_changeaddr(&nd_tbl, dev);
fib6_run_gc(0, net, false);
+ /* fallthrough */
+ case NETDEV_UP:
idev = in6_dev_get(dev);
if (!idev)
break;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1e15c54fd5e2..1f90644056ac 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -51,15 +51,6 @@ void *ip6t_alloc_initial_table(const struct xt_table *info)
}
EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
-/*
- We keep a set of rules for each CPU, so we can avoid write-locking
- them in the softirq when updating the counters and therefore
- only need to read-lock in the softirq; doing a write_lock_bh() in user
- context stops packets coming through and allows user context to read
- the counters or update the rules.
-
- Hence the start of any table is given by get_table() below. */
-
/* Returns whether matches rule or not. */
/* Performance critical - called for every packet */
static inline bool
@@ -411,7 +402,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
to 0 as we leave), and comefrom to save source hook bitmask */
for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
unsigned int pos = newinfo->hook_entry[hook];
- struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos);
+ struct ip6t_entry *e = entry0 + pos;
if (!(valid_hooks & (1 << hook)))
continue;
@@ -453,14 +444,12 @@ mark_source_chains(const struct xt_table_info *newinfo,
if (pos == oldpos)
goto next;
- e = (struct ip6t_entry *)
- (entry0 + pos);
+ e = entry0 + pos;
} while (oldpos == pos + e->next_offset);
/* Move along one */
size = e->next_offset;
- e = (struct ip6t_entry *)
- (entry0 + pos + size);
+ e = entry0 + pos + size;
if (pos + size >= newinfo->size)
return 0;
e->counters.pcnt = pos;
@@ -475,16 +464,14 @@ mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = (struct ip6t_entry *)
- (entry0 + newpos);
+ e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
if (newpos >= newinfo->size)
return 0;
}
- e = (struct ip6t_entry *)
- (entry0 + newpos);
+ e = entry0 + newpos;
e->counters.pcnt = pos;
pos = newpos;
}
@@ -863,7 +850,7 @@ copy_entries_to_user(unsigned int total_size,
const struct xt_entry_match *m;
const struct xt_entry_target *t;
- e = (struct ip6t_entry *)(loc_cpu_entry + off);
+ e = loc_cpu_entry + off;
if (copy_to_user(userptr + off, e, sizeof(*e))) {
ret = -EFAULT;
goto free_counters;
@@ -1258,7 +1245,7 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
int ret = 0;
origsize = *size;
- ce = (struct compat_ip6t_entry __user *)*dstptr;
+ ce = *dstptr;
if (copy_to_user(ce, e, sizeof(struct ip6t_entry)) != 0 ||
copy_to_user(&ce->counters, &counters[i],
sizeof(counters[i])) != 0)
@@ -1394,7 +1381,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
struct xt_entry_match *ematch;
origsize = *size;
- de = (struct ip6t_entry *)*dstptr;
+ de = *dstptr;
memcpy(de, e, sizeof(struct ip6t_entry));
memcpy(&de->counters, &e->counters, sizeof(e->counters));
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 4ef1ddd4bbbd..d3c4daa708b9 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -307,12 +307,17 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
XT_SYNPROXY_OPT_ECN);
synproxy_send_client_synack(net, skb, th, &opts);
- return NF_DROP;
+ consume_skb(skb);
+ return NF_STOLEN;
} else if (th->ack && !(th->fin || th->rst || th->syn)) {
/* ACK from client */
- synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq));
- return NF_DROP;
+ if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq))) {
+ consume_skb(skb);
+ return NF_STOLEN;
+ } else {
+ return NF_DROP;
+ }
}
return XT_CONTINUE;
@@ -388,10 +393,13 @@ static unsigned int ipv6_synproxy_hook(void *priv,
* number match the one of first SYN.
*/
if (synproxy_recv_client_ack(net, skb, th, &opts,
- ntohl(th->seq) + 1))
+ ntohl(th->seq) + 1)) {
this_cpu_inc(snet->stats->cookie_retrans);
-
- return NF_DROP;
+ consume_skb(skb);
+ return NF_STOLEN;
+ } else {
+ return NF_DROP;
+ }
}
synproxy->isn = ntohl(th->ack_seq);
@@ -430,20 +438,57 @@ static unsigned int ipv6_synproxy_hook(void *priv,
return NF_ACCEPT;
}
+static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv6_synproxy_hook,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv6_synproxy_hook,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
static int synproxy_tg6_check(const struct xt_tgchk_param *par)
{
+ struct synproxy_net *snet = synproxy_pernet(par->net);
const struct ip6t_entry *e = par->entryinfo;
+ int err;
if (!(e->ipv6.flags & IP6T_F_PROTO) ||
e->ipv6.proto != IPPROTO_TCP ||
e->ipv6.invflags & XT_INV_PROTO)
return -EINVAL;
- return nf_ct_netns_get(par->net, par->family);
+ err = nf_ct_netns_get(par->net, par->family);
+ if (err)
+ return err;
+
+ if (snet->hook_ref6 == 0) {
+ err = nf_register_net_hooks(par->net, ipv6_synproxy_ops,
+ ARRAY_SIZE(ipv6_synproxy_ops));
+ if (err) {
+ nf_ct_netns_put(par->net, par->family);
+ return err;
+ }
+ }
+
+ snet->hook_ref6++;
+ return err;
}
static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
{
+ struct synproxy_net *snet = synproxy_pernet(par->net);
+
+ snet->hook_ref6--;
+ if (snet->hook_ref6 == 0)
+ nf_unregister_net_hooks(par->net, ipv6_synproxy_ops,
+ ARRAY_SIZE(ipv6_synproxy_ops));
nf_ct_netns_put(par->net, par->family);
}
@@ -458,46 +503,14 @@ static struct xt_target synproxy_tg6_reg __read_mostly = {
.me = THIS_MODULE,
};
-static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
- {
- .hook = ipv6_synproxy_hook,
- .pf = NFPROTO_IPV6,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
- },
- {
- .hook = ipv6_synproxy_hook,
- .pf = NFPROTO_IPV6,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
- },
-};
-
static int __init synproxy_tg6_init(void)
{
- int err;
-
- err = nf_register_hooks(ipv6_synproxy_ops,
- ARRAY_SIZE(ipv6_synproxy_ops));
- if (err < 0)
- goto err1;
-
- err = xt_register_target(&synproxy_tg6_reg);
- if (err < 0)
- goto err2;
-
- return 0;
-
-err2:
- nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
-err1:
- return err;
+ return xt_register_target(&synproxy_tg6_reg);
}
static void __exit synproxy_tg6_exit(void)
{
xt_unregister_target(&synproxy_tg6_reg);
- nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
}
module_init(synproxy_tg6_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index d2c2ccbfbe72..d5f028e33f65 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -221,8 +221,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
type = icmp6h->icmp6_type - 130;
if (type >= 0 && type < sizeof(noct_valid_new) &&
noct_valid_new[type]) {
- nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW);
- nf_conntrack_get(skb_nfct(skb));
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
return NF_ACCEPT;
}
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index 888ecd106e5f..4a7ddeddbaab 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -58,8 +58,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_reset(skb);
- nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW);
- nf_conntrack_get(skb_nfct(skb));
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
#endif
if (hooknum == NF_INET_PRE_ROUTING ||
hooknum == NF_INET_LOCAL_IN) {
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index e0be97e636a4..b2b4f031b3a1 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -235,7 +235,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
inside->icmp6.icmp6_cksum =
csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
skb->len - hdrlen, IPPROTO_ICMPV6,
- csum_partial(&inside->icmp6,
+ skb_checksum(skb, hdrlen,
skb->len - hdrlen, 0));
}
@@ -273,13 +273,7 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
if (!ct)
return NF_ACCEPT;
- /* Don't try to NAT if this packet is not conntracked */
- if (nf_ct_is_untracked(ct))
- return NF_ACCEPT;
-
- nat = nf_ct_nat_ext_add(ct);
- if (nat == NULL)
- return NF_ACCEPT;
+ nat = nfct_nat(ct);
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 051b6a6bfff6..2297c9f073ba 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -30,6 +30,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
const struct net_device *out)
{
enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
struct in6_addr src;
struct nf_conn *ct;
struct nf_nat_range newrange;
@@ -42,7 +43,9 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
&ipv6_hdr(skb)->daddr, 0, &src) < 0)
return NF_DROP;
- nfct_nat(ct)->masq_index = out->ifindex;
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat)
+ nat->masq_index = out->ifindex;
newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
newrange.min_addr.in6 = src;
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 765facf03d45..43f91d9b086c 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -159,7 +159,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
nft_in(pkt)->ifindex);
return;
}
@@ -246,7 +246,7 @@ nft_fib6_select_ops(const struct nft_ctx *ctx,
static struct nft_expr_type nft_fib6_type __read_mostly = {
.name = "fib",
- .select_ops = &nft_fib6_select_ops,
+ .select_ops = nft_fib6_select_ops,
.policy = nft_fib_policy,
.maxattr = NFTA_FIB_MAX,
.family = NFPROTO_IPV6,
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index cd4252346a32..e9065b8d3af8 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -79,14 +79,13 @@ EXPORT_SYMBOL(ipv6_select_ident);
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr =
- (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
unsigned int packet_len = skb_tail_pointer(skb) -
skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
- while (offset + 1 <= packet_len) {
+ while (offset <= packet_len) {
+ struct ipv6_opt_hdr *exthdr;
switch (**nexthdr) {
@@ -107,13 +106,16 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
return offset;
}
- offset += ipv6_optlen(exthdr);
- *nexthdr = &exthdr->nexthdr;
+ if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
+ return -EINVAL;
+
exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
offset);
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
}
- return offset;
+ return -EINVAL;
}
EXPORT_SYMBOL(ip6_find_1stfragopt);
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index e3770abe688a..b5d54d4f995c 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -26,7 +26,7 @@
#include <net/protocol.h>
#if IS_ENABLED(CONFIG_IPV6)
-const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet6_protos);
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0da6a12b5472..1f992d9e261d 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -632,6 +632,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
return -EMSGSIZE;
}
+ if (length < sizeof(struct ipv6hdr))
+ return -EINVAL;
if (flags&MSG_PROBE)
goto out;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fb174b590fd3..dc61b0b5e64e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2910,7 +2910,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
unsigned int pref;
int err;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
+ NULL);
if (err < 0)
goto errout;
@@ -3263,7 +3264,8 @@ static int ip6_route_multipath_del(struct fib6_config *cfg)
return last_err;
}
-static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct fib6_config cfg;
int err;
@@ -3280,7 +3282,8 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
}
}
-static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct fib6_config cfg;
int err;
@@ -3568,7 +3571,8 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
NLM_F_MULTI);
}
-static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[RTA_MAX+1];
@@ -3578,7 +3582,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
struct flowi6 fl6;
int err, iif = 0, oif = 0;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
+ extack);
if (err < 0)
goto errout;
@@ -3704,7 +3709,10 @@ static int ip6_route_dev_notify(struct notifier_block *this,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
- if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
+ if (!(dev->flags & IFF_LOOPBACK))
+ return NOTIFY_OK;
+
+ if (event == NETDEV_REGISTER) {
net->ipv6.ip6_null_entry->dst.dev = dev;
net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -3713,6 +3721,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,
net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
#endif
+ } else if (event == NETDEV_UNREGISTER) {
+ in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
+ in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
+#endif
}
return NOTIFY_OK;
@@ -4019,9 +4033,24 @@ static struct pernet_operations ip6_route_net_late_ops = {
static struct notifier_block ip6_route_dev_notifier = {
.notifier_call = ip6_route_dev_notify,
- .priority = 0,
+ .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
};
+void __init ip6_route_init_special_entries(void)
+{
+ /* Registering of the loopback is done before this portion of code,
+ * the loopback reference in rt6_info will not be taken, do it
+ * manually for init_net */
+ init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #endif
+}
+
int __init ip6_route_init(void)
{
int ret;
@@ -4048,17 +4077,6 @@ int __init ip6_route_init(void)
ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
- /* Registering of the loopback is done before this portion of code,
- * the loopback reference in rt6_info will not be taken, do it
- * manually for init_net */
- init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- #ifdef CONFIG_IPV6_MULTIPLE_TABLES
- init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- #endif
ret = fib6_init();
if (ret)
goto out_register_subsys;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 85582257d3af..6a495490d43e 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -26,17 +26,13 @@
#include <linux/seg6_iptunnel.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
-#ifdef CONFIG_DST_CACHE
#include <net/dst_cache.h>
-#endif
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
struct seg6_lwt {
-#ifdef CONFIG_DST_CACHE
struct dst_cache cache;
-#endif
struct seg6_iptunnel_encap tuninfo[0];
};
@@ -105,7 +101,7 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
- err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC);
+ err = skb_cow_head(skb, tot_len);
if (unlikely(err))
return err;
@@ -156,7 +152,7 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
- err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC);
+ err = skb_cow_head(skb, hdrlen);
if (unlikely(err))
return err;
@@ -237,6 +233,9 @@ static int seg6_do_srh(struct sk_buff *skb)
static int seg6_input(struct sk_buff *skb)
{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct dst_entry *dst = NULL;
+ struct seg6_lwt *slwt;
int err;
err = seg6_do_srh(skb);
@@ -245,8 +244,30 @@ static int seg6_input(struct sk_buff *skb)
return err;
}
+ slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+
+ preempt_disable();
+ dst = dst_cache_get(&slwt->cache);
+ preempt_enable();
+
skb_dst_drop(skb);
- ip6_route_input(skb);
+
+ if (!dst) {
+ ip6_route_input(skb);
+ dst = skb_dst(skb);
+ if (!dst->error) {
+ preempt_disable();
+ dst_cache_set_ip6(&slwt->cache, dst,
+ &ipv6_hdr(skb)->saddr);
+ preempt_enable();
+ }
+ } else {
+ skb_dst_set(skb, dst);
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ return err;
return dst_input(skb);
}
@@ -264,11 +285,9 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
-#ifdef CONFIG_DST_CACHE
preempt_disable();
dst = dst_cache_get(&slwt->cache);
preempt_enable();
-#endif
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -287,16 +306,18 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
-#ifdef CONFIG_DST_CACHE
preempt_disable();
dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
preempt_enable();
-#endif
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
+
return dst_output(net, sk, skb);
drop:
kfree_skb(skb);
@@ -315,7 +336,7 @@ static int seg6_build_state(struct nlattr *nla,
int err;
err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
- seg6_iptunnel_policy);
+ seg6_iptunnel_policy, NULL);
if (err < 0)
return err;
@@ -355,13 +376,11 @@ static int seg6_build_state(struct nlattr *nla,
slwt = seg6_lwt_lwtunnel(newts);
-#ifdef CONFIG_DST_CACHE
err = dst_cache_init(&slwt->cache, GFP_KERNEL);
if (err) {
kfree(newts);
return err;
}
-#endif
memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
@@ -375,12 +394,10 @@ static int seg6_build_state(struct nlattr *nla,
return 0;
}
-#ifdef CONFIG_DST_CACHE
static void seg6_destroy_state(struct lwtunnel_state *lwt)
{
dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
}
-#endif
static int seg6_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
@@ -414,9 +431,7 @@ static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
static const struct lwtunnel_encap_ops seg6_iptun_ops = {
.build_state = seg6_build_state,
-#ifdef CONFIG_DST_CACHE
.destroy_state = seg6_destroy_state,
-#endif
.output = seg6_output,
.input = seg6_input,
.fill_encap = seg6_fill_encap_info,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 99853c6e33a8..61e5902f0687 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -881,11 +881,12 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
- rt = ip_route_output_ports(tunnel->net, &fl4, NULL,
- dst, tiph->saddr,
- 0, 0,
- IPPROTO_IPV6, RT_TOS(tos),
- tunnel->parms.link);
+ flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark,
+ RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
+ 0, dst, tiph->saddr, 0, 0,
+ sock_net_uid(tunnel->net, NULL));
+ rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
+
if (IS_ERR(rt)) {
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
@@ -1071,7 +1072,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
}
}
-static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
+static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
+ __u32 fwmark)
{
struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
@@ -1085,8 +1087,9 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
ipip6_tunnel_link(sitn, t);
t->parms.iph.ttl = p->iph.ttl;
t->parms.iph.tos = p->iph.tos;
- if (t->parms.link != p->link) {
+ if (t->parms.link != p->link || t->fwmark != fwmark) {
t->parms.link = p->link;
+ t->fwmark = fwmark;
ipip6_tunnel_bind_dev(t->dev);
}
dst_cache_reset(&t->dst_cache);
@@ -1220,7 +1223,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
t = netdev_priv(dev);
}
- ipip6_tunnel_update(t, &p);
+ ipip6_tunnel_update(t, &p, t->fwmark);
}
if (t) {
@@ -1418,7 +1421,8 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
}
static void ipip6_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm *parms,
+ __u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -1457,6 +1461,8 @@ static void ipip6_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_PROTO])
parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (data[IFLA_IPTUN_FWMARK])
+ *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}
/* This function returns true when ENCAP attributes are present in the nl msg */
@@ -1549,7 +1555,7 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
return err;
}
- ipip6_netlink_parms(data, &nt->parms);
+ ipip6_netlink_parms(data, &nt->parms, &nt->fwmark);
if (ipip6_tunnel_locate(net, &nt->parms, 0))
return -EEXIST;
@@ -1577,6 +1583,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
#endif
+ __u32 fwmark = t->fwmark;
int err;
if (dev == sitn->fb_tunnel_dev)
@@ -1588,7 +1595,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
return err;
}
- ipip6_netlink_parms(data, &p);
+ ipip6_netlink_parms(data, &p, &fwmark);
if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
(!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
@@ -1602,7 +1609,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
} else
t = netdev_priv(dev);
- ipip6_tunnel_update(t, &p);
+ ipip6_tunnel_update(t, &p, fwmark);
#ifdef CONFIG_IPV6_SIT_6RD
if (ipip6_netlink_6rd_parms(data, &ip6rd))
@@ -1649,6 +1656,8 @@ static size_t ipip6_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_IPTUN_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -1665,7 +1674,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))) ||
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
- nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
+ nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags) ||
+ nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
goto nla_put_failure;
#ifdef CONFIG_IPV6_SIT_6RD
@@ -1715,6 +1725,7 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
};
static void ipip6_dellink(struct net_device *dev, struct list_head *head)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 895ff650db43..5abc3692b901 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -18,6 +18,7 @@
#include <linux/random.h>
#include <linux/siphash.h>
#include <linux/kernel.h>
+#include <net/secure_seq.h>
#include <net/ipv6.h>
#include <net/tcp.h>
@@ -143,6 +144,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
int mss;
struct dst_entry *dst;
__u8 rcv_wscale;
+ u32 tsoff = 0;
if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
goto out;
@@ -162,6 +164,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
memset(&tcp_opt, 0, sizeof(tcp_opt));
tcp_parse_options(skb, &tcp_opt, 0, NULL);
+ if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
+ tsoff = secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32);
+ tcp_opt.rcv_tsecr -= tsoff;
+ }
+
if (!cookie_timestamp_decode(&tcp_opt))
goto out;
@@ -242,7 +250,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
- ret = tcp_get_cookie_sock(sk, skb, req, dst);
+ ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff);
out:
return ret;
out_free:
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 49fa2e8c3fa9..4f4310a36a04 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -101,12 +101,18 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
}
}
-static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v6_init_seq(const struct sk_buff *skb)
{
- return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
- ipv6_hdr(skb)->saddr.s6_addr32,
- tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source, tsoff);
+ return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
+}
+
+static u32 tcp_v6_init_ts_off(const struct sk_buff *skb)
+{
+ return secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -122,7 +128,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
- u32 seq;
int err;
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
@@ -265,11 +270,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_gso_type = SKB_GSO_TCPV6;
ip6_dst_store(sk, dst, NULL, NULL);
- if (tcp_death_row->sysctl_tw_recycle &&
- !tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
- tcp_fetch_timewait_stamp(sk, dst);
-
icsk->icsk_ext_hdr_len = 0;
if (opt)
icsk->icsk_ext_hdr_len = opt->opt_flen +
@@ -287,13 +287,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk_set_txhash(sk);
if (likely(!tp->repair)) {
- seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32,
- inet->inet_sport,
- inet->inet_dport,
- &tp->tsoffset);
if (!tp->write_seq)
- tp->write_seq = seq;
+ tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
+ inet->inet_sport,
+ inet->inet_dport);
+ tp->tsoffset = secure_tcpv6_ts_off(np->saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32);
}
if (tcp_fastopen_defer_connect(sk, &err))
@@ -727,11 +727,8 @@ static void tcp_v6_init_req(struct request_sock *req,
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
struct flowi *fl,
- const struct request_sock *req,
- bool *strict)
+ const struct request_sock *req)
{
- if (strict)
- *strict = true;
return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
}
@@ -757,7 +754,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.cookie_init_seq = cookie_v6_init_sequence,
#endif
.route_req = tcp_v6_route_req,
- .init_seq = tcp_v6_init_sequence,
+ .init_seq = tcp_v6_init_seq,
+ .init_ts_off = tcp_v6_init_ts_off,
.send_synack = tcp_v6_send_synack,
};
@@ -1064,6 +1062,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
+ newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
@@ -1133,6 +1132,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
First: no IPv4 options.
*/
newinet->inet_opt = NULL;
+ newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
@@ -1301,8 +1301,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
goto discard;
if (nsk != sk) {
- sock_rps_save_rxhash(nsk, skb);
- sk_mark_napi_id(nsk, skb);
if (tcp_child_process(sk, nsk, skb))
goto reset;
if (opt_skb)
@@ -1921,7 +1919,7 @@ struct proto tcpv6_prot = {
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
@@ -1933,8 +1931,9 @@ struct proto tcpv6_prot = {
.diag_destroy = tcp_abort,
};
-static const struct inet6_protocol tcpv6_protocol = {
+static struct inet6_protocol tcpv6_protocol = {
.early_demux = tcp_v6_early_demux,
+ .early_demux_handler = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e28082f0a307..06ec39b79609 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -46,6 +46,7 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
+#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/busy_poll.h>
#include <net/sock_reuseport.h>
@@ -525,7 +526,7 @@ out:
return;
}
-int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int rc;
@@ -568,7 +569,7 @@ void udpv6_encap_enable(void)
}
EXPORT_SYMBOL(udpv6_encap_enable);
-int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
int is_udplite = IS_UDPLITE(sk);
@@ -864,6 +865,69 @@ discard:
return 0;
}
+
+static struct sock *__udp6_lib_demux_lookup(struct net *net,
+ __be16 loc_port, const struct in6_addr *loc_addr,
+ __be16 rmt_port, const struct in6_addr *rmt_addr,
+ int dif)
+{
+ unsigned short hnum = ntohs(loc_port);
+ unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
+ unsigned int slot2 = hash2 & udp_table.mask;
+ struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
+ const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
+ struct sock *sk;
+
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
+ if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+ return sk;
+ /* Only check first socket in chain */
+ break;
+ }
+ return NULL;
+}
+
+static void udp_v6_early_demux(struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ const struct udphdr *uh;
+ struct sock *sk;
+ struct dst_entry *dst;
+ int dif = skb->dev->ifindex;
+
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) +
+ sizeof(struct udphdr)))
+ return;
+
+ uh = udp_hdr(skb);
+
+ if (skb->pkt_type == PACKET_HOST)
+ sk = __udp6_lib_demux_lookup(net, uh->dest,
+ &ipv6_hdr(skb)->daddr,
+ uh->source, &ipv6_hdr(skb)->saddr,
+ dif);
+ else
+ return;
+
+ if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))
+ return;
+
+ skb->sk = sk;
+ skb->destructor = sock_efree;
+ dst = READ_ONCE(sk->sk_rx_dst);
+
+ if (dst)
+ dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+ if (dst) {
+ if (dst->flags & DST_NOCACHE) {
+ if (likely(atomic_inc_not_zero(&dst->__refcnt)))
+ skb_dst_set(skb, dst);
+ } else {
+ skb_dst_set_noref(skb, dst);
+ }
+ }
+}
+
static __inline__ int udpv6_rcv(struct sk_buff *skb)
{
return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
@@ -1378,7 +1442,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
}
#endif
-static const struct inet6_protocol udpv6_protocol = {
+static struct inet6_protocol udpv6_protocol = {
+ .early_demux = udp_v6_early_demux,
+ .early_demux_handler = udp_v6_early_demux,
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index e78bdc76dcc3..f180b3d85e31 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -26,7 +26,6 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len);
-int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
void udpv6_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index ac858c480f2f..a2267f80febb 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -29,6 +29,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
u8 frag_hdr_sz = sizeof(struct frag_hdr);
__wsum csum;
int tnl_hlen;
+ int err;
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
@@ -90,7 +91,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
/* Find the unfragmentable header and shift it left by frag_hdr_sz
* bytes to insert fragment header.
*/
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ err = ip6_find_1stfragopt(skb, &prevhdr);
+ if (err < 0)
+ return ERR_PTR(err);
+ unfrag_ip6hlen = err;
nexthdr = *prevhdr;
*prevhdr = NEXTHDR_FRAGMENT;
unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 4439ee44c8b0..7a92c0f31912 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -13,6 +13,7 @@
#include <net/dst.h>
#include <net/ipv6.h>
#include <net/xfrm.h>
+#include <net/protocol.h>
/* Add encapsulation header.
*
@@ -26,6 +27,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
iph = ipv6_hdr(skb);
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
@@ -61,9 +63,41 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
+static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ const struct net_offload *ops;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb->transport_header += x->props.header_len;
+ ops = rcu_dereference(inet6_offloads[xo->proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+ segs = ops->callbacks.gso_segment(skb, features);
+
+ return segs;
+}
+
+static void xfrm6_transport_xmit(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb_reset_mac_len(skb);
+ pskb_pull(skb, skb->mac_len + sizeof(struct ipv6hdr) + x->props.header_len);
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ skb_reset_transport_header(skb);
+ skb->transport_header -= x->props.header_len;
+ }
+}
+
+
static struct xfrm_mode xfrm6_transport_mode = {
.input = xfrm6_transport_input,
.output = xfrm6_transport_output,
+ .gso_segment = xfrm4_transport_gso_segment,
+ .xmit = xfrm6_transport_xmit,
.owner = THIS_MODULE,
.encap = XFRM_MODE_TRANSPORT,
};
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 372855eeaf42..02556e356f87 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -36,6 +36,9 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
struct ipv6hdr *top_iph;
int dsfield;
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
offsetof(struct ipv6hdr, nexthdr);
@@ -96,11 +99,35 @@ out:
return err;
}
+static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ __skb_push(skb, skb->mac_len);
+ return skb_mac_gso_segment(skb, features);
+
+}
+
+static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ skb->network_header = skb->network_header - x->props.header_len;
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+ }
+
+ skb_reset_mac_len(skb);
+ pskb_pull(skb, skb->mac_len + x->props.header_len);
+}
+
static struct xfrm_mode xfrm6_tunnel_mode = {
.input2 = xfrm6_mode_tunnel_input,
.input = xfrm_prepare_input,
.output2 = xfrm6_mode_tunnel_output,
.output = xfrm6_prepare_output,
+ .gso_segment = xfrm6_mode_tunnel_gso_segment,
+ .xmit = xfrm6_mode_tunnel_xmit,
.owner = THIS_MODULE,
.encap = XFRM_MODE_TUNNEL,
.flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 4d09ce6fa90e..8ae87d4ec5ff 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -73,11 +73,16 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
int mtu, ret = 0;
struct dst_entry *dst = skb_dst(skb);
+ if (skb->ignore_df)
+ goto out;
+
mtu = dst_mtu(dst);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if (!skb->ignore_df && skb->len > mtu) {
+ if ((!skb_is_gso(skb) && skb->len > mtu) ||
+ (skb_is_gso(skb) &&
+ skb_gso_network_seglen(skb) > ip6_skb_dst_mtu(skb))) {
skb->dev = dst->dev;
skb->protocol = htons(ETH_P_IPV6);
@@ -89,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
ret = -EMSGSIZE;
}
-
+out:
return ret;
}
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 8a9219ff2e77..fa31ef29e3fa 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1168,11 +1168,10 @@ static int ipxitf_ioctl(unsigned int cmd, void __user *arg)
sipx->sipx_network = ipxif->if_netnum;
memcpy(sipx->sipx_node, ipxif->if_node,
sizeof(sipx->sipx_node));
- rc = -EFAULT;
+ rc = 0;
if (copy_to_user(arg, &ifr, sizeof(ifr)))
- break;
+ rc = -EFAULT;
ipxitf_put(ipxif);
- rc = 0;
break;
}
case SIOCAIPXITFCRT:
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 31762f76cdb5..deca20fb2ce2 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1707,11 +1707,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct kcm_clone info;
struct socket *newsock = NULL;
- if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
- return -EFAULT;
-
err = kcm_clone(sock, &info, &newsock);
-
if (!err) {
if (copy_to_user((void __user *)arg, &info,
sizeof(info))) {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index be8cecc65002..c1950bb14735 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3827,7 +3827,6 @@ static inline void pfkey_exit_proc(struct net *net)
static struct xfrm_mgr pfkeyv2_mgr =
{
- .id = "pfkeyv2",
.notify = pfkey_send_notify,
.acquire = pfkey_send_acquire,
.compile_policy = pfkey_compile_policy,
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index e37d9554da7b..fa0342574b89 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -120,7 +120,7 @@ static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
return sk->sk_user_data;
}
-static inline struct l2tp_net *l2tp_pernet(struct net *net)
+static inline struct l2tp_net *l2tp_pernet(const struct net *net)
{
BUG_ON(!net);
@@ -217,27 +217,6 @@ static void l2tp_tunnel_sock_put(struct sock *sk)
sock_put(sk);
}
-/* Lookup a session by id in the global session list
- */
-static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
-{
- struct l2tp_net *pn = l2tp_pernet(net);
- struct hlist_head *session_list =
- l2tp_session_id_hash_2(pn, session_id);
- struct l2tp_session *session;
-
- rcu_read_lock_bh();
- hlist_for_each_entry_rcu(session, session_list, global_hlist) {
- if (session->session_id == session_id) {
- rcu_read_unlock_bh();
- return session;
- }
- }
- rcu_read_unlock_bh();
-
- return NULL;
-}
-
/* Session hash list.
* The session_id SHOULD be random according to RFC2661, but several
* L2TP implementations (Cisco and Microsoft) use incrementing
@@ -250,38 +229,10 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
}
-/* Lookup a session by id
- */
-struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id)
-{
- struct hlist_head *session_list;
- struct l2tp_session *session;
-
- /* In L2TPv3, session_ids are unique over all tunnels and we
- * sometimes need to look them up before we know the
- * tunnel.
- */
- if (tunnel == NULL)
- return l2tp_session_find_2(net, session_id);
-
- session_list = l2tp_session_id_hash(tunnel, session_id);
- read_lock_bh(&tunnel->hlist_lock);
- hlist_for_each_entry(session, session_list, hlist) {
- if (session->session_id == session_id) {
- read_unlock_bh(&tunnel->hlist_lock);
- return session;
- }
- }
- read_unlock_bh(&tunnel->hlist_lock);
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(l2tp_session_find);
-
-/* Like l2tp_session_find() but takes a reference on the returned session.
+/* Lookup a session. A new reference is held on the returned session.
* Optionally calls session->ref() too if do_ref is true.
*/
-struct l2tp_session *l2tp_session_get(struct net *net,
+struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_tunnel *tunnel,
u32 session_id, bool do_ref)
{
@@ -356,7 +307,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
/* Lookup a session by interface name.
* This is very inefficient but is only used by management interfaces.
*/
-struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
+ const char *ifname,
bool do_ref)
{
struct l2tp_net *pn = l2tp_pernet(net);
@@ -427,7 +379,7 @@ exist:
/* Lookup a tunnel by id
*/
-struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
+struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id)
{
struct l2tp_tunnel *tunnel;
struct l2tp_net *pn = l2tp_pernet(net);
@@ -445,7 +397,7 @@ struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
-struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
+struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth)
{
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 8ce7818c7a9d..eec5ad2ebb93 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -230,18 +230,16 @@ out:
return tunnel;
}
-struct l2tp_session *l2tp_session_get(struct net *net,
+struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_tunnel *tunnel,
u32 session_id, bool do_ref);
-struct l2tp_session *l2tp_session_find(struct net *net,
- struct l2tp_tunnel *tunnel,
- u32 session_id);
struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
bool do_ref);
-struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
+ const char *ifname,
bool do_ref);
-struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
-struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
+struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
+struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 6fd41d7afe1e..8b21af7321b9 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
#include "l2tp_core.h"
@@ -127,8 +130,13 @@ static const struct net_device_ops l2tp_eth_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
};
+static struct device_type l2tpeth_type = {
+ .name = "l2tpeth",
+};
+
static void l2tp_eth_dev_setup(struct net_device *dev)
{
+ SET_NETDEV_DEVTYPE(dev, &l2tpeth_type);
ether_setup(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->features |= NETIF_F_LLTX;
@@ -204,8 +212,58 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
}
#endif
+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+ struct l2tp_session *session,
+ struct net_device *dev)
+{
+ unsigned int overhead = 0;
+ struct dst_entry *dst;
+ u32 l3_overhead = 0;
+
+ /* if the encap is UDP, account for UDP header size */
+ if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+ overhead += sizeof(struct udphdr);
+ dev->needed_headroom += sizeof(struct udphdr);
+ }
+ if (session->mtu != 0) {
+ dev->mtu = session->mtu;
+ dev->needed_headroom += session->hdr_len;
+ return;
+ }
+ lock_sock(tunnel->sock);
+ l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+ release_sock(tunnel->sock);
+ if (l3_overhead == 0) {
+ /* L3 Overhead couldn't be identified, this could be
+ * because tunnel->sock was NULL or the socket's
+ * address family was not IPv4 or IPv6,
+ * dev mtu stays at 1500.
+ */
+ return;
+ }
+ /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr
+ * UDP overhead, if any, was already factored in above.
+ */
+ overhead += session->hdr_len + ETH_HLEN + l3_overhead;
+
+ /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+ dst = sk_dst_get(tunnel->sock);
+ if (dst) {
+ /* dst_mtu will use PMTU if found, else fallback to intf MTU */
+ u32 pmtu = dst_mtu(dst);
+
+ if (pmtu != 0)
+ dev->mtu = pmtu;
+ dst_release(dst);
+ }
+ session->mtu = dev->mtu - overhead;
+ dev->mtu = session->mtu;
+ dev->needed_headroom += session->hdr_len;
+}
+
static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
+ unsigned char name_assign_type;
struct net_device *dev;
char name[IFNAMSIZ];
struct l2tp_tunnel *tunnel;
@@ -222,15 +280,12 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
}
if (cfg->ifname) {
- dev = dev_get_by_name(net, cfg->ifname);
- if (dev) {
- dev_put(dev);
- rc = -EEXIST;
- goto out;
- }
strlcpy(name, cfg->ifname, IFNAMSIZ);
- } else
+ name_assign_type = NET_NAME_USER;
+ } else {
strcpy(name, L2TP_ETH_DEV_NAME);
+ name_assign_type = NET_NAME_ENUM;
+ }
session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
peer_session_id, cfg);
@@ -239,7 +294,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
goto out;
}
- dev = alloc_netdev(sizeof(*priv), name, NET_NAME_UNKNOWN,
+ dev = alloc_netdev(sizeof(*priv), name, name_assign_type,
l2tp_eth_dev_setup);
if (!dev) {
rc = -ENOMEM;
@@ -247,12 +302,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
}
dev_net_set(dev, net);
- if (session->mtu == 0)
- session->mtu = dev->mtu - session->hdr_len;
- dev->mtu = session->mtu;
- dev->needed_headroom += session->hdr_len;
dev->min_mtu = 0;
dev->max_mtu = ETH_MAX_MTU;
+ l2tp_eth_adjust_mtu(tunnel, session, dev);
priv = netdev_priv(dev);
priv->dev = dev;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 7e3e669baac4..12cfcd0ca807 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -521,11 +521,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
goto out;
}
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
- session = l2tp_session_find(net, tunnel, session_id);
- if (session) {
- ret = -EEXIST;
- goto out;
- }
if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
ret = -EINVAL;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index cb4fff785cbf..8364fe5b59e4 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -142,7 +142,7 @@ static struct proto llc_proto = {
.name = "LLC",
.owner = THIS_MODULE,
.obj_size = sizeof(struct llc_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
};
/**
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 8bc5a1bd2d45..9b02c13d258b 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -506,7 +506,7 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap,
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
if (llc_estab_match(sap, daddr, laddr, rc)) {
- /* Extra checks required by SLAB_DESTROY_BY_RCU */
+ /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
@@ -565,7 +565,7 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap,
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
if (llc_listener_match(sap, laddr, rc)) {
- /* Extra checks required by SLAB_DESTROY_BY_RCU */
+ /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 5404d0d195cc..63b6ab056370 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -328,7 +328,7 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap,
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
if (llc_dgram_match(sap, laddr, rc)) {
- /* Extra checks required by SLAB_DESTROY_BY_RCU */
+ /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 4456559cb056..1b7a4daf283c 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -357,14 +357,14 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
spin_lock_init(&tid_agg_rx->reorder_lock);
/* rx timer */
- tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired;
- tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
- init_timer_deferrable(&tid_agg_rx->session_timer);
+ setup_deferrable_timer(&tid_agg_rx->session_timer,
+ sta_rx_agg_session_timer_expired,
+ (unsigned long)&sta->timer_to_tid[tid]);
/* rx reorder timer */
- tid_agg_rx->reorder_timer.function = sta_rx_agg_reorder_timer_expired;
- tid_agg_rx->reorder_timer.data = (unsigned long)&sta->timer_to_tid[tid];
- init_timer(&tid_agg_rx->reorder_timer);
+ setup_timer(&tid_agg_rx->reorder_timer,
+ sta_rx_agg_reorder_timer_expired,
+ (unsigned long)&sta->timer_to_tid[tid]);
/* prepare reordering buffer */
tid_agg_rx->reorder_buf =
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 45319cc01121..60e2a62f7bef 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -670,14 +670,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
tid_tx->timeout = timeout;
/* response timer */
- tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired;
- tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid];
- init_timer(&tid_tx->addba_resp_timer);
+ setup_timer(&tid_tx->addba_resp_timer,
+ sta_addba_resp_timer_expired,
+ (unsigned long)&sta->timer_to_tid[tid]);
/* tx timer */
- tid_tx->session_timer.function = sta_tx_agg_session_timer_expired;
- tid_tx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
- init_timer_deferrable(&tid_tx->session_timer);
+ setup_deferrable_timer(&tid_tx->session_timer,
+ sta_tx_agg_session_timer_expired,
+ (unsigned long)&sta->timer_to_tid[tid]);
/* assign a dialog token */
sta->ampdu_mlme.dialog_token_allocator++;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ac879bb17870..6c2e6060cd54 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3,7 +3,7 @@
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
- * Copyright (C) 2015-2016 Intel Deutschland GmbH
+ * Copyright (C) 2015-2017 Intel Deutschland GmbH
*
* This file is GPLv2 as found in COPYING.
*/
@@ -22,11 +22,98 @@
#include "mesh.h"
#include "wme.h"
+static void ieee80211_set_mu_mimo_follow(struct ieee80211_sub_if_data *sdata,
+ struct vif_params *params)
+{
+ bool mu_mimo_groups = false;
+ bool mu_mimo_follow = false;
+
+ if (params->vht_mumimo_groups) {
+ u64 membership;
+
+ BUILD_BUG_ON(sizeof(membership) != WLAN_MEMBERSHIP_LEN);
+
+ memcpy(sdata->vif.bss_conf.mu_group.membership,
+ params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN);
+ memcpy(sdata->vif.bss_conf.mu_group.position,
+ params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN,
+ WLAN_USER_POSITION_LEN);
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MU_GROUPS);
+ /* don't care about endianness - just check for 0 */
+ memcpy(&membership, params->vht_mumimo_groups,
+ WLAN_MEMBERSHIP_LEN);
+ mu_mimo_groups = membership != 0;
+ }
+
+ if (params->vht_mumimo_follow_addr) {
+ mu_mimo_follow =
+ is_valid_ether_addr(params->vht_mumimo_follow_addr);
+ ether_addr_copy(sdata->u.mntr.mu_follow_addr,
+ params->vht_mumimo_follow_addr);
+ }
+
+ sdata->vif.mu_mimo_owner = mu_mimo_groups || mu_mimo_follow;
+}
+
+static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
+ struct vif_params *params)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_sub_if_data *monitor_sdata;
+
+ /* check flags first */
+ if (params->flags && ieee80211_sdata_running(sdata)) {
+ u32 mask = MONITOR_FLAG_COOK_FRAMES | MONITOR_FLAG_ACTIVE;
+
+ /*
+ * Prohibit MONITOR_FLAG_COOK_FRAMES and
+ * MONITOR_FLAG_ACTIVE to be changed while the
+ * interface is up.
+ * Else we would need to add a lot of cruft
+ * to update everything:
+ * cooked_mntrs, monitor and all fif_* counters
+ * reconfigure hardware
+ */
+ if ((params->flags & mask) != (sdata->u.mntr.flags & mask))
+ return -EBUSY;
+ }
+
+ /* also validate MU-MIMO change */
+ monitor_sdata = rtnl_dereference(local->monitor_sdata);
+
+ if (!monitor_sdata &&
+ (params->vht_mumimo_groups || params->vht_mumimo_follow_addr))
+ return -EOPNOTSUPP;
+
+ /* apply all changes now - no failures allowed */
+
+ if (monitor_sdata)
+ ieee80211_set_mu_mimo_follow(monitor_sdata, params);
+
+ if (params->flags) {
+ if (ieee80211_sdata_running(sdata)) {
+ ieee80211_adjust_monitor_flags(sdata, -1);
+ sdata->u.mntr.flags = params->flags;
+ ieee80211_adjust_monitor_flags(sdata, 1);
+
+ ieee80211_configure_filter(local);
+ } else {
+ /*
+ * Because the interface is down, ieee80211_do_stop
+ * and ieee80211_do_open take care of "everything"
+ * mentioned in the comment above.
+ */
+ sdata->u.mntr.flags = params->flags;
+ }
+ }
+
+ return 0;
+}
+
static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy,
const char *name,
unsigned char name_assign_type,
enum nl80211_iftype type,
- u32 *flags,
struct vif_params *params)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
@@ -38,9 +125,14 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy,
if (err)
return ERR_PTR(err);
- if (type == NL80211_IFTYPE_MONITOR && flags) {
- sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
- sdata->u.mntr.flags = *flags;
+ sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+
+ if (type == NL80211_IFTYPE_MONITOR) {
+ err = ieee80211_set_mon_options(sdata, params);
+ if (err) {
+ ieee80211_if_remove(sdata);
+ return NULL;
+ }
}
return wdev;
@@ -55,7 +147,7 @@ static int ieee80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev)
static int ieee80211_change_iface(struct wiphy *wiphy,
struct net_device *dev,
- enum nl80211_iftype type, u32 *flags,
+ enum nl80211_iftype type,
struct vif_params *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -75,58 +167,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
}
if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_sub_if_data *monitor_sdata;
- u32 mu_mntr_cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
-
- monitor_sdata = rtnl_dereference(local->monitor_sdata);
- if (monitor_sdata &&
- wiphy_ext_feature_isset(wiphy, mu_mntr_cap_flag)) {
- memcpy(monitor_sdata->vif.bss_conf.mu_group.membership,
- params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN);
- memcpy(monitor_sdata->vif.bss_conf.mu_group.position,
- params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN,
- WLAN_USER_POSITION_LEN);
- monitor_sdata->vif.mu_mimo_owner = true;
- ieee80211_bss_info_change_notify(monitor_sdata,
- BSS_CHANGED_MU_GROUPS);
-
- ether_addr_copy(monitor_sdata->u.mntr.mu_follow_addr,
- params->macaddr);
- }
-
- if (!flags)
- return 0;
-
- if (ieee80211_sdata_running(sdata)) {
- u32 mask = MONITOR_FLAG_COOK_FRAMES |
- MONITOR_FLAG_ACTIVE;
-
- /*
- * Prohibit MONITOR_FLAG_COOK_FRAMES and
- * MONITOR_FLAG_ACTIVE to be changed while the
- * interface is up.
- * Else we would need to add a lot of cruft
- * to update everything:
- * cooked_mntrs, monitor and all fif_* counters
- * reconfigure hardware
- */
- if ((*flags & mask) != (sdata->u.mntr.flags & mask))
- return -EBUSY;
-
- ieee80211_adjust_monitor_flags(sdata, -1);
- sdata->u.mntr.flags = *flags;
- ieee80211_adjust_monitor_flags(sdata, 1);
-
- ieee80211_configure_filter(local);
- } else {
- /*
- * Because the interface is down, ieee80211_do_stop
- * and ieee80211_do_open take care of "everything"
- * mentioned in the comment above.
- */
- sdata->u.mntr.flags = *flags;
- }
+ ret = ieee80211_set_mon_options(sdata, params);
+ if (ret)
+ return ret;
}
return 0;
@@ -617,10 +660,11 @@ void sta_set_rate_info_tx(struct sta_info *sta,
int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
u16 brate;
- sband = sta->local->hw.wiphy->bands[
- ieee80211_get_sdata_band(sta->sdata)];
- brate = sband->bitrates[rate->idx].bitrate;
- rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
+ sband = ieee80211_get_sband(sta->sdata);
+ if (sband) {
+ brate = sband->bitrates[rate->idx].bitrate;
+ rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
+ }
}
if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
rinfo->bw = RATE_INFO_BW_40;
@@ -696,11 +740,8 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
return 0;
mutex_lock(&local->mtx);
- mutex_lock(&local->iflist_mtx);
if (local->use_chanctx) {
- sdata = rcu_dereference_protected(
- local->monitor_sdata,
- lockdep_is_held(&local->iflist_mtx));
+ sdata = rtnl_dereference(local->monitor_sdata);
if (sdata) {
ieee80211_vif_release_channel(sdata);
ret = ieee80211_vif_use_channel(sdata, chandef,
@@ -713,7 +754,6 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
if (ret == 0)
local->monitor_chandef = *chandef;
- mutex_unlock(&local->iflist_mtx);
mutex_unlock(&local->mtx);
return ret;
@@ -1214,10 +1254,11 @@ static int sta_apply_parameters(struct ieee80211_local *local,
int ret = 0;
struct ieee80211_supported_band *sband;
struct ieee80211_sub_if_data *sdata = sta->sdata;
- enum nl80211_band band = ieee80211_get_sdata_band(sdata);
u32 mask, set;
- sband = local->hw.wiphy->bands[band];
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return -EINVAL;
mask = params->sta_flags_mask;
set = params->sta_flags_set;
@@ -1350,7 +1391,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
sband, params->supported_rates,
params->supported_rates_len,
- &sta->sta.supp_rates[band]);
+ &sta->sta.supp_rates[sband->band]);
}
if (params->ht_capa)
@@ -1366,8 +1407,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
/* returned value is only needed for rc update, but the
* rc isn't initialized here yet, so ignore it
*/
- __ieee80211_vht_handle_opmode(sdata, sta,
- params->opmode_notif, band);
+ __ieee80211_vht_handle_opmode(sdata, sta, params->opmode_notif,
+ sband->band);
}
if (params->support_p2p_ps >= 0)
@@ -2005,13 +2046,15 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
struct bss_parameters *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- enum nl80211_band band;
+ struct ieee80211_supported_band *sband;
u32 changed = 0;
if (!sdata_dereference(sdata->u.ap.beacon, sdata))
return -ENOENT;
- band = ieee80211_get_sdata_band(sdata);
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return -EINVAL;
if (params->use_cts_prot >= 0) {
sdata->vif.bss_conf.use_cts_prot = params->use_cts_prot;
@@ -2024,7 +2067,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
}
if (!sdata->vif.bss_conf.use_short_slot &&
- band == NL80211_BAND_5GHZ) {
+ sband->band == NL80211_BAND_5GHZ) {
sdata->vif.bss_conf.use_short_slot = true;
changed |= BSS_CHANGED_ERP_SLOT;
}
@@ -2037,11 +2080,12 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
if (params->basic_rates) {
ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
- wiphy->bands[band],
+ wiphy->bands[sband->band],
params->basic_rates,
params->basic_rates_len,
&sdata->vif.bss_conf.basic_rates);
changed |= BSS_CHANGED_BASIC_RATES;
+ ieee80211_check_rate_mask(sdata);
}
if (params->ap_isolate >= 0) {
@@ -2198,7 +2242,8 @@ ieee80211_sched_scan_start(struct wiphy *wiphy,
}
static int
-ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev)
+ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev,
+ u64 reqid)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
@@ -2630,6 +2675,33 @@ static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
bss_conf->cqm_rssi_thold = rssi_thold;
bss_conf->cqm_rssi_hyst = rssi_hyst;
+ bss_conf->cqm_rssi_low = 0;
+ bss_conf->cqm_rssi_high = 0;
+ sdata->u.mgd.last_cqm_event_signal = 0;
+
+ /* tell the driver upon association, unless already associated */
+ if (sdata->u.mgd.associated &&
+ sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_CQM);
+
+ return 0;
+}
+
+static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy,
+ struct net_device *dev,
+ s32 rssi_low, s32 rssi_high)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_vif *vif = &sdata->vif;
+ struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+
+ if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)
+ return -EOPNOTSUPP;
+
+ bss_conf->cqm_rssi_low = rssi_low;
+ bss_conf->cqm_rssi_high = rssi_high;
+ bss_conf->cqm_rssi_thold = 0;
+ bss_conf->cqm_rssi_hyst = 0;
sdata->u.mgd.last_cqm_event_signal = 0;
/* tell the driver upon association, unless already associated */
@@ -2658,6 +2730,21 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
return ret;
}
+ /*
+ * If active validate the setting and reject it if it doesn't leave
+ * at least one basic rate usable, since we really have to be able
+ * to send something, and if we're an AP we have to be able to do
+ * so at a basic rate so that all clients can receive it.
+ */
+ if (rcu_access_pointer(sdata->vif.chanctx_conf) &&
+ sdata->vif.bss_conf.chandef.chan) {
+ u32 basic_rates = sdata->vif.bss_conf.basic_rates;
+ enum nl80211_band band = sdata->vif.bss_conf.chandef.chan->band;
+
+ if (!(mask->control[band].legacy & basic_rates))
+ return -EINVAL;
+ }
+
for (i = 0; i < NUM_NL80211_BANDS; i++) {
struct ieee80211_supported_band *sband = wiphy->bands[i];
int j;
@@ -3639,6 +3726,7 @@ const struct cfg80211_ops mac80211_config_ops = {
.mgmt_tx = ieee80211_mgmt_tx,
.mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait,
.set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
+ .set_cqm_rssi_range_config = ieee80211_set_cqm_rssi_range_config,
.mgmt_frame_register = ieee80211_mgmt_frame_register,
.set_antenna = ieee80211_set_antenna,
.get_antenna = ieee80211_get_antenna,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 98999d3d5262..364d4e137649 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -66,6 +66,8 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
2 + (IEEE80211_MAX_SUPP_RATES - 8) +
2 + sizeof(struct ieee80211_ht_cap) +
2 + sizeof(struct ieee80211_ht_operation) +
+ 2 + sizeof(struct ieee80211_vht_cap) +
+ 2 + sizeof(struct ieee80211_vht_operation) +
ifibss->ie_len;
presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL);
if (!presp)
@@ -425,7 +427,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
cfg80211_chandef_create(&chandef, cbss->channel,
- NL80211_CHAN_WIDTH_20_NOHT);
+ NL80211_CHAN_NO_HT);
chandef.width = sdata->u.ibss.chandef.width;
break;
case NL80211_CHAN_WIDTH_80:
@@ -437,7 +439,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
default:
/* fall back to 20 MHz for unsupported modes */
cfg80211_chandef_create(&chandef, cbss->channel,
- NL80211_CHAN_WIDTH_20_NOHT);
+ NL80211_CHAN_NO_HT);
break;
}
@@ -992,7 +994,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
enum nl80211_band band = rx_status->band;
enum nl80211_bss_scan_width scan_width;
struct ieee80211_local *local = sdata->local;
- struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
+ struct ieee80211_supported_band *sband;
bool rates_updated = false;
u32 supp_rates = 0;
@@ -1002,6 +1004,10 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
if (!ether_addr_equal(mgmt->bssid, sdata->u.ibss.bssid))
return;
+ sband = local->hw.wiphy->bands[band];
+ if (WARN_ON(!sband))
+ return;
+
rcu_read_lock();
sta = sta_info_get(sdata, mgmt->sa);
@@ -1014,9 +1020,9 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
prev_rates = sta->sta.supp_rates[band];
/* make sure mandatory rates are always added */
scan_width = NL80211_BSS_CHAN_WIDTH_20;
- if (rx_status->flag & RX_FLAG_5MHZ)
+ if (rx_status->bw == RATE_INFO_BW_5)
scan_width = NL80211_BSS_CHAN_WIDTH_5;
- if (rx_status->flag & RX_FLAG_10MHZ)
+ else if (rx_status->bw == RATE_INFO_BW_10)
scan_width = NL80211_BSS_CHAN_WIDTH_10;
sta->sta.supp_rates[band] = supp_rates |
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 0e718437d080..f8f6c148f554 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -839,6 +839,8 @@ struct txq_info {
struct ieee80211_if_mntr {
u32 flags;
u8 mu_follow_addr[ETH_ALEN] __aligned(2);
+
+ struct list_head list;
};
/**
@@ -999,21 +1001,6 @@ sdata_assert_lock(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&sdata->wdev.mtx);
}
-static inline enum nl80211_band
-ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata)
-{
- enum nl80211_band band = NL80211_BAND_2GHZ;
- struct ieee80211_chanctx_conf *chanctx_conf;
-
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (!WARN_ON(!chanctx_conf))
- band = chanctx_conf->def.chan->band;
- rcu_read_unlock();
-
- return band;
-}
-
static inline int
ieee80211_chandef_get_shift(struct cfg80211_chan_def *chandef)
{
@@ -1259,6 +1246,7 @@ struct ieee80211_local {
/* see iface.c */
struct list_head interfaces;
+ struct list_head mon_list; /* only that are IFF_UP && !cooked */
struct mutex iflist_mtx;
/*
@@ -1418,6 +1406,27 @@ IEEE80211_WDEV_TO_SUB_IF(struct wireless_dev *wdev)
return container_of(wdev, struct ieee80211_sub_if_data, wdev);
}
+static inline struct ieee80211_supported_band *
+ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ enum nl80211_band band;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+
+ if (WARN_ON(!chanctx_conf)) {
+ rcu_read_unlock();
+ return NULL;
+ }
+
+ band = chanctx_conf->def.chan->band;
+ rcu_read_unlock();
+
+ return local->hw.wiphy->bands[band];
+}
+
/* this struct represents 802.11n's RA/TID combination */
struct ieee80211_ra_tid {
u8 ra[ETH_ALEN];
@@ -1474,6 +1483,7 @@ struct ieee802_11_elems {
const u8 *opmode_notif;
const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
const struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
+ const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie;
/* length of them, respectively */
u8 ext_capab_len;
@@ -1527,9 +1537,9 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
status->flag & RX_FLAG_MACTIME_END);
if (status->flag & (RX_FLAG_MACTIME_START | RX_FLAG_MACTIME_END))
return true;
- /* can't handle HT/VHT preamble yet */
+ /* can't handle non-legacy preamble yet */
if (status->flag & RX_FLAG_MACTIME_PLCP_START &&
- !(status->flag & (RX_FLAG_HT | RX_FLAG_VHT)))
+ status->encoding != RX_ENC_LEGACY)
return true;
return false;
}
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5bb0c5012819..3bd5b81f5d81 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -676,7 +676,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
set_bit(SDATA_STATE_RUNNING, &sdata->state);
- if (sdata->vif.type == NL80211_IFTYPE_WDS) {
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_WDS:
/* Create STA entry for the WDS peer */
sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr,
GFP_KERNEL);
@@ -697,8 +698,17 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
rate_control_rate_init(sta);
netif_carrier_on(dev);
- } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) {
+ break;
+ case NL80211_IFTYPE_P2P_DEVICE:
rcu_assign_pointer(local->p2p_sdata, sdata);
+ break;
+ case NL80211_IFTYPE_MONITOR:
+ if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)
+ break;
+ list_add_tail_rcu(&sdata->u.mntr.list, &local->mon_list);
+ break;
+ default:
+ break;
}
/*
@@ -817,6 +827,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_AP:
cancel_work_sync(&sdata->u.ap.request_smps_work);
break;
+ case NL80211_IFTYPE_MONITOR:
+ if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)
+ break;
+ list_del_rcu(&sdata->u.mntr.list);
+ break;
default:
break;
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 56fb47953b72..8aa1f5b6a051 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -253,6 +253,7 @@ static void ieee80211_restart_work(struct work_struct *work)
WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
"%s called with hardware scan in progress\n", __func__);
+ flush_work(&local->radar_detected_work);
rtnl_lock();
list_for_each_entry(sdata, &local->interfaces, list)
flush_delayed_work(&sdata->dec_tailroom_needed_wk);
@@ -603,6 +604,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
ARRAY_SIZE(local->ext_capa);
INIT_LIST_HEAD(&local->interfaces);
+ INIT_LIST_HEAD(&local->mon_list);
__hw_addr_init(&local->mc_list);
@@ -1186,6 +1188,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
cancel_work_sync(&local->reconfig_filter);
cancel_work_sync(&local->tdls_chsw_work);
flush_work(&local->sched_scan_stopped_work);
+ flush_work(&local->radar_detected_work);
ieee80211_clear_tx_pending(local);
rate_control_deinitialize(local);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 6e7b6a07b7d5..737e1f082b0d 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -63,6 +63,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
u32 basic_rates = 0;
struct cfg80211_chan_def sta_chan_def;
+ struct ieee80211_supported_band *sband;
/*
* As support for each feature is added, check for matching
@@ -83,7 +84,11 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
(ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth)))
return false;
- ieee80211_sta_get_rates(sdata, ie, ieee80211_get_sdata_band(sdata),
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return false;
+
+ ieee80211_sta_get_rates(sdata, ie, sband->band,
&basic_rates);
if (sdata->vif.bss_conf.basic_rates != basic_rates)
@@ -399,12 +404,13 @@ static int mesh_add_ds_params_ie(struct ieee80211_sub_if_data *sdata,
int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
- struct ieee80211_local *local = sdata->local;
- enum nl80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_supported_band *sband;
u8 *pos;
- sband = local->hw.wiphy->bands[band];
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return -EINVAL;
+
if (!sband->ht_cap.ht_supported ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
@@ -462,12 +468,13 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
- struct ieee80211_local *local = sdata->local;
- enum nl80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_supported_band *sband;
u8 *pos;
- sband = local->hw.wiphy->bands[band];
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return -EINVAL;
+
if (!sband->vht_cap.vht_supported ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
@@ -916,12 +923,16 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
struct cfg80211_csa_settings params;
struct ieee80211_csa_ie csa_ie;
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- enum nl80211_band band = ieee80211_get_sdata_band(sdata);
+ struct ieee80211_supported_band *sband;
int err;
u32 sta_flags;
sdata_assert_lock(sdata);
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return false;
+
sta_flags = IEEE80211_STA_DISABLE_VHT;
switch (sdata->vif.bss_conf.chandef.width) {
case NL80211_CHAN_WIDTH_20_NOHT:
@@ -935,7 +946,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
memset(&params, 0, sizeof(params));
memset(&csa_ie, 0, sizeof(csa_ie));
- err = ieee80211_parse_ch_switch_ie(sdata, elems, band,
+ err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band,
sta_flags, sdata->vif.addr,
&csa_ie);
if (err < 0)
@@ -1100,8 +1111,14 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
return;
- if (mesh_matches_local(sdata, &elems))
- mesh_neighbour_update(sdata, mgmt->sa, &elems);
+ if (mesh_matches_local(sdata, &elems)) {
+ mpl_dbg(sdata, "rssi_threshold=%d,rx_status->signal=%d\n",
+ sdata->u.mesh.mshcfg.rssi_threshold, rx_status->signal);
+ if (!sdata->u.mesh.user_mpm ||
+ sdata->u.mesh.mshcfg.rssi_threshold == 0 ||
+ sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal)
+ mesh_neighbour_update(sdata, mgmt->sa, &elems);
+ }
if (ifmsh->sync_ops)
ifmsh->sync_ops->rx_bcn_presp(sdata,
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index b747c9645e43..4005edd71fe8 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -16,6 +16,7 @@
#define TEST_FRAME_LEN 8192
#define MAX_METRIC 0xffffffff
#define ARITH_SHIFT 8
+#define LINK_FAIL_THRESH 95
#define MAX_PREQ_QUEUE_LEN 64
@@ -307,10 +308,12 @@ void ieee80211s_update_metric(struct ieee80211_local *local,
failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK);
- /* moving average, scaled to 100 */
- sta->mesh->fail_avg =
- ((80 * sta->mesh->fail_avg + 5) / 100 + 20 * failed);
- if (sta->mesh->fail_avg > 95)
+ /* moving average, scaled to 100.
+ * feed failure as 100 and success as 0
+ */
+ ewma_mesh_fail_avg_add(&sta->mesh->fail_avg, failed * 100);
+ if (ewma_mesh_fail_avg_read(&sta->mesh->fail_avg) >
+ LINK_FAIL_THRESH)
mesh_plink_broken(sta);
}
@@ -325,6 +328,8 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
int rate, err;
u32 tx_time, estimated_retx;
u64 result;
+ unsigned long fail_avg =
+ ewma_mesh_fail_avg_read(&sta->mesh->fail_avg);
/* Try to get rate based on HW/SW RC algorithm.
* Rate is returned in units of Kbps, correct this
@@ -336,7 +341,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
if (rate) {
err = 0;
} else {
- if (sta->mesh->fail_avg >= 100)
+ if (fail_avg > LINK_FAIL_THRESH)
return MAX_METRIC;
sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
@@ -344,7 +349,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
if (WARN_ON(!rate))
return MAX_METRIC;
- err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+ err = (fail_avg << ARITH_SHIFT) / 100;
}
/* bitrate is in units of 100 Kbps, while we need rate in units of
@@ -484,6 +489,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
? mpath->exp_time : exp_time;
mesh_path_activate(mpath);
spin_unlock_bh(&mpath->state_lock);
+ ewma_mesh_fail_avg_init(&sta->mesh->fail_avg);
+ /* init it at a low value - 0 start is tricky */
+ ewma_mesh_fail_avg_add(&sta->mesh->fail_avg, 1);
mesh_path_tx_pending(mpath);
/* draft says preq_id should be saved to, but there does
* not seem to be any use for it, skipping by now
@@ -522,6 +530,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
? mpath->exp_time : exp_time;
mesh_path_activate(mpath);
spin_unlock_bh(&mpath->state_lock);
+ ewma_mesh_fail_avg_init(&sta->mesh->fail_avg);
+ /* init it at a low value - 0 start is tricky */
+ ewma_mesh_fail_avg_add(&sta->mesh->fail_avg, 1);
mesh_path_tx_pending(mpath);
} else
spin_unlock_bh(&mpath->state_lock);
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index f0e6175a9821..97269caafecd 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -397,11 +397,10 @@ struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata,
new_mpath->sdata = sdata;
new_mpath->flags = 0;
skb_queue_head_init(&new_mpath->frame_queue);
- new_mpath->timer.data = (unsigned long) new_mpath;
- new_mpath->timer.function = mesh_path_timer;
new_mpath->exp_time = jiffies;
spin_lock_init(&new_mpath->state_lock);
- init_timer(&new_mpath->timer);
+ setup_timer(&new_mpath->timer, mesh_path_timer,
+ (unsigned long) new_mpath);
return new_mpath;
}
@@ -829,6 +828,9 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop)
mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID;
mesh_path_activate(mpath);
spin_unlock_bh(&mpath->state_lock);
+ ewma_mesh_fail_avg_init(&next_hop->mesh->fail_avg);
+ /* init it at a low value - 0 start is tricky */
+ ewma_mesh_fail_avg_add(&next_hop->mesh->fail_avg, 1);
mesh_path_tx_pending(mpath);
}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 953d71e784a9..1131cd504a15 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -95,19 +95,23 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta)
static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
- enum nl80211_band band = ieee80211_get_sdata_band(sdata);
- struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
+ struct ieee80211_supported_band *sband;
struct sta_info *sta;
u32 erp_rates = 0, changed = 0;
int i;
bool short_slot = false;
- if (band == NL80211_BAND_5GHZ) {
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return changed;
+
+ if (sband->band == NL80211_BAND_5GHZ) {
/* (IEEE 802.11-2012 19.4.5) */
short_slot = true;
goto out;
- } else if (band != NL80211_BAND_2GHZ)
+ } else if (sband->band != NL80211_BAND_2GHZ) {
goto out;
+ }
for (i = 0; i < sband->n_bitrates; i++)
if (sband->bitrates[i].flags & IEEE80211_RATE_ERP_G)
@@ -123,7 +127,7 @@ static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata)
continue;
short_slot = false;
- if (erp_rates & sta->sta.supp_rates[band])
+ if (erp_rates & sta->sta.supp_rates[sband->band])
short_slot = true;
else
break;
@@ -249,7 +253,15 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
mgmt->u.action.u.self_prot.action_code = action;
if (action != WLAN_SP_MESH_PEERING_CLOSE) {
- enum nl80211_band band = ieee80211_get_sdata_band(sdata);
+ struct ieee80211_supported_band *sband;
+ enum nl80211_band band;
+
+ sband = ieee80211_get_sband(sdata);
+ if (!sband) {
+ err = -EINVAL;
+ goto free;
+ }
+ band = sband->band;
/* capability info */
pos = skb_put(skb, 2);
@@ -395,13 +407,16 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems, bool insert)
{
struct ieee80211_local *local = sdata->local;
- enum nl80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_supported_band *sband;
u32 rates, basic_rates = 0, changed = 0;
enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth;
- sband = local->hw.wiphy->bands[band];
- rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates);
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return;
+
+ rates = ieee80211_sta_get_rates(sdata, elems, sband->band,
+ &basic_rates);
spin_lock_bh(&sta->mesh->plink_lock);
sta->rx_stats.last_rx = jiffies;
@@ -412,9 +427,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
goto out;
sta->mesh->processed_beacon = true;
- if (sta->sta.supp_rates[band] != rates)
+ if (sta->sta.supp_rates[sband->band] != rates)
changed |= IEEE80211_RC_SUPP_RATES_CHANGED;
- sta->sta.supp_rates[band] = rates;
+ sta->sta.supp_rates[sband->band] = rates;
if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
elems->ht_cap_elem, sta))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6e90301154d5..0ea9712bd99e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -6,7 +6,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2015 - 2016 Intel Deutschland GmbH
+ * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -1855,11 +1855,16 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
u16 capab, bool erp_valid, u8 erp)
{
struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+ struct ieee80211_supported_band *sband;
u32 changed = 0;
bool use_protection;
bool use_short_preamble;
bool use_short_slot;
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return changed;
+
if (erp_valid) {
use_protection = (erp & WLAN_ERP_USE_PROTECTION) != 0;
use_short_preamble = (erp & WLAN_ERP_BARKER_PREAMBLE) == 0;
@@ -1869,7 +1874,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
}
use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME);
- if (ieee80211_get_sdata_band(sdata) == NL80211_BAND_5GHZ)
+ if (sband->band == NL80211_BAND_5GHZ)
use_short_slot = true;
if (use_protection != bss_conf->use_cts_prot) {
@@ -1908,6 +1913,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
sdata->u.mgd.associated = cbss;
memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
+ ieee80211_check_rate_mask(sdata);
+
sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE;
if (sdata->vif.p2p ||
@@ -2797,8 +2804,9 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
- sdata_info(sdata, "disassociated from %pM (Reason: %u)\n",
- mgmt->sa, reason_code);
+ sdata_info(sdata, "disassociated from %pM (Reason: %u=%s)\n",
+ mgmt->sa, reason_code,
+ ieee80211_get_reason_code_string(reason_code));
ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
@@ -2822,15 +2830,15 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
*have_higher_than_11mbit = true;
/*
- * BSS_MEMBERSHIP_SELECTOR_HT_PHY is defined in 802.11n-2009
- * 7.3.2.2 as a magic value instead of a rate. Hence, skip it.
+ * Skip HT and VHT BSS membership selectors since they're not
+ * rates.
*
- * Note: Even through the membership selector and the basic
+ * Note: Even though the membership selector and the basic
* rate flag share the same bit, they are not exactly
* the same.
*/
- if (!!(supp_rates[i] & 0x80) &&
- (supp_rates[i] & 0x7f) == BSS_MEMBERSHIP_SELECTOR_HT_PHY)
+ if (supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HT_PHY) ||
+ supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY))
continue;
for (j = 0; j < sband->n_bitrates; j++) {
@@ -3001,7 +3009,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
goto out;
}
- sband = local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)];
+ sband = ieee80211_get_sband(sdata);
+ if (!sband) {
+ mutex_unlock(&sdata->local->sta_mtx);
+ ret = false;
+ goto out;
+ }
/* Set up internal HT/VHT capabilities */
if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
@@ -3085,6 +3098,18 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
}
changed |= BSS_CHANGED_QOS;
+ if (elems.max_idle_period_ie) {
+ bss_conf->max_idle_period =
+ le16_to_cpu(elems.max_idle_period_ie->max_idle_period);
+ bss_conf->protected_keep_alive =
+ !!(elems.max_idle_period_ie->idle_options &
+ WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE);
+ changed |= BSS_CHANGED_KEEP_ALIVE;
+ } else {
+ bss_conf->max_idle_period = 0;
+ bss_conf->protected_keep_alive = false;
+ }
+
/* set AID and assoc capability,
* ieee80211_set_associated() will tell the driver */
bss_conf->aid = aid;
@@ -3430,6 +3455,30 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
}
}
+ if (bss_conf->cqm_rssi_low &&
+ ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) {
+ int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
+ int last_event = ifmgd->last_cqm_event_signal;
+ int low = bss_conf->cqm_rssi_low;
+ int high = bss_conf->cqm_rssi_high;
+
+ if (sig < low &&
+ (last_event == 0 || last_event >= low)) {
+ ifmgd->last_cqm_event_signal = sig;
+ ieee80211_cqm_rssi_notify(
+ &sdata->vif,
+ NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW,
+ sig, GFP_KERNEL);
+ } else if (sig > high &&
+ (last_event == 0 || last_event <= high)) {
+ ifmgd->last_cqm_event_signal = sig;
+ ieee80211_cqm_rssi_notify(
+ &sdata->vif,
+ NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
+ sig, GFP_KERNEL);
+ }
+ }
+
if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) {
mlme_dbg_ratelimited(sdata,
"cancelling AP probe due to a received beacon\n");
@@ -4333,6 +4382,10 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data))
return -EINVAL;
+ /* If a reconfig is happening, bail out */
+ if (local->in_reconfig)
+ return -EBUSY;
+
if (assoc) {
rcu_read_lock();
have_sta = sta_info_get(sdata, cbss->bssid);
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 76a8bcd8ef11..a87d195c4a61 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -10,7 +10,7 @@ static void ieee80211_sched_scan_cancel(struct ieee80211_local *local)
{
if (ieee80211_request_sched_scan_stop(local))
return;
- cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
+ cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0);
}
int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 206698bc93f4..ea1f4315c521 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -2,6 +2,7 @@
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -61,6 +62,28 @@ void rate_control_rate_init(struct sta_info *sta)
set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
}
+void rate_control_tx_status(struct ieee80211_local *local,
+ struct ieee80211_supported_band *sband,
+ struct ieee80211_tx_status *st)
+{
+ struct rate_control_ref *ref = local->rate_ctrl;
+ struct sta_info *sta = container_of(st->sta, struct sta_info, sta);
+ void *priv_sta = sta->rate_ctrl_priv;
+
+ if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
+ return;
+
+ spin_lock_bh(&sta->rate_ctrl_lock);
+ if (ref->ops->tx_status_ext)
+ ref->ops->tx_status_ext(ref->priv, sband, priv_sta, st);
+ else if (st->skb)
+ ref->ops->tx_status(ref->priv, sband, st->sta, priv_sta, st->skb);
+ else
+ WARN_ON_ONCE(1);
+
+ spin_unlock_bh(&sta->rate_ctrl_lock);
+}
+
void rate_control_rate_update(struct ieee80211_local *local,
struct ieee80211_supported_band *sband,
struct sta_info *sta, u32 changed)
@@ -173,9 +196,11 @@ ieee80211_rate_control_ops_get(const char *name)
/* try default if specific alg requested but not found */
ops = ieee80211_try_rate_control_ops_get(ieee80211_default_rc_algo);
- /* try built-in one if specific alg requested but not found */
- if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
+ /* Note: check for > 0 is intentional to avoid clang warning */
+ if (!ops && (strlen(CONFIG_MAC80211_RC_DEFAULT) > 0))
+ /* try built-in one if specific alg requested but not found */
ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
+
kernel_param_unlock(THIS_MODULE);
return ops;
@@ -208,7 +233,6 @@ static struct rate_control_ref *rate_control_alloc(const char *name,
ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL);
if (!ref)
return NULL;
- ref->local = local;
ref->ops = ieee80211_rate_control_ops_get(name);
if (!ref->ops)
goto free;
@@ -229,18 +253,45 @@ free:
return NULL;
}
-static void rate_control_free(struct rate_control_ref *ctrl_ref)
+static void rate_control_free(struct ieee80211_local *local,
+ struct rate_control_ref *ctrl_ref)
{
ctrl_ref->ops->free(ctrl_ref->priv);
#ifdef CONFIG_MAC80211_DEBUGFS
- debugfs_remove_recursive(ctrl_ref->local->debugfs.rcdir);
- ctrl_ref->local->debugfs.rcdir = NULL;
+ debugfs_remove_recursive(local->debugfs.rcdir);
+ local->debugfs.rcdir = NULL;
#endif
kfree(ctrl_ref);
}
+void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_supported_band *sband;
+ u32 user_mask, basic_rates = sdata->vif.bss_conf.basic_rates;
+ enum nl80211_band band;
+
+ if (WARN_ON(!sdata->vif.bss_conf.chandef.chan))
+ return;
+
+ if (WARN_ON_ONCE(!basic_rates))
+ return;
+
+ band = sdata->vif.bss_conf.chandef.chan->band;
+ user_mask = sdata->rc_rateidx_mask[band];
+ sband = local->hw.wiphy->bands[band];
+
+ if (user_mask & basic_rates)
+ return;
+
+ sdata_dbg(sdata,
+ "no overlap between basic rates (0x%x) and user mask (0x%x on band %d) - clearing the latter",
+ basic_rates, user_mask, band);
+ sdata->rc_rateidx_mask[band] = (1 << sband->n_bitrates) - 1;
+}
+
static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc)
{
struct sk_buff *skb = txrc->skb;
@@ -875,7 +926,9 @@ int rate_control_set_rates(struct ieee80211_hw *hw,
struct ieee80211_sta_rates *old;
struct ieee80211_supported_band *sband;
- sband = hw->wiphy->bands[ieee80211_get_sdata_band(sta->sdata)];
+ sband = ieee80211_get_sband(sta->sdata);
+ if (!sband)
+ return -EINVAL;
rate_control_apply_mask_ratetbl(sta, sband, rates);
/*
* mac80211 guarantees that this function will not be called
@@ -936,6 +989,6 @@ void rate_control_deinitialize(struct ieee80211_local *local)
return;
local->rate_ctrl = NULL;
- rate_control_free(ref);
+ rate_control_free(local, ref);
}
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 8d3260785b94..8212bfeb71d6 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -20,7 +20,6 @@
#include "driver-ops.h"
struct rate_control_ref {
- struct ieee80211_local *local;
const struct rate_control_ops *ops;
void *priv;
};
@@ -29,47 +28,9 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
struct ieee80211_tx_rate_control *txrc);
-static inline void rate_control_tx_status(struct ieee80211_local *local,
- struct ieee80211_supported_band *sband,
- struct sta_info *sta,
- struct sk_buff *skb)
-{
- struct rate_control_ref *ref = local->rate_ctrl;
- struct ieee80211_sta *ista = &sta->sta;
- void *priv_sta = sta->rate_ctrl_priv;
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-
- if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
- return;
-
- spin_lock_bh(&sta->rate_ctrl_lock);
- if (ref->ops->tx_status)
- ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb);
- else
- ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info);
- spin_unlock_bh(&sta->rate_ctrl_lock);
-}
-
-static inline void
-rate_control_tx_status_noskb(struct ieee80211_local *local,
- struct ieee80211_supported_band *sband,
- struct sta_info *sta,
- struct ieee80211_tx_info *info)
-{
- struct rate_control_ref *ref = local->rate_ctrl;
- struct ieee80211_sta *ista = &sta->sta;
- void *priv_sta = sta->rate_ctrl_priv;
-
- if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
- return;
-
- if (WARN_ON_ONCE(!ref->ops->tx_status_noskb))
- return;
-
- spin_lock_bh(&sta->rate_ctrl_lock);
- ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info);
- spin_unlock_bh(&sta->rate_ctrl_lock);
-}
+void rate_control_tx_status(struct ieee80211_local *local,
+ struct ieee80211_supported_band *sband,
+ struct ieee80211_tx_status *st);
void rate_control_rate_init(struct sta_info *sta);
void rate_control_rate_update(struct ieee80211_local *local,
@@ -111,6 +72,8 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta)
#endif
}
+void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata);
+
/* Get a reference to the rate control algorithm. If `name' is NULL, get the
* first available algorithm. */
int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 3ebe4405a2d4..9766c1cc4b0a 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -264,9 +264,9 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
static void
minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
- struct ieee80211_sta *sta, void *priv_sta,
- struct ieee80211_tx_info *info)
+ void *priv_sta, struct ieee80211_tx_status *st)
{
+ struct ieee80211_tx_info *info = st->info;
struct minstrel_priv *mp = priv;
struct minstrel_sta_info *mi = priv_sta;
struct ieee80211_tx_rate *ar = info->status.rates;
@@ -726,7 +726,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta)
const struct rate_control_ops mac80211_minstrel = {
.name = "minstrel",
- .tx_status_noskb = minstrel_tx_status,
+ .tx_status_ext = minstrel_tx_status,
.get_rate = minstrel_get_rate,
.rate_init = minstrel_rate_init,
.alloc = minstrel_alloc,
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 8e783e197e93..4a5bdad9f303 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -678,9 +678,9 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
static void
minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
- struct ieee80211_sta *sta, void *priv_sta,
- struct ieee80211_tx_info *info)
+ void *priv_sta, struct ieee80211_tx_status *st)
{
+ struct ieee80211_tx_info *info = st->info;
struct minstrel_ht_sta_priv *msp = priv_sta;
struct minstrel_ht_sta *mi = &msp->ht;
struct ieee80211_tx_rate *ar = info->status.rates;
@@ -690,8 +690,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
int i;
if (!msp->is_ht)
- return mac80211_minstrel.tx_status_noskb(priv, sband, sta,
- &msp->legacy, info);
+ return mac80211_minstrel.tx_status_ext(priv, sband,
+ &msp->legacy, st);
/* This packet was aggregated but doesn't carry status info */
if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
@@ -1374,7 +1374,7 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
static const struct rate_control_ops mac80211_minstrel_ht = {
.name = "minstrel_ht",
- .tx_status_noskb = minstrel_ht_tx_status,
+ .tx_status_ext = minstrel_ht_tx_status,
.get_rate = minstrel_ht_get_rate,
.rate_init = minstrel_ht_rate_init,
.rate_update = minstrel_ht_rate_update,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4d7543d1a62c..35f4c7d7a500 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -95,24 +95,13 @@ static u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
* This function cleans up the SKB, i.e. it removes all the stuff
* only useful for monitoring.
*/
-static struct sk_buff *remove_monitor_info(struct ieee80211_local *local,
- struct sk_buff *skb,
- unsigned int rtap_vendor_space)
+static void remove_monitor_info(struct sk_buff *skb,
+ unsigned int present_fcs_len,
+ unsigned int rtap_vendor_space)
{
- if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) {
- if (likely(skb->len > FCS_LEN))
- __pskb_trim(skb, skb->len - FCS_LEN);
- else {
- /* driver bug */
- WARN_ON(1);
- dev_kfree_skb(skb);
- return NULL;
- }
- }
-
+ if (present_fcs_len)
+ __pskb_trim(skb, skb->len - present_fcs_len);
__pskb_pull(skb, rtap_vendor_space);
-
- return skb;
}
static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len,
@@ -167,7 +156,7 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
/* padding for RX_FLAGS if necessary */
len = ALIGN(len, 2);
- if (status->flag & RX_FLAG_HT) /* HT info */
+ if (status->encoding == RX_ENC_HT) /* HT info */
len += 3;
if (status->flag & RX_FLAG_AMPDU_DETAILS) {
@@ -175,7 +164,7 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
len += 8;
}
- if (status->flag & RX_FLAG_VHT) {
+ if (status->encoding == RX_ENC_VHT) {
len = ALIGN(len, 2);
len += 12;
}
@@ -340,12 +329,12 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
*pos |= IEEE80211_RADIOTAP_F_FCS;
if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
*pos |= IEEE80211_RADIOTAP_F_BADFCS;
- if (status->flag & RX_FLAG_SHORTPRE)
+ if (status->enc_flags & RX_ENC_FLAG_SHORTPRE)
*pos |= IEEE80211_RADIOTAP_F_SHORTPRE;
pos++;
/* IEEE80211_RADIOTAP_RATE */
- if (!rate || status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) {
+ if (!rate || status->encoding != RX_ENC_LEGACY) {
/*
* Without rate information don't add it. If we have,
* MCS information is a separate field in radiotap,
@@ -356,9 +345,9 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
} else {
int shift = 0;
rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
- if (status->flag & RX_FLAG_10MHZ)
+ if (status->bw == RATE_INFO_BW_10)
shift = 1;
- else if (status->flag & RX_FLAG_5MHZ)
+ else if (status->bw == RATE_INFO_BW_5)
shift = 2;
*pos = DIV_ROUND_UP(rate->bitrate, 5 * (1 << shift));
}
@@ -367,14 +356,14 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
/* IEEE80211_RADIOTAP_CHANNEL */
put_unaligned_le16(status->freq, pos);
pos += 2;
- if (status->flag & RX_FLAG_10MHZ)
+ if (status->bw == RATE_INFO_BW_10)
channel_flags |= IEEE80211_CHAN_HALF;
- else if (status->flag & RX_FLAG_5MHZ)
+ else if (status->bw == RATE_INFO_BW_5)
channel_flags |= IEEE80211_CHAN_QUARTER;
if (status->band == NL80211_BAND_5GHZ)
channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ;
- else if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT))
+ else if (status->encoding != RX_ENC_LEGACY)
channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
else if (rate && rate->flags & IEEE80211_RATE_ERP_G)
channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ;
@@ -413,21 +402,21 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
put_unaligned_le16(rx_flags, pos);
pos += 2;
- if (status->flag & RX_FLAG_HT) {
+ if (status->encoding == RX_ENC_HT) {
unsigned int stbc;
rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
*pos++ = local->hw.radiotap_mcs_details;
*pos = 0;
- if (status->flag & RX_FLAG_SHORT_GI)
+ if (status->enc_flags & RX_ENC_FLAG_SHORT_GI)
*pos |= IEEE80211_RADIOTAP_MCS_SGI;
- if (status->flag & RX_FLAG_40MHZ)
+ if (status->bw == RATE_INFO_BW_40)
*pos |= IEEE80211_RADIOTAP_MCS_BW_40;
- if (status->flag & RX_FLAG_HT_GF)
+ if (status->enc_flags & RX_ENC_FLAG_HT_GF)
*pos |= IEEE80211_RADIOTAP_MCS_FMT_GF;
- if (status->flag & RX_FLAG_LDPC)
+ if (status->enc_flags & RX_ENC_FLAG_LDPC)
*pos |= IEEE80211_RADIOTAP_MCS_FEC_LDPC;
- stbc = (status->flag & RX_FLAG_STBC_MASK) >> RX_FLAG_STBC_SHIFT;
+ stbc = (status->enc_flags & RX_ENC_FLAG_STBC_MASK) >> RX_ENC_FLAG_STBC_SHIFT;
*pos |= stbc << IEEE80211_RADIOTAP_MCS_STBC_SHIFT;
pos++;
*pos++ = status->rate_idx;
@@ -460,35 +449,40 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
*pos++ = 0;
}
- if (status->flag & RX_FLAG_VHT) {
+ if (status->encoding == RX_ENC_VHT) {
u16 known = local->hw.radiotap_vht_details;
rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
put_unaligned_le16(known, pos);
pos += 2;
/* flags */
- if (status->flag & RX_FLAG_SHORT_GI)
+ if (status->enc_flags & RX_ENC_FLAG_SHORT_GI)
*pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI;
/* in VHT, STBC is binary */
- if (status->flag & RX_FLAG_STBC_MASK)
+ if (status->enc_flags & RX_ENC_FLAG_STBC_MASK)
*pos |= IEEE80211_RADIOTAP_VHT_FLAG_STBC;
- if (status->vht_flag & RX_VHT_FLAG_BF)
+ if (status->enc_flags & RX_ENC_FLAG_BF)
*pos |= IEEE80211_RADIOTAP_VHT_FLAG_BEAMFORMED;
pos++;
/* bandwidth */
- if (status->vht_flag & RX_VHT_FLAG_80MHZ)
+ switch (status->bw) {
+ case RATE_INFO_BW_80:
*pos++ = 4;
- else if (status->vht_flag & RX_VHT_FLAG_160MHZ)
+ break;
+ case RATE_INFO_BW_160:
*pos++ = 11;
- else if (status->flag & RX_FLAG_40MHZ)
+ break;
+ case RATE_INFO_BW_40:
*pos++ = 1;
- else /* 20 MHz */
+ break;
+ default:
*pos++ = 0;
+ }
/* MCS/NSS */
- *pos = (status->rate_idx << 4) | status->vht_nss;
+ *pos = (status->rate_idx << 4) | status->nss;
pos += 4;
/* coding field */
- if (status->flag & RX_FLAG_LDPC)
+ if (status->enc_flags & RX_ENC_FLAG_LDPC)
*pos |= IEEE80211_RADIOTAP_CODING_LDPC_USER0;
pos++;
/* group ID */
@@ -544,6 +538,59 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
}
}
+static struct sk_buff *
+ieee80211_make_monitor_skb(struct ieee80211_local *local,
+ struct sk_buff **origskb,
+ struct ieee80211_rate *rate,
+ int rtap_vendor_space, bool use_origskb)
+{
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(*origskb);
+ int rt_hdrlen, needed_headroom;
+ struct sk_buff *skb;
+
+ /* room for the radiotap header based on driver features */
+ rt_hdrlen = ieee80211_rx_radiotap_hdrlen(local, status, *origskb);
+ needed_headroom = rt_hdrlen - rtap_vendor_space;
+
+ if (use_origskb) {
+ /* only need to expand headroom if necessary */
+ skb = *origskb;
+ *origskb = NULL;
+
+ /*
+ * This shouldn't trigger often because most devices have an
+ * RX header they pull before we get here, and that should
+ * be big enough for our radiotap information. We should
+ * probably export the length to drivers so that we can have
+ * them allocate enough headroom to start with.
+ */
+ if (skb_headroom(skb) < needed_headroom &&
+ pskb_expand_head(skb, needed_headroom, 0, GFP_ATOMIC)) {
+ dev_kfree_skb(skb);
+ return NULL;
+ }
+ } else {
+ /*
+ * Need to make a copy and possibly remove radiotap header
+ * and FCS from the original.
+ */
+ skb = skb_copy_expand(*origskb, needed_headroom, 0, GFP_ATOMIC);
+
+ if (!skb)
+ return NULL;
+ }
+
+ /* prepend radiotap information */
+ ieee80211_add_rx_radiotap_header(local, skb, rate, rt_hdrlen, true);
+
+ skb_reset_mac_header(skb);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->pkt_type = PACKET_OTHERHOST;
+ skb->protocol = htons(ETH_P_802_2);
+
+ return skb;
+}
+
/*
* This function copies a received frame to all monitor interfaces and
* returns a cleaned-up SKB that no longer includes the FCS nor the
@@ -555,13 +602,12 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
{
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(origskb);
struct ieee80211_sub_if_data *sdata;
- int rt_hdrlen, needed_headroom;
- struct sk_buff *skb, *skb2;
- struct net_device *prev_dev = NULL;
+ struct sk_buff *monskb = NULL;
int present_fcs_len = 0;
unsigned int rtap_vendor_space = 0;
struct ieee80211_sub_if_data *monitor_sdata =
rcu_dereference(local->monitor_sdata);
+ bool only_monitor = false;
if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) {
struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data;
@@ -578,8 +624,15 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
* the SKB because it has a bad FCS/PLCP checksum.
*/
- if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))
+ if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) {
+ if (unlikely(origskb->len <= FCS_LEN)) {
+ /* driver bug */
+ WARN_ON(1);
+ dev_kfree_skb(origskb);
+ return NULL;
+ }
present_fcs_len = FCS_LEN;
+ }
/* ensure hdr->frame_control and vendor radiotap data are in skb head */
if (!pskb_may_pull(origskb, 2 + rtap_vendor_space)) {
@@ -587,89 +640,62 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
return NULL;
}
+ only_monitor = should_drop_frame(origskb, present_fcs_len,
+ rtap_vendor_space);
+
if (!local->monitors || (status->flag & RX_FLAG_SKIP_MONITOR)) {
- if (should_drop_frame(origskb, present_fcs_len,
- rtap_vendor_space)) {
+ if (only_monitor) {
dev_kfree_skb(origskb);
return NULL;
}
- return remove_monitor_info(local, origskb, rtap_vendor_space);
+ remove_monitor_info(origskb, present_fcs_len,
+ rtap_vendor_space);
+ return origskb;
}
ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_vendor_space);
- /* room for the radiotap header based on driver features */
- rt_hdrlen = ieee80211_rx_radiotap_hdrlen(local, status, origskb);
- needed_headroom = rt_hdrlen - rtap_vendor_space;
-
- if (should_drop_frame(origskb, present_fcs_len, rtap_vendor_space)) {
- /* only need to expand headroom if necessary */
- skb = origskb;
- origskb = NULL;
-
- /*
- * This shouldn't trigger often because most devices have an
- * RX header they pull before we get here, and that should
- * be big enough for our radiotap information. We should
- * probably export the length to drivers so that we can have
- * them allocate enough headroom to start with.
- */
- if (skb_headroom(skb) < needed_headroom &&
- pskb_expand_head(skb, needed_headroom, 0, GFP_ATOMIC)) {
- dev_kfree_skb(skb);
- return NULL;
- }
- } else {
- /*
- * Need to make a copy and possibly remove radiotap header
- * and FCS from the original.
- */
- skb = skb_copy_expand(origskb, needed_headroom, 0, GFP_ATOMIC);
-
- origskb = remove_monitor_info(local, origskb,
- rtap_vendor_space);
-
- if (!skb)
- return origskb;
- }
-
- /* prepend radiotap information */
- ieee80211_add_rx_radiotap_header(local, skb, rate, rt_hdrlen, true);
-
- skb_reset_mac_header(skb);
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->pkt_type = PACKET_OTHERHOST;
- skb->protocol = htons(ETH_P_802_2);
+ list_for_each_entry_rcu(sdata, &local->mon_list, u.mntr.list) {
+ bool last_monitor = list_is_last(&sdata->u.mntr.list,
+ &local->mon_list);
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
- continue;
+ if (!monskb)
+ monskb = ieee80211_make_monitor_skb(local, &origskb,
+ rate,
+ rtap_vendor_space,
+ only_monitor &&
+ last_monitor);
- if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)
- continue;
+ if (monskb) {
+ struct sk_buff *skb;
- if (!ieee80211_sdata_running(sdata))
- continue;
+ if (last_monitor) {
+ skb = monskb;
+ monskb = NULL;
+ } else {
+ skb = skb_clone(monskb, GFP_ATOMIC);
+ }
- if (prev_dev) {
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2) {
- skb2->dev = prev_dev;
- netif_receive_skb(skb2);
+ if (skb) {
+ skb->dev = sdata->dev;
+ ieee80211_rx_stats(skb->dev, skb->len);
+ netif_receive_skb(skb);
}
}
- prev_dev = sdata->dev;
- ieee80211_rx_stats(sdata->dev, skb->len);
+ if (last_monitor)
+ break;
}
- if (prev_dev) {
- skb->dev = prev_dev;
- netif_receive_skb(skb);
- } else
- dev_kfree_skb(skb);
+ /* this happens if last_monitor was erroneously false */
+ dev_kfree_skb(monskb);
+
+ /* ditto */
+ if (!origskb)
+ return NULL;
+ remove_monitor_info(origskb, present_fcs_len, rtap_vendor_space);
return origskb;
}
@@ -3315,8 +3341,8 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
status = IEEE80211_SKB_RXCB((rx->skb));
sband = rx->local->hw.wiphy->bands[status->band];
- if (!(status->flag & RX_FLAG_HT) &&
- !(status->flag & RX_FLAG_VHT))
+ if (!(status->encoding == RX_ENC_HT) &&
+ !(status->encoding == RX_ENC_VHT))
rate = &sband->bitrates[status->rate_idx];
ieee80211_rx_cooked_monitor(rx, rate);
@@ -3553,7 +3579,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
struct ieee80211_hdr *hdr = (void *)skb->data;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
- int multicast = is_multicast_ether_addr(hdr->addr1);
+ bool multicast = is_multicast_ether_addr(hdr->addr1);
switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
@@ -3577,7 +3603,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
return false;
if (!rx->sta) {
int rate_idx;
- if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT))
+ if (status->encoding != RX_ENC_LEGACY)
rate_idx = 0; /* TODO: HT/VHT rates */
else
rate_idx = status->rate_idx;
@@ -3597,7 +3623,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
return false;
if (!rx->sta) {
int rate_idx;
- if (status->flag & RX_FLAG_HT)
+ if (status->encoding != RX_ENC_LEGACY)
rate_idx = 0; /* TODO: HT rates */
else
rate_idx = status->rate_idx;
@@ -4260,7 +4286,8 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
* we probably can't have a valid rate here anyway.
*/
- if (status->flag & RX_FLAG_HT) {
+ switch (status->encoding) {
+ case RX_ENC_HT:
/*
* rate_idx is MCS index, which can be [0-76]
* as documented on:
@@ -4278,14 +4305,19 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
status->rate_idx,
status->rate_idx))
goto drop;
- } else if (status->flag & RX_FLAG_VHT) {
+ break;
+ case RX_ENC_VHT:
if (WARN_ONCE(status->rate_idx > 9 ||
- !status->vht_nss ||
- status->vht_nss > 8,
+ !status->nss ||
+ status->nss > 8,
"Rate marked as a VHT rate but data is invalid: MCS: %d, NSS: %d\n",
- status->rate_idx, status->vht_nss))
+ status->rate_idx, status->nss))
goto drop;
- } else {
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ /* fall through */
+ case RX_ENC_LEGACY:
if (WARN_ON(status->rate_idx >= sband->n_bitrates))
goto drop;
rate = &sband->bitrates[status->rate_idx];
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index faab3c490d2b..47d2ed570470 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -79,9 +79,9 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal;
bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_20;
- if (rx_status->flag & RX_FLAG_5MHZ)
+ if (rx_status->bw == RATE_INFO_BW_5)
bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_5;
- if (rx_status->flag & RX_FLAG_10MHZ)
+ else if (rx_status->bw == RATE_INFO_BW_10)
bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10;
bss_meta.chan = channel;
@@ -174,8 +174,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
if (beacon) {
struct ieee80211_supported_band *sband =
local->hw.wiphy->bands[rx_status->band];
- if (!(rx_status->flag & RX_FLAG_HT) &&
- !(rx_status->flag & RX_FLAG_VHT))
+ if (!(rx_status->encoding == RX_ENC_HT) &&
+ !(rx_status->encoding == RX_ENC_VHT))
bss->beacon_rate =
&sband->bitrates[rx_status->rate_idx];
}
@@ -1219,7 +1219,7 @@ void ieee80211_sched_scan_results(struct ieee80211_hw *hw)
trace_api_sched_scan_results(local);
- cfg80211_sched_scan_results(hw->wiphy);
+ cfg80211_sched_scan_results(hw->wiphy, 0);
}
EXPORT_SYMBOL(ieee80211_sched_scan_results);
@@ -1239,7 +1239,7 @@ void ieee80211_sched_scan_end(struct ieee80211_local *local)
mutex_unlock(&local->mtx);
- cfg80211_sched_scan_stopped(local->hw.wiphy);
+ cfg80211_sched_scan_stopped(local->hw.wiphy, 0);
}
void ieee80211_sched_scan_stopped_work(struct work_struct *work)
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 97f4c9d6b54c..0782e486fe89 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -132,9 +132,9 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee80211_vht_operation vht_oper = {
.chan_width =
wide_bw_chansw_ie->new_channel_width,
- .center_freq_seg1_idx =
+ .center_freq_seg0_idx =
wide_bw_chansw_ie->new_center_freq_seg0,
- .center_freq_seg2_idx =
+ .center_freq_seg1_idx =
wide_bw_chansw_ie->new_center_freq_seg1,
/* .basic_mcs_set doesn't matter */
};
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 3323a2fb289b..7cdf7a835bb0 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2,7 +2,7 @@
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2015 - 2016 Intel Deutschland GmbH
+ * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -395,10 +395,15 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->sta.smps_mode = IEEE80211_SMPS_OFF;
if (sdata->vif.type == NL80211_IFTYPE_AP ||
sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
- struct ieee80211_supported_band *sband =
- hw->wiphy->bands[ieee80211_get_sdata_band(sdata)];
- u8 smps = (sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >>
- IEEE80211_HT_CAP_SM_PS_SHIFT;
+ struct ieee80211_supported_band *sband;
+ u8 smps;
+
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ goto free_txq;
+
+ smps = (sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >>
+ IEEE80211_HT_CAP_SM_PS_SHIFT;
/*
* Assume that hostapd advertises our caps in the beacon and
* this is the known_smps_mode for a station that just assciated
@@ -1957,24 +1962,32 @@ sta_get_last_rx_stats(struct sta_info *sta)
static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
struct rate_info *rinfo)
{
- rinfo->bw = (rate & STA_STATS_RATE_BW_MASK) >>
- STA_STATS_RATE_BW_SHIFT;
+ rinfo->bw = STA_STATS_GET(BW, rate);
- if (rate & STA_STATS_RATE_VHT) {
+ switch (STA_STATS_GET(TYPE, rate)) {
+ case STA_STATS_RATE_TYPE_VHT:
rinfo->flags = RATE_INFO_FLAGS_VHT_MCS;
- rinfo->mcs = rate & 0xf;
- rinfo->nss = (rate & 0xf0) >> 4;
- } else if (rate & STA_STATS_RATE_HT) {
+ rinfo->mcs = STA_STATS_GET(VHT_MCS, rate);
+ rinfo->nss = STA_STATS_GET(VHT_NSS, rate);
+ if (STA_STATS_GET(SGI, rate))
+ rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
+ break;
+ case STA_STATS_RATE_TYPE_HT:
rinfo->flags = RATE_INFO_FLAGS_MCS;
- rinfo->mcs = rate & 0xff;
- } else if (rate & STA_STATS_RATE_LEGACY) {
+ rinfo->mcs = STA_STATS_GET(HT_MCS, rate);
+ if (STA_STATS_GET(SGI, rate))
+ rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
+ break;
+ case STA_STATS_RATE_TYPE_LEGACY: {
struct ieee80211_supported_band *sband;
u16 brate;
unsigned int shift;
+ int band = STA_STATS_GET(LEGACY_BAND, rate);
+ int rate_idx = STA_STATS_GET(LEGACY_IDX, rate);
rinfo->flags = 0;
- sband = local->hw.wiphy->bands[(rate >> 4) & 0xf];
- brate = sband->bitrates[rate & 0xf].bitrate;
+ sband = local->hw.wiphy->bands[band];
+ brate = sband->bitrates[rate_idx].bitrate;
if (rinfo->bw == RATE_INFO_BW_5)
shift = 2;
else if (rinfo->bw == RATE_INFO_BW_10)
@@ -1982,10 +1995,9 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
else
shift = 0;
rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
+ break;
+ }
}
-
- if (rate & STA_STATS_RATE_SGI)
- rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
}
static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index e65cda34d2bc..5609cacb20d5 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -1,7 +1,7 @@
/*
* Copyright 2002-2005, Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright(c) 2015-2016 Intel Deutschland GmbH
+ * Copyright(c) 2015-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -16,6 +16,7 @@
#include <linux/if_ether.h>
#include <linux/workqueue.h>
#include <linux/average.h>
+#include <linux/bitfield.h>
#include <linux/etherdevice.h>
#include <linux/rhashtable.h>
#include <linux/u64_stats_sync.h>
@@ -324,6 +325,9 @@ struct ieee80211_fast_rx {
struct rcu_head rcu_head;
};
+/* we use only values in the range 0-100, so pick a large precision */
+DECLARE_EWMA(mesh_fail_avg, 20, 8)
+
/**
* struct mesh_sta - mesh STA information
* @plink_lock: serialize access to plink fields
@@ -369,7 +373,7 @@ struct mesh_sta {
enum nl80211_mesh_power_mode nonpeer_pm;
/* moving percentage of failed MSDUs */
- unsigned int fail_avg;
+ struct ewma_mesh_fail_avg fail_avg;
};
DECLARE_EWMA(signal, 10, 8)
@@ -724,40 +728,55 @@ void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta);
unsigned long ieee80211_sta_last_active(struct sta_info *sta);
+enum sta_stats_type {
+ STA_STATS_RATE_TYPE_INVALID = 0,
+ STA_STATS_RATE_TYPE_LEGACY,
+ STA_STATS_RATE_TYPE_HT,
+ STA_STATS_RATE_TYPE_VHT,
+};
+
+#define STA_STATS_FIELD_HT_MCS GENMASK( 7, 0)
+#define STA_STATS_FIELD_LEGACY_IDX GENMASK( 3, 0)
+#define STA_STATS_FIELD_LEGACY_BAND GENMASK( 7, 4)
+#define STA_STATS_FIELD_VHT_MCS GENMASK( 3, 0)
+#define STA_STATS_FIELD_VHT_NSS GENMASK( 7, 4)
+#define STA_STATS_FIELD_BW GENMASK(11, 8)
+#define STA_STATS_FIELD_SGI GENMASK(12, 12)
+#define STA_STATS_FIELD_TYPE GENMASK(15, 13)
+
+#define STA_STATS_FIELD(_n, _v) FIELD_PREP(STA_STATS_FIELD_ ## _n, _v)
+#define STA_STATS_GET(_n, _v) FIELD_GET(STA_STATS_FIELD_ ## _n, _v)
+
#define STA_STATS_RATE_INVALID 0
-#define STA_STATS_RATE_VHT 0x8000
-#define STA_STATS_RATE_HT 0x4000
-#define STA_STATS_RATE_LEGACY 0x2000
-#define STA_STATS_RATE_SGI 0x1000
-#define STA_STATS_RATE_BW_SHIFT 9
-#define STA_STATS_RATE_BW_MASK (0x7 << STA_STATS_RATE_BW_SHIFT)
-
-static inline u16 sta_stats_encode_rate(struct ieee80211_rx_status *s)
+
+static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s)
{
- u16 r = s->rate_idx;
-
- if (s->vht_flag & RX_VHT_FLAG_80MHZ)
- r |= RATE_INFO_BW_80 << STA_STATS_RATE_BW_SHIFT;
- else if (s->vht_flag & RX_VHT_FLAG_160MHZ)
- r |= RATE_INFO_BW_160 << STA_STATS_RATE_BW_SHIFT;
- else if (s->flag & RX_FLAG_40MHZ)
- r |= RATE_INFO_BW_40 << STA_STATS_RATE_BW_SHIFT;
- else if (s->flag & RX_FLAG_10MHZ)
- r |= RATE_INFO_BW_10 << STA_STATS_RATE_BW_SHIFT;
- else if (s->flag & RX_FLAG_5MHZ)
- r |= RATE_INFO_BW_5 << STA_STATS_RATE_BW_SHIFT;
- else
- r |= RATE_INFO_BW_20 << STA_STATS_RATE_BW_SHIFT;
-
- if (s->flag & RX_FLAG_SHORT_GI)
- r |= STA_STATS_RATE_SGI;
-
- if (s->flag & RX_FLAG_VHT)
- r |= STA_STATS_RATE_VHT | (s->vht_nss << 4);
- else if (s->flag & RX_FLAG_HT)
- r |= STA_STATS_RATE_HT;
- else
- r |= STA_STATS_RATE_LEGACY | (s->band << 4);
+ u16 r;
+
+ r = STA_STATS_FIELD(BW, s->bw);
+
+ if (s->enc_flags & RX_ENC_FLAG_SHORT_GI)
+ r |= STA_STATS_FIELD(SGI, 1);
+
+ switch (s->encoding) {
+ case RX_ENC_VHT:
+ r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_VHT);
+ r |= STA_STATS_FIELD(VHT_NSS, s->nss);
+ r |= STA_STATS_FIELD(VHT_MCS, s->rate_idx);
+ break;
+ case RX_ENC_HT:
+ r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_HT);
+ r |= STA_STATS_FIELD(HT_MCS, s->rate_idx);
+ break;
+ case RX_ENC_LEGACY:
+ r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_LEGACY);
+ r |= STA_STATS_FIELD(LEGACY_BAND, s->band);
+ r |= STA_STATS_FIELD(LEGACY_IDX, s->rate_idx);
+ break;
+ default:
+ WARN_ON(1);
+ return STA_STATS_RATE_INVALID;
+ }
return r;
}
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 83b8b11f24ea..be47ac5cd8c8 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -200,6 +200,7 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
}
if (ieee80211_is_action(mgmt->frame_control) &&
+ !ieee80211_has_protected(mgmt->frame_control) &&
mgmt->u.action.category == WLAN_CATEGORY_HT &&
mgmt->u.action.u.ht_smps.action == WLAN_HT_ACTION_SMPS &&
ieee80211_sdata_running(sdata)) {
@@ -630,61 +631,6 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw,
return rates_idx;
}
-void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
- struct ieee80211_sta *pubsta,
- struct ieee80211_tx_info *info)
-{
- struct ieee80211_local *local = hw_to_local(hw);
- struct ieee80211_supported_band *sband;
- int retry_count;
- bool acked, noack_success;
-
- ieee80211_tx_get_rates(hw, info, &retry_count);
-
- sband = hw->wiphy->bands[info->band];
-
- acked = !!(info->flags & IEEE80211_TX_STAT_ACK);
- noack_success = !!(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED);
-
- if (pubsta) {
- struct sta_info *sta;
-
- sta = container_of(pubsta, struct sta_info, sta);
-
- if (!acked)
- sta->status_stats.retry_failed++;
- sta->status_stats.retry_count += retry_count;
-
- if (acked) {
- sta->status_stats.last_ack = jiffies;
-
- if (sta->status_stats.lost_packets)
- sta->status_stats.lost_packets = 0;
-
- /* Track when last TDLS packet was ACKed */
- if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
- sta->status_stats.last_tdls_pkt_time = jiffies;
- } else {
- ieee80211_lost_packet(sta, info);
- }
-
- rate_control_tx_status_noskb(local, sband, sta, info);
- }
-
- if (acked || noack_success) {
- I802_DEBUG_INC(local->dot11TransmittedFrameCount);
- if (!pubsta)
- I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount);
- if (retry_count > 0)
- I802_DEBUG_INC(local->dot11RetryCount);
- if (retry_count > 1)
- I802_DEBUG_INC(local->dot11MultipleRetryCount);
- } else {
- I802_DEBUG_INC(local->dot11FailedCount);
- }
-}
-EXPORT_SYMBOL(ieee80211_tx_status_noskb);
-
void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
struct ieee80211_supported_band *sband,
int retry_count, int shift, bool send_to_cooked)
@@ -742,15 +688,16 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
dev_kfree_skb(skb);
}
-void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void __ieee80211_tx_status(struct ieee80211_hw *hw,
+ struct ieee80211_tx_status *status)
{
+ struct sk_buff *skb = status->skb;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_local *local = hw_to_local(hw);
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_tx_info *info = status->info;
+ struct sta_info *sta;
__le16 fc;
struct ieee80211_supported_band *sband;
- struct rhlist_head *tmp;
- struct sta_info *sta;
int retry_count;
int rates_idx;
bool send_to_cooked;
@@ -761,16 +708,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count);
- rcu_read_lock();
-
sband = local->hw.wiphy->bands[info->band];
fc = hdr->frame_control;
- for_each_sta_info(local, hdr->addr1, sta, tmp) {
- /* skip wrong virtual interface */
- if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr))
- continue;
-
+ if (status->sta) {
+ sta = container_of(status->sta, struct sta_info, sta);
shift = ieee80211_vif_get_shift(&sta->sdata->vif);
if (info->flags & IEEE80211_TX_STATUS_EOSP)
@@ -790,7 +732,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
* that this TX packet failed because of that.
*/
ieee80211_handle_filtered_frame(local, sta, skb);
- rcu_read_unlock();
return;
}
@@ -840,7 +781,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) {
ieee80211_handle_filtered_frame(local, sta, skb);
- rcu_read_unlock();
return;
} else {
if (!acked)
@@ -856,7 +796,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
}
}
- rate_control_tx_status(local, sband, sta, skb);
+ rate_control_tx_status(local, sband, status);
if (ieee80211_vif_is_mesh(&sta->sdata->vif))
ieee80211s_update_metric(local, sta, skb);
@@ -883,8 +823,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
}
}
- rcu_read_unlock();
-
ieee80211_led_tx(local);
/* SNMP counters
@@ -949,8 +887,96 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
/* send to monitor interfaces */
ieee80211_tx_monitor(local, skb, sband, retry_count, shift, send_to_cooked);
}
+
+void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_tx_status status = {
+ .skb = skb,
+ .info = IEEE80211_SKB_CB(skb),
+ };
+ struct rhlist_head *tmp;
+ struct sta_info *sta;
+
+ rcu_read_lock();
+
+ for_each_sta_info(local, hdr->addr1, sta, tmp) {
+ /* skip wrong virtual interface */
+ if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr))
+ continue;
+
+ status.sta = &sta->sta;
+ break;
+ }
+
+ __ieee80211_tx_status(hw, &status);
+ rcu_read_unlock();
+}
EXPORT_SYMBOL(ieee80211_tx_status);
+void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
+ struct ieee80211_tx_status *status)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_tx_info *info = status->info;
+ struct ieee80211_sta *pubsta = status->sta;
+ struct ieee80211_supported_band *sband;
+ int retry_count;
+ bool acked, noack_success;
+
+ if (status->skb)
+ return __ieee80211_tx_status(hw, status);
+
+ if (!status->sta)
+ return;
+
+ ieee80211_tx_get_rates(hw, info, &retry_count);
+
+ sband = hw->wiphy->bands[info->band];
+
+ acked = !!(info->flags & IEEE80211_TX_STAT_ACK);
+ noack_success = !!(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED);
+
+ if (pubsta) {
+ struct sta_info *sta;
+
+ sta = container_of(pubsta, struct sta_info, sta);
+
+ if (!acked)
+ sta->status_stats.retry_failed++;
+ sta->status_stats.retry_count += retry_count;
+
+ if (acked) {
+ sta->status_stats.last_ack = jiffies;
+
+ if (sta->status_stats.lost_packets)
+ sta->status_stats.lost_packets = 0;
+
+ /* Track when last TDLS packet was ACKed */
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
+ sta->status_stats.last_tdls_pkt_time = jiffies;
+ } else {
+ ieee80211_lost_packet(sta, info);
+ }
+
+ rate_control_tx_status(local, sband, status);
+ }
+
+ if (acked || noack_success) {
+ I802_DEBUG_INC(local->dot11TransmittedFrameCount);
+ if (!pubsta)
+ I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount);
+ if (retry_count > 0)
+ I802_DEBUG_INC(local->dot11RetryCount);
+ if (retry_count > 1)
+ I802_DEBUG_INC(local->dot11MultipleRetryCount);
+ } else {
+ I802_DEBUG_INC(local->dot11FailedCount);
+ }
+}
+EXPORT_SYMBOL(ieee80211_tx_status_ext);
+
void ieee80211_report_low_ack(struct ieee80211_sta *pubsta, u32 num_packets)
{
struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index afca7d103684..f20dcf1b1830 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -47,8 +47,7 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata,
NL80211_FEATURE_TDLS_CHANNEL_SWITCH;
bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) &&
!ifmgd->tdls_wider_bw_prohibited;
- enum nl80211_band band = ieee80211_get_sdata_band(sdata);
- struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
+ struct ieee80211_supported_band *sband = ieee80211_get_sband(sdata);
bool vht = sband && sband->vht_cap.vht_supported;
u8 *pos = (void *)skb_put(skb, 10);
@@ -180,11 +179,14 @@ static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb)
static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata,
u16 status_code)
{
+ struct ieee80211_supported_band *sband;
+
/* The capability will be 0 when sending a failure code */
if (status_code != 0)
return 0;
- if (ieee80211_get_sdata_band(sdata) == NL80211_BAND_2GHZ) {
+ sband = ieee80211_get_sband(sdata);
+ if (sband && sband->band == NL80211_BAND_2GHZ) {
return WLAN_CAPABILITY_SHORT_SLOT_TIME |
WLAN_CAPABILITY_SHORT_PREAMBLE;
}
@@ -358,17 +360,20 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
u8 action_code, bool initiator,
const u8 *extra_ies, size_t extra_ies_len)
{
- enum nl80211_band band = ieee80211_get_sdata_band(sdata);
- struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
+ struct ieee80211_local *local = sdata->local;
struct ieee80211_sta_ht_cap ht_cap;
struct ieee80211_sta_vht_cap vht_cap;
struct sta_info *sta = NULL;
size_t offset = 0, noffset;
u8 *pos;
- ieee80211_add_srates_ie(sdata, skb, false, band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, band);
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return;
+
+ ieee80211_add_srates_ie(sdata, skb, false, sband->band);
+ ieee80211_add_ext_srates_ie(sdata, skb, false, sband->band);
ieee80211_tdls_add_supp_channels(sdata, skb);
/* add any custom IEs that go before Extended Capabilities */
@@ -439,7 +444,6 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
* the same on all bands. The specification limits the setup to a
* single HT-cap, so use the current band for now.
*/
- sband = local->hw.wiphy->bands[band];
memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap));
if ((action_code == WLAN_TDLS_SETUP_REQUEST ||
@@ -545,9 +549,13 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
size_t offset = 0, noffset;
struct sta_info *sta, *ap_sta;
- enum nl80211_band band = ieee80211_get_sdata_band(sdata);
+ struct ieee80211_supported_band *sband;
u8 *pos;
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return;
+
mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, peer);
@@ -612,7 +620,8 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
/* only include VHT-operation if not on the 2.4GHz band */
- if (band != NL80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) {
+ if (sband->band != NL80211_BAND_2GHZ &&
+ sta->sta.vht_cap.vht_supported) {
/*
* if both peers support WIDER_BW, we can expand the chandef to
* a wider compatible one, up to 80MHz
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ba8d7db0a071..04b22f8982fe 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -682,10 +682,6 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
txrc.skb = tx->skb;
txrc.reported_rate.idx = -1;
txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
- if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1)
- txrc.max_rate_idx = -1;
- else
- txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
if (tx->sdata->rc_has_mcs_mask[info->band])
txrc.rate_idx_mcs_mask =
@@ -4249,10 +4245,6 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
txrc.skb = skb;
txrc.reported_rate.idx = -1;
txrc.rate_idx_mask = sdata->rc_rateidx_mask[band];
- if (txrc.rate_idx_mask == (1 << txrc.sband->n_bitrates) - 1)
- txrc.max_rate_idx = -1;
- else
- txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
txrc.bss = true;
rate_control_get_rate(sdata, NULL, &txrc);
@@ -4305,7 +4297,10 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
return bcn;
shift = ieee80211_vif_get_shift(vif);
- sband = hw->wiphy->bands[ieee80211_get_sdata_band(vif_to_sdata(vif))];
+ sband = ieee80211_get_sband(vif_to_sdata(vif));
+ if (!sband)
+ return bcn;
+
ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false);
return bcn;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ac59fbd280df..ac9ac6c35594 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2015-2016 Intel Deutschland GmbH
+ * Copyright (C) 2015-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -828,6 +828,7 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
case WLAN_EID_EXT_CAPABILITY:
case WLAN_EID_CHAN_SWITCH_TIMING:
case WLAN_EID_LINK_ID:
+ case WLAN_EID_BSS_MAX_IDLE_PERIOD:
/*
* not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible
* that if the content gets bigger it might be needed more than once
@@ -1089,6 +1090,10 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
else
elem_parse_failed = true;
break;
+ case WLAN_EID_BSS_MAX_IDLE_PERIOD:
+ if (elen >= sizeof(*elems->max_idle_period_ie))
+ elems->max_idle_period_ie = (void *)pos;
+ break;
default:
break;
}
@@ -1590,14 +1595,14 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
size_t num_rates;
u32 supp_rates, rate_flags;
int i, j, shift;
+
sband = sdata->local->hw.wiphy->bands[band];
+ if (WARN_ON(!sband))
+ return 1;
rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
shift = ieee80211_vif_get_shift(&sdata->vif);
- if (WARN_ON(!sband))
- return 1;
-
num_rates = sband->n_bitrates;
supp_rates = 0;
for (i = 0; i < elems->supp_rates_len +
@@ -1983,6 +1988,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (sdata->u.mgd.have_beacon)
changed |= BSS_CHANGED_BEACON_INFO;
+ if (sdata->vif.bss_conf.max_idle_period ||
+ sdata->vif.bss_conf.protected_keep_alive)
+ changed |= BSS_CHANGED_KEEP_ALIVE;
+
sdata_lock(sdata);
ieee80211_bss_info_change_notify(sdata, changed);
sdata_unlock(sdata);
@@ -2103,7 +2112,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
mutex_unlock(&local->mtx);
if (sched_scan_stopped)
- cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
+ cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0);
wake_up:
if (local->in_reconfig) {
@@ -2413,13 +2422,13 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
*pos++ = WLAN_EID_VHT_OPERATION;
*pos++ = sizeof(struct ieee80211_vht_operation);
vht_oper = (struct ieee80211_vht_operation *)pos;
- vht_oper->center_freq_seg1_idx = ieee80211_frequency_to_channel(
+ vht_oper->center_freq_seg0_idx = ieee80211_frequency_to_channel(
chandef->center_freq1);
if (chandef->center_freq2)
- vht_oper->center_freq_seg2_idx =
+ vht_oper->center_freq_seg1_idx =
ieee80211_frequency_to_channel(chandef->center_freq2);
else
- vht_oper->center_freq_seg2_idx = 0x00;
+ vht_oper->center_freq_seg1_idx = 0x00;
switch (chandef->width) {
case NL80211_CHAN_WIDTH_160:
@@ -2428,11 +2437,11 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
* workaround.
*/
vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
- vht_oper->center_freq_seg2_idx = vht_oper->center_freq_seg1_idx;
+ vht_oper->center_freq_seg1_idx = vht_oper->center_freq_seg0_idx;
if (chandef->chan->center_freq < chandef->center_freq1)
- vht_oper->center_freq_seg1_idx -= 8;
+ vht_oper->center_freq_seg0_idx -= 8;
else
- vht_oper->center_freq_seg1_idx += 8;
+ vht_oper->center_freq_seg0_idx += 8;
break;
case NL80211_CHAN_WIDTH_80P80:
/*
@@ -2491,9 +2500,9 @@ bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
if (!oper)
return false;
- cf1 = ieee80211_channel_to_frequency(oper->center_freq_seg1_idx,
+ cf1 = ieee80211_channel_to_frequency(oper->center_freq_seg0_idx,
chandef->chan->band);
- cf2 = ieee80211_channel_to_frequency(oper->center_freq_seg2_idx,
+ cf2 = ieee80211_channel_to_frequency(oper->center_freq_seg1_idx,
chandef->chan->band);
switch (oper->chan_width) {
@@ -2503,11 +2512,11 @@ bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
new.width = NL80211_CHAN_WIDTH_80;
new.center_freq1 = cf1;
/* If needed, adjust based on the newer interop workaround. */
- if (oper->center_freq_seg2_idx) {
+ if (oper->center_freq_seg1_idx) {
unsigned int diff;
- diff = abs(oper->center_freq_seg2_idx -
- oper->center_freq_seg1_idx);
+ diff = abs(oper->center_freq_seg1_idx -
+ oper->center_freq_seg0_idx);
if (diff == 8) {
new.width = NL80211_CHAN_WIDTH_160;
new.center_freq1 = cf2;
@@ -2715,42 +2724,39 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
memset(&ri, 0, sizeof(ri));
/* Fill cfg80211 rate info */
- if (status->flag & RX_FLAG_HT) {
+ switch (status->encoding) {
+ case RX_ENC_HT:
ri.mcs = status->rate_idx;
ri.flags |= RATE_INFO_FLAGS_MCS;
- if (status->flag & RX_FLAG_40MHZ)
- ri.bw = RATE_INFO_BW_40;
- else
- ri.bw = RATE_INFO_BW_20;
- if (status->flag & RX_FLAG_SHORT_GI)
+ ri.bw = status->bw;
+ if (status->enc_flags & RX_ENC_FLAG_SHORT_GI)
ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
- } else if (status->flag & RX_FLAG_VHT) {
+ break;
+ case RX_ENC_VHT:
ri.flags |= RATE_INFO_FLAGS_VHT_MCS;
ri.mcs = status->rate_idx;
- ri.nss = status->vht_nss;
- if (status->flag & RX_FLAG_40MHZ)
- ri.bw = RATE_INFO_BW_40;
- else if (status->vht_flag & RX_VHT_FLAG_80MHZ)
- ri.bw = RATE_INFO_BW_80;
- else if (status->vht_flag & RX_VHT_FLAG_160MHZ)
- ri.bw = RATE_INFO_BW_160;
- else
- ri.bw = RATE_INFO_BW_20;
- if (status->flag & RX_FLAG_SHORT_GI)
+ ri.nss = status->nss;
+ ri.bw = status->bw;
+ if (status->enc_flags & RX_ENC_FLAG_SHORT_GI)
ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
- } else {
+ break;
+ default:
+ WARN_ON(1);
+ /* fall through */
+ case RX_ENC_LEGACY: {
struct ieee80211_supported_band *sband;
int shift = 0;
int bitrate;
- if (status->flag & RX_FLAG_10MHZ) {
+ ri.bw = status->bw;
+
+ switch (status->bw) {
+ case RATE_INFO_BW_10:
shift = 1;
- ri.bw = RATE_INFO_BW_10;
- } else if (status->flag & RX_FLAG_5MHZ) {
+ break;
+ case RATE_INFO_BW_5:
shift = 2;
- ri.bw = RATE_INFO_BW_5;
- } else {
- ri.bw = RATE_INFO_BW_20;
+ break;
}
sband = local->hw.wiphy->bands[status->band];
@@ -2762,19 +2768,21 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
if (status->band == NL80211_BAND_5GHZ) {
ts += 20 << shift;
mpdu_offset += 2;
- } else if (status->flag & RX_FLAG_SHORTPRE) {
+ } else if (status->enc_flags & RX_ENC_FLAG_SHORTPRE) {
ts += 96;
} else {
ts += 192;
}
}
+ break;
+ }
}
rate = cfg80211_calculate_bitrate(&ri);
if (WARN_ONCE(!rate,
"Invalid bitrate: flags=0x%llx, idx=%d, vht_nss=%d\n",
(unsigned long long)status->flag, status->rate_idx,
- status->vht_nss))
+ status->nss))
return 0;
/* rewind from end of MPDU */
@@ -2791,8 +2799,10 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
struct ieee80211_sub_if_data *sdata;
struct cfg80211_chan_def chandef;
+ /* for interface list, to avoid linking iflist_mtx and chanctx_mtx */
+ ASSERT_RTNL();
+
mutex_lock(&local->mtx);
- mutex_lock(&local->iflist_mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
/* it might be waiting for the local->mtx, but then
* by the time it gets it, sdata->wdev.cac_started
@@ -2809,7 +2819,6 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
GFP_KERNEL);
}
}
- mutex_unlock(&local->iflist_mtx);
mutex_unlock(&local->mtx);
}
@@ -2831,7 +2840,9 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work)
}
mutex_unlock(&local->chanctx_mtx);
+ rtnl_lock();
ieee80211_dfs_cac_cancel(local);
+ rtnl_unlock();
if (num_chanctx > 1)
/* XXX: multi-channel is not supported yet */
@@ -2846,7 +2857,7 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw)
trace_api_radar_detected(local);
- ieee80211_queue_work(hw, &local->radar_detected_work);
+ schedule_work(&local->radar_detected_work);
}
EXPORT_SYMBOL(ieee80211_radar_detected);
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index 56ccffa3f2bf..62141dcec2d6 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -19,6 +19,7 @@
#ifndef __IEEE802154_I_H
#define __IEEE802154_I_H
+#include <linux/interrupt.h>
#include <linux/mutex.h>
#include <linux/hrtimer.h>
#include <net/cfg802154.h>
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 6414079aa729..257ec66009da 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -24,6 +24,9 @@
#include <net/nexthop.h>
#include "internal.h"
+/* max memory we will use for mpls_route */
+#define MAX_MPLS_ROUTE_MEM 4096
+
/* Maximum number of labels to look ahead at when selecting a path of
* a multipath route
*/
@@ -32,7 +35,9 @@
#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
static int zero = 0;
+static int one = 1;
static int label_limit = (1 << 20) - 1;
+static int ttl_max = 255;
static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
struct nlmsghdr *nlh, struct net *net, u32 portid,
@@ -58,10 +63,7 @@ EXPORT_SYMBOL_GPL(mpls_output_possible);
static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh)
{
- u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN);
- int nh_index = nh - rt->rt_nh;
-
- return nh0_via + rt->rt_max_alen * nh_index;
+ return (u8 *)nh + rt->rt_via_offset;
}
static const u8 *mpls_nh_via(const struct mpls_route *rt,
@@ -187,21 +189,32 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb)
return hash;
}
+static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index)
+{
+ return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size);
+}
+
+/* number of alive nexthops (rt->rt_nhn_alive) and the flags for
+ * a next hop (nh->nh_flags) are modified by netdev event handlers.
+ * Since those fields can change at any moment, use READ_ONCE to
+ * access both.
+ */
static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
struct sk_buff *skb)
{
- int alive = ACCESS_ONCE(rt->rt_nhn_alive);
u32 hash = 0;
int nh_index = 0;
int n = 0;
+ u8 alive;
/* No need to look further into packet if there's only
* one path
*/
if (rt->rt_nhn == 1)
- goto out;
+ return rt->rt_nh;
- if (alive <= 0)
+ alive = READ_ONCE(rt->rt_nhn_alive);
+ if (alive == 0)
return NULL;
hash = mpls_multipath_hash(rt, skb);
@@ -209,7 +222,9 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
if (alive == rt->rt_nhn)
goto out;
for_nexthops(rt) {
- if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ unsigned int nh_flags = READ_ONCE(nh->nh_flags);
+
+ if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
continue;
if (n == nh_index)
return nh;
@@ -217,11 +232,11 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
} endfor_nexthops(rt);
out:
- return &rt->rt_nh[nh_index];
+ return mpls_get_nexthop(rt, nh_index);
}
-static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
- struct mpls_entry_decoded dec)
+static bool mpls_egress(struct net *net, struct mpls_route *rt,
+ struct sk_buff *skb, struct mpls_entry_decoded dec)
{
enum mpls_payload_type payload_type;
bool success = false;
@@ -246,22 +261,46 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
switch (payload_type) {
case MPT_IPV4: {
struct iphdr *hdr4 = ip_hdr(skb);
+ u8 new_ttl;
skb->protocol = htons(ETH_P_IP);
+
+ /* If propagating TTL, take the decremented TTL from
+ * the incoming MPLS header, otherwise decrement the
+ * TTL, but only if not 0 to avoid underflow.
+ */
+ if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+ (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ net->mpls.ip_ttl_propagate))
+ new_ttl = dec.ttl;
+ else
+ new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0;
+
csum_replace2(&hdr4->check,
htons(hdr4->ttl << 8),
- htons(dec.ttl << 8));
- hdr4->ttl = dec.ttl;
+ htons(new_ttl << 8));
+ hdr4->ttl = new_ttl;
success = true;
break;
}
case MPT_IPV6: {
struct ipv6hdr *hdr6 = ipv6_hdr(skb);
skb->protocol = htons(ETH_P_IPV6);
- hdr6->hop_limit = dec.ttl;
+
+ /* If propagating TTL, take the decremented TTL from
+ * the incoming MPLS header, otherwise decrement the
+ * hop limit, but only if not 0 to avoid underflow.
+ */
+ if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+ (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ net->mpls.ip_ttl_propagate))
+ hdr6->hop_limit = dec.ttl;
+ else if (hdr6->hop_limit)
+ hdr6->hop_limit = hdr6->hop_limit - 1;
success = true;
break;
}
case MPT_UNSPEC:
+ /* Should have decided which protocol it is by now */
break;
}
@@ -361,7 +400,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!new_header_size && dec.bos)) {
/* Penultimate hop popping */
- if (!mpls_egress(rt, skb, dec))
+ if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
goto err;
} else {
bool bos;
@@ -412,6 +451,7 @@ static struct packet_type mpls_packet_type __read_mostly = {
static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
[RTA_DST] = { .type = NLA_U32 },
[RTA_OIF] = { .type = NLA_U32 },
+ [RTA_TTL_PROPAGATE] = { .type = NLA_U8 },
};
struct mpls_route_config {
@@ -421,6 +461,7 @@ struct mpls_route_config {
u8 rc_via_alen;
u8 rc_via[MAX_VIA_ALEN];
u32 rc_label;
+ u8 rc_ttl_propagate;
u8 rc_output_labels;
u32 rc_output_label[MAX_NEW_LABELS];
u32 rc_nlflags;
@@ -430,20 +471,27 @@ struct mpls_route_config {
int rc_mp_len;
};
-static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen)
+/* all nexthops within a route have the same size based on max
+ * number of labels and max via length for a hop
+ */
+static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels)
{
- u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN);
+ u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen);
struct mpls_route *rt;
+ size_t size;
- rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh),
- VIA_ALEN_ALIGN) +
- num_nh * max_alen_aligned,
- GFP_KERNEL);
- if (rt) {
- rt->rt_nhn = num_nh;
- rt->rt_nhn_alive = num_nh;
- rt->rt_max_alen = max_alen_aligned;
- }
+ size = sizeof(*rt) + num_nh * nh_size;
+ if (size > MAX_MPLS_ROUTE_MEM)
+ return ERR_PTR(-EINVAL);
+
+ rt = kzalloc(size, GFP_KERNEL);
+ if (!rt)
+ return ERR_PTR(-ENOMEM);
+
+ rt->rt_nhn = num_nh;
+ rt->rt_nhn_alive = num_nh;
+ rt->rt_nh_size = nh_size;
+ rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels);
return rt;
}
@@ -648,9 +696,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
return -ENOMEM;
err = -EINVAL;
- /* Ensure only a supported number of labels are present */
- if (cfg->rc_output_labels > MAX_NEW_LABELS)
- goto errout;
nh->nh_labels = cfg->rc_output_labels;
for (i = 0; i < nh->nh_labels; i++)
@@ -675,7 +720,7 @@ errout:
static int mpls_nh_build(struct net *net, struct mpls_route *rt,
struct mpls_nh *nh, int oif, struct nlattr *via,
- struct nlattr *newdst)
+ struct nlattr *newdst, u8 max_labels)
{
int err = -ENOMEM;
@@ -683,7 +728,7 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt,
goto errout;
if (newdst) {
- err = nla_get_labels(newdst, MAX_NEW_LABELS,
+ err = nla_get_labels(newdst, max_labels,
&nh->nh_labels, nh->nh_label);
if (err)
goto errout;
@@ -708,22 +753,20 @@ errout:
return err;
}
-static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
- u8 cfg_via_alen, u8 *max_via_alen)
+static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len,
+ u8 cfg_via_alen, u8 *max_via_alen,
+ u8 *max_labels)
{
- int nhs = 0;
int remaining = len;
-
- if (!rtnh) {
- *max_via_alen = cfg_via_alen;
- return 1;
- }
+ u8 nhs = 0;
*max_via_alen = 0;
+ *max_labels = 0;
while (rtnh_ok(rtnh, remaining)) {
struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
int attrlen;
+ u8 n_labels = 0;
attrlen = rtnh_attrlen(rtnh);
nla = nla_find(attrs, attrlen, RTA_VIA);
@@ -737,7 +780,20 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
via_alen);
}
+ nla = nla_find(attrs, attrlen, RTA_NEWDST);
+ if (nla &&
+ nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0)
+ return 0;
+
+ *max_labels = max_t(u8, *max_labels, n_labels);
+
+ /* number of nexthops is tracked by a u8.
+ * Check for overflow.
+ */
+ if (nhs == 255)
+ return 0;
nhs++;
+
rtnh = rtnh_next(rtnh, &remaining);
}
@@ -746,13 +802,13 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
}
static int mpls_nh_build_multi(struct mpls_route_config *cfg,
- struct mpls_route *rt)
+ struct mpls_route *rt, u8 max_labels)
{
struct rtnexthop *rtnh = cfg->rc_mp;
struct nlattr *nla_via, *nla_newdst;
int remaining = cfg->rc_mp_len;
- int nhs = 0;
int err = 0;
+ u8 nhs = 0;
change_nexthops(rt) {
int attrlen;
@@ -779,7 +835,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg,
}
err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
- rtnh->rtnh_ifindex, nla_via, nla_newdst);
+ rtnh->rtnh_ifindex, nla_via, nla_newdst,
+ max_labels);
if (err)
goto errout;
@@ -806,7 +863,8 @@ static int mpls_route_add(struct mpls_route_config *cfg)
int err = -EINVAL;
u8 max_via_alen;
unsigned index;
- int nhs;
+ u8 max_labels;
+ u8 nhs;
index = cfg->rc_label;
@@ -844,21 +902,32 @@ static int mpls_route_add(struct mpls_route_config *cfg)
goto errout;
err = -EINVAL;
- nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
- cfg->rc_via_alen, &max_via_alen);
+ if (cfg->rc_mp) {
+ nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
+ cfg->rc_via_alen, &max_via_alen,
+ &max_labels);
+ } else {
+ max_via_alen = cfg->rc_via_alen;
+ max_labels = cfg->rc_output_labels;
+ nhs = 1;
+ }
+
if (nhs == 0)
goto errout;
err = -ENOMEM;
- rt = mpls_rt_alloc(nhs, max_via_alen);
- if (!rt)
+ rt = mpls_rt_alloc(nhs, max_via_alen, max_labels);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
goto errout;
+ }
rt->rt_protocol = cfg->rc_protocol;
rt->rt_payload_type = cfg->rc_payload_type;
+ rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
if (cfg->rc_mp)
- err = mpls_nh_build_multi(cfg, rt);
+ err = mpls_nh_build_multi(cfg, rt, max_labels);
else
err = mpls_nh_build_from_cfg(cfg, rt);
if (err)
@@ -1011,8 +1080,8 @@ static int mpls_netconf_msgsize_devconf(int type)
return size;
}
-static void mpls_netconf_notify_devconf(struct net *net, int type,
- struct mpls_dev *mdev)
+static void mpls_netconf_notify_devconf(struct net *net, int event,
+ int type, struct mpls_dev *mdev)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -1021,8 +1090,7 @@ static void mpls_netconf_notify_devconf(struct net *net, int type,
if (!skb)
goto errout;
- err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, RTM_NEWNETCONF,
- 0, type);
+ err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, event, 0, type);
if (err < 0) {
/* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */
WARN_ON(err == -EMSGSIZE);
@@ -1042,7 +1110,8 @@ static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = {
};
static int mpls_netconf_get_devconf(struct sk_buff *in_skb,
- struct nlmsghdr *nlh)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NETCONFA_MAX + 1];
@@ -1054,7 +1123,7 @@ static int mpls_netconf_get_devconf(struct sk_buff *in_skb,
int err;
err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
- devconf_mpls_policy);
+ devconf_mpls_policy, NULL);
if (err < 0)
goto errout;
@@ -1155,9 +1224,8 @@ static int mpls_conf_proc(struct ctl_table *ctl, int write,
if (i == offsetof(struct mpls_dev, input_enabled) &&
val != oval) {
- mpls_netconf_notify_devconf(net,
- NETCONFA_INPUT,
- mdev);
+ mpls_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_INPUT, mdev);
}
}
@@ -1198,10 +1266,11 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
- mdev->sysctl = register_net_sysctl(dev_net(dev), path, table);
+ mdev->sysctl = register_net_sysctl(net, path, table);
if (!mdev->sysctl)
goto free;
+ mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, mdev);
return 0;
free:
@@ -1210,13 +1279,17 @@ out:
return -ENOBUFS;
}
-static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev)
+static void mpls_dev_sysctl_unregister(struct net_device *dev,
+ struct mpls_dev *mdev)
{
+ struct net *net = dev_net(dev);
struct ctl_table *table;
table = mdev->sysctl->ctl_table_arg;
unregister_net_sysctl_table(mdev->sysctl);
kfree(table);
+
+ mpls_netconf_notify_devconf(net, RTM_DELNETCONF, 0, mdev);
}
static struct mpls_dev *mpls_add_dev(struct net_device *dev)
@@ -1242,11 +1315,12 @@ static struct mpls_dev *mpls_add_dev(struct net_device *dev)
u64_stats_init(&mpls_stats->syncp);
}
+ mdev->dev = dev;
+
err = mpls_dev_sysctl_register(dev, mdev);
if (err)
goto free;
- mdev->dev = dev;
rcu_assign_pointer(dev->mpls_ptr, mdev);
return mdev;
@@ -1269,8 +1343,7 @@ static void mpls_ifdown(struct net_device *dev, int event)
{
struct mpls_route __rcu **platform_label;
struct net *net = dev_net(dev);
- unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN;
- unsigned int alive;
+ u8 alive, deleted;
unsigned index;
platform_label = rtnl_dereference(net->mpls.platform_label);
@@ -1281,36 +1354,48 @@ static void mpls_ifdown(struct net_device *dev, int event)
continue;
alive = 0;
+ deleted = 0;
change_nexthops(rt) {
+ unsigned int nh_flags = nh->nh_flags;
+
if (rtnl_dereference(nh->nh_dev) != dev)
goto next;
switch (event) {
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
- nh->nh_flags |= RTNH_F_DEAD;
+ nh_flags |= RTNH_F_DEAD;
/* fall through */
case NETDEV_CHANGE:
- nh->nh_flags |= RTNH_F_LINKDOWN;
+ nh_flags |= RTNH_F_LINKDOWN;
break;
}
if (event == NETDEV_UNREGISTER)
RCU_INIT_POINTER(nh->nh_dev, NULL);
+
+ if (nh->nh_flags != nh_flags)
+ WRITE_ONCE(nh->nh_flags, nh_flags);
next:
- if (!(nh->nh_flags & nh_flags))
+ if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)))
alive++;
+ if (!rtnl_dereference(nh->nh_dev))
+ deleted++;
} endfor_nexthops(rt);
WRITE_ONCE(rt->rt_nhn_alive, alive);
+
+ /* if there are no more nexthops, delete the route */
+ if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn)
+ mpls_route_update(net, index, NULL, NULL);
}
}
-static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
+static void mpls_ifup(struct net_device *dev, unsigned int flags)
{
struct mpls_route __rcu **platform_label;
struct net *net = dev_net(dev);
unsigned index;
- int alive;
+ u8 alive;
platform_label = rtnl_dereference(net->mpls.platform_label);
for (index = 0; index < net->mpls.platform_labels; index++) {
@@ -1321,20 +1406,22 @@ static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
alive = 0;
change_nexthops(rt) {
+ unsigned int nh_flags = nh->nh_flags;
struct net_device *nh_dev =
rtnl_dereference(nh->nh_dev);
- if (!(nh->nh_flags & nh_flags)) {
+ if (!(nh_flags & flags)) {
alive++;
continue;
}
if (nh_dev != dev)
continue;
alive++;
- nh->nh_flags &= ~nh_flags;
+ nh_flags &= ~flags;
+ WRITE_ONCE(nh->nh_flags, flags);
} endfor_nexthops(rt);
- ACCESS_ONCE(rt->rt_nhn_alive) = alive;
+ WRITE_ONCE(rt->rt_nhn_alive, alive);
}
}
@@ -1385,7 +1472,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
mpls_ifdown(dev, event);
mdev = mpls_dev_get(dev);
if (mdev) {
- mpls_dev_sysctl_unregister(mdev);
+ mpls_dev_sysctl_unregister(dev, mdev);
RCU_INIT_POINTER(dev->mpls_ptr, NULL);
call_rcu(&mdev->rcu, mpls_dev_destroy_rcu);
}
@@ -1395,7 +1482,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
if (mdev) {
int err;
- mpls_dev_sysctl_unregister(mdev);
+ mpls_dev_sysctl_unregister(dev, mdev);
err = mpls_dev_sysctl_register(dev, mdev);
if (err)
return notifier_from_errno(err);
@@ -1455,16 +1542,18 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
EXPORT_SYMBOL_GPL(nla_put_labels);
int nla_get_labels(const struct nlattr *nla,
- u32 max_labels, u8 *labels, u32 label[])
+ u8 max_labels, u8 *labels, u32 label[])
{
unsigned len = nla_len(nla);
- unsigned nla_labels;
struct mpls_shim_hdr *nla_label;
+ u8 nla_labels;
bool bos;
int i;
- /* len needs to be an even multiple of 4 (the label size) */
- if (len & 3)
+ /* len needs to be an even multiple of 4 (the label size). Number
+ * of labels is a u8 so check for overflow.
+ */
+ if (len & 3 || len / 4 > 255)
return -EINVAL;
/* Limit the number of new labels allowed */
@@ -1472,6 +1561,10 @@ int nla_get_labels(const struct nlattr *nla,
if (nla_labels > max_labels)
return -EINVAL;
+ /* when label == NULL, caller wants number of labels */
+ if (!label)
+ goto out;
+
nla_label = nla_data(nla);
bos = true;
for (i = nla_labels - 1; i >= 0; i--, bos = false) {
@@ -1495,6 +1588,7 @@ int nla_get_labels(const struct nlattr *nla,
label[i] = dec.label;
}
+out:
*labels = nla_labels;
return 0;
}
@@ -1550,13 +1644,13 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
int index;
int err;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy,
+ NULL);
if (err < 0)
goto errout;
err = -EINVAL;
rtm = nlmsg_data(nlh);
- memset(cfg, 0, sizeof(*cfg));
if (rtm->rtm_family != AF_MPLS)
goto errout;
@@ -1584,6 +1678,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->rc_label = LABEL_NOT_SPECIFIED;
cfg->rc_protocol = rtm->rtm_protocol;
cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC;
+ cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
cfg->rc_nlflags = nlh->nlmsg_flags;
cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->rc_nlinfo.nlh = nlh;
@@ -1630,6 +1725,17 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->rc_mp_len = nla_len(nla);
break;
}
+ case RTA_TTL_PROPAGATE:
+ {
+ u8 ttl_propagate = nla_get_u8(nla);
+
+ if (ttl_propagate > 1)
+ goto errout;
+ cfg->rc_ttl_propagate = ttl_propagate ?
+ MPLS_TTL_PROP_ENABLED :
+ MPLS_TTL_PROP_DISABLED;
+ break;
+ }
default:
/* Unsupported attribute */
goto errout;
@@ -1641,29 +1747,47 @@ errout:
return err;
}
-static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- struct mpls_route_config cfg;
+ struct mpls_route_config *cfg;
int err;
- err = rtm_to_route_config(skb, nlh, &cfg);
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg)
+ return -ENOMEM;
+
+ err = rtm_to_route_config(skb, nlh, cfg);
if (err < 0)
- return err;
+ goto out;
+
+ err = mpls_route_del(cfg);
+out:
+ kfree(cfg);
- return mpls_route_del(&cfg);
+ return err;
}
-static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- struct mpls_route_config cfg;
+ struct mpls_route_config *cfg;
int err;
- err = rtm_to_route_config(skb, nlh, &cfg);
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg)
+ return -ENOMEM;
+
+ err = rtm_to_route_config(skb, nlh, cfg);
if (err < 0)
- return err;
+ goto out;
+
+ err = mpls_route_add(cfg);
+out:
+ kfree(cfg);
- return mpls_route_add(&cfg);
+ return err;
}
static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
@@ -1690,6 +1814,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (nla_put_labels(skb, RTA_DST, 1, &label))
goto nla_put_failure;
+
+ if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) {
+ bool ttl_propagate =
+ rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED;
+
+ if (nla_put_u8(skb, RTA_TTL_PROPAGATE,
+ ttl_propagate))
+ goto nla_put_failure;
+ }
if (rt->rt_nhn == 1) {
const struct mpls_nh *nh = rt->rt_nh;
@@ -1711,21 +1844,23 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
} else {
struct rtnexthop *rtnh;
struct nlattr *mp;
- int dead = 0;
- int linkdown = 0;
+ u8 linkdown = 0;
+ u8 dead = 0;
mp = nla_nest_start(skb, RTA_MULTIPATH);
if (!mp)
goto nla_put_failure;
for_nexthops(rt) {
+ dev = rtnl_dereference(nh->nh_dev);
+ if (!dev)
+ continue;
+
rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
if (!rtnh)
goto nla_put_failure;
- dev = rtnl_dereference(nh->nh_dev);
- if (dev)
- rtnh->rtnh_ifindex = dev->ifindex;
+ rtnh->rtnh_ifindex = dev->ifindex;
if (nh->nh_flags & RTNH_F_LINKDOWN) {
rtnh->rtnh_flags |= RTNH_F_LINKDOWN;
linkdown++;
@@ -1800,7 +1935,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
{
size_t payload =
NLMSG_ALIGN(sizeof(struct rtmsg))
- + nla_total_size(4); /* RTA_DST */
+ + nla_total_size(4) /* RTA_DST */
+ + nla_total_size(1); /* RTA_TTL_PROPAGATE */
if (rt->rt_nhn == 1) {
struct mpls_nh *nh = rt->rt_nh;
@@ -1816,6 +1952,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
size_t nhsize = 0;
for_nexthops(rt) {
+ if (!rtnl_dereference(nh->nh_dev))
+ continue;
nhsize += nla_total_size(sizeof(struct rtnexthop));
/* RTA_VIA */
if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC)
@@ -1867,10 +2005,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
unsigned index;
if (size) {
- labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
- if (!labels)
- labels = vzalloc(size);
-
+ labels = kvzalloc(size, GFP_KERNEL);
if (!labels)
goto nolabels;
}
@@ -1878,12 +2013,13 @@ static int resize_platform_label_table(struct net *net, size_t limit)
/* In case the predefined labels need to be populated */
if (limit > MPLS_LABEL_IPV4NULL) {
struct net_device *lo = net->loopback_dev;
- rt0 = mpls_rt_alloc(1, lo->addr_len);
- if (!rt0)
+ rt0 = mpls_rt_alloc(1, lo->addr_len, 0);
+ if (IS_ERR(rt0))
goto nort0;
RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
rt0->rt_protocol = RTPROT_KERNEL;
rt0->rt_payload_type = MPT_IPV4;
+ rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
rt0->rt_nh->nh_via_alen = lo->addr_len;
memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
@@ -1891,12 +2027,13 @@ static int resize_platform_label_table(struct net *net, size_t limit)
}
if (limit > MPLS_LABEL_IPV6NULL) {
struct net_device *lo = net->loopback_dev;
- rt2 = mpls_rt_alloc(1, lo->addr_len);
- if (!rt2)
+ rt2 = mpls_rt_alloc(1, lo->addr_len, 0);
+ if (IS_ERR(rt2))
goto nort2;
RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
rt2->rt_protocol = RTPROT_KERNEL;
rt2->rt_payload_type = MPT_IPV6;
+ rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
rt2->rt_nh->nh_via_alen = lo->addr_len;
memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
@@ -1978,6 +2115,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
return ret;
}
+#define MPLS_NS_SYSCTL_OFFSET(field) \
+ (&((struct net *)0)->field)
+
static const struct ctl_table mpls_table[] = {
{
.procname = "platform_labels",
@@ -1986,21 +2126,47 @@ static const struct ctl_table mpls_table[] = {
.mode = 0644,
.proc_handler = mpls_platform_labels,
},
+ {
+ .procname = "ip_ttl_propagate",
+ .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "default_ttl",
+ .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl),
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &ttl_max,
+ },
{ }
};
static int mpls_net_init(struct net *net)
{
struct ctl_table *table;
+ int i;
net->mpls.platform_labels = 0;
net->mpls.platform_label = NULL;
+ net->mpls.ip_ttl_propagate = 1;
+ net->mpls.default_ttl = 255;
table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
if (table == NULL)
return -ENOMEM;
- table[0].data = net;
+ /* Table data contains only offsets relative to the base of
+ * the mdev at this point, so make them absolute.
+ */
+ for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
+ table[i].data = (char *)net + (uintptr_t)table[i].data;
+
net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
if (net->mpls.ctl == NULL) {
kfree(table);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 76360d8b9579..4db6a5971322 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -2,6 +2,11 @@
#define MPLS_INTERNAL_H
#include <net/mpls.h>
+/* put a reasonable limit on the number of labels
+ * we will accept from userspace
+ */
+#define MAX_NEW_LABELS 30
+
struct mpls_entry_decoded {
u32 label;
u8 ttl;
@@ -64,7 +69,6 @@ struct mpls_dev {
struct sk_buff;
#define LABEL_NOT_SPECIFIED (1 << 20)
-#define MAX_NEW_LABELS 2
/* This maximum ha length copied from the definition of struct neighbour */
#define VIA_ALEN_ALIGN sizeof(unsigned long)
@@ -83,11 +87,35 @@ enum mpls_payload_type {
struct mpls_nh { /* next hop label forwarding entry */
struct net_device __rcu *nh_dev;
+
+ /* nh_flags is accessed under RCU in the packet path; it is
+ * modified handling netdev events with rtnl lock held
+ */
unsigned int nh_flags;
- u32 nh_label[MAX_NEW_LABELS];
u8 nh_labels;
u8 nh_via_alen;
u8 nh_via_table;
+ u8 nh_reserved1;
+
+ u32 nh_label[0];
+};
+
+/* offset of via from beginning of mpls_nh */
+#define MPLS_NH_VIA_OFF(num_labels) \
+ ALIGN(sizeof(struct mpls_nh) + (num_labels) * sizeof(u32), \
+ VIA_ALEN_ALIGN)
+
+/* all nexthops within a route have the same size based on the
+ * max number of labels and max via length across all nexthops
+ */
+#define MPLS_NH_SIZE(num_labels, max_via_alen) \
+ (MPLS_NH_VIA_OFF((num_labels)) + \
+ ALIGN((max_via_alen), VIA_ALEN_ALIGN))
+
+enum mpls_ttl_propagation {
+ MPLS_TTL_PROP_DEFAULT,
+ MPLS_TTL_PROP_ENABLED,
+ MPLS_TTL_PROP_DISABLED,
};
/* The route, nexthops and vias are stored together in the same memory
@@ -98,16 +126,16 @@ struct mpls_nh { /* next hop label forwarding entry */
* +----------------------+
* | mpls_nh 0 |
* +----------------------+
- * | ... |
- * +----------------------+
- * | mpls_nh n-1 |
- * +----------------------+
- * | alignment padding |
+ * | alignment padding | 4 bytes for odd number of labels
* +----------------------+
* | via[rt_max_alen] 0 |
* +----------------------+
+ * | alignment padding | via's aligned on sizeof(unsigned long)
+ * +----------------------+
* | ... |
* +----------------------+
+ * | mpls_nh n-1 |
+ * +----------------------+
* | via[rt_max_alen] n-1 |
* +----------------------+
*/
@@ -116,22 +144,30 @@ struct mpls_route { /* next hop label forwarding entry */
u8 rt_protocol;
u8 rt_payload_type;
u8 rt_max_alen;
- unsigned int rt_nhn;
- unsigned int rt_nhn_alive;
+ u8 rt_ttl_propagate;
+ u8 rt_nhn;
+ /* rt_nhn_alive is accessed under RCU in the packet path; it
+ * is modified handling netdev events with rtnl lock held
+ */
+ u8 rt_nhn_alive;
+ u8 rt_nh_size;
+ u8 rt_via_offset;
+ u8 rt_reserved1;
struct mpls_nh rt_nh[0];
};
#define for_nexthops(rt) { \
- int nhsel; struct mpls_nh *nh; \
- for (nhsel = 0, nh = (rt)->rt_nh; \
+ int nhsel; struct mpls_nh *nh; u8 *__nh; \
+ for (nhsel = 0, nh = (rt)->rt_nh, __nh = (u8 *)((rt)->rt_nh); \
nhsel < (rt)->rt_nhn; \
- nh++, nhsel++)
+ __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++)
#define change_nexthops(rt) { \
- int nhsel; struct mpls_nh *nh; \
- for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh); \
+ int nhsel; struct mpls_nh *nh; u8 *__nh; \
+ for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh), \
+ __nh = (u8 *)((rt)->rt_nh); \
nhsel < (rt)->rt_nhn; \
- nh++, nhsel++)
+ __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++)
#define endfor_nexthops(rt) }
@@ -166,7 +202,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels,
const u32 label[]);
-int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels,
+int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels,
u32 label[]);
int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
u8 via[]);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index e4e4424f9eb1..369c7a23c86c 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -29,6 +29,7 @@
static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = {
[MPLS_IPTUNNEL_DST] = { .type = NLA_U32 },
+ [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 },
};
static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
@@ -49,6 +50,7 @@ static int mpls_xmit(struct sk_buff *skb)
struct rtable *rt = NULL;
struct rt6_info *rt6 = NULL;
struct mpls_dev *out_mdev;
+ struct net *net;
int err = 0;
bool bos;
int i;
@@ -56,17 +58,7 @@ static int mpls_xmit(struct sk_buff *skb)
/* Find the output device */
out_dev = dst->dev;
-
- /* Obtain the ttl */
- if (dst->ops->family == AF_INET) {
- ttl = ip_hdr(skb)->ttl;
- rt = (struct rtable *)dst;
- } else if (dst->ops->family == AF_INET6) {
- ttl = ipv6_hdr(skb)->hop_limit;
- rt6 = (struct rt6_info *)dst;
- } else {
- goto drop;
- }
+ net = dev_net(out_dev);
skb_orphan(skb);
@@ -78,6 +70,38 @@ static int mpls_xmit(struct sk_buff *skb)
tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
+ /* Obtain the ttl using the following set of rules.
+ *
+ * LWT ttl propagation setting:
+ * - disabled => use default TTL value from LWT
+ * - enabled => use TTL value from IPv4/IPv6 header
+ * - default =>
+ * Global ttl propagation setting:
+ * - disabled => use default TTL value from global setting
+ * - enabled => use TTL value from IPv4/IPv6 header
+ */
+ if (dst->ops->family == AF_INET) {
+ if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+ ttl = tun_encap_info->default_ttl;
+ else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ !net->mpls.ip_ttl_propagate)
+ ttl = net->mpls.default_ttl;
+ else
+ ttl = ip_hdr(skb)->ttl;
+ rt = (struct rtable *)dst;
+ } else if (dst->ops->family == AF_INET6) {
+ if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+ ttl = tun_encap_info->default_ttl;
+ else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ !net->mpls.ip_ttl_propagate)
+ ttl = net->mpls.default_ttl;
+ else
+ ttl = ipv6_hdr(skb)->hop_limit;
+ rt6 = (struct rt6_info *)dst;
+ } else {
+ goto drop;
+ }
+
/* Verify the destination can hold the packet */
new_header_size = mpls_encap_size(tun_encap_info);
mtu = mpls_dev_mtu(out_dev);
@@ -140,10 +164,11 @@ static int mpls_build_state(struct nlattr *nla,
struct mpls_iptunnel_encap *tun_encap_info;
struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
struct lwtunnel_state *newts;
+ u8 n_labels;
int ret;
ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
- mpls_iptunnel_policy);
+ mpls_iptunnel_policy, NULL);
if (ret < 0)
return ret;
@@ -151,15 +176,32 @@ static int mpls_build_state(struct nlattr *nla,
return -EINVAL;
- newts = lwtunnel_state_alloc(sizeof(*tun_encap_info));
+ /* determine number of labels */
+ if (nla_get_labels(tb[MPLS_IPTUNNEL_DST],
+ MAX_NEW_LABELS, &n_labels, NULL))
+ return -EINVAL;
+
+ newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) +
+ n_labels * sizeof(u32));
if (!newts)
return -ENOMEM;
tun_encap_info = mpls_lwtunnel_encap(newts);
- ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
+ ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels,
&tun_encap_info->labels, tun_encap_info->label);
if (ret)
goto errout;
+
+ tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT;
+
+ if (tb[MPLS_IPTUNNEL_TTL]) {
+ tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]);
+ /* TTL 0 implies propagate from IP header */
+ tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ?
+ MPLS_TTL_PROP_DISABLED :
+ MPLS_TTL_PROP_ENABLED;
+ }
+
newts->type = LWTUNNEL_ENCAP_MPLS;
newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
newts->headroom = mpls_encap_size(tun_encap_info);
@@ -186,6 +228,10 @@ static int mpls_fill_encap_info(struct sk_buff *skb,
tun_encap_info->label))
goto nla_put_failure;
+ if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT &&
+ nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -195,10 +241,16 @@ nla_put_failure:
static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
{
struct mpls_iptunnel_encap *tun_encap_info;
+ int nlsize;
tun_encap_info = mpls_lwtunnel_encap(lwtstate);
- return nla_total_size(tun_encap_info->labels * 4);
+ nlsize = nla_total_size(tun_encap_info->labels * 4);
+
+ if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT)
+ nlsize += nla_total_size(1);
+
+ return nlsize;
}
static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
@@ -207,10 +259,12 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b);
int l;
- if (a_hdr->labels != b_hdr->labels)
+ if (a_hdr->labels != b_hdr->labels ||
+ a_hdr->ttl_propagate != b_hdr->ttl_propagate ||
+ a_hdr->default_ttl != b_hdr->default_ttl)
return 1;
- for (l = 0; l < MAX_NEW_LABELS; l++)
+ for (l = 0; l < a_hdr->labels; l++)
if (a_hdr->label[l] != b_hdr->label[l])
return 1;
return 0;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a87a6f8a74d8..552d606e57ca 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -126,14 +126,15 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
}
EXPORT_SYMBOL(nf_register_net_hook);
-void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entry *
+__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
struct nf_hook_entry __rcu **pp;
struct nf_hook_entry *p;
pp = nf_hook_entry_head(net, reg);
if (WARN_ON_ONCE(!pp))
- return;
+ return NULL;
mutex_lock(&nf_hook_mutex);
for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
@@ -145,7 +146,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
mutex_unlock(&nf_hook_mutex);
if (!p) {
WARN(1, "nf_unregister_net_hook: hook not found!\n");
- return;
+ return NULL;
}
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
@@ -154,10 +155,24 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
#ifdef HAVE_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
+
+ return p;
+}
+
+void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+{
+ struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg);
+ unsigned int nfq;
+
+ if (!p)
+ return;
+
synchronize_net();
- nf_queue_nf_hook_drop(net, p);
+
/* other cpu might still process nfqueue verdict that used reg */
- synchronize_net();
+ nfq = nf_queue_nf_hook_drop(net);
+ if (nfq)
+ synchronize_net();
kfree(p);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
@@ -183,10 +198,32 @@ err:
EXPORT_SYMBOL(nf_register_net_hooks);
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
- unsigned int n)
+ unsigned int hookcount)
{
- while (n-- > 0)
- nf_unregister_net_hook(net, &reg[n]);
+ struct nf_hook_entry *to_free[16];
+ unsigned int i, n, nfq;
+
+ do {
+ n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
+
+ for (i = 0; i < n; i++)
+ to_free[i] = __nf_unregister_net_hook(net, &reg[i]);
+
+ synchronize_net();
+
+ /* need 2nd synchronize_net() if nfqueue is used, skb
+ * can get reinjected right before nf_queue_hook_drop()
+ */
+ nfq = nf_queue_nf_hook_drop(net);
+ if (nfq)
+ synchronize_net();
+
+ for (i = 0; i < n; i++)
+ kfree(to_free[i]);
+
+ reg += n;
+ hookcount -= n;
+ } while (hookcount > 0);
}
EXPORT_SYMBOL(nf_unregister_net_hooks);
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 6f09a99298cd..8ad2b52a0b32 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -232,7 +232,7 @@ mtype_list(const struct ip_set *set,
if (!test_bit(id, map->members) ||
(SET_WITH_TIMEOUT(set) &&
#ifdef IP_SET_BITMAP_STORED_TIMEOUT
- mtype_is_filled((const struct mtype_elem *)x) &&
+ mtype_is_filled(x) &&
#endif
ip_set_timeout_expired(ext_timeout(x, set))))
continue;
@@ -248,8 +248,7 @@ mtype_list(const struct ip_set *set,
}
if (mtype_do_list(skb, map, id, set->dsize))
goto nla_put_failure;
- if (ip_set_put_extensions(skb, set, x,
- mtype_is_filled((const struct mtype_elem *)x)))
+ if (ip_set_put_extensions(skb, set, x, mtype_is_filled(x)))
goto nla_put_failure;
ipset_nest_end(skb, nested);
}
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index c296f9b606d4..ba6a5516dc7c 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -295,7 +295,8 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
if (unlikely(!flag_nested(nla)))
return -IPSET_ERR_PROTOCOL;
- if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
+ if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
+ ipaddr_policy, NULL))
return -IPSET_ERR_PROTOCOL;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
return -IPSET_ERR_PROTOCOL;
@@ -313,7 +314,8 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
if (unlikely(!flag_nested(nla)))
return -IPSET_ERR_PROTOCOL;
- if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
+ if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
+ ipaddr_policy, NULL))
return -IPSET_ERR_PROTOCOL;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
return -IPSET_ERR_PROTOCOL;
@@ -501,14 +503,6 @@ __ip_set_put(struct ip_set *set)
* a separate reference counter
*/
static inline void
-__ip_set_get_netlink(struct ip_set *set)
-{
- write_lock_bh(&ip_set_ref_lock);
- set->ref_netlink++;
- write_unlock_bh(&ip_set_ref_lock);
-}
-
-static inline void
__ip_set_put_netlink(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -769,7 +763,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
+ nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd),
sizeof(*nfmsg), flags);
if (!nlh)
return NULL;
@@ -906,7 +900,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
/* Without holding any locks, create private part. */
if (attr[IPSET_ATTR_DATA] &&
nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
- set->type->create_policy)) {
+ set->type->create_policy, NULL)) {
ret = -IPSET_ERR_PROTOCOL;
goto put_out;
}
@@ -1257,8 +1251,8 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
ip_set_id_t index;
/* Second pass, so parser can't fail */
- nla_parse(cda, IPSET_ATTR_CMD_MAX,
- attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
+ nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len,
+ ip_set_setname_policy, NULL);
if (cda[IPSET_ATTR_SETNAME]) {
struct ip_set *set;
@@ -1305,7 +1299,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
* manually :-(
*/
if (nlh->nlmsg_flags & NLM_F_ACK)
- netlink_ack(cb->skb, nlh, ret);
+ netlink_ack(cb->skb, nlh, ret, NULL);
return ret;
}
}
@@ -1501,9 +1495,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
cmdattr = (void *)&errmsg->msg + min_len;
- nla_parse(cda, IPSET_ATTR_CMD_MAX,
- cmdattr, nlh->nlmsg_len - min_len,
- ip_set_adt_policy);
+ nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr,
+ nlh->nlmsg_len - min_len, ip_set_adt_policy, NULL);
errline = nla_data(cda[IPSET_ATTR_LINENO]);
@@ -1549,7 +1542,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (attr[IPSET_ATTR_DATA]) {
if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
attr[IPSET_ATTR_DATA],
- set->type->adt_policy))
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
use_lineno);
@@ -1561,7 +1554,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (nla_type(nla) != IPSET_ATTR_DATA ||
!flag_nested(nla) ||
nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
- set->type->adt_policy))
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
flags, use_lineno);
@@ -1603,7 +1596,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (attr[IPSET_ATTR_DATA]) {
if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
attr[IPSET_ATTR_DATA],
- set->type->adt_policy))
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
use_lineno);
@@ -1615,7 +1608,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (nla_type(nla) != IPSET_ATTR_DATA ||
!flag_nested(nla) ||
nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
- set->type->adt_policy))
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
flags, use_lineno);
@@ -1646,7 +1639,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
return -ENOENT;
if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
- set->type->adt_policy))
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
rcu_read_lock_bh();
@@ -1915,7 +1908,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
ret = -EFAULT;
goto done;
}
- op = (unsigned int *)data;
+ op = data;
if (*op < IP_SET_OP_VERSION) {
/* Check the version at the beginning of operations */
@@ -2013,7 +2006,7 @@ static struct nf_sockopt_ops so_set __read_mostly = {
.pf = PF_INET,
.get_optmin = SO_IP_SET,
.get_optmax = SO_IP_SET + 1,
- .get = &ip_set_sockfn_get,
+ .get = ip_set_sockfn_get,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index e6a2753dff9e..3d2ac71a83ec 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -181,7 +181,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
cp->flags |= IP_VS_CONN_F_HASHED;
- atomic_inc(&cp->refcnt);
+ refcount_inc(&cp->refcnt);
hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
ret = 1;
} else {
@@ -215,7 +215,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_HASHED) {
hlist_del_rcu(&cp->c_list);
cp->flags &= ~IP_VS_CONN_F_HASHED;
- atomic_dec(&cp->refcnt);
+ refcount_dec(&cp->refcnt);
ret = 1;
} else
ret = 0;
@@ -242,13 +242,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_HASHED) {
ret = false;
/* Decrease refcnt and unlink conn only if we are last user */
- if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&cp->refcnt)) {
hlist_del_rcu(&cp->c_list);
cp->flags &= ~IP_VS_CONN_F_HASHED;
ret = true;
}
} else
- ret = atomic_read(&cp->refcnt) ? false : true;
+ ret = refcount_read(&cp->refcnt) ? false : true;
spin_unlock(&cp->lock);
ct_write_unlock_bh(hash);
@@ -475,7 +475,7 @@ static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp)
void ip_vs_conn_put(struct ip_vs_conn *cp)
{
if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) &&
- (atomic_read(&cp->refcnt) == 1) &&
+ (refcount_read(&cp->refcnt) == 1) &&
!timer_pending(&cp->timer))
/* expire connection immediately */
__ip_vs_conn_put_notimer(cp);
@@ -617,8 +617,8 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ cp->flags, refcount_read(&cp->refcnt),
+ refcount_read(&dest->refcnt));
/* Update the connection counters */
if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -714,8 +714,8 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ cp->flags, refcount_read(&cp->refcnt),
+ refcount_read(&dest->refcnt));
/* Update the connection counters */
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -863,10 +863,10 @@ static void ip_vs_conn_expire(unsigned long data)
expire_later:
IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
- atomic_read(&cp->refcnt),
+ refcount_read(&cp->refcnt),
atomic_read(&cp->n_control));
- atomic_inc(&cp->refcnt);
+ refcount_inc(&cp->refcnt);
cp->timeout = 60*HZ;
if (ipvs->sync_state & IP_VS_STATE_MASTER)
@@ -941,7 +941,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
* it in the table, so that other thread run ip_vs_random_dropentry
* but cannot drop this entry.
*/
- atomic_set(&cp->refcnt, 1);
+ refcount_set(&cp->refcnt, 1);
cp->control = NULL;
atomic_set(&cp->n_control, 0);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index db40050f8785..d2d7bdf1d510 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -542,7 +542,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
- cp->flags, atomic_read(&cp->refcnt));
+ cp->flags, refcount_read(&cp->refcnt));
ip_vs_conn_stats(cp, svc);
return cp;
@@ -1193,7 +1193,7 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
- cp->flags, atomic_read(&cp->refcnt));
+ cp->flags, refcount_read(&cp->refcnt));
LeaveFunction(12);
return cp;
}
@@ -2200,6 +2200,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
static int __net_init __ip_vs_init(struct net *net)
{
struct netns_ipvs *ipvs;
+ int ret;
ipvs = net_generic(net, ip_vs_net_id);
if (ipvs == NULL)
@@ -2231,13 +2232,17 @@ static int __net_init __ip_vs_init(struct net *net)
if (ip_vs_sync_net_init(ipvs) < 0)
goto sync_fail;
- printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
- sizeof(struct netns_ipvs), ipvs->gen);
+ ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+ if (ret < 0)
+ goto hook_fail;
+
return 0;
/*
* Error handling
*/
+hook_fail:
+ ip_vs_sync_net_cleanup(ipvs);
sync_fail:
ip_vs_conn_net_cleanup(ipvs);
conn_fail:
@@ -2257,6 +2262,7 @@ static void __net_exit __ip_vs_cleanup(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */
ip_vs_conn_net_cleanup(ipvs);
ip_vs_app_net_cleanup(ipvs);
@@ -2317,24 +2323,16 @@ static int __init ip_vs_init(void)
if (ret < 0)
goto cleanup_sub;
- ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
- if (ret < 0) {
- pr_err("can't register hooks.\n");
- goto cleanup_dev;
- }
-
ret = ip_vs_register_nl_ioctl();
if (ret < 0) {
pr_err("can't register netlink/ioctl.\n");
- goto cleanup_hooks;
+ goto cleanup_dev;
}
pr_info("ipvs loaded.\n");
return ret;
-cleanup_hooks:
- nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
cleanup_dev:
unregister_pernet_device(&ipvs_core_dev_ops);
cleanup_sub:
@@ -2351,7 +2349,6 @@ exit:
static void __exit ip_vs_cleanup(void)
{
ip_vs_unregister_nl_ioctl();
- nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
unregister_pernet_device(&ipvs_core_dev_ops);
unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
ip_vs_conn_cleanup();
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5aeb0dde6ccc..1fa3c2307b6e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -699,7 +699,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
dest->vfwmark,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
if (dest->af == dest_af &&
ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
dest->port == dport &&
@@ -934,7 +934,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atomic_set(&dest->activeconns, 0);
atomic_set(&dest->inactconns, 0);
atomic_set(&dest->persistconns, 0);
- atomic_set(&dest->refcnt, 1);
+ refcount_set(&dest->refcnt, 1);
INIT_HLIST_NODE(&dest->d_list);
spin_lock_init(&dest->dst_lock);
@@ -998,7 +998,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
"dest->refcnt=%d, service %u/%s:%u\n",
IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
- atomic_read(&dest->refcnt),
+ refcount_read(&dest->refcnt),
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
ntohs(dest->vport));
@@ -1074,7 +1074,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
spin_lock_bh(&ipvs->dest_trash_lock);
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
if (list_empty(&ipvs->dest_trash) && !cleanup)
mod_timer(&ipvs->dest_trash_timer,
jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
@@ -1157,7 +1157,7 @@ static void ip_vs_dest_trash_expire(unsigned long data)
spin_lock(&ipvs->dest_trash_lock);
list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
- if (atomic_read(&dest->refcnt) > 1)
+ if (refcount_read(&dest->refcnt) > 1)
continue;
if (dest->idle_start) {
if (time_before(now, dest->idle_start +
@@ -1545,7 +1545,7 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
dev->name,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
__ip_vs_dst_cache_reset(dest);
}
spin_unlock_bh(&dest->dst_lock);
@@ -1774,13 +1774,13 @@ static struct ctl_table vs_vars[] = {
.procname = "sync_version",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_do_sync_mode,
+ .proc_handler = proc_do_sync_mode,
},
{
.procname = "sync_ports",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_do_sync_ports,
+ .proc_handler = proc_do_sync_ports,
},
{
.procname = "sync_persist_mode",
@@ -2130,8 +2130,8 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
" Total Incoming Outgoing Incoming Outgoing\n");
- seq_printf(seq,
- " Conns Packets Packets Bytes Bytes\n");
+ seq_puts(seq,
+ " Conns Packets Packets Bytes Bytes\n");
ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
@@ -2178,8 +2178,8 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
" Total Incoming Outgoing Incoming Outgoing\n");
- seq_printf(seq,
- "CPU Conns Packets Packets Bytes Bytes\n");
+ seq_puts(seq,
+ "CPU Conns Packets Packets Bytes Bytes\n");
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
@@ -3078,6 +3078,17 @@ nla_put_failure:
return skb->len;
}
+static bool ip_vs_is_af_valid(int af)
+{
+ if (af == AF_INET)
+ return true;
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6 && ipv6_mod_enabled())
+ return true;
+#endif
+ return false;
+}
+
static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry,
@@ -3089,7 +3100,8 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
/* Parse mandatory identifying service fields first */
if (nla == NULL ||
- nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
+ nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla,
+ ip_vs_svc_policy, NULL))
return -EINVAL;
nla_af = attrs[IPVS_SVC_ATTR_AF];
@@ -3104,11 +3116,7 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
memset(usvc, 0, sizeof(*usvc));
usvc->af = nla_get_u16(nla_af);
-#ifdef CONFIG_IP_VS_IPV6
- if (usvc->af != AF_INET && usvc->af != AF_INET6)
-#else
- if (usvc->af != AF_INET)
-#endif
+ if (!ip_vs_is_af_valid(usvc->af))
return -EAFNOSUPPORT;
if (nla_fwmark) {
@@ -3251,8 +3259,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
mutex_lock(&__ip_vs_mutex);
/* Try to find the service for which to dump destinations */
- if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
- IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
+ if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX,
+ ip_vs_cmd_policy, NULL))
goto out_err;
@@ -3288,7 +3296,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
/* Parse mandatory identifying destination fields first */
if (nla == NULL ||
- nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
+ nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla,
+ ip_vs_dest_policy, NULL))
return -EINVAL;
nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
@@ -3530,7 +3539,7 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
info->attrs[IPVS_CMD_ATTR_DAEMON],
- ip_vs_daemon_policy))
+ ip_vs_daemon_policy, info->extack))
goto out;
if (cmd == IPVS_CMD_NEW_DAEMON)
@@ -3610,6 +3619,11 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
if (udest.af == 0)
udest.af = svc->af;
+ if (!ip_vs_is_af_valid(udest.af)) {
+ ret = -EAFNOSUPPORT;
+ goto out;
+ }
+
if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
/* The synchronization protocol is incompatible
* with mixed family services
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index d30c327bb578..fb780be76d15 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -260,7 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
buf_len = strlen(buf);
ct = nf_ct_get(skb, &ctinfo);
- if (ct && !nf_ct_is_untracked(ct) && nfct_nat(ct)) {
+ if (ct && (ct->status & IPS_NAT_MASK)) {
+ bool mangled;
+
/* If mangling fails this function will return 0
* which will cause the packet to be dropped.
* Mangling can only fail under memory pressure,
@@ -268,12 +270,13 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* packet.
*/
rcu_read_lock();
- ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
- iph->ihl * 4,
- start-data, end-start,
- buf, buf_len);
+ mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ iph->ihl * 4,
+ start - data,
+ end - start,
+ buf, buf_len);
rcu_read_unlock();
- if (ret) {
+ if (mangled) {
ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0);
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -482,11 +485,8 @@ static struct pernet_operations ip_vs_ftp_ops = {
static int __init ip_vs_ftp_init(void)
{
- int rv;
-
- rv = register_pernet_subsys(&ip_vs_ftp_ops);
/* rcu_barrier() is called by netns on error */
- return rv;
+ return register_pernet_subsys(&ip_vs_ftp_ops);
}
/*
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 5824927cf8e0..b6aa4a970c6e 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -448,7 +448,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 703f11877bee..c13ff575f9f7 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
}
@@ -249,7 +249,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
__func__,
IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port),
atomic_read(&most->activeconns),
- atomic_read(&most->refcnt),
+ refcount_read(&most->refcnt),
atomic_read(&most->weight), moh);
return most;
}
@@ -612,7 +612,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index fc230d99aa3b..6cf3fd81a5ec 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -85,7 +85,7 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
struct nf_conntrack_tuple new_tuple;
- if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
+ if (ct == NULL || nf_ct_is_confirmed(ct) ||
nf_ct_is_dying(ct))
return;
@@ -232,7 +232,7 @@ void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
{
struct nf_conntrack_expect *exp;
- if (ct == NULL || nf_ct_is_untracked(ct))
+ if (ct == NULL)
return;
exp = nf_ct_expect_alloc(ct);
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index a8b63401e773..7d9d4ac596ca 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -110,7 +110,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 8ae480715cea..ca880a3ad033 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -193,28 +193,6 @@ ip_vs_create_timeout_table(int *table, int size)
}
-/*
- * Set timeout value for state specified by name
- */
-int
-ip_vs_set_state_timeout(int *table, int num, const char *const *names,
- const char *name, int to)
-{
- int i;
-
- if (!table || !name || !to)
- return -EINVAL;
-
- for (i = 0; i < num; i++) {
- if (strcmp(names[i], name))
- continue;
- table[i] = to * HZ;
- return 0;
- }
- return -ENOENT;
-}
-
-
const char * ip_vs_state_name(__u16 proto, int state)
{
struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index d952d67f904d..56f8e4b204ff 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -447,7 +447,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
ntohs(cp->cport),
sctp_state_name(cp->state),
sctp_state_name(next_state),
- atomic_read(&cp->refcnt));
+ refcount_read(&cp->refcnt));
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
(next_state != IP_VS_SCTP_S_ESTABLISHED)) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 5117bcb7d2f0..12dc8d5bc37d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -557,7 +557,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
ntohs(cp->cport),
tcp_state_name(cp->state),
tcp_state_name(new_state),
- atomic_read(&cp->refcnt));
+ refcount_read(&cp->refcnt));
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 58bacfc461ee..ee0530d14c5f 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -97,7 +97,7 @@ stop:
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
- atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+ refcount_read(&dest->refcnt), atomic_read(&dest->weight));
return dest;
}
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index f8e2d00f528b..ab23cf203437 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -111,7 +111,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index b03c28084f81..0e5b64a75da0 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -520,7 +520,7 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
pkts % sync_period != sysctl_sync_threshold(ipvs))
return 0;
- } else if (sync_refresh_period <= 0 &&
+ } else if (!sync_refresh_period &&
pkts != sysctl_sync_threshold(ipvs))
return 0;
@@ -1849,7 +1849,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
if (state == IP_VS_STATE_MASTER) {
struct ipvs_master_sync_state *ms;
- ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
+ ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL);
if (!ipvs->ms)
goto out;
ms = ipvs->ms;
@@ -1862,7 +1862,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
ms->ipvs = ipvs;
}
} else {
- array = kzalloc(count * sizeof(struct task_struct *),
+ array = kcalloc(count, sizeof(struct task_struct *),
GFP_KERNEL);
if (!array)
goto out;
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 6b366fd90554..6add39e0ec20 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -83,7 +83,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 17e6d4406ca7..62258dd457ac 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -218,7 +218,7 @@ found:
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
- atomic_read(&dest->refcnt),
+ refcount_read(&dest->refcnt),
atomic_read(&dest->weight));
mark->cl = dest;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 4e1a98fcc8c3..2eab1e0400f4 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -775,7 +775,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- if (ct && !nf_ct_is_untracked(ct)) {
+ if (ct) {
IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off,
"ip_vs_nat_xmit(): "
"stopping DNAT to local address");
@@ -866,7 +866,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- if (ct && !nf_ct_is_untracked(ct)) {
+ if (ct) {
IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to local address");
@@ -1338,7 +1338,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- if (ct && !nf_ct_is_untracked(ct)) {
+ if (ct) {
IP_VS_DBG(10, "%s(): "
"stopping DNAT to local address %pI4\n",
__func__, &cp->daddr.ip);
@@ -1429,7 +1429,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- if (ct && !nf_ct_is_untracked(ct)) {
+ if (ct) {
IP_VS_DBG(10, "%s(): "
"stopping DNAT to local address %pI6\n",
__func__, &cp->daddr.in6);
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 45da11afa785..866916712905 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -55,7 +55,7 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
};
EXPORT_SYMBOL_GPL(seq_print_acct);
-static struct nf_ct_ext_type acct_extend __read_mostly = {
+static const struct nf_ct_ext_type acct_extend = {
.len = sizeof(struct nf_conn_acct),
.align = __alignof__(struct nf_conn_acct),
.id = NF_CT_EXT_ACCT,
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 57a26cc90c9f..03d2ccffa9fa 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -207,6 +207,8 @@ static int __init nf_conntrack_amanda_init(void)
{
int ret, i;
+ NF_CT_HELPER_BUILD_BUG_ON(0);
+
for (i = 0; i < ARRAY_SIZE(search); i++) {
search[i].ts = textsearch_prepare(ts_algo, search[i].string,
search[i].len,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ffb78e5f7b70..e847dbaa0c6b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -76,6 +76,7 @@ struct conntrack_gc_work {
struct delayed_work dwork;
u32 last_bucket;
bool exiting;
+ bool early_drop;
long next_gc_run;
};
@@ -180,14 +181,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
seqcount_t nf_conntrack_generation __read_mostly;
-
-/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used
- * for the nfctinfo. We cheat by (ab)using the PER CPU cache line
- * alignment to enforce this.
- */
-DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
-EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
-
static unsigned int nf_conntrack_hash_rnd __read_mostly;
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
@@ -706,7 +699,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
- !nfct_nat(ct) &&
+ ((ct->status & IPS_NAT_DONE_MASK) == 0) &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
enum ip_conntrack_info oldinfo;
@@ -918,7 +911,7 @@ static unsigned int early_drop_list(struct net *net,
continue;
/* kill only if still in same netns -- might have moved due to
- * SLAB_DESTROY_BY_RCU rules.
+ * SLAB_TYPESAFE_BY_RCU rules.
*
* We steal the timer reference. If that fails timer has
* already fired or someone else deleted it. Just drop ref
@@ -959,10 +952,30 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
return false;
}
+static bool gc_worker_skip_ct(const struct nf_conn *ct)
+{
+ return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct);
+}
+
+static bool gc_worker_can_early_drop(const struct nf_conn *ct)
+{
+ const struct nf_conntrack_l4proto *l4proto;
+
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status))
+ return true;
+
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
+ return true;
+
+ return false;
+}
+
static void gc_worker(struct work_struct *work)
{
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
unsigned int i, goal, buckets = 0, expired_count = 0;
+ unsigned int nf_conntrack_max95 = 0;
struct conntrack_gc_work *gc_work;
unsigned int ratio, scanned = 0;
unsigned long next_run;
@@ -971,6 +984,8 @@ static void gc_worker(struct work_struct *work)
goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
i = gc_work->last_bucket;
+ if (gc_work->early_drop)
+ nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
do {
struct nf_conntrack_tuple_hash *h;
@@ -987,6 +1002,8 @@ static void gc_worker(struct work_struct *work)
i = 0;
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+ struct net *net;
+
tmp = nf_ct_tuplehash_to_ctrack(h);
scanned++;
@@ -995,6 +1012,27 @@ static void gc_worker(struct work_struct *work)
expired_count++;
continue;
}
+
+ if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
+ continue;
+
+ net = nf_ct_net(tmp);
+ if (atomic_read(&net->ct.count) < nf_conntrack_max95)
+ continue;
+
+ /* need to take reference to avoid possible races */
+ if (!atomic_inc_not_zero(&tmp->ct_general.use))
+ continue;
+
+ if (gc_worker_skip_ct(tmp)) {
+ nf_ct_put(tmp);
+ continue;
+ }
+
+ if (gc_worker_can_early_drop(tmp))
+ nf_ct_kill(tmp);
+
+ nf_ct_put(tmp);
}
/* could check get_nulls_value() here and restart if ct
@@ -1040,6 +1078,7 @@ static void gc_worker(struct work_struct *work)
next_run = gc_work->next_gc_run;
gc_work->last_bucket = i;
+ gc_work->early_drop = false;
queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}
@@ -1065,6 +1104,8 @@ __nf_conntrack_alloc(struct net *net,
if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
if (!early_drop(net, hash)) {
+ if (!conntrack_gc_work.early_drop)
+ conntrack_gc_work.early_drop = true;
atomic_dec(&net->ct.count);
net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
return ERR_PTR(-ENOMEM);
@@ -1073,7 +1114,7 @@ __nf_conntrack_alloc(struct net *net,
/*
* Do not use kmem_cache_zalloc(), as this cache uses
- * SLAB_DESTROY_BY_RCU.
+ * SLAB_TYPESAFE_BY_RCU.
*/
ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
if (ct == NULL)
@@ -1118,7 +1159,7 @@ void nf_conntrack_free(struct nf_conn *ct)
struct net *net = nf_ct_net(ct);
/* A freed object has refcnt == 0, that's
- * the golden rule for SLAB_DESTROY_BY_RCU
+ * the golden rule for SLAB_TYPESAFE_BY_RCU
*/
NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
@@ -1133,7 +1174,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
/* Allocate a new conntrack: we return -ENOMEM if classification
failed due to stress. Otherwise it really is unclassifiable. */
-static struct nf_conntrack_tuple_hash *
+static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_l3proto *l3proto,
@@ -1241,21 +1282,20 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
}
-/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */
-static inline struct nf_conn *
+/* On success, returns 0, sets skb->_nfct | ctinfo */
+static int
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto,
- int *set_reply,
- enum ip_conntrack_info *ctinfo)
+ struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
+ enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
struct nf_conn *ct;
u32 hash;
@@ -1264,7 +1304,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
dataoff, l3num, protonum, net, &tuple, l3proto,
l4proto)) {
pr_debug("Can't get tuple\n");
- return NULL;
+ return 0;
}
/* look for tuple match */
@@ -1275,33 +1315,30 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
skb, dataoff, hash);
if (!h)
- return NULL;
+ return 0;
if (IS_ERR(h))
- return (void *)h;
+ return PTR_ERR(h);
}
ct = nf_ct_tuplehash_to_ctrack(h);
/* It exists; we have (non-exclusive) reference. */
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
- *ctinfo = IP_CT_ESTABLISHED_REPLY;
- /* Please set reply bit if this packet OK */
- *set_reply = 1;
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
} else {
/* Once we've had two way comms, always ESTABLISHED. */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
pr_debug("normal packet for %p\n", ct);
- *ctinfo = IP_CT_ESTABLISHED;
+ ctinfo = IP_CT_ESTABLISHED;
} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
pr_debug("related packet for %p\n", ct);
- *ctinfo = IP_CT_RELATED;
+ ctinfo = IP_CT_RELATED;
} else {
pr_debug("new packet for %p\n", ct);
- *ctinfo = IP_CT_NEW;
+ ctinfo = IP_CT_NEW;
}
- *set_reply = 0;
}
- nf_ct_set(skb, ct, *ctinfo);
- return ct;
+ nf_ct_set(skb, ct, ctinfo);
+ return 0;
}
unsigned int
@@ -1315,13 +1352,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
- int set_reply = 0;
int ret;
tmpl = nf_ct_get(skb, &ctinfo);
- if (tmpl) {
+ if (tmpl || ctinfo == IP_CT_UNTRACKED) {
/* Previously seen (loopback or untracked)? Ignore. */
- if (!nf_ct_is_template(tmpl)) {
+ if ((tmpl && !nf_ct_is_template(tmpl)) ||
+ ctinfo == IP_CT_UNTRACKED) {
NF_CT_STAT_INC_ATOMIC(net, ignore);
return NF_ACCEPT;
}
@@ -1358,23 +1395,22 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
goto out;
}
repeat:
- ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
- l3proto, l4proto, &set_reply, &ctinfo);
- if (!ct) {
- /* Not valid part of a connection */
- NF_CT_STAT_INC_ATOMIC(net, invalid);
- ret = NF_ACCEPT;
- goto out;
- }
-
- if (IS_ERR(ct)) {
+ ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
+ l3proto, l4proto);
+ if (ret < 0) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
ret = NF_DROP;
goto out;
}
- NF_CT_ASSERT(skb_nfct(skb));
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct) {
+ /* Not valid part of a connection */
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
+ ret = NF_ACCEPT;
+ goto out;
+ }
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
@@ -1399,7 +1435,8 @@ repeat:
goto out;
}
- if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+ if (ctinfo == IP_CT_ESTABLISHED_REPLY &&
+ !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_REPLY, ct);
out:
if (tmpl)
@@ -1634,18 +1671,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size)
}
EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
-static int untrack_refs(void)
-{
- int cnt = 0, cpu;
-
- for_each_possible_cpu(cpu) {
- struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
-
- cnt += atomic_read(&ct->ct_general.use) - 1;
- }
- return cnt;
-}
-
void nf_conntrack_cleanup_start(void)
{
conntrack_gc_work.exiting = true;
@@ -1655,8 +1680,6 @@ void nf_conntrack_cleanup_start(void)
void nf_conntrack_cleanup_end(void)
{
RCU_INIT_POINTER(nf_ct_destroy, NULL);
- while (untrack_refs() > 0)
- schedule();
cancel_delayed_work_sync(&conntrack_gc_work.dwork);
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
@@ -1830,20 +1853,44 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
-void nf_ct_untracked_status_or(unsigned long bits)
+static __always_inline unsigned int total_extension_size(void)
{
- int cpu;
+ /* remember to add new extensions below */
+ BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
- for_each_possible_cpu(cpu)
- per_cpu(nf_conntrack_untracked, cpu).status |= bits;
-}
-EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
+ return sizeof(struct nf_ct_ext) +
+ sizeof(struct nf_conn_help)
+#if IS_ENABLED(CONFIG_NF_NAT)
+ + sizeof(struct nf_conn_nat)
+#endif
+ + sizeof(struct nf_conn_seqadj)
+ + sizeof(struct nf_conn_acct)
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ + sizeof(struct nf_conntrack_ecache)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ + sizeof(struct nf_conn_tstamp)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+ + sizeof(struct nf_conn_timeout)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ + sizeof(struct nf_conn_labels)
+#endif
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ + sizeof(struct nf_conn_synproxy)
+#endif
+ ;
+};
int nf_conntrack_init_start(void)
{
int max_factor = 8;
int ret = -ENOMEM;
- int i, cpu;
+ int i;
+
+ /* struct nf_ct_ext uses u8 to store offsets/size */
+ BUILD_BUG_ON(total_extension_size() > 255u);
seqcount_init(&nf_conntrack_generation);
@@ -1882,7 +1929,7 @@ int nf_conntrack_init_start(void)
nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
sizeof(struct nf_conn),
NFCT_INFOMASK + 1,
- SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
if (!nf_conntrack_cachep)
goto err_cachep;
@@ -1926,15 +1973,6 @@ int nf_conntrack_init_start(void)
if (ret < 0)
goto err_proto;
- /* Set up fake conntrack: to never be deleted, not in any hashes */
- for_each_possible_cpu(cpu) {
- struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
- write_pnet(&ct->ct_net, &init_net);
- atomic_set(&ct->ct_general.use, 1);
- }
- /* - and look it like as a confirmed connection */
- nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
-
conntrack_gc_work_init(&conntrack_gc_work);
queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
@@ -1982,6 +2020,7 @@ int nf_conntrack_init_net(struct net *net)
int ret = -ENOMEM;
int cpu;
+ BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
atomic_set(&net->ct.count, 0);
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 22fc32143e9c..caac41ad9483 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -195,7 +195,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
events = xchg(&e->cache, 0);
- if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events)
+ if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
goto out_unlock;
/* We make a copy of the missed event cache without taking
@@ -212,7 +212,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
ret = notify->fcn(events | missed, &item);
- if (likely(ret >= 0 && !missed))
+ if (likely(ret == 0 && !missed))
goto out_unlock;
spin_lock_bh(&ct->lock);
@@ -347,7 +347,7 @@ static struct ctl_table event_sysctl_table[] = {
};
#endif /* CONFIG_SYSCTL */
-static struct nf_ct_ext_type event_extend __read_mostly = {
+static const struct nf_ct_ext_type event_extend = {
.len = sizeof(struct nf_conntrack_ecache),
.align = __alignof__(struct nf_conntrack_ecache),
.id = NF_CT_EXT_ECACHE,
@@ -420,6 +420,9 @@ int nf_conntrack_ecache_init(void)
int ret = nf_ct_extend_register(&event_extend);
if (ret < 0)
pr_err("nf_ct_event: Unable to register event extension.\n");
+
+ BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */
+
return ret;
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index d80073037856..e03d16ed550d 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -103,6 +103,17 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
nf_ct_zone_equal_any(i->master, zone);
}
+bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
+{
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(nf_ct_remove_expect);
+
struct nf_conntrack_expect *
__nf_ct_expect_find(struct net *net,
const struct nf_conntrack_zone *zone,
@@ -133,7 +144,7 @@ nf_ct_expect_find_get(struct net *net,
rcu_read_lock();
i = __nf_ct_expect_find(net, zone, tuple);
- if (i && !atomic_inc_not_zero(&i->use))
+ if (i && !refcount_inc_not_zero(&i->use))
i = NULL;
rcu_read_unlock();
@@ -186,7 +197,7 @@ nf_ct_find_expectation(struct net *net,
return NULL;
if (exp->flags & NF_CT_EXPECT_PERMANENT) {
- atomic_inc(&exp->use);
+ refcount_inc(&exp->use);
return exp;
} else if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
@@ -211,10 +222,7 @@ void nf_ct_remove_expectations(struct nf_conn *ct)
spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
- if (del_timer(&exp->timeout)) {
- nf_ct_unlink_expect(exp);
- nf_ct_expect_put(exp);
- }
+ nf_ct_remove_expect(exp);
}
spin_unlock_bh(&nf_conntrack_expect_lock);
}
@@ -255,10 +263,7 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
{
spin_lock_bh(&nf_conntrack_expect_lock);
- if (del_timer(&exp->timeout)) {
- nf_ct_unlink_expect(exp);
- nf_ct_expect_put(exp);
- }
+ nf_ct_remove_expect(exp);
spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
@@ -275,7 +280,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
return NULL;
new->master = me;
- atomic_set(&new->use, 1);
+ refcount_set(&new->use, 1);
return new;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
@@ -348,7 +353,7 @@ static void nf_ct_expect_free_rcu(struct rcu_head *head)
void nf_ct_expect_put(struct nf_conntrack_expect *exp)
{
- if (atomic_dec_and_test(&exp->use))
+ if (refcount_dec_and_test(&exp->use))
call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
}
EXPORT_SYMBOL_GPL(nf_ct_expect_put);
@@ -361,7 +366,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
/* two references : one for hash insert, one for the timer */
- atomic_add(2, &exp->use);
+ refcount_add(2, &exp->use);
hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
master_help->expecting[exp->class]++;
@@ -394,10 +399,8 @@ static void evict_oldest_expect(struct nf_conn *master,
last = exp;
}
- if (last && del_timer(&last->timeout)) {
- nf_ct_unlink_expect(last);
- nf_ct_expect_put(last);
- }
+ if (last)
+ nf_ct_remove_expect(last);
}
static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
@@ -419,11 +422,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
h = nf_ct_expect_dst_hash(net, &expect->tuple);
hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
if (expect_matches(i, expect)) {
- if (del_timer(&i->timeout)) {
- nf_ct_unlink_expect(i);
- nf_ct_expect_put(i);
+ if (nf_ct_remove_expect(expect))
break;
- }
} else if (expect_clash(i, expect)) {
ret = -EBUSY;
goto out;
@@ -549,7 +549,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
seq_printf(s, "%ld ", timer_pending(&expect->timeout)
? (long)(expect->timeout.expires - jiffies)/HZ : 0);
else
- seq_printf(s, "- ");
+ seq_puts(s, "- ");
seq_printf(s, "l3proto = %u proto=%u ",
expect->tuple.src.l3num,
expect->tuple.dst.protonum);
@@ -559,7 +559,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
expect->tuple.dst.protonum));
if (expect->flags & NF_CT_EXPECT_PERMANENT) {
- seq_printf(s, "PERMANENT");
+ seq_puts(s, "PERMANENT");
delim = ",";
}
if (expect->flags & NF_CT_EXPECT_INACTIVE) {
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 008299b7f78f..6c605e88ebae 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -18,17 +18,14 @@
static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM];
static DEFINE_MUTEX(nf_ct_ext_type_mutex);
+#define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */
-void __nf_ct_ext_destroy(struct nf_conn *ct)
+void nf_ct_ext_destroy(struct nf_conn *ct)
{
unsigned int i;
struct nf_ct_ext_type *t;
- struct nf_ct_ext *ext = ct->ext;
for (i = 0; i < NF_CT_EXT_NUM; i++) {
- if (!__nf_ct_ext_exist(ext, i))
- continue;
-
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[i]);
@@ -41,54 +38,26 @@ void __nf_ct_ext_destroy(struct nf_conn *ct)
rcu_read_unlock();
}
}
-EXPORT_SYMBOL(__nf_ct_ext_destroy);
-
-static void *
-nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id,
- size_t var_alloc_len, gfp_t gfp)
-{
- unsigned int off, len;
- struct nf_ct_ext_type *t;
- size_t alloc_size;
-
- rcu_read_lock();
- t = rcu_dereference(nf_ct_ext_types[id]);
- if (!t) {
- rcu_read_unlock();
- return NULL;
- }
-
- off = ALIGN(sizeof(struct nf_ct_ext), t->align);
- len = off + t->len + var_alloc_len;
- alloc_size = t->alloc_size + var_alloc_len;
- rcu_read_unlock();
-
- *ext = kzalloc(alloc_size, gfp);
- if (!*ext)
- return NULL;
-
- (*ext)->offset[id] = off;
- (*ext)->len = len;
-
- return (void *)(*ext) + off;
-}
+EXPORT_SYMBOL(nf_ct_ext_destroy);
-void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
- size_t var_alloc_len, gfp_t gfp)
+void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
{
+ unsigned int newlen, newoff, oldlen, alloc;
struct nf_ct_ext *old, *new;
- int newlen, newoff;
struct nf_ct_ext_type *t;
/* Conntrack must not be confirmed to avoid races on reallocation. */
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
old = ct->ext;
- if (!old)
- return nf_ct_ext_create(&ct->ext, id, var_alloc_len, gfp);
- if (__nf_ct_ext_exist(old, id))
- return NULL;
+ if (old) {
+ if (__nf_ct_ext_exist(old, id))
+ return NULL;
+ oldlen = old->len;
+ } else {
+ oldlen = sizeof(*new);
+ }
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
@@ -97,15 +66,19 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
return NULL;
}
- newoff = ALIGN(old->len, t->align);
- newlen = newoff + t->len + var_alloc_len;
+ newoff = ALIGN(oldlen, t->align);
+ newlen = newoff + t->len;
rcu_read_unlock();
- new = __krealloc(old, newlen, gfp);
+ alloc = max(newlen, NF_CT_EXT_PREALLOC);
+ new = __krealloc(old, alloc, gfp);
if (!new)
return NULL;
- if (new != old) {
+ if (!old) {
+ memset(new->offset, 0, sizeof(new->offset));
+ ct->ext = new;
+ } else if (new != old) {
kfree_rcu(old, rcu);
rcu_assign_pointer(ct->ext, new);
}
@@ -115,45 +88,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
memset((void *)new + newoff, 0, newlen - newoff);
return (void *)new + newoff;
}
-EXPORT_SYMBOL(__nf_ct_ext_add_length);
-
-static void update_alloc_size(struct nf_ct_ext_type *type)
-{
- int i, j;
- struct nf_ct_ext_type *t1, *t2;
- enum nf_ct_ext_id min = 0, max = NF_CT_EXT_NUM - 1;
-
- /* unnecessary to update all types */
- if ((type->flags & NF_CT_EXT_F_PREALLOC) == 0) {
- min = type->id;
- max = type->id;
- }
-
- /* This assumes that extended areas in conntrack for the types
- whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
- for (i = min; i <= max; i++) {
- t1 = rcu_dereference_protected(nf_ct_ext_types[i],
- lockdep_is_held(&nf_ct_ext_type_mutex));
- if (!t1)
- continue;
-
- t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
- t1->len;
- for (j = 0; j < NF_CT_EXT_NUM; j++) {
- t2 = rcu_dereference_protected(nf_ct_ext_types[j],
- lockdep_is_held(&nf_ct_ext_type_mutex));
- if (t2 == NULL || t2 == t1 ||
- (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
- continue;
-
- t1->alloc_size = ALIGN(t1->alloc_size, t2->align)
- + t2->len;
- }
- }
-}
+EXPORT_SYMBOL(nf_ct_ext_add);
/* This MUST be called in process context. */
-int nf_ct_extend_register(struct nf_ct_ext_type *type)
+int nf_ct_extend_register(const struct nf_ct_ext_type *type)
{
int ret = 0;
@@ -163,12 +101,7 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type)
goto out;
}
- /* This ensures that nf_ct_ext_create() can allocate enough area
- before updating alloc_size */
- type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align)
- + type->len;
rcu_assign_pointer(nf_ct_ext_types[type->id], type);
- update_alloc_size(type);
out:
mutex_unlock(&nf_ct_ext_type_mutex);
return ret;
@@ -176,11 +109,10 @@ out:
EXPORT_SYMBOL_GPL(nf_ct_extend_register);
/* This MUST be called in process context. */
-void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
+void nf_ct_extend_unregister(const struct nf_ct_ext_type *type)
{
mutex_lock(&nf_ct_ext_type_mutex);
RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
- update_alloc_size(type);
mutex_unlock(&nf_ct_ext_type_mutex);
synchronize_rcu();
}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 4aecef4a89fb..f0e9a7511e1a 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -577,6 +577,8 @@ static int __init nf_conntrack_ftp_init(void)
{
int i, ret = 0;
+ NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_ftp_master));
+
ftp_buffer = kmalloc(65536, GFP_KERNEL);
if (!ftp_buffer)
return -ENOMEM;
@@ -589,12 +591,10 @@ static int __init nf_conntrack_ftp_init(void)
for (i = 0; i < ports_c; i++) {
nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, "ftp",
FTP_PORT, ports[i], ports[i], &ftp_exp_policy,
- 0, sizeof(struct nf_ct_ftp_master), help,
- nf_ct_ftp_from_nlattr, THIS_MODULE);
+ 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE);
nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp",
FTP_PORT, ports[i], ports[i], &ftp_exp_policy,
- 0, sizeof(struct nf_ct_ftp_master), help,
- nf_ct_ftp_from_nlattr, THIS_MODULE);
+ 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE);
}
ret = nf_conntrack_helpers_register(ftp, ports_c * 2);
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index f65d93639d12..3bcdc718484e 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -637,7 +637,6 @@ static const struct nf_conntrack_expect_policy h245_exp_policy = {
static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
.name = "H.245",
.me = THIS_MODULE,
- .data_len = sizeof(struct nf_ct_h323_master),
.tuple.src.l3num = AF_UNSPEC,
.tuple.dst.protonum = IPPROTO_UDP,
.help = h245_help,
@@ -1215,7 +1214,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
{
.name = "Q.931",
.me = THIS_MODULE,
- .data_len = sizeof(struct nf_ct_h323_master),
.tuple.src.l3num = AF_INET,
.tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT),
.tuple.dst.protonum = IPPROTO_TCP,
@@ -1800,7 +1798,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
{
.name = "RAS",
.me = THIS_MODULE,
- .data_len = sizeof(struct nf_ct_h323_master),
.tuple.src.l3num = AF_INET,
.tuple.src.u.udp.port = cpu_to_be16(RAS_PORT),
.tuple.dst.protonum = IPPROTO_UDP,
@@ -1810,7 +1807,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
{
.name = "RAS",
.me = THIS_MODULE,
- .data_len = sizeof(struct nf_ct_h323_master),
.tuple.src.l3num = AF_INET6,
.tuple.src.u.udp.port = cpu_to_be16(RAS_PORT),
.tuple.dst.protonum = IPPROTO_UDP,
@@ -1836,6 +1832,8 @@ static int __init nf_conntrack_h323_init(void)
{
int ret;
+ NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_h323_master));
+
h323_buffer = kmalloc(65536, GFP_KERNEL);
if (!h323_buffer)
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 4eeb3418366a..3a60efa7799b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -187,8 +187,7 @@ nf_ct_helper_ext_add(struct nf_conn *ct,
{
struct nf_conn_help *help;
- help = nf_ct_ext_add_length(ct, NF_CT_EXT_HELPER,
- helper->data_len, gfp);
+ help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp);
if (help)
INIT_HLIST_HEAD(&help->expectations);
else
@@ -386,17 +385,36 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
unsigned int h = helper_hash(&me->tuple);
struct nf_conntrack_helper *cur;
- int ret = 0;
+ int ret = 0, i;
BUG_ON(me->expect_policy == NULL);
BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
+ if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
+ return -EINVAL;
+
mutex_lock(&nf_ct_helper_mutex);
- hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
- if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) {
- ret = -EEXIST;
- goto out;
+ for (i = 0; i < nf_ct_helper_hsize; i++) {
+ hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) {
+ if (!strcmp(cur->name, me->name) &&
+ (cur->tuple.src.l3num == NFPROTO_UNSPEC ||
+ cur->tuple.src.l3num == me->tuple.src.l3num) &&
+ cur->tuple.dst.protonum == me->tuple.dst.protonum) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+ }
+
+ /* avoid unpredictable behaviour for auto_assign_helper */
+ if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) {
+ hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
+ if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple,
+ &mask)) {
+ ret = -EEXIST;
+ goto out;
+ }
}
}
hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
@@ -455,11 +473,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
if ((rcu_dereference_protected(
help->helper,
lockdep_is_held(&nf_conntrack_expect_lock)
- ) == me || exp->helper == me) &&
- del_timer(&exp->timeout)) {
- nf_ct_unlink_expect(exp);
- nf_ct_expect_put(exp);
- }
+ ) == me || exp->helper == me))
+ nf_ct_remove_expect(exp);
}
}
spin_unlock_bh(&nf_conntrack_expect_lock);
@@ -491,7 +506,7 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper,
u16 l3num, u16 protonum, const char *name,
u16 default_port, u16 spec_port, u32 id,
const struct nf_conntrack_expect_policy *exp_pol,
- u32 expect_class_max, u32 data_len,
+ u32 expect_class_max,
int (*help)(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo),
@@ -504,7 +519,6 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper,
helper->tuple.src.u.all = htons(spec_port);
helper->expect_policy = exp_pol;
helper->expect_class_max = expect_class_max;
- helper->data_len = data_len;
helper->help = help;
helper->from_nlattr = from_nlattr;
helper->me = module;
@@ -544,7 +558,7 @@ void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper,
}
EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister);
-static struct nf_ct_ext_type helper_extend __read_mostly = {
+static const struct nf_ct_ext_type helper_extend = {
.len = sizeof(struct nf_conn_help),
.align = __alignof__(struct nf_conn_help),
.id = NF_CT_EXT_HELPER,
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 1972a149f958..5523acce9d69 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -243,6 +243,12 @@ static int __init nf_conntrack_irc_init(void)
return -EINVAL;
}
+ if (max_dcc_channels > NF_CT_EXPECT_MAX_CNT) {
+ pr_err("max_dcc_channels must not be more than %u\n",
+ NF_CT_EXPECT_MAX_CNT);
+ return -EINVAL;
+ }
+
irc_exp_policy.max_expected = max_dcc_channels;
irc_exp_policy.timeout = dcc_timeout;
@@ -257,7 +263,7 @@ static int __init nf_conntrack_irc_init(void)
for (i = 0; i < ports_c; i++) {
nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc",
IRC_PORT, ports[i], i, &irc_exp_policy,
- 0, 0, help, NULL, THIS_MODULE);
+ 0, help, NULL, THIS_MODULE);
}
ret = nf_conntrack_helpers_register(&irc[0], ports_c);
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index bcab8bde7312..adf219859901 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -82,7 +82,7 @@ void nf_connlabels_put(struct net *net)
}
EXPORT_SYMBOL_GPL(nf_connlabels_put);
-static struct nf_ct_ext_type labels_extend __read_mostly = {
+static const struct nf_ct_ext_type labels_extend = {
.len = sizeof(struct nf_conn_labels),
.align = __alignof__(struct nf_conn_labels),
.id = NF_CT_EXT_LABELS,
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 4c8f30a3d6d2..496ce173f0c1 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -58,6 +58,8 @@ static struct nf_conntrack_helper helper __read_mostly = {
static int __init nf_conntrack_netbios_ns_init(void)
{
+ NF_CT_HELPER_BUILD_BUG_ON(0);
+
exp_policy.timeout = timeout;
return nf_conntrack_helper_register(&helper);
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dc7dfd68fafe..dcf561b5c97a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -417,8 +417,7 @@ nla_put_failure:
return -1;
}
-static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
- const struct nf_conn *ct)
+static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct)
{
struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
struct nf_ct_seqadj *seq;
@@ -426,15 +425,20 @@ static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
return 0;
+ spin_lock_bh(&ct->lock);
seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
- return -1;
+ goto err;
seq = &seqadj->seq[IP_CT_DIR_REPLY];
if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
- return -1;
+ goto err;
+ spin_unlock_bh(&ct->lock);
return 0;
+err:
+ spin_unlock_bh(&ct->lock);
+ return -1;
}
static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
@@ -467,7 +471,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nlattr *nest_parms;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
- event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
+ event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -627,10 +631,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
unsigned int flags = 0, group;
int err;
- /* ignore our fake conntrack entry */
- if (nf_ct_is_untracked(ct))
- return 0;
-
if (events & (1 << IPCT_DESTROY)) {
type = IPCTNL_MSG_CT_DELETE;
group = NFNLGRP_CONNTRACK_DESTROY;
@@ -652,7 +652,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
if (skb == NULL)
goto errout;
- type |= NFNL_SUBSYS_CTNETLINK << 8;
+ type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type);
nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -908,7 +908,7 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
struct nf_conntrack_l3proto *l3proto;
int ret = 0;
- ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL);
+ ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL);
if (ret < 0)
return ret;
@@ -917,7 +917,7 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
if (likely(l3proto->nlattr_to_tuple)) {
ret = nla_validate_nested(attr, CTA_IP_MAX,
- l3proto->nla_policy);
+ l3proto->nla_policy, NULL);
if (ret == 0)
ret = l3proto->nlattr_to_tuple(tb, tuple);
}
@@ -938,7 +938,8 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
struct nf_conntrack_l4proto *l4proto;
int ret = 0;
- ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy);
+ ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy,
+ NULL);
if (ret < 0)
return ret;
@@ -951,7 +952,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
if (likely(l4proto->nlattr_to_tuple)) {
ret = nla_validate_nested(attr, CTA_PROTO_MAX,
- l4proto->nla_policy);
+ l4proto->nla_policy, NULL);
if (ret == 0)
ret = l4proto->nlattr_to_tuple(tb, tuple);
}
@@ -1015,7 +1016,8 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
memset(tuple, 0, sizeof(*tuple));
- err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy);
+ err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy,
+ NULL);
if (err < 0)
return err;
@@ -1065,7 +1067,7 @@ static int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
int err;
struct nlattr *tb[CTA_HELP_MAX+1];
- err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy);
+ err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy, NULL);
if (err < 0)
return err;
@@ -1419,6 +1421,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
}
#endif
+static void
+__ctnetlink_change_status(struct nf_conn *ct, unsigned long on,
+ unsigned long off)
+{
+ unsigned int bit;
+
+ /* Ignore these unchangable bits */
+ on &= ~IPS_UNCHANGEABLE_MASK;
+ off &= ~IPS_UNCHANGEABLE_MASK;
+
+ for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
+ if (on & (1 << bit))
+ set_bit(bit, &ct->status);
+ else if (off & (1 << bit))
+ clear_bit(bit, &ct->status);
+ }
+}
+
static int
ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
{
@@ -1438,10 +1458,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
/* ASSURED bit can only be set */
return -EBUSY;
- /* Be careful here, modifying NAT bits can screw up things,
- * so don't let users modify them directly if they don't pass
- * nf_nat_range. */
- ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+ __ctnetlink_change_status(ct, status, 0);
return 0;
}
@@ -1510,23 +1527,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
return 0;
}
+ rcu_read_lock();
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper == NULL) {
-#ifdef CONFIG_MODULES
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
- if (request_module("nfct-helper-%s", helpname) < 0) {
- spin_lock_bh(&nf_conntrack_expect_lock);
- return -EOPNOTSUPP;
- }
-
- spin_lock_bh(&nf_conntrack_expect_lock);
- helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
- nf_ct_protonum(ct));
- if (helper)
- return -EAGAIN;
-#endif
+ rcu_read_unlock();
return -EOPNOTSUPP;
}
@@ -1535,13 +1540,16 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
/* update private helper data if allowed. */
if (helper->from_nlattr)
helper->from_nlattr(helpinfo, ct);
- return 0;
+ err = 0;
} else
- return -EBUSY;
+ err = -EBUSY;
+ } else {
+ /* we cannot set a helper for an existing conntrack */
+ err = -EOPNOTSUPP;
}
- /* we cannot set a helper for an existing conntrack */
- return -EOPNOTSUPP;
+ rcu_read_unlock();
+ return err;
}
static int ctnetlink_change_timeout(struct nf_conn *ct,
@@ -1571,7 +1579,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
struct nf_conntrack_l4proto *l4proto;
int err = 0;
- err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy);
+ err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy,
+ NULL);
if (err < 0)
return err;
@@ -1596,7 +1605,7 @@ static int change_seq_adj(struct nf_ct_seqadj *seq,
int err;
struct nlattr *cda[CTA_SEQADJ_MAX+1];
- err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy);
+ err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy, NULL);
if (err < 0)
return err;
@@ -1631,25 +1640,30 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
if (!seqadj)
return 0;
+ spin_lock_bh(&ct->lock);
if (cda[CTA_SEQ_ADJ_ORIG]) {
ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
cda[CTA_SEQ_ADJ_ORIG]);
if (ret < 0)
- return ret;
+ goto err;
- ct->status |= IPS_SEQ_ADJUST;
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
}
if (cda[CTA_SEQ_ADJ_REPLY]) {
ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
cda[CTA_SEQ_ADJ_REPLY]);
if (ret < 0)
- return ret;
+ goto err;
- ct->status |= IPS_SEQ_ADJUST;
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
}
+ spin_unlock_bh(&ct->lock);
return 0;
+err:
+ spin_unlock_bh(&ct->lock);
+ return ret;
}
static int
@@ -1960,9 +1974,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
- spin_lock_bh(&nf_conntrack_expect_lock);
err = ctnetlink_change_conntrack(ct, cda);
- spin_unlock_bh(&nf_conntrack_expect_lock);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
(1 << IPCT_ASSURED) |
@@ -1988,7 +2000,8 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
- event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU);
+ event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK,
+ IPCTNL_MSG_CT_GET_STATS_CPU);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -2071,7 +2084,7 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
unsigned int nr_conntracks = atomic_read(&net->ct.count);
- event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS);
+ event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -2177,13 +2190,7 @@ ctnetlink_glue_build_size(const struct nf_conn *ct)
static struct nf_conn *ctnetlink_glue_get_ct(const struct sk_buff *skb,
enum ip_conntrack_info *ctinfo)
{
- struct nf_conn *ct;
-
- ct = nf_ct_get(skb, ctinfo);
- if (ct && nf_ct_is_untracked(ct))
- ct = NULL;
-
- return ct;
+ return nf_ct_get(skb, ctinfo);
}
static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
@@ -2300,10 +2307,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
/* This check is less strict than ctnetlink_change_status()
* because callers often flip IPS_EXPECTED bits when sending
* an NFQA_CT attribute to the kernel. So ignore the
- * unchangeable bits but do not error out.
+ * unchangeable bits but do not error out. Also user programs
+ * are allowed to clear the bits that they are allowed to change.
*/
- ct->status = (status & ~IPS_UNCHANGEABLE_MASK) |
- (ct->status & IPS_UNCHANGEABLE_MASK);
+ __ctnetlink_change_status(ct, status, ~status);
return 0;
}
@@ -2353,15 +2360,11 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
struct nlattr *cda[CTA_MAX+1];
int ret;
- ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy);
+ ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy, NULL);
if (ret < 0)
return ret;
- spin_lock_bh(&nf_conntrack_expect_lock);
- ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
- return ret;
+ return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
}
static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
@@ -2390,7 +2393,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
struct nf_conntrack_expect *exp;
int err;
- err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy);
+ err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy,
+ NULL);
if (err < 0)
return err;
@@ -2581,7 +2585,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
- event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -2632,7 +2636,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
if (skb == NULL)
goto errout;
- type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
+ type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type);
nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -2698,7 +2702,7 @@ restart:
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
- if (!atomic_inc_not_zero(&exp->use))
+ if (!refcount_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
@@ -2744,7 +2748,7 @@ restart:
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
- if (!atomic_inc_not_zero(&exp->use))
+ if (!refcount_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
@@ -3015,7 +3019,8 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
struct nf_conntrack_tuple nat_tuple = {};
int err;
- err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy);
+ err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr,
+ exp_nat_nla_policy, NULL);
if (err < 0)
return err;
@@ -3049,6 +3054,10 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
struct nf_conn_help *help;
int err;
+ help = nfct_help(ct);
+ if (!help)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (cda[CTA_EXPECT_CLASS] && helper) {
class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS]));
if (class > helper->expect_class_max)
@@ -3058,26 +3067,11 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
if (!exp)
return ERR_PTR(-ENOMEM);
- help = nfct_help(ct);
- if (!help) {
- if (!cda[CTA_EXPECT_TIMEOUT]) {
- err = -EINVAL;
- goto err_out;
- }
- exp->timeout.expires =
- jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
-
- exp->flags = NF_CT_EXPECT_USERSPACE;
- if (cda[CTA_EXPECT_FLAGS]) {
- exp->flags |=
- ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
- }
+ if (cda[CTA_EXPECT_FLAGS]) {
+ exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
+ exp->flags &= ~NF_CT_EXPECT_USERSPACE;
} else {
- if (cda[CTA_EXPECT_FLAGS]) {
- exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
- exp->flags &= ~NF_CT_EXPECT_USERSPACE;
- } else
- exp->flags = 0;
+ exp->flags = 0;
}
if (cda[CTA_EXPECT_FN]) {
const char *name = nla_data(cda[CTA_EXPECT_FN]);
@@ -3240,7 +3234,8 @@ ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu,
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
- event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU);
+ event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK,
+ IPCTNL_MSG_EXP_GET_STATS_CPU);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index f60a4755d71e..6959e93063d4 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -263,7 +263,7 @@ out_unexpect_orig:
goto out_put_both;
}
-static inline int
+static int
pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq,
@@ -391,7 +391,7 @@ invalid:
return NF_ACCEPT;
}
-static inline int
+static int
pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq,
@@ -523,6 +523,14 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
int ret;
u_int16_t msg;
+#if IS_ENABLED(CONFIG_NF_NAT)
+ if (!nf_ct_is_confirmed(ct) && (ct->status & IPS_NAT_MASK)) {
+ struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+
+ if (!nat && !nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC))
+ return NF_DROP;
+ }
+#endif
/* don't do any tracking before tcp handshake complete */
if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
return NF_ACCEPT;
@@ -596,7 +604,6 @@ static const struct nf_conntrack_expect_policy pptp_exp_policy = {
static struct nf_conntrack_helper pptp __read_mostly = {
.name = "pptp",
.me = THIS_MODULE,
- .data_len = sizeof(struct nf_ct_pptp_master),
.tuple.src.l3num = AF_INET,
.tuple.src.u.tcp.port = cpu_to_be16(PPTP_CONTROL_PORT),
.tuple.dst.protonum = IPPROTO_TCP,
@@ -607,6 +614,8 @@ static struct nf_conntrack_helper pptp __read_mostly = {
static int __init nf_conntrack_pptp_init(void)
{
+ NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_pptp_master));
+
return nf_conntrack_helper_register(&pptp);
}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 2d6ee1803415..2de6c1fe3261 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -202,7 +202,7 @@ static int kill_l3proto(struct nf_conn *i, void *data)
static int kill_l4proto(struct nf_conn *i, void *data)
{
struct nf_conntrack_l4proto *l4proto;
- l4proto = (struct nf_conntrack_l4proto *)data;
+ l4proto = data;
return nf_ct_protonum(i) == l4proto->l4proto &&
nf_ct_l3num(i) == l4proto->l3proto;
}
@@ -441,9 +441,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
void nf_ct_l4proto_pernet_unregister_one(struct net *net,
struct nf_conntrack_l4proto *l4proto)
{
- struct nf_proto_net *pn = NULL;
+ struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
- pn = nf_ct_l4proto_net(net, l4proto);
if (pn == NULL)
return;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 93dd1c5b7bff..b553fdd68816 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -609,6 +609,20 @@ out_invalid:
return -NF_ACCEPT;
}
+static bool dccp_can_early_drop(const struct nf_conn *ct)
+{
+ switch (ct->proto.dccp.state) {
+ case CT_DCCP_CLOSEREQ:
+ case CT_DCCP_CLOSING:
+ case CT_DCCP_TIMEWAIT:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
static void dccp_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
@@ -665,7 +679,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr,
- dccp_nla_policy);
+ dccp_nla_policy, NULL);
if (err < 0)
return err;
@@ -868,6 +882,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.packet = dccp_packet,
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
+ .can_early_drop = dccp_can_early_drop,
.print_tuple = dccp_print_tuple,
.print_conntrack = dccp_print_conntrack,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -902,6 +917,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.packet = dccp_packet,
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
+ .can_early_drop = dccp_can_early_drop,
.print_tuple = dccp_print_tuple,
.print_conntrack = dccp_print_conntrack,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 33279aab583d..13875d599a85 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -535,6 +535,20 @@ out_invalid:
return -NF_ACCEPT;
}
+static bool sctp_can_early_drop(const struct nf_conn *ct)
+{
+ switch (ct->proto.sctp.state) {
+ case SCTP_CONNTRACK_SHUTDOWN_SENT:
+ case SCTP_CONNTRACK_SHUTDOWN_RECD:
+ case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
@@ -584,10 +598,8 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
if (!attr)
return 0;
- err = nla_parse_nested(tb,
- CTA_PROTOINFO_SCTP_MAX,
- attr,
- sctp_nla_policy);
+ err = nla_parse_nested(tb, CTA_PROTOINFO_SCTP_MAX, attr,
+ sctp_nla_policy, NULL);
if (err < 0)
return err;
@@ -783,6 +795,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
.error = sctp_error,
+ .can_early_drop = sctp_can_early_drop,
.me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = sctp_to_nlattr,
@@ -818,6 +831,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
.error = sctp_error,
+ .can_early_drop = sctp_can_early_drop,
.me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = sctp_to_nlattr,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b122e9dacfed..9758a7dfd83e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -419,10 +419,9 @@ static void tcp_options(const struct sk_buff *skb,
&& opsize == TCPOLEN_WINDOW) {
state->td_scale = *(u_int8_t *)ptr;
- if (state->td_scale > 14) {
- /* See RFC1323 */
- state->td_scale = 14;
- }
+ if (state->td_scale > TCP_MAX_WSCALE)
+ state->td_scale = TCP_MAX_WSCALE;
+
state->flags |=
IP_CT_TCP_FLAG_WINDOW_SCALE;
}
@@ -1172,6 +1171,22 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
}
+static bool tcp_can_early_drop(const struct nf_conn *ct)
+{
+ switch (ct->proto.tcp.state) {
+ case TCP_CONNTRACK_FIN_WAIT:
+ case TCP_CONNTRACK_LAST_ACK:
+ case TCP_CONNTRACK_TIME_WAIT:
+ case TCP_CONNTRACK_CLOSE:
+ case TCP_CONNTRACK_CLOSE_WAIT:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
@@ -1234,7 +1249,8 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
if (!pattr)
return 0;
- err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
+ err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr,
+ tcp_nla_policy, NULL);
if (err < 0)
return err;
@@ -1549,6 +1565,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
.error = tcp_error,
+ .can_early_drop = tcp_can_early_drop,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = tcp_to_nlattr,
.nlattr_size = tcp_nlattr_size,
@@ -1586,6 +1603,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
.error = tcp_error,
+ .can_early_drop = tcp_can_early_drop,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = tcp_to_nlattr,
.nlattr_size = tcp_nlattr_size,
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 9dcb9ee9b97d..ae457f39d5ce 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -184,6 +184,8 @@ static int __init nf_conntrack_sane_init(void)
{
int i, ret = 0;
+ NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sane_master));
+
sane_buffer = kmalloc(65536, GFP_KERNEL);
if (!sane_buffer)
return -ENOMEM;
@@ -196,13 +198,11 @@ static int __init nf_conntrack_sane_init(void)
for (i = 0; i < ports_c; i++) {
nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, "sane",
SANE_PORT, ports[i], ports[i],
- &sane_exp_policy, 0,
- sizeof(struct nf_ct_sane_master), help, NULL,
+ &sane_exp_policy, 0, help, NULL,
THIS_MODULE);
nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, "sane",
SANE_PORT, ports[i], ports[i],
- &sane_exp_policy, 0,
- sizeof(struct nf_ct_sane_master), help, NULL,
+ &sane_exp_policy, 0, help, NULL,
THIS_MODULE);
}
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index ef7063eced7c..a975efd6b8c3 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -231,7 +231,7 @@ s32 nf_ct_seq_offset(const struct nf_conn *ct,
}
EXPORT_SYMBOL_GPL(nf_ct_seq_offset);
-static struct nf_ct_ext_type nf_ct_seqadj_extend __read_mostly = {
+static const struct nf_ct_ext_type nf_ct_seqadj_extend = {
.len = sizeof(struct nf_conn_seqadj),
.align = __alignof__(struct nf_conn_seqadj),
.id = NF_CT_EXT_SEQADJ,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 0d17894798b5..d38af4274335 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -829,10 +829,8 @@ static void flush_expectations(struct nf_conn *ct, bool media)
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media)
continue;
- if (!del_timer(&exp->timeout))
+ if (!nf_ct_remove_expect(exp))
continue;
- nf_ct_unlink_expect(exp);
- nf_ct_expect_put(exp);
if (!media)
break;
}
@@ -1624,29 +1622,27 @@ static int __init nf_conntrack_sip_init(void)
{
int i, ret;
+ NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sip_master));
+
if (ports_c == 0)
ports[ports_c++] = SIP_PORT;
for (i = 0; i < ports_c; i++) {
nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip",
SIP_PORT, ports[i], i, sip_exp_policy,
- SIP_EXPECT_MAX,
- sizeof(struct nf_ct_sip_master), sip_help_udp,
+ SIP_EXPECT_MAX, sip_help_udp,
NULL, THIS_MODULE);
nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, "sip",
SIP_PORT, ports[i], i, sip_exp_policy,
- SIP_EXPECT_MAX,
- sizeof(struct nf_ct_sip_master), sip_help_tcp,
+ SIP_EXPECT_MAX, sip_help_tcp,
NULL, THIS_MODULE);
nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, "sip",
SIP_PORT, ports[i], i, sip_exp_policy,
- SIP_EXPECT_MAX,
- sizeof(struct nf_ct_sip_master), sip_help_udp,
+ SIP_EXPECT_MAX, sip_help_udp,
NULL, THIS_MODULE);
nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, "sip",
SIP_PORT, ports[i], i, sip_exp_policy,
- SIP_EXPECT_MAX,
- sizeof(struct nf_ct_sip_master), sip_help_tcp,
+ SIP_EXPECT_MAX, sip_help_tcp,
NULL, THIS_MODULE);
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 2256147dcaad..ccb5cb9043e0 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -250,7 +250,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
goto release;
if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
- seq_printf(s, "[UNREPLIED] ");
+ seq_puts(s, "[UNREPLIED] ");
print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
l3proto, l4proto);
@@ -261,7 +261,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
goto release;
if (test_bit(IPS_ASSURED_BIT, &ct->status))
- seq_printf(s, "[ASSURED] ");
+ seq_puts(s, "[ASSURED] ");
if (seq_has_overflowed(s))
goto release;
@@ -350,7 +350,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
const struct ip_conntrack_stat *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
+ seq_puts(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
return 0;
}
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index b1227dc6f75e..0ec6779fd5d9 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -113,16 +113,18 @@ static int __init nf_conntrack_tftp_init(void)
{
int i, ret;
+ NF_CT_HELPER_BUILD_BUG_ON(0);
+
if (ports_c == 0)
ports[ports_c++] = TFTP_PORT;
for (i = 0; i < ports_c; i++) {
nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, "tftp",
TFTP_PORT, ports[i], i, &tftp_exp_policy,
- 0, 0, tftp_help, NULL, THIS_MODULE);
+ 0, tftp_help, NULL, THIS_MODULE);
nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, "tftp",
TFTP_PORT, ports[i], i, &tftp_exp_policy,
- 0, 0, tftp_help, NULL, THIS_MODULE);
+ 0, tftp_help, NULL, THIS_MODULE);
}
ret = nf_conntrack_helpers_register(tftp, ports_c * 2);
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index 26e742006c48..46aee65f339b 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook);
void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_timeout_put_hook);
-static struct nf_ct_ext_type timeout_extend __read_mostly = {
+static const struct nf_ct_ext_type timeout_extend = {
.len = sizeof(struct nf_conn_timeout),
.align = __alignof__(struct nf_conn_timeout),
.id = NF_CT_EXT_TIMEOUT,
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
index 7a394df0deb7..4c4734b78318 100644
--- a/net/netfilter/nf_conntrack_timestamp.c
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -33,7 +33,7 @@ static struct ctl_table tstamp_sysctl_table[] = {
};
#endif /* CONFIG_SYSCTL */
-static struct nf_ct_ext_type tstamp_extend __read_mostly = {
+static const struct nf_ct_ext_type tstamp_extend = {
.len = sizeof(struct nf_conn_tstamp),
.align = __alignof__(struct nf_conn_tstamp),
.id = NF_CT_EXT_TSTAMP,
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index c46d214d5323..bfa742da83af 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -14,7 +14,7 @@
/* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
struct nf_hook_entry **entryp, unsigned int verdict);
-void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry);
+unsigned int nf_queue_nf_hook_drop(struct net *net);
int __init netfilter_queue_init(void);
/* nf_log.c */
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8d85a0598b60..8bb152a7cca4 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -71,7 +71,6 @@ void nf_log_unset(struct net *net, const struct nf_logger *logger)
RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL);
}
mutex_unlock(&nf_log_mutex);
- synchronize_rcu();
}
EXPORT_SYMBOL(nf_log_unset);
@@ -376,13 +375,13 @@ static int seq_show(struct seq_file *s, void *v)
logger = nft_log_dereference(loggers[*pos][i]);
seq_printf(s, "%s", logger->name);
if (i == 0 && loggers[*pos][i + 1] != NULL)
- seq_printf(s, ",");
+ seq_puts(s, ",");
if (seq_has_overflowed(s))
return -ENOSPC;
}
- seq_printf(s, ")\n");
+ seq_puts(s, ")\n");
if (seq_has_overflowed(s))
return -ENOSPC;
diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index eb772380a202..e4d61a7a5258 100644
--- a/net/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c
@@ -33,7 +33,6 @@ static unsigned int help(struct sk_buff *skb,
{
char buffer[sizeof("65535")];
u_int16_t port;
- unsigned int ret;
/* Connection comes from client. */
exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
@@ -63,14 +62,14 @@ static unsigned int help(struct sk_buff *skb,
}
sprintf(buffer, "%u", port);
- ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
- protoff, matchoff, matchlen,
- buffer, strlen(buffer));
- if (ret != NF_ACCEPT) {
+ if (!nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
+ protoff, matchoff, matchlen,
+ buffer, strlen(buffer))) {
nf_ct_helper_log(skb, exp->master, "cannot mangle packet");
nf_ct_unexpect_related(exp);
+ return NF_DROP;
}
- return ret;
+ return NF_ACCEPT;
}
static void __exit nf_nat_amanda_fini(void)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 82802e4a6640..b48d6b5aae8a 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -71,11 +71,10 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
if (ct == NULL)
return;
- family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
- rcu_read_lock();
+ family = nf_ct_l3num(ct);
l3proto = __nf_nat_l3proto_find(family);
if (l3proto == NULL)
- goto out;
+ return;
dir = CTINFO2DIR(ctinfo);
if (dir == IP_CT_DIR_ORIGINAL)
@@ -84,8 +83,6 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
statusbit = IPS_SRC_NAT;
l3proto->decode_session(skb, ct, dir, statusbit, fl);
-out:
- rcu_read_unlock();
}
int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
@@ -411,12 +408,6 @@ nf_nat_setup_info(struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
struct nf_conntrack_tuple curr_tuple, new_tuple;
- struct nf_conn_nat *nat;
-
- /* nat helper or nfctnetlink also setup binding */
- nat = nf_ct_nat_ext_add(ct);
- if (nat == NULL)
- return NF_ACCEPT;
NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
maniptype == NF_NAT_MANIP_DST);
@@ -549,10 +540,6 @@ struct nf_nat_proto_clean {
static int nf_nat_proto_remove(struct nf_conn *i, void *data)
{
const struct nf_nat_proto_clean *clean = data;
- struct nf_conn_nat *nat = nfct_nat(i);
-
- if (!nat)
- return 0;
if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
(clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
@@ -563,12 +550,10 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
{
- struct nf_conn_nat *nat = nfct_nat(ct);
-
if (nf_nat_proto_remove(ct, data))
return 1;
- if (!nat)
+ if ((ct->status & IPS_SRC_NAT_DONE) == 0)
return 0;
/* This netns is being destroyed, and conntrack has nat null binding.
@@ -716,13 +701,9 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
/* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
- struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
-
- if (!nat)
- return;
-
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ if (ct->status & IPS_SRC_NAT_DONE)
+ rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
+ nf_nat_bysource_params);
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -730,7 +711,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
.align = __alignof__(struct nf_conn_nat),
.destroy = nf_nat_cleanup_conntrack,
.id = NF_CT_EXT_NAT,
- .flags = NF_CT_EXT_F_PREALLOC,
};
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -751,7 +731,8 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr,
const struct nf_nat_l4proto *l4proto;
int err;
- err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
+ err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr,
+ protonat_nla_policy, NULL);
if (err < 0)
return err;
@@ -780,7 +761,7 @@ nfnetlink_parse_nat(const struct nlattr *nat,
memset(range, 0, sizeof(*range));
- err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
+ err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy, NULL);
if (err < 0)
return err;
@@ -819,7 +800,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
/* No NAT information has been passed, allocate the null-binding */
if (attr == NULL)
- return __nf_nat_alloc_null_binding(ct, manip);
+ return __nf_nat_alloc_null_binding(ct, manip) == NF_DROP ? -ENOMEM : 0;
err = nfnetlink_parse_nat(attr, ct, &range, l3proto);
if (err < 0)
@@ -874,9 +855,6 @@ static int __init nf_nat_init(void)
nf_ct_helper_expectfn_register(&follow_master_nat);
- /* Initialize fake conntrack so that NAT will skip it */
- nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
-
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
nfnetlink_parse_nat_setup);
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 211661cb2c90..607a373379b4 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -70,15 +70,15 @@ static void mangle_contents(struct sk_buff *skb,
}
/* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
+static bool enlarge_skb(struct sk_buff *skb, unsigned int extra)
{
if (skb->len + extra > 65535)
- return 0;
+ return false;
if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
- return 0;
+ return false;
- return 1;
+ return true;
}
/* Generic function for mangling variable-length address changes inside
@@ -89,26 +89,26 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
* skb enlargement, ...
*
* */
-int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len, bool adjust)
+bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
+ unsigned int match_offset,
+ unsigned int match_len,
+ const char *rep_buffer,
+ unsigned int rep_len, bool adjust)
{
const struct nf_nat_l3proto *l3proto;
struct tcphdr *tcph;
int oldlen, datalen;
if (!skb_make_writable(skb, skb->len))
- return 0;
+ return false;
if (rep_len > match_len &&
rep_len - match_len > skb_tailroom(skb) &&
!enlarge_skb(skb, rep_len - match_len))
- return 0;
+ return false;
SKB_LINEAR_ASSERT(skb);
@@ -128,7 +128,7 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
nf_ct_seqadj_set(ct, ctinfo, tcph->seq,
(int)rep_len - (int)match_len);
- return 1;
+ return true;
}
EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet);
@@ -142,7 +142,7 @@ EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet);
* XXX - This function could be merged with nf_nat_mangle_tcp_packet which
* should be fairly easy to do.
*/
-int
+bool
nf_nat_mangle_udp_packet(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -157,12 +157,12 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
int datalen, oldlen;
if (!skb_make_writable(skb, skb->len))
- return 0;
+ return false;
if (rep_len > match_len &&
rep_len - match_len > skb_tailroom(skb) &&
!enlarge_skb(skb, rep_len - match_len))
- return 0;
+ return false;
udph = (void *)skb->data + protoff;
@@ -176,13 +176,13 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
/* fix udp checksum if udp checksum was previously calculated */
if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
- return 1;
+ return true;
l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check,
datalen, oldlen);
- return 1;
+ return true;
}
EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 1fb2258c3535..0648cb096bd8 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -37,7 +37,6 @@ static unsigned int help(struct sk_buff *skb,
struct nf_conn *ct = exp->master;
union nf_inet_addr newaddr;
u_int16_t port;
- unsigned int ret;
/* Reply comes from server. */
newaddr = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3;
@@ -83,14 +82,14 @@ static unsigned int help(struct sk_buff *skb,
pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n",
buffer, &newaddr.ip, port);
- ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff,
- matchlen, buffer, strlen(buffer));
- if (ret != NF_ACCEPT) {
+ if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff,
+ matchlen, buffer, strlen(buffer))) {
nf_ct_helper_log(skb, ct, "cannot mangle packet");
nf_ct_unexpect_related(exp);
+ return NF_DROP;
}
- return ret;
+ return NF_ACCEPT;
}
static void __exit nf_nat_irc_fini(void)
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 4a7662486f44..043850c9d154 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -96,15 +96,18 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
-void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry)
+unsigned int nf_queue_nf_hook_drop(struct net *net)
{
const struct nf_queue_handler *qh;
+ unsigned int count = 0;
rcu_read_lock();
qh = rcu_dereference(net->nf.queue_handler);
if (qh)
- qh->nf_hook_drop(net, entry);
+ count = qh->nf_hook_drop(net);
rcu_read_unlock();
+
+ return count;
}
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 7c6d1fbe38b9..a504e87c6ddf 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -66,8 +66,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW) {
opts->wscale = *ptr;
- if (opts->wscale > 14)
- opts->wscale = 14;
+ if (opts->wscale > TCP_MAX_WSCALE)
+ opts->wscale = TCP_MAX_WSCALE;
opts->options |= XT_SYNPROXY_OPT_WSCALE;
}
break;
@@ -287,9 +287,9 @@ static int synproxy_cpu_seq_show(struct seq_file *seq, void *v)
struct synproxy_stats *stats = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries\t\tsyn_received\t"
- "cookie_invalid\tcookie_valid\t"
- "cookie_retrans\tconn_reopened\n");
+ seq_puts(seq, "entries\t\tsyn_received\t"
+ "cookie_invalid\tcookie_valid\t"
+ "cookie_retrans\tconn_reopened\n");
return 0;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 434c739dfeca..559225029740 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -144,7 +144,7 @@ static int nf_tables_register_hooks(struct net *net,
unsigned int hook_nops)
{
if (table->flags & NFT_TABLE_F_DORMANT ||
- !(chain->flags & NFT_BASE_CHAIN))
+ !nft_is_base_chain(chain))
return 0;
return nf_register_net_hooks(net, nft_base_chain(chain)->ops,
@@ -157,7 +157,7 @@ static void nf_tables_unregister_hooks(struct net *net,
unsigned int hook_nops)
{
if (table->flags & NFT_TABLE_F_DORMANT ||
- !(chain->flags & NFT_BASE_CHAIN))
+ !nft_is_base_chain(chain))
return;
nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops);
@@ -438,7 +438,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- event |= NFNL_SUBSYS_NFTABLES << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
if (nlh == NULL)
goto nla_put_failure;
@@ -587,7 +587,7 @@ static void _nf_tables_table_disable(struct net *net,
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
- if (!(chain->flags & NFT_BASE_CHAIN))
+ if (!nft_is_base_chain(chain))
continue;
if (cnt && i++ == cnt)
@@ -608,7 +608,7 @@ static int nf_tables_table_enable(struct net *net,
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
- if (!(chain->flags & NFT_BASE_CHAIN))
+ if (!nft_is_base_chain(chain))
continue;
err = nf_register_net_hooks(net, nft_base_chain(chain)->ops,
@@ -989,7 +989,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- event |= NFNL_SUBSYS_NFTABLES << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
if (nlh == NULL)
goto nla_put_failure;
@@ -1007,7 +1007,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
goto nla_put_failure;
- if (chain->flags & NFT_BASE_CHAIN) {
+ if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
const struct nf_hook_ops *ops = &basechain->ops[0];
struct nlattr *nest;
@@ -1182,7 +1182,8 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
struct nft_stats *stats;
int err;
- err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
+ err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy,
+ NULL);
if (err < 0)
return ERR_PTR(err);
@@ -1226,7 +1227,7 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
{
BUG_ON(chain->use > 0);
- if (chain->flags & NFT_BASE_CHAIN) {
+ if (nft_is_base_chain(chain)) {
struct nft_base_chain *basechain = nft_base_chain(chain);
module_put(basechain->type->owner);
@@ -1257,7 +1258,7 @@ static int nft_chain_parse_hook(struct net *net,
int err;
err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
- nft_hook_policy);
+ nft_hook_policy, NULL);
if (err < 0)
return err;
@@ -1364,8 +1365,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
}
if (nla[NFTA_CHAIN_POLICY]) {
- if ((chain != NULL &&
- !(chain->flags & NFT_BASE_CHAIN)))
+ if (chain != NULL &&
+ !nft_is_base_chain(chain))
return -EOPNOTSUPP;
if (chain == NULL &&
@@ -1396,7 +1397,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct nft_chain_hook hook;
struct nf_hook_ops *ops;
- if (!(chain->flags & NFT_BASE_CHAIN))
+ if (!nft_is_base_chain(chain))
return -EBUSY;
err = nft_chain_parse_hook(net, nla, afi, &hook,
@@ -1433,7 +1434,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
}
if (nla[NFTA_CHAIN_COUNTERS]) {
- if (!(chain->flags & NFT_BASE_CHAIN))
+ if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
@@ -1724,7 +1725,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
struct nlattr *tb[NFTA_EXPR_MAX + 1];
int err;
- err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy);
+ err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy, NULL);
if (err < 0)
return err;
@@ -1734,7 +1735,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
if (tb[NFTA_EXPR_DATA]) {
err = nla_parse_nested(info->tb, type->maxattr,
- tb[NFTA_EXPR_DATA], type->policy);
+ tb[NFTA_EXPR_DATA], type->policy, NULL);
if (err < 0)
goto err1;
} else
@@ -1772,8 +1773,19 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx,
goto err1;
}
+ if (ops->validate) {
+ const struct nft_data *data = NULL;
+
+ err = ops->validate(ctx, expr, &data);
+ if (err < 0)
+ goto err2;
+ }
+
return 0;
+err2:
+ if (ops->destroy)
+ ops->destroy(ctx, expr);
err1:
expr->ops = NULL;
return err;
@@ -1874,10 +1886,9 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
const struct nft_expr *expr, *next;
struct nlattr *list;
const struct nft_rule *prule;
- int type = event | NFNL_SUBSYS_NFTABLES << 8;
+ u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg),
- flags);
+ nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags);
if (nlh == NULL)
goto nla_put_failure;
@@ -1895,7 +1906,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) {
- prule = list_entry(rule->list.prev, struct nft_rule, list);
+ prule = list_prev_entry(rule, list);
if (nla_put_be64(skb, NFTA_RULE_POSITION,
cpu_to_be64(prule->handle),
NFTA_RULE_PAD))
@@ -2523,8 +2534,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
return 0;
}
-struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
- const struct nlattr *nla, u8 genmask)
+static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
+ const struct nlattr *nla, u8 genmask)
{
struct nft_set *set;
@@ -2538,11 +2549,10 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
}
return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL_GPL(nf_tables_set_lookup);
-struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
- const struct nlattr *nla,
- u8 genmask)
+static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_trans *trans;
u32 id = ntohl(nla_get_be32(nla));
@@ -2557,7 +2567,25 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
}
return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid);
+
+struct nft_set *nft_set_lookup(const struct net *net,
+ const struct nft_table *table,
+ const struct nlattr *nla_set_name,
+ const struct nlattr *nla_set_id,
+ u8 genmask)
+{
+ struct nft_set *set;
+
+ set = nf_tables_set_lookup(table, nla_set_name, genmask);
+ if (IS_ERR(set)) {
+ if (!nla_set_id)
+ return set;
+
+ set = nf_tables_set_lookup_byid(net, nla_set_id, genmask);
+ }
+ return set;
+}
+EXPORT_SYMBOL_GPL(nft_set_lookup);
static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
const char *name)
@@ -2617,7 +2645,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
u32 portid = ctx->portid;
u32 seq = ctx->seq;
- event |= NFNL_SUBSYS_NFTABLES << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
flags);
if (nlh == NULL)
@@ -2851,7 +2879,8 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
struct nlattr *da[NFTA_SET_DESC_MAX + 1];
int err;
- err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy);
+ err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla,
+ nft_set_desc_policy, NULL);
if (err < 0)
return err;
@@ -3353,7 +3382,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
int event, err;
err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
- NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy);
+ NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy,
+ NULL);
if (err < 0)
return err;
@@ -3367,8 +3397,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (IS_ERR(set))
return PTR_ERR(set);
- event = NFT_MSG_NEWSETELEM;
- event |= NFNL_SUBSYS_NFTABLES << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM);
portid = NETLINK_CB(cb->skb).portid;
seq = cb->nlh->nlmsg_seq;
@@ -3453,7 +3482,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
struct nlattr *nest;
int err;
- event |= NFNL_SUBSYS_NFTABLES << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
flags);
if (nlh == NULL)
@@ -3612,7 +3641,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
int err;
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
- nft_set_elem_policy);
+ nft_set_elem_policy, NULL);
if (err < 0)
return err;
@@ -3749,6 +3778,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = set->ops->insert(ctx->net, set, &elem, &ext2);
if (err) {
if (err == -EEXIST) {
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
+ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
+ nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
+ nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
+ return -EBUSY;
if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
memcmp(nft_set_ext_data(ext),
@@ -3842,7 +3876,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
int err;
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
- nft_set_elem_policy);
+ nft_set_elem_policy, NULL);
if (err < 0)
goto err1;
@@ -4064,7 +4098,8 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
[NFTA_OBJ_DATA] = { .type = NLA_NESTED },
};
-static struct nft_object *nft_obj_init(const struct nft_object_type *type,
+static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
+ const struct nft_object_type *type,
const struct nlattr *attr)
{
struct nlattr *tb[type->maxattr + 1];
@@ -4072,7 +4107,8 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type,
int err;
if (attr) {
- err = nla_parse_nested(tb, type->maxattr, attr, type->policy);
+ err = nla_parse_nested(tb, type->maxattr, attr, type->policy,
+ NULL);
if (err < 0)
goto err1;
} else {
@@ -4084,7 +4120,7 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type,
if (obj == NULL)
goto err1;
- err = type->init((const struct nlattr * const *)tb, obj);
+ err = type->init(ctx, (const struct nlattr * const *)tb, obj);
if (err < 0)
goto err2;
@@ -4192,7 +4228,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
if (IS_ERR(type))
return PTR_ERR(type);
- obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]);
+ obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err1;
@@ -4224,7 +4260,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
- event |= NFNL_SUBSYS_NFTABLES << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
if (nlh == NULL)
goto nla_put_failure;
@@ -4406,8 +4442,6 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
err:
kfree_skb(skb2);
return err;
-
- return 0;
}
static void nft_obj_destroy(struct nft_object *obj)
@@ -4497,7 +4531,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN;
+ int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
if (nlh == NULL)
@@ -4679,7 +4713,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
if (nft_trans_chain_name(trans)[0])
strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
- if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN))
+ if (!nft_is_base_chain(trans->ctx.chain))
return;
basechain = nft_base_chain(trans->ctx.chain);
@@ -4993,7 +5027,7 @@ int nft_chain_validate_dependency(const struct nft_chain *chain,
{
const struct nft_base_chain *basechain;
- if (chain->flags & NFT_BASE_CHAIN) {
+ if (nft_is_base_chain(chain)) {
basechain = nft_base_chain(chain);
if (basechain->type->type != type)
return -EOPNOTSUPP;
@@ -5007,7 +5041,7 @@ int nft_chain_validate_hooks(const struct nft_chain *chain,
{
struct nft_base_chain *basechain;
- if (chain->flags & NFT_BASE_CHAIN) {
+ if (nft_is_base_chain(chain)) {
basechain = nft_base_chain(chain);
if ((1 << basechain->ops[0].hooknum) & hook_flags)
@@ -5285,7 +5319,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
struct nft_chain *chain;
int err;
- err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy);
+ err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy,
+ NULL);
if (err < 0)
return err;
@@ -5316,7 +5351,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
tb[NFTA_VERDICT_CHAIN], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (chain->flags & NFT_BASE_CHAIN)
+ if (nft_is_base_chain(chain))
return -EOPNOTSUPP;
chain->use++;
@@ -5415,7 +5450,7 @@ int nft_data_init(const struct nft_ctx *ctx,
struct nlattr *tb[NFTA_DATA_MAX + 1];
int err;
- err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy);
+ err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy, NULL);
if (err < 0)
return err;
@@ -5489,7 +5524,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
{
struct nft_rule *rule, *nr;
- BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN));
+ BUG_ON(!nft_is_base_chain(ctx->chain));
nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain,
ctx->afi->nops);
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 9e2ae424b640..403432988313 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -128,7 +128,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
list_for_each_entry(table, &afi->tables, list) {
ctx.table = table;
list_for_each_entry_safe(chain, nr, &table->chains, list) {
- if (!(chain->flags & NFT_BASE_CHAIN))
+ if (!nft_is_base_chain(chain))
continue;
ctx.chain = chain;
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 12eb9041dca2..e1b15e7a5793 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -169,7 +169,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
struct nlmsghdr *nlh;
struct sk_buff *skb;
unsigned int size;
- int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE;
+ u16 event;
if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
return;
@@ -198,6 +198,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
if (!skb)
return;
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE);
nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0);
if (!nlh)
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 68eda920160e..80f5ecf2c3d7 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -148,7 +148,8 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
EXPORT_SYMBOL_GPL(nfnetlink_unicast);
/* Process one complete nfnetlink message. */
-static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
const struct nfnl_callback *nc;
@@ -191,8 +192,8 @@ replay:
int attrlen = nlh->nlmsg_len - min_len;
__u8 subsys_id = NFNL_SUBSYS_ID(type);
- err = nla_parse(cda, ss->cb[cb_id].attr_count,
- attr, attrlen, ss->cb[cb_id].policy);
+ err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen,
+ ss->cb[cb_id].policy, extack);
if (err < 0) {
rcu_read_unlock();
return err;
@@ -261,7 +262,7 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
struct nfnl_err *nfnl_err, *next;
list_for_each_entry_safe(nfnl_err, next, err_list, head) {
- netlink_ack(skb, nfnl_err->nlh, nfnl_err->err);
+ netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL);
nfnl_err_del(nfnl_err);
}
}
@@ -284,13 +285,13 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
if (subsys_id >= NFNL_SUBSYS_COUNT)
- return netlink_ack(skb, nlh, -EINVAL);
+ return netlink_ack(skb, nlh, -EINVAL, NULL);
replay:
status = 0;
skb = netlink_skb_clone(oskb, GFP_KERNEL);
if (!skb)
- return netlink_ack(oskb, nlh, -ENOMEM);
+ return netlink_ack(oskb, nlh, -ENOMEM, NULL);
nfnl_lock(subsys_id);
ss = nfnl_dereference_protected(subsys_id);
@@ -304,20 +305,20 @@ replay:
#endif
{
nfnl_unlock(subsys_id);
- netlink_ack(oskb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
return kfree_skb(skb);
}
}
if (!ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
- netlink_ack(oskb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
return kfree_skb(skb);
}
if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) {
nfnl_unlock(subsys_id);
- netlink_ack(oskb, nlh, -ERESTART);
+ netlink_ack(oskb, nlh, -ERESTART, NULL);
return kfree_skb(skb);
}
@@ -376,8 +377,8 @@ replay:
struct nlattr *attr = (void *)nlh + min_len;
int attrlen = nlh->nlmsg_len - min_len;
- err = nla_parse(cda, ss->cb[cb_id].attr_count,
- attr, attrlen, ss->cb[cb_id].policy);
+ err = nla_parse(cda, ss->cb[cb_id].attr_count, attr,
+ attrlen, ss->cb[cb_id].policy, NULL);
if (err < 0)
goto ack;
@@ -407,7 +408,8 @@ ack:
* pointing to the batch header.
*/
nfnl_err_reset(&err_list);
- netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
+ netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM,
+ NULL);
status |= NFNL_BATCH_FAILURE;
goto done;
}
@@ -465,9 +467,10 @@ static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh)
skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
return;
- err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy);
+ err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy,
+ NULL);
if (err < 0) {
- netlink_ack(skb, nlh, err);
+ netlink_ack(skb, nlh, err, NULL);
return;
}
if (cda[NFNL_BATCH_GENID])
@@ -493,14 +496,14 @@ static void nfnetlink_rcv(struct sk_buff *skb)
return;
if (!netlink_net_capable(skb, CAP_NET_ADMIN)) {
- netlink_ack(skb, nlh, -EPERM);
+ netlink_ack(skb, nlh, -EPERM, NULL);
return;
}
if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN)
nfnetlink_rcv_skb_batch(skb, nlh);
else
- netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+ netlink_rcv_skb(skb, nfnetlink_rcv_msg);
}
#ifdef CONFIG_MODULES
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index d44d89b56127..9898fb4d0512 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/slab.h>
@@ -32,7 +33,7 @@ struct nf_acct {
atomic64_t bytes;
unsigned long flags;
struct list_head head;
- atomic_t refcnt;
+ refcount_t refcnt;
char name[NFACCT_NAME_MAX];
struct rcu_head rcu_head;
char data[0];
@@ -123,7 +124,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
atomic64_set(&nfacct->pkts,
be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
}
- atomic_set(&nfacct->refcnt, 1);
+ refcount_set(&nfacct->refcnt, 1);
list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list);
return 0;
}
@@ -138,7 +139,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
u64 pkts, bytes;
u32 old_flags;
- event |= NFNL_SUBSYS_ACCT << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_ACCT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -166,7 +167,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
NFACCT_PAD) ||
nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes),
NFACCT_PAD) ||
- nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
+ nla_put_be32(skb, NFACCT_USE, htonl(refcount_read(&acct->refcnt))))
goto nla_put_failure;
if (acct->flags & NFACCT_F_QUOTA) {
u64 *quota = (u64 *)acct->data;
@@ -243,7 +244,8 @@ nfacct_filter_alloc(const struct nlattr * const attr)
struct nlattr *tb[NFACCT_FILTER_MAX + 1];
int err;
- err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy);
+ err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy,
+ NULL);
if (err < 0)
return ERR_PTR(err);
@@ -329,7 +331,7 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
/* We want to avoid races with nfnl_acct_put. So only when the current
* refcnt is 1, we decrease it to 0.
*/
- if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&cur->refcnt)) {
/* We are protected by nfnl mutex. */
list_del_rcu(&cur->head);
kfree_rcu(cur, rcu_head);
@@ -413,7 +415,7 @@ struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
if (!try_module_get(THIS_MODULE))
goto err;
- if (!atomic_inc_not_zero(&cur->refcnt)) {
+ if (!refcount_inc_not_zero(&cur->refcnt)) {
module_put(THIS_MODULE);
goto err;
}
@@ -429,7 +431,7 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
void nfnl_acct_put(struct nf_acct *acct)
{
- if (atomic_dec_and_test(&acct->refcnt))
+ if (refcount_dec_and_test(&acct->refcnt))
kfree_rcu(acct, rcu_head);
module_put(THIS_MODULE);
@@ -502,7 +504,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net)
list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) {
list_del_rcu(&cur->head);
- if (atomic_dec_and_test(&cur->refcnt))
+ if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
}
}
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index d45558178da5..950bf6eadc65 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -77,7 +77,8 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
int err;
struct nlattr *tb[NFCTH_TUPLE_MAX+1];
- err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol);
+ err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr,
+ nfnl_cthelper_tuple_pol, NULL);
if (err < 0)
return err;
@@ -104,7 +105,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
if (help->helper->data_len == 0)
return -EINVAL;
- memcpy(help->data, nla_data(attr), help->helper->data_len);
+ nla_memcpy(help->data, nla_data(attr), sizeof(help->data));
return 0;
}
@@ -137,7 +138,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy,
int err;
struct nlattr *tb[NFCTH_POLICY_MAX+1];
- err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol);
+ err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
+ nfnl_cthelper_expect_pol, NULL);
if (err < 0)
return err;
@@ -150,6 +152,9 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy,
nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN);
expect_policy->max_expected =
ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+ if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
+ return -EINVAL;
+
expect_policy->timeout =
ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
@@ -171,7 +176,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
unsigned int class_max;
ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
- nfnl_cthelper_expect_policy_set);
+ nfnl_cthelper_expect_policy_set, NULL);
if (ret < 0)
return ret;
@@ -213,6 +218,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
{
struct nf_conntrack_helper *helper;
struct nfnl_cthelper *nfcth;
+ unsigned int size;
int ret;
if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN])
@@ -228,7 +234,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
goto err1;
strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
- helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
+ size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
+ if (size > FIELD_SIZEOF(struct nf_conn_help, data)) {
+ ret = -ENOMEM;
+ goto err2;
+ }
+
helper->flags |= NF_CT_HELPER_F_USERSPACE;
memcpy(&helper->tuple, tuple, sizeof(struct nf_conntrack_tuple));
@@ -276,7 +287,7 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
int err;
err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
- nfnl_cthelper_expect_pol);
+ nfnl_cthelper_expect_pol, NULL);
if (err < 0)
return err;
@@ -290,6 +301,9 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
new_policy->max_expected =
ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+ if (new_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
+ return -EINVAL;
+
new_policy->timeout =
ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
@@ -336,7 +350,7 @@ static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
int err;
err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
- nfnl_cthelper_expect_policy_set);
+ nfnl_cthelper_expect_policy_set, NULL);
if (err < 0)
return err;
@@ -501,7 +515,7 @@ nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
unsigned int flags = portid ? NLM_F_MULTI : 0;
int status;
- event |= NFNL_SUBSYS_CTHELPER << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 47d6656c9119..a3e7bb54d96a 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -56,7 +56,8 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1];
ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
- attr, l4proto->ctnl_timeout.nla_policy);
+ attr, l4proto->ctnl_timeout.nla_policy,
+ NULL);
if (ret < 0)
return ret;
@@ -138,7 +139,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
timeout->l3num = l3num;
timeout->l4proto = l4proto;
- atomic_set(&timeout->refcnt, 1);
+ refcount_set(&timeout->refcnt, 1);
list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
return 0;
@@ -158,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
unsigned int flags = portid ? NLM_F_MULTI : 0;
struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
- event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -172,7 +173,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) ||
nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
nla_put_be32(skb, CTA_TIMEOUT_USE,
- htonl(atomic_read(&timeout->refcnt))))
+ htonl(refcount_read(&timeout->refcnt))))
goto nla_put_failure;
if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
@@ -339,7 +340,7 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
/* We want to avoid races with ctnl_timeout_put. So only when the
* current refcnt is 1, we decrease it to 0.
*/
- if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&timeout->refcnt)) {
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
nf_ct_l4proto_put(timeout->l4proto);
@@ -431,7 +432,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
- event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -536,7 +537,7 @@ ctnl_timeout_find_get(struct net *net, const char *name)
if (!try_module_get(THIS_MODULE))
goto err;
- if (!atomic_inc_not_zero(&timeout->refcnt)) {
+ if (!refcount_inc_not_zero(&timeout->refcnt)) {
module_put(THIS_MODULE);
goto err;
}
@@ -550,7 +551,7 @@ err:
static void ctnl_timeout_put(struct ctnl_timeout *timeout)
{
- if (atomic_dec_and_test(&timeout->refcnt))
+ if (refcount_dec_and_test(&timeout->refcnt))
kfree_rcu(timeout, rcu_head);
module_put(THIS_MODULE);
@@ -601,7 +602,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
list_del_rcu(&cur->head);
nf_ct_l4proto_put(cur->l4proto);
- if (atomic_dec_and_test(&cur->refcnt))
+ if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
}
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 08247bf7d7b8..da9704971a83 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -40,6 +40,8 @@
#include <net/netfilter/nfnetlink_log.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
+
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#include "../bridge/br_private.h"
@@ -57,7 +59,7 @@
struct nfulnl_instance {
struct hlist_node hlist; /* global list of instances */
spinlock_t lock;
- atomic_t use; /* use count */
+ refcount_t use; /* use count */
unsigned int qlen; /* number of nlmsgs in skb */
struct sk_buff *skb; /* pre-allocatd skb */
@@ -115,7 +117,7 @@ __instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
static inline void
instance_get(struct nfulnl_instance *inst)
{
- atomic_inc(&inst->use);
+ refcount_inc(&inst->use);
}
static struct nfulnl_instance *
@@ -125,7 +127,7 @@ instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
rcu_read_lock_bh();
inst = __instance_lookup(log, group_num);
- if (inst && !atomic_inc_not_zero(&inst->use))
+ if (inst && !refcount_inc_not_zero(&inst->use))
inst = NULL;
rcu_read_unlock_bh();
@@ -145,7 +147,7 @@ static void nfulnl_instance_free_rcu(struct rcu_head *head)
static void
instance_put(struct nfulnl_instance *inst)
{
- if (inst && atomic_dec_and_test(&inst->use))
+ if (inst && refcount_dec_and_test(&inst->use))
call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
}
@@ -180,7 +182,7 @@ instance_create(struct net *net, u_int16_t group_num,
INIT_HLIST_NODE(&inst->hlist);
spin_lock_init(&inst->lock);
/* needs to be two, since we _put() after creation */
- atomic_set(&inst->use, 2);
+ refcount_set(&inst->use, 2);
setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
@@ -409,7 +411,7 @@ __build_packet_message(struct nfnl_log_net *log,
const unsigned char *hwhdrp;
nlh = nlmsg_put(inst->skb, 0, 0,
- NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
+ nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
sizeof(struct nfgenmsg), 0);
if (!nlh)
return -1;
@@ -801,7 +803,7 @@ static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl,
static struct nf_logger nfulnl_logger __read_mostly = {
.name = "nfnetlink_log",
.type = NF_LOG_TYPE_ULOG,
- .logfn = &nfulnl_log_packet,
+ .logfn = nfulnl_log_packet,
.me = THIS_MODULE,
};
@@ -1031,7 +1033,7 @@ static int seq_show(struct seq_file *s, void *v)
inst->group_num,
inst->peer_portid, inst->qlen,
inst->copy_mode, inst->copy_range,
- inst->flushtimeout, atomic_read(&inst->use));
+ inst->flushtimeout, refcount_read(&inst->use));
return 0;
}
@@ -1138,10 +1140,10 @@ out:
static void __exit nfnetlink_log_fini(void)
{
- nf_log_unregister(&nfulnl_logger);
nfnetlink_subsys_unregister(&nfulnl_subsys);
netlink_unregister_notifier(&nfulnl_rtnl_notifier);
unregister_pernet_subsys(&nfnl_log_net_ops);
+ nf_log_unregister(&nfulnl_logger);
}
MODULE_DESCRIPTION("netfilter userspace logging");
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 933509ebf3d3..8a0f218b7938 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -447,7 +447,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
}
nlh = nlmsg_put(skb, 0, 0,
- NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
+ nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET),
sizeof(struct nfgenmsg), 0);
if (!nlh) {
skb_tx_error(entskb);
@@ -922,16 +922,10 @@ static struct notifier_block nfqnl_dev_notifier = {
.notifier_call = nfqnl_rcv_dev_event,
};
-static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr)
-{
- return rcu_access_pointer(entry->hook) ==
- (struct nf_hook_entry *)entry_ptr;
-}
-
-static void nfqnl_nf_hook_drop(struct net *net,
- const struct nf_hook_entry *hook)
+static unsigned int nfqnl_nf_hook_drop(struct net *net)
{
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ unsigned int instances = 0;
int i;
rcu_read_lock();
@@ -939,10 +933,14 @@ static void nfqnl_nf_hook_drop(struct net *net,
struct nfqnl_instance *inst;
struct hlist_head *head = &q->instance_table[i];
- hlist_for_each_entry_rcu(inst, head, hlist)
- nfqnl_flush(inst, nf_hook_cmp, (unsigned long)hook);
+ hlist_for_each_entry_rcu(inst, head, hlist) {
+ nfqnl_flush(inst, NULL, 0);
+ instances++;
+ }
}
rcu_read_unlock();
+
+ return instances;
}
static int
@@ -1109,7 +1107,7 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry,
int err;
err = nla_parse_nested(tb, NFQA_VLAN_MAX, nfqa[NFQA_VLAN],
- nfqa_vlan_policy);
+ nfqa_vlan_policy, NULL);
if (err < 0)
return err;
@@ -1213,8 +1211,8 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
};
static const struct nf_queue_handler nfqh = {
- .outfn = &nfqnl_enqueue_packet,
- .nf_hook_drop = &nfqnl_nf_hook_drop,
+ .outfn = nfqnl_enqueue_packet,
+ .nf_hook_drop = nfqnl_nf_hook_drop,
};
static int nfqnl_recv_config(struct net *net, struct sock *ctnl,
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index c21e7eb8dce0..f753ec69f790 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -42,7 +42,8 @@ static int nft_compat_chain_validate_dependency(const char *tablename,
{
const struct nft_base_chain *basechain;
- if (!tablename || !(chain->flags & NFT_BASE_CHAIN))
+ if (!tablename ||
+ !nft_is_base_chain(chain))
return 0;
basechain = nft_base_chain(chain);
@@ -165,7 +166,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
par->entryinfo = entry;
par->target = target;
par->targinfo = info;
- if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
const struct nf_hook_ops *ops = &basechain->ops[0];
@@ -200,7 +201,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
int err;
err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr,
- nft_rule_compat_policy);
+ nft_rule_compat_policy, NULL);
if (err < 0)
return err;
@@ -230,10 +231,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
union nft_entry e = {};
int ret;
- ret = nft_compat_chain_validate_dependency(target->table, ctx->chain);
- if (ret < 0)
- goto err;
-
target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
if (ctx->nla[NFTA_RULE_COMPAT]) {
@@ -302,7 +299,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
unsigned int hook_mask = 0;
int ret;
- if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
const struct nf_hook_ops *ops = &basechain->ops[0];
@@ -383,7 +380,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
par->entryinfo = entry;
par->match = match;
par->matchinfo = info;
- if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
const struct nf_hook_ops *ops = &basechain->ops[0];
@@ -419,10 +416,6 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
union nft_entry e = {};
int ret;
- ret = nft_compat_chain_validate_dependency(match->table, ctx->chain);
- if (ret < 0)
- goto err;
-
match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
if (ctx->nla[NFTA_RULE_COMPAT]) {
@@ -485,7 +478,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
unsigned int hook_mask = 0;
int ret;
- if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
const struct nf_hook_ops *ops = &basechain->ops[0];
@@ -511,7 +504,7 @@ nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
- event |= NFNL_SUBSYS_NFT_COMPAT << 8;
+ event = nfnl_msg_type(NFNL_SUBSYS_NFT_COMPAT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 7f8422213341..67a710ebde09 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -82,7 +82,8 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
return 0;
}
-static int nft_counter_obj_init(const struct nlattr * const tb[],
+static int nft_counter_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 0264258c46fe..a34ceb38fc55 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -32,6 +32,12 @@ struct nft_ct {
};
};
+struct nft_ct_helper_obj {
+ struct nf_conntrack_helper *helper4;
+ struct nf_conntrack_helper *helper6;
+ u8 l4proto;
+};
+
#ifdef CONFIG_NF_CONNTRACK_ZONES
static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
@@ -66,12 +72,12 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
switch (priv->key) {
case NFT_CT_STATE:
- if (ct == NULL)
- state = NF_CT_STATE_INVALID_BIT;
- else if (nf_ct_is_untracked(ct))
+ if (ct)
+ state = NF_CT_STATE_BIT(ctinfo);
+ else if (ctinfo == IP_CT_UNTRACKED)
state = NF_CT_STATE_UNTRACKED_BIT;
else
- state = NF_CT_STATE_BIT(ctinfo);
+ state = NF_CT_STATE_INVALID_BIT;
*dest = state;
return;
default:
@@ -258,7 +264,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
+ if (ct == NULL || nf_ct_is_template(ct))
return;
switch (priv->key) {
@@ -278,6 +284,22 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
NF_CT_LABELS_MAX_SIZE / sizeof(u32));
break;
#endif
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ case NFT_CT_EVENTMASK: {
+ struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct);
+ u32 ctmask = regs->data[priv->sreg];
+
+ if (e) {
+ if (e->ctmask != ctmask)
+ e->ctmask = ctmask;
+ break;
+ }
+
+ if (ctmask && !nf_ct_is_confirmed(ct))
+ nf_ct_ecache_ext_add(ct, ctmask, 0, GFP_ATOMIC);
+ break;
+ }
+#endif
default:
break;
}
@@ -533,6 +555,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
len = sizeof(u16);
break;
#endif
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ case NFT_CT_EVENTMASK:
+ if (tb[NFTA_CT_DIRECTION])
+ return -EINVAL;
+ len = sizeof(u32);
+ break;
+#endif
default:
return -EOPNOTSUPP;
}
@@ -696,7 +725,7 @@ nft_ct_select_ops(const struct nft_ctx *ctx,
static struct nft_expr_type nft_ct_type __read_mostly = {
.name = "ct",
- .select_ops = &nft_ct_select_ops,
+ .select_ops = nft_ct_select_ops,
.policy = nft_ct_policy,
.maxattr = NFTA_CT_MAX,
.owner = THIS_MODULE,
@@ -712,12 +741,10 @@ static void nft_notrack_eval(const struct nft_expr *expr,
ct = nf_ct_get(pkt->skb, &ctinfo);
/* Previously seen (loopback or untracked)? Ignore. */
- if (ct)
+ if (ct || ctinfo == IP_CT_UNTRACKED)
return;
- ct = nf_ct_untracked_get();
- atomic_inc(&ct->ct_general.use);
- nf_ct_set(skb, ct, IP_CT_NEW);
+ nf_ct_set(skb, ct, IP_CT_UNTRACKED);
}
static struct nft_expr_type nft_notrack_type;
@@ -733,6 +760,162 @@ static struct nft_expr_type nft_notrack_type __read_mostly = {
.owner = THIS_MODULE,
};
+static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ struct nf_conntrack_helper *help4, *help6;
+ char name[NF_CT_HELPER_NAME_LEN];
+ int family = ctx->afi->family;
+
+ if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
+ return -EINVAL;
+
+ priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]);
+ if (!priv->l4proto)
+ return -ENOENT;
+
+ nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name));
+
+ if (tb[NFTA_CT_HELPER_L3PROTO])
+ family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO]));
+
+ help4 = NULL;
+ help6 = NULL;
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ if (ctx->afi->family == NFPROTO_IPV6)
+ return -EINVAL;
+
+ help4 = nf_conntrack_helper_try_module_get(name, family,
+ priv->l4proto);
+ break;
+ case NFPROTO_IPV6:
+ if (ctx->afi->family == NFPROTO_IPV4)
+ return -EINVAL;
+
+ help6 = nf_conntrack_helper_try_module_get(name, family,
+ priv->l4proto);
+ break;
+ case NFPROTO_NETDEV: /* fallthrough */
+ case NFPROTO_BRIDGE: /* same */
+ case NFPROTO_INET:
+ help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4,
+ priv->l4proto);
+ help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6,
+ priv->l4proto);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ /* && is intentional; only error if INET found neither ipv4 or ipv6 */
+ if (!help4 && !help6)
+ return -ENOENT;
+
+ priv->helper4 = help4;
+ priv->helper6 = help6;
+
+ return 0;
+}
+
+static void nft_ct_helper_obj_destroy(struct nft_object *obj)
+{
+ struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+
+ if (priv->helper4)
+ module_put(priv->helper4->me);
+ if (priv->helper6)
+ module_put(priv->helper6->me);
+}
+
+static void nft_ct_helper_obj_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
+ struct nf_conntrack_helper *to_assign = NULL;
+ struct nf_conn_help *help;
+
+ if (!ct ||
+ nf_ct_is_confirmed(ct) ||
+ nf_ct_is_template(ct) ||
+ priv->l4proto != nf_ct_protonum(ct))
+ return;
+
+ switch (nf_ct_l3num(ct)) {
+ case NFPROTO_IPV4:
+ to_assign = priv->helper4;
+ break;
+ case NFPROTO_IPV6:
+ to_assign = priv->helper6;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ if (!to_assign)
+ return;
+
+ if (test_bit(IPS_HELPER_BIT, &ct->status))
+ return;
+
+ help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC);
+ if (help) {
+ rcu_assign_pointer(help->helper, to_assign);
+ set_bit(IPS_HELPER_BIT, &ct->status);
+ }
+}
+
+static int nft_ct_helper_obj_dump(struct sk_buff *skb,
+ struct nft_object *obj, bool reset)
+{
+ const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ const struct nf_conntrack_helper *helper = priv->helper4;
+ u16 family;
+
+ if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name))
+ return -1;
+
+ if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto))
+ return -1;
+
+ if (priv->helper4 && priv->helper6)
+ family = NFPROTO_INET;
+ else if (priv->helper6)
+ family = NFPROTO_IPV6;
+ else
+ family = NFPROTO_IPV4;
+
+ if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family)))
+ return -1;
+
+ return 0;
+}
+
+static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
+ [NFTA_CT_HELPER_NAME] = { .type = NLA_STRING,
+ .len = NF_CT_HELPER_NAME_LEN - 1 },
+ [NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 },
+ [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
+};
+
+static struct nft_object_type nft_ct_helper_obj __read_mostly = {
+ .type = NFT_OBJECT_CT_HELPER,
+ .size = sizeof(struct nft_ct_helper_obj),
+ .maxattr = NFTA_CT_HELPER_MAX,
+ .policy = nft_ct_helper_policy,
+ .eval = nft_ct_helper_obj_eval,
+ .init = nft_ct_helper_obj_init,
+ .destroy = nft_ct_helper_obj_destroy,
+ .dump = nft_ct_helper_obj_dump,
+ .owner = THIS_MODULE,
+};
+
static int __init nft_ct_module_init(void)
{
int err;
@@ -747,7 +930,14 @@ static int __init nft_ct_module_init(void)
if (err < 0)
goto err1;
+ err = nft_register_obj(&nft_ct_helper_obj);
+ if (err < 0)
+ goto err2;
+
return 0;
+
+err2:
+ nft_unregister_expr(&nft_notrack_type);
err1:
nft_unregister_expr(&nft_ct_type);
return err;
@@ -755,6 +945,7 @@ err1:
static void __exit nft_ct_module_exit(void)
{
+ nft_unregister_obj(&nft_ct_helper_obj);
nft_unregister_expr(&nft_notrack_type);
nft_unregister_expr(&nft_ct_type);
}
@@ -766,3 +957,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("ct");
MODULE_ALIAS_NFT_EXPR("notrack");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 049ad2d9ee66..66221ad891a9 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -82,8 +82,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
timeout = priv->timeout ? : set->timeout;
*nft_set_ext_expiration(ext) = jiffies + timeout;
- } else if (sexpr == NULL)
- goto out;
+ }
if (sexpr != NULL)
sexpr->ops->eval(sexpr, regs, pkt);
@@ -92,7 +91,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
return;
}
-out:
+
if (!priv->invert)
regs->verdict.code = NFT_BREAK;
}
@@ -133,16 +132,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
priv->invert = true;
}
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
- genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_DYNSET_SET_ID])
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_DYNSET_SET_ID],
- genmask);
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_DYNSET_SET_NAME],
+ tb[NFTA_DYNSET_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (set->ops->update == NULL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index c308920b194c..1ec49fe5845f 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -98,14 +98,21 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
goto err;
offset = i + priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- memcpy(dest, opt + offset, priv->len);
+ if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+ *dest = 1;
+ } else {
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ memcpy(dest, opt + offset, priv->len);
+ }
return;
}
err:
- regs->verdict.code = NFT_BREAK;
+ if (priv->flags & NFT_EXTHDR_F_PRESENT)
+ *dest = 0;
+ else
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
@@ -225,7 +232,7 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
static struct nft_expr_type nft_exthdr_type __read_mostly = {
.name = "exthdr",
- .select_ops = &nft_exthdr_select_ops,
+ .select_ops = nft_exthdr_select_ops,
.policy = nft_exthdr_policy,
.maxattr = NFTA_EXTHDR_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 29a4906adc27..21df8cccea65 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -24,7 +24,8 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
EXPORT_SYMBOL(nft_fib_policy);
#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
- NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)
+ NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
+ NFTA_FIB_F_PRESENT)
int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nft_data **data)
@@ -112,7 +113,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (err < 0)
return err;
- return nft_fib_validate(ctx, expr, NULL);
+ return 0;
}
EXPORT_SYMBOL_GPL(nft_fib_init);
@@ -133,19 +134,22 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
}
EXPORT_SYMBOL_GPL(nft_fib_dump);
-void nft_fib_store_result(void *reg, enum nft_fib_result r,
+void nft_fib_store_result(void *reg, const struct nft_fib *priv,
const struct nft_pktinfo *pkt, int index)
{
struct net_device *dev;
u32 *dreg = reg;
- switch (r) {
+ switch (priv->result) {
case NFT_FIB_RESULT_OIF:
- *dreg = index;
+ *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index;
break;
case NFT_FIB_RESULT_OIFNAME:
dev = dev_get_by_index_rcu(nft_net(pkt), index);
- strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+ if (priv->flags & NFTA_FIB_F_PRESENT)
+ *dreg = !!dev;
+ else
+ strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
break;
default:
WARN_ON_ONCE(1);
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index c4dad1254ead..24f2f7567ddb 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -17,7 +17,7 @@
#include <net/netfilter/nf_tables_core.h>
#include <linux/jhash.h>
-struct nft_hash {
+struct nft_jhash {
enum nft_registers sreg:8;
enum nft_registers dreg:8;
u8 len;
@@ -27,11 +27,11 @@ struct nft_hash {
u32 offset;
};
-static void nft_hash_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static void nft_jhash_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
- struct nft_hash *priv = nft_expr_priv(expr);
+ struct nft_jhash *priv = nft_expr_priv(expr);
const void *data = &regs->data[priv->sreg];
u32 h;
@@ -39,6 +39,25 @@ static void nft_hash_eval(const struct nft_expr *expr,
regs->data[priv->dreg] = h + priv->offset;
}
+struct nft_symhash {
+ enum nft_registers dreg:8;
+ u32 modulus;
+ u32 offset;
+};
+
+static void nft_symhash_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_symhash *priv = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ u32 h;
+
+ h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus);
+
+ regs->data[priv->dreg] = h + priv->offset;
+}
+
static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_SREG] = { .type = NLA_U32 },
[NFTA_HASH_DREG] = { .type = NLA_U32 },
@@ -46,13 +65,14 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_MODULUS] = { .type = NLA_U32 },
[NFTA_HASH_SEED] = { .type = NLA_U32 },
[NFTA_HASH_OFFSET] = { .type = NLA_U32 },
+ [NFTA_HASH_TYPE] = { .type = NLA_U32 },
};
-static int nft_hash_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_jhash_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- struct nft_hash *priv = nft_expr_priv(expr);
+ struct nft_jhash *priv = nft_expr_priv(expr);
u32 len;
int err;
@@ -95,10 +115,36 @@ static int nft_hash_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, sizeof(u32));
}
-static int nft_hash_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+static int nft_symhash_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_symhash *priv = nft_expr_priv(expr);
+
+ if (!tb[NFTA_HASH_DREG] ||
+ !tb[NFTA_HASH_MODULUS])
+ return -EINVAL;
+
+ if (tb[NFTA_HASH_OFFSET])
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
+
+ priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
+
+ priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+ if (priv->modulus <= 1)
+ return -ERANGE;
+
+ if (priv->offset + priv->modulus - 1 < priv->offset)
+ return -EOVERFLOW;
+
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_jhash_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
- const struct nft_hash *priv = nft_expr_priv(expr);
+ const struct nft_jhash *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
goto nla_put_failure;
@@ -114,6 +160,28 @@ static int nft_hash_dump(struct sk_buff *skb,
if (priv->offset != 0)
if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_JENKINS)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_symhash_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_symhash *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+ goto nla_put_failure;
+ if (priv->offset != 0)
+ if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_SYM)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -121,17 +189,46 @@ nla_put_failure:
}
static struct nft_expr_type nft_hash_type;
-static const struct nft_expr_ops nft_hash_ops = {
+static const struct nft_expr_ops nft_jhash_ops = {
.type = &nft_hash_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)),
- .eval = nft_hash_eval,
- .init = nft_hash_init,
- .dump = nft_hash_dump,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_jhash)),
+ .eval = nft_jhash_eval,
+ .init = nft_jhash_init,
+ .dump = nft_jhash_dump,
};
+static const struct nft_expr_ops nft_symhash_ops = {
+ .type = &nft_hash_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
+ .eval = nft_symhash_eval,
+ .init = nft_symhash_init,
+ .dump = nft_symhash_dump,
+};
+
+static const struct nft_expr_ops *
+nft_hash_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ u32 type;
+
+ if (!tb[NFTA_HASH_TYPE])
+ return &nft_jhash_ops;
+
+ type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE]));
+ switch (type) {
+ case NFT_HASH_SYM:
+ return &nft_symhash_ops;
+ case NFT_HASH_JENKINS:
+ return &nft_jhash_ops;
+ default:
+ break;
+ }
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct nft_expr_type nft_hash_type __read_mostly = {
.name = "hash",
- .ops = &nft_hash_ops,
+ .select_ops = nft_hash_select_ops,
.policy = nft_hash_policy,
.maxattr = NFTA_HASH_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index c6baf412236d..18dd57a52651 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -17,9 +17,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-static DEFINE_SPINLOCK(limit_lock);
-
struct nft_limit {
+ spinlock_t lock;
u64 last;
u64 tokens;
u64 tokens_max;
@@ -34,7 +33,7 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
u64 now, tokens;
s64 delta;
- spin_lock_bh(&limit_lock);
+ spin_lock_bh(&limit->lock);
now = ktime_get_ns();
tokens = limit->tokens + now - limit->last;
if (tokens > limit->tokens_max)
@@ -44,11 +43,11 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
delta = tokens - cost;
if (delta >= 0) {
limit->tokens = delta;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&limit->lock);
return limit->invert;
}
limit->tokens = tokens;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&limit->lock);
return !limit->invert;
}
@@ -86,6 +85,7 @@ static int nft_limit_init(struct nft_limit *limit,
limit->invert = true;
}
limit->last = ktime_get_ns();
+ spin_lock_init(&limit->lock);
return 0;
}
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index e21aea7e5ec8..475570e89ede 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -71,16 +71,10 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
tb[NFTA_LOOKUP_SREG] == NULL)
return -EINVAL;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_LOOKUP_SET_ID]) {
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_LOOKUP_SET_ID],
- genmask);
- }
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET],
+ tb[NFTA_LOOKUP_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (set->flags & NFT_SET_EVAL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 11ce016cd479..6ac03d4266c9 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -46,10 +46,6 @@ int nft_masq_init(const struct nft_ctx *ctx,
struct nft_masq *priv = nft_expr_priv(expr);
int err;
- err = nft_masq_validate(ctx, expr, NULL);
- if (err)
- return err;
-
if (tb[NFTA_MASQ_FLAGS]) {
priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
if (priv->flags & ~NF_NAT_RANGE_MASK)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 7b60e01f38ff..5a60eb23a7ed 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -372,10 +372,6 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- err = nft_meta_set_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
@@ -471,7 +467,7 @@ nft_meta_select_ops(const struct nft_ctx *ctx,
static struct nft_expr_type nft_meta_type __read_mostly = {
.name = "meta",
- .select_ops = &nft_meta_select_ops,
+ .select_ops = nft_meta_select_ops,
.policy = nft_meta_policy,
.maxattr = NFTA_META_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 439e0bd152a0..ed548d06b6dd 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -138,10 +138,6 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
}
- err = nft_nat_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
if (tb[NFTA_NAT_FAMILY] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index a66b36097b8f..5a3a52c71545 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -188,7 +188,7 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
static struct nft_expr_type nft_ng_type __read_mostly = {
.name = "numgen",
- .select_ops = &nft_ng_select_ops,
+ .select_ops = nft_ng_select_ops,
.policy = nft_ng_policy,
.maxattr = NFTA_NG_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 1ae8c49ca4a1..1dd428fbaaa3 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -116,16 +116,10 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
struct nft_set *set;
int err;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_OBJREF_SET_ID]) {
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_OBJREF_SET_ID],
- genmask);
- }
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_OBJREF_SET_NAME],
+ tb[NFTA_OBJREF_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (!(set->flags & NFT_SET_OBJECT))
return -EINVAL;
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index dbb6aaff67ec..98613658d4ac 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -197,7 +197,7 @@ nft_queue_select_ops(const struct nft_ctx *ctx,
static struct nft_expr_type nft_queue_type __read_mostly = {
.name = "queue",
- .select_ops = &nft_queue_select_ops,
+ .select_ops = nft_queue_select_ops,
.policy = nft_queue_policy,
.maxattr = NFTA_QUEUE_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 2d6fe3559912..25e33159be57 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -99,7 +99,8 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
return 0;
}
-static int nft_quota_obj_init(const struct nlattr * const tb[],
+static int nft_quota_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_quota *priv = nft_obj_data(obj);
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 40dcd05146d5..1e66538bf0ff 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -47,10 +47,6 @@ int nft_redir_init(const struct nft_ctx *ctx,
unsigned int plen;
int err;
- err = nft_redir_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
priv->sreg_proto_min =
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index c64de3f7379d..29f5bd2377b0 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -42,11 +42,6 @@ int nft_reject_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int err;
-
- err = nft_reject_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 9e90a02cb104..5a7fb5ff867d 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -66,11 +66,7 @@ static int nft_reject_inet_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int icmp_code, err;
-
- err = nft_reject_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
+ int icmp_code;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 8ebbc2940f4c..b988162b5b15 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -257,6 +257,11 @@ static int nft_bitmap_init(const struct nft_set *set,
static void nft_bitmap_destroy(const struct nft_set *set)
{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_bitmap_elem *be, *n;
+
+ list_for_each_entry_safe(be, n, &priv->list, head)
+ nft_set_elem_destroy(set, be, true);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 5f652720fc78..8ec086b6b56b 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -352,7 +352,7 @@ static int nft_hash_init(const struct nft_set *set,
static void nft_hash_elem_destroy(void *ptr, void *arg)
{
- nft_set_elem_destroy((const struct nft_set *)arg, ptr, true);
+ nft_set_elem_destroy(arg, ptr, true);
}
static void nft_hash_destroy(const struct nft_set *set)
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 78dfbf9588b3..e97e2fb53f0a 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -18,9 +18,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-static DEFINE_SPINLOCK(nft_rbtree_lock);
-
struct nft_rbtree {
+ rwlock_t lock;
struct rb_root root;
};
@@ -44,14 +43,14 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
- const struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
u8 genmask = nft_genmask_cur(net);
const struct rb_node *parent;
const void *this;
int d;
- spin_lock_bh(&nft_rbtree_lock);
+ read_lock_bh(&priv->lock);
parent = priv->root.rb_node;
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -75,7 +74,7 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
}
if (nft_rbtree_interval_end(rbe))
goto out;
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
*ext = &rbe->ext;
return true;
@@ -85,12 +84,12 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_interval_end(interval)) {
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
*ext = &interval->ext;
return true;
}
out:
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
return false;
}
@@ -140,12 +139,13 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
{
+ struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->priv;
int err;
- spin_lock_bh(&nft_rbtree_lock);
+ write_lock_bh(&priv->lock);
err = __nft_rbtree_insert(net, set, rbe, ext);
- spin_unlock_bh(&nft_rbtree_lock);
+ write_unlock_bh(&priv->lock);
return err;
}
@@ -157,9 +157,9 @@ static void nft_rbtree_remove(const struct net *net,
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->priv;
- spin_lock_bh(&nft_rbtree_lock);
+ write_lock_bh(&priv->lock);
rb_erase(&rbe->node, &priv->root);
- spin_unlock_bh(&nft_rbtree_lock);
+ write_unlock_bh(&priv->lock);
}
static void nft_rbtree_activate(const struct net *net,
@@ -224,12 +224,12 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_set *set,
struct nft_set_iter *iter)
{
- const struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
struct nft_set_elem elem;
struct rb_node *node;
- spin_lock_bh(&nft_rbtree_lock);
+ read_lock_bh(&priv->lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
rbe = rb_entry(node, struct nft_rbtree_elem, node);
@@ -242,13 +242,13 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0) {
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
return;
}
cont:
iter->count++;
}
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
}
static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
@@ -262,6 +262,7 @@ static int nft_rbtree_init(const struct nft_set *set,
{
struct nft_rbtree *priv = nft_set_priv(set);
+ rwlock_init(&priv->lock);
priv->root = RB_ROOT;
return 0;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 14857afc9937..8876b7da6884 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -763,17 +763,8 @@ EXPORT_SYMBOL(xt_check_entry_offsets);
*/
unsigned int *xt_alloc_entry_offsets(unsigned int size)
{
- unsigned int *off;
+ return kvmalloc_array(size, sizeof(unsigned int), GFP_KERNEL | __GFP_ZERO);
- off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN);
-
- if (off)
- return off;
-
- if (size < (SIZE_MAX / sizeof(unsigned int)))
- off = vmalloc(size * sizeof(unsigned int));
-
- return off;
}
EXPORT_SYMBOL(xt_alloc_entry_offsets);
@@ -1007,8 +998,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
if (!info) {
- info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN |
- __GFP_NORETRY | __GFP_HIGHMEM,
+ info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
PAGE_KERNEL);
if (!info)
return NULL;
@@ -1051,8 +1041,10 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
list_for_each_entry(t, &init_net.xt.tables[af], list) {
if (strcmp(t->name, name))
continue;
- if (!try_module_get(t->me))
+ if (!try_module_get(t->me)) {
+ mutex_unlock(&xt[af].mutex);
return NULL;
+ }
mutex_unlock(&xt[af].mutex);
if (t->table_init(net) != 0) {
@@ -1114,7 +1106,7 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
size = sizeof(void **) * nr_cpu_ids;
if (size > PAGE_SIZE)
- i->jumpstack = vzalloc(size);
+ i->jumpstack = kvzalloc(size, GFP_KERNEL);
else
i->jumpstack = kzalloc(size, GFP_KERNEL);
if (i->jumpstack == NULL)
@@ -1136,12 +1128,8 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
*/
size = sizeof(void *) * i->stacksize * 2u;
for_each_possible_cpu(cpu) {
- if (size > PAGE_SIZE)
- i->jumpstack[cpu] = vmalloc_node(size,
- cpu_to_node(cpu));
- else
- i->jumpstack[cpu] = kmalloc_node(size,
- GFP_KERNEL, cpu_to_node(cpu));
+ i->jumpstack[cpu] = kvmalloc_node(size, GFP_KERNEL,
+ cpu_to_node(cpu));
if (i->jumpstack[cpu] == NULL)
/*
* Freeing will be done later on by the callers. The
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index 19247a17e511..c502419d6306 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -31,146 +31,76 @@ MODULE_ALIAS("ip6t_AUDIT");
MODULE_ALIAS("ebt_AUDIT");
MODULE_ALIAS("arpt_AUDIT");
-static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb,
- unsigned int proto, unsigned int offset)
-{
- switch (proto) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE: {
- const __be16 *pptr;
- __be16 _ports[2];
-
- pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports);
- if (pptr == NULL) {
- audit_log_format(ab, " truncated=1");
- return;
- }
-
- audit_log_format(ab, " sport=%hu dport=%hu",
- ntohs(pptr[0]), ntohs(pptr[1]));
- }
- break;
-
- case IPPROTO_ICMP:
- case IPPROTO_ICMPV6: {
- const u8 *iptr;
- u8 _ih[2];
-
- iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih);
- if (iptr == NULL) {
- audit_log_format(ab, " truncated=1");
- return;
- }
-
- audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu",
- iptr[0], iptr[1]);
-
- }
- break;
- }
-}
-
-static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
+static bool audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
{
struct iphdr _iph;
const struct iphdr *ih;
- ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
- if (!ih) {
- audit_log_format(ab, " truncated=1");
- return;
- }
+ ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_iph), &_iph);
+ if (!ih)
+ return false;
- audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu",
- &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol);
+ audit_log_format(ab, " saddr=%pI4 daddr=%pI4 proto=%hhu",
+ &ih->saddr, &ih->daddr, ih->protocol);
- if (ntohs(ih->frag_off) & IP_OFFSET) {
- audit_log_format(ab, " frag=1");
- return;
- }
-
- audit_proto(ab, skb, ih->protocol, ih->ihl * 4);
+ return true;
}
-static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
+static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
{
struct ipv6hdr _ip6h;
const struct ipv6hdr *ih;
u8 nexthdr;
__be16 frag_off;
- int offset;
ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
- if (!ih) {
- audit_log_format(ab, " truncated=1");
- return;
- }
+ if (!ih)
+ return false;
nexthdr = ih->nexthdr;
- offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h),
- &nexthdr, &frag_off);
+ ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), &nexthdr, &frag_off);
audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
&ih->saddr, &ih->daddr, nexthdr);
- if (offset)
- audit_proto(ab, skb, nexthdr, offset);
+ return true;
}
static unsigned int
audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- const struct xt_audit_info *info = par->targinfo;
struct audit_buffer *ab;
+ int fam = -1;
if (audit_enabled == 0)
goto errout;
-
ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
if (ab == NULL)
goto errout;
- audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
- info->type, xt_hooknum(par), skb->len,
- xt_in(par) ? xt_inname(par) : "?",
- xt_out(par) ? xt_outname(par) : "?");
-
- if (skb->mark)
- audit_log_format(ab, " mark=%#x", skb->mark);
-
- if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
- audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
- ntohs(eth_hdr(skb)->h_proto));
-
- if (xt_family(par) == NFPROTO_BRIDGE) {
- switch (eth_hdr(skb)->h_proto) {
- case htons(ETH_P_IP):
- audit_ip4(ab, skb);
- break;
-
- case htons(ETH_P_IPV6):
- audit_ip6(ab, skb);
- break;
- }
- }
- }
+ audit_log_format(ab, "mark=%#x", skb->mark);
switch (xt_family(par)) {
+ case NFPROTO_BRIDGE:
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1;
+ break;
+ case htons(ETH_P_IPV6):
+ fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1;
+ break;
+ }
+ break;
case NFPROTO_IPV4:
- audit_ip4(ab, skb);
+ fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1;
break;
-
case NFPROTO_IPV6:
- audit_ip6(ab, skb);
+ fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1;
break;
}
-#ifdef CONFIG_NETWORK_SECMARK
- if (skb->secmark)
- audit_log_secctx(ab, skb->secmark);
-#endif
+ if (fam == -1)
+ audit_log_format(ab, " saddr=? daddr=? proto=-1");
audit_log_end(ab);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index b008db0184b8..bb7ad82dcd56 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -26,11 +26,12 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
if (skb->_nfct != 0)
return XT_CONTINUE;
- /* special case the untracked ct : we want the percpu object */
- if (!ct)
- ct = nf_ct_untracked_get();
- atomic_inc(&ct->ct_general.use);
- nf_ct_set(skb, ct, IP_CT_NEW);
+ if (ct) {
+ atomic_inc(&ct->ct_general.use);
+ nf_ct_set(skb, ct, IP_CT_NEW);
+ } else {
+ nf_ct_set(skb, ct, IP_CT_UNTRACKED);
+ }
return XT_CONTINUE;
}
@@ -167,8 +168,10 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
goto err_put_timeout;
}
timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
- if (timeout_ext == NULL)
+ if (!timeout_ext) {
ret = -ENOMEM;
+ goto err_put_timeout;
+ }
rcu_read_unlock();
return ret;
@@ -200,6 +203,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
struct xt_ct_target_info_v1 *info)
{
struct nf_conntrack_zone zone;
+ struct nf_conn_help *help;
struct nf_conn *ct;
int ret = -EOPNOTSUPP;
@@ -248,7 +252,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
if (info->timeout[0]) {
ret = xt_ct_set_timeout(ct, par, info->timeout);
if (ret < 0)
- goto err3;
+ goto err4;
}
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
nf_conntrack_get(&ct->ct_general);
@@ -256,6 +260,10 @@ out:
info->ct = ct;
return 0;
+err4:
+ help = nfct_help(ct);
+ if (help)
+ module_put(help->helper->me);
err3:
nf_ct_tmpl_free(ct);
err2:
@@ -335,7 +343,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
struct nf_conn *ct = info->ct;
struct nf_conn_help *help;
- if (ct && !nf_ct_is_untracked(ct)) {
+ if (ct) {
help = nfct_help(ct);
if (help)
module_put(help->helper->me);
@@ -412,8 +420,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (skb->_nfct != 0)
return XT_CONTINUE;
- nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW);
- nf_conntrack_get(skb_nfct(skb));
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 02afaf48a729..60e6dbe12460 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -84,7 +84,7 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
struct nf_conntrack_tuple *otuple;
struct nf_conntrack_tuple *rtuple;
- if (ct == NULL || nf_ct_is_untracked(ct))
+ if (ct == NULL)
return -1;
otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 9a9884a39c0e..57ef175dfbfa 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -121,9 +121,6 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (ct == NULL)
return false;
- if (nf_ct_is_untracked(ct))
- return false;
-
if (ct->master)
hash = xt_cluster_hash(ct->master, info);
else
diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
index 7827128d5a95..23372879e6e3 100644
--- a/net/netfilter/xt_connlabel.c
+++ b/net/netfilter/xt_connlabel.c
@@ -29,7 +29,7 @@ connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par)
bool invert = info->options & XT_CONNLABEL_OP_INVERT;
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL || nf_ct_is_untracked(ct))
+ if (ct == NULL)
return invert;
labels = nf_ct_labels_find(ct);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 9935d5029b0e..ec377cc6a369 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
u_int32_t newmark;
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL || nf_ct_is_untracked(ct))
+ if (ct == NULL)
return XT_CONTINUE;
switch (info->mode) {
@@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL || nf_ct_is_untracked(ct))
+ if (ct == NULL)
return false;
return ((ct->mark & info->mask) == info->mark) ^ info->invert;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index c0fb217bc649..39cf1d019240 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -172,12 +172,11 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
ct = nf_ct_get(skb, &ctinfo);
- if (ct) {
- if (nf_ct_is_untracked(ct))
- statebit = XT_CONNTRACK_STATE_UNTRACKED;
- else
- statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
- } else
+ if (ct)
+ statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
+ else if (ctinfo == IP_CT_UNTRACKED)
+ statebit = XT_CONNTRACK_STATE_UNTRACKED;
+ else
statebit = XT_CONNTRACK_STATE_INVALID;
if (info->match_flags & XT_CONNTRACK_STATE) {
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 2a6dfe8b74d3..762e1874f28b 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -119,7 +119,7 @@ static int
cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
{
if (revision == 1) {
- struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
+ struct hashlimit_cfg1 *cfg = from;
to->mode = cfg->mode;
to->avg = cfg->avg;
@@ -895,7 +895,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos)
static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct xt_hashlimit_htable *htable = s->private;
- unsigned int *bucket = (unsigned int *)v;
+ unsigned int *bucket = v;
*pos = ++(*bucket);
if (*pos >= htable->cfg.size) {
@@ -909,7 +909,7 @@ static void dl_seq_stop(struct seq_file *s, void *v)
__releases(htable->lock)
{
struct xt_hashlimit_htable *htable = s->private;
- unsigned int *bucket = (unsigned int *)v;
+ unsigned int *bucket = v;
if (!IS_ERR(bucket))
kfree(bucket);
@@ -980,7 +980,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_show_v1(struct seq_file *s, void *v)
{
struct xt_hashlimit_htable *htable = s->private;
- unsigned int *bucket = (unsigned int *)v;
+ unsigned int *bucket = v;
struct dsthash_ent *ent;
if (!hlist_empty(&htable->hash[*bucket])) {
@@ -994,7 +994,7 @@ static int dl_seq_show_v1(struct seq_file *s, void *v)
static int dl_seq_show(struct seq_file *s, void *v)
{
struct xt_hashlimit_htable *htable = s->private;
- unsigned int *bucket = (unsigned int *)v;
+ unsigned int *bucket = v;
struct dsthash_ent *ent;
if (!hlist_empty(&htable->hash[*bucket])) {
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 0fdc89064488..42540d26c2b8 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -116,7 +116,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL || nf_ct_is_untracked(ct)) {
+ if (ct == NULL) {
match = false;
goto out_put_cp;
}
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index dab962df1787..d27b5f1ea619 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -18,6 +18,7 @@
#include <linux/netfilter/xt_limit.h>
struct xt_limit_priv {
+ spinlock_t lock;
unsigned long prev;
uint32_t credit;
};
@@ -32,8 +33,6 @@ MODULE_ALIAS("ip6t_limit");
* see net/sched/sch_tbf.c in the linux source tree
*/
-static DEFINE_SPINLOCK(limit_lock);
-
/* Rusty: This is my (non-mathematically-inclined) understanding of
this algorithm. The `average rate' in jiffies becomes your initial
amount of credit `credit' and the most credit you can ever have
@@ -72,7 +71,7 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
struct xt_limit_priv *priv = r->master;
unsigned long now = jiffies;
- spin_lock_bh(&limit_lock);
+ spin_lock_bh(&priv->lock);
priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
if (priv->credit > r->credit_cap)
priv->credit = r->credit_cap;
@@ -80,11 +79,11 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (priv->credit >= r->cost) {
/* We're not limited. */
priv->credit -= r->cost;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&priv->lock);
return true;
}
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&priv->lock);
return false;
}
@@ -126,6 +125,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
r->credit_cap = priv->credit; /* Credits full. */
r->cost = user2credits(r->avg);
}
+ spin_lock_init(&priv->lock);
+
return 0;
}
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 1d89a4eaf841..3f6c4fa78bdb 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -388,10 +388,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
}
sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size;
- if (sz <= PAGE_SIZE)
- t = kzalloc(sz, GFP_KERNEL);
- else
- t = vzalloc(sz);
+ t = kvzalloc(sz, GFP_KERNEL);
if (t == NULL) {
ret = -ENOMEM;
goto out;
@@ -532,7 +529,7 @@ static int recent_seq_show(struct seq_file *seq, void *v)
&e->addr.in6, e->ttl, e->stamps[i], e->index);
for (i = 0; i < e->nstamps; i++)
seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]);
- seq_printf(seq, "\n");
+ seq_putc(seq, '\n');
return 0;
}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 770bbec878f1..e75ef39669c5 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -152,7 +152,7 @@ static int socket_mt_enable_defrag(struct net *net, int family)
switch (family) {
case NFPROTO_IPV4:
return nf_defrag_ipv4_enable(net);
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
case NFPROTO_IPV6:
return nf_defrag_ipv6_enable(net);
#endif
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 5746a33789a5..5fbd79194d21 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -28,14 +28,13 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par)
unsigned int statebit;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- if (!ct)
+ if (ct)
+ statebit = XT_STATE_BIT(ctinfo);
+ else if (ctinfo == IP_CT_UNTRACKED)
+ statebit = XT_STATE_UNTRACKED;
+ else
statebit = XT_STATE_INVALID;
- else {
- if (nf_ct_is_untracked(ct))
- statebit = XT_STATE_UNTRACKED;
- else
- statebit = XT_STATE_BIT(ctinfo);
- }
+
return (sinfo->statemask & statebit);
}
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 4149d3e63589..9aacf2da3d98 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -101,7 +101,7 @@ static int netlbl_cipsov4_add_common(struct genl_info *info,
if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST],
NLBL_CIPSOV4_A_MAX,
- netlbl_cipsov4_genl_policy) != 0)
+ netlbl_cipsov4_genl_policy, NULL) != 0)
return -EINVAL;
nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem)
@@ -148,7 +148,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
NLBL_CIPSOV4_A_MAX,
- netlbl_cipsov4_genl_policy) != 0)
+ netlbl_cipsov4_genl_policy, NULL) != 0)
return -EINVAL;
doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
@@ -170,10 +170,10 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
nla_a_rem)
if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) {
- if (nla_validate_nested(nla_a,
- NLBL_CIPSOV4_A_MAX,
- netlbl_cipsov4_genl_policy) != 0)
- goto add_std_failure;
+ if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy,
+ NULL) != 0)
+ goto add_std_failure;
nla_for_each_nested(nla_b, nla_a, nla_b_rem)
switch (nla_type(nla_b)) {
case NLBL_CIPSOV4_A_MLSLVLLOC:
@@ -236,7 +236,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) {
if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
NLBL_CIPSOV4_A_MAX,
- netlbl_cipsov4_genl_policy) != 0)
+ netlbl_cipsov4_genl_policy, NULL) != 0)
goto add_std_failure;
nla_for_each_nested(nla_a,
@@ -244,8 +244,9 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
nla_a_rem)
if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) {
if (nla_validate_nested(nla_a,
- NLBL_CIPSOV4_A_MAX,
- netlbl_cipsov4_genl_policy) != 0)
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy,
+ NULL) != 0)
goto add_std_failure;
nla_for_each_nested(nla_b, nla_a, nla_b_rem)
switch (nla_type(nla_b)) {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 596eaff66649..ee841f00a6ec 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -78,14 +78,6 @@ struct listeners {
/* state bits */
#define NETLINK_S_CONGESTED 0x0
-/* flags */
-#define NETLINK_F_KERNEL_SOCKET 0x1
-#define NETLINK_F_RECV_PKTINFO 0x2
-#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
-#define NETLINK_F_RECV_NO_ENOBUFS 0x8
-#define NETLINK_F_LISTEN_ALL_NSID 0x10
-#define NETLINK_F_CAP_ACK 0x20
-
static inline int netlink_is_kernel(struct sock *sk)
{
return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
@@ -1660,6 +1652,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
nlk->flags &= ~NETLINK_F_CAP_ACK;
err = 0;
break;
+ case NETLINK_EXT_ACK:
+ if (val)
+ nlk->flags |= NETLINK_F_EXT_ACK;
+ else
+ nlk->flags &= ~NETLINK_F_EXT_ACK;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -1744,6 +1743,15 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
err = 0;
break;
+ case NETLINK_EXT_ACK:
+ if (len < sizeof(int))
+ return -EINVAL;
+ len = sizeof(int);
+ val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0;
+ if (put_user(len, optlen) || put_user(val, optval))
+ return -EFAULT;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -2275,21 +2283,44 @@ error_free:
}
EXPORT_SYMBOL(__netlink_dump_start);
-void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
+void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
+ const struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
struct nlmsghdr *rep;
struct nlmsgerr *errmsg;
size_t payload = sizeof(*errmsg);
+ size_t tlvlen = 0;
struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
+ unsigned int flags = 0;
/* Error messages get the original request appened, unless the user
- * requests to cap the error message.
+ * requests to cap the error message, and get extra error data if
+ * requested.
*/
- if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
- payload += nlmsg_len(nlh);
+ if (err) {
+ if (!(nlk->flags & NETLINK_F_CAP_ACK))
+ payload += nlmsg_len(nlh);
+ else
+ flags |= NLM_F_CAPPED;
+ if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
+ if (extack->_msg)
+ tlvlen += nla_total_size(strlen(extack->_msg) + 1);
+ if (extack->bad_attr)
+ tlvlen += nla_total_size(sizeof(u32));
+ }
+ } else {
+ flags |= NLM_F_CAPPED;
+
+ if (nlk->flags & NETLINK_F_EXT_ACK &&
+ extack && extack->cookie_len)
+ tlvlen += nla_total_size(extack->cookie_len);
+ }
+
+ if (tlvlen)
+ flags |= NLM_F_ACK_TLVS;
- skb = nlmsg_new(payload, GFP_KERNEL);
+ skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
if (!skb) {
struct sock *sk;
@@ -2305,17 +2336,42 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
}
rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- NLMSG_ERROR, payload, 0);
+ NLMSG_ERROR, payload, flags);
errmsg = nlmsg_data(rep);
errmsg->error = err;
memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
+
+ if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
+ if (err) {
+ if (extack->_msg)
+ WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
+ extack->_msg));
+ if (extack->bad_attr &&
+ !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
+ (u8 *)extack->bad_attr >= in_skb->data +
+ in_skb->len))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
+ (u8 *)extack->bad_attr -
+ in_skb->data));
+ } else {
+ if (extack->cookie_len)
+ WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
+ extack->cookie_len,
+ extack->cookie));
+ }
+ }
+
+ nlmsg_end(skb, rep);
+
netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
}
EXPORT_SYMBOL(netlink_ack);
int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
- struct nlmsghdr *))
+ struct nlmsghdr *,
+ struct netlink_ext_ack *))
{
+ struct netlink_ext_ack extack = {};
struct nlmsghdr *nlh;
int err;
@@ -2336,13 +2392,13 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
goto ack;
- err = cb(skb, nlh);
+ err = cb(skb, nlh, &extack);
if (err == -EINTR)
goto skip;
ack:
if (nlh->nlmsg_flags & NLM_F_ACK || err)
- netlink_ack(skb, nlh, err);
+ netlink_ack(skb, nlh, err, &extack);
skip:
msglen = NLMSG_ALIGN(nlh->nlmsg_len);
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 4fdb38318977..3490f2430532 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -6,6 +6,15 @@
#include <linux/workqueue.h>
#include <net/sock.h>
+/* flags */
+#define NETLINK_F_KERNEL_SOCKET 0x1
+#define NETLINK_F_RECV_PKTINFO 0x2
+#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
+#define NETLINK_F_RECV_NO_ENOBUFS 0x8
+#define NETLINK_F_LISTEN_ALL_NSID 0x10
+#define NETLINK_F_CAP_ACK 0x20
+#define NETLINK_F_EXT_ACK 0x40
+
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index a5546249fb10..8faa20b4d457 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -19,6 +19,27 @@ static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
nlk->groups);
}
+static int sk_diag_put_flags(struct sock *sk, struct sk_buff *skb)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ u32 flags = 0;
+
+ if (nlk->cb_running)
+ flags |= NDIAG_FLAG_CB_RUNNING;
+ if (nlk->flags & NETLINK_F_RECV_PKTINFO)
+ flags |= NDIAG_FLAG_PKTINFO;
+ if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ flags |= NDIAG_FLAG_BROADCAST_ERROR;
+ if (nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)
+ flags |= NDIAG_FLAG_NO_ENOBUFS;
+ if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ flags |= NDIAG_FLAG_LISTEN_ALL_NSID;
+ if (nlk->flags & NETLINK_F_CAP_ACK)
+ flags |= NDIAG_FLAG_CAP_ACK;
+
+ return nla_put_u32(skb, NETLINK_DIAG_FLAGS, flags);
+}
+
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
struct netlink_diag_req *req,
u32 portid, u32 seq, u32 flags, int sk_ino)
@@ -52,6 +73,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
goto out_nlmsg_trim;
+ if ((req->ndiag_show & NDIAG_SHOW_FLAGS) &&
+ sk_diag_put_flags(sk, skb))
+ goto out_nlmsg_trim;
+
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 92e0981f7404..10f8b4cff40a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -497,7 +497,8 @@ static int genl_lock_done(struct netlink_callback *cb)
static int genl_family_rcv_msg(const struct genl_family *family,
struct sk_buff *skb,
- struct nlmsghdr *nlh)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
const struct genl_ops *ops;
struct net *net = sock_net(skb->sk);
@@ -573,7 +574,7 @@ static int genl_family_rcv_msg(const struct genl_family *family,
if (attrbuf) {
err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
- ops->policy);
+ ops->policy, extack);
if (err < 0)
goto out;
}
@@ -584,6 +585,7 @@ static int genl_family_rcv_msg(const struct genl_family *family,
info.genlhdr = nlmsg_data(nlh);
info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = attrbuf;
+ info.extack = extack;
genl_info_net_set(&info, net);
memset(&info.user_ptr, 0, sizeof(info.user_ptr));
@@ -605,7 +607,8 @@ out:
return err;
}
-static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
const struct genl_family *family;
int err;
@@ -617,7 +620,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!family->parallel_ops)
genl_lock();
- err = genl_family_rcv_msg(family, skb, nlh);
+ err = genl_family_rcv_msg(family, skb, nlh, extack);
if (!family->parallel_ops)
genl_unlock();
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 03f3d5c7beb8..6b0850e63e09 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -119,7 +119,8 @@ static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
u32 idx;
rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize,
- attrbuf, nfc_genl_family.maxattr, nfc_genl_policy);
+ attrbuf, nfc_genl_family.maxattr, nfc_genl_policy,
+ NULL);
if (rc < 0)
return ERR_PTR(rc);
@@ -303,6 +304,17 @@ free_msg:
return -EMSGSIZE;
}
+static int nfc_genl_setup_device_added(struct nfc_dev *dev, struct sk_buff *msg)
+{
+ if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) ||
+ nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
+ nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) ||
+ nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) ||
+ nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode))
+ return -1;
+ return 0;
+}
+
int nfc_genl_device_added(struct nfc_dev *dev)
{
struct sk_buff *msg;
@@ -317,10 +329,7 @@ int nfc_genl_device_added(struct nfc_dev *dev)
if (!hdr)
goto free_msg;
- if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) ||
- nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
- nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) ||
- nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up))
+ if (nfc_genl_setup_device_added(dev, msg))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -596,11 +605,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
if (cb)
genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
- if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) ||
- nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
- nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) ||
- nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) ||
- nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode))
+ if (nfc_genl_setup_device_added(dev, msg))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -918,7 +923,7 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
rc = nfc_activate_target(dev, target_idx, protocol);
nfc_put_device(dev);
- return 0;
+ return rc;
}
static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info)
@@ -1161,7 +1166,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) {
rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr,
- nfc_sdp_genl_policy);
+ nfc_sdp_genl_policy, info->extack);
if (rc != 0) {
rc = -EINVAL;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c82301ce3fff..e4610676299b 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2014 Nicira, Inc.
+ * Copyright (c) 2007-2017 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -44,13 +44,10 @@
#include "conntrack.h"
#include "vport.h"
-static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key,
- const struct nlattr *attr, int len);
-
struct deferred_action {
struct sk_buff *skb;
const struct nlattr *actions;
+ int actions_len;
/* Store pkt_key clone when creating deferred action. */
struct sw_flow_key pkt_key;
@@ -82,14 +79,31 @@ struct action_fifo {
struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};
-struct recirc_keys {
+struct action_flow_keys {
struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
};
static struct action_fifo __percpu *action_fifos;
-static struct recirc_keys __percpu *recirc_keys;
+static struct action_flow_keys __percpu *flow_keys;
static DEFINE_PER_CPU(int, exec_actions_level);
+/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
+ * space. Return NULL if out of key spaces.
+ */
+static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
+{
+ struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
+ int level = this_cpu_read(exec_actions_level);
+ struct sw_flow_key *key = NULL;
+
+ if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
+ key = &keys->key[level - 1];
+ *key = *key_;
+ }
+
+ return key;
+}
+
static void action_fifo_init(struct action_fifo *fifo)
{
fifo->head = 0;
@@ -119,8 +133,9 @@ static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
/* Return true if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
- const struct sw_flow_key *key,
- const struct nlattr *attr)
+ const struct sw_flow_key *key,
+ const struct nlattr *actions,
+ const int actions_len)
{
struct action_fifo *fifo;
struct deferred_action *da;
@@ -129,7 +144,8 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
da = action_fifo_put(fifo);
if (da) {
da->skb = skb;
- da->actions = attr;
+ da->actions = actions;
+ da->actions_len = actions_len;
da->pkt_key = *key;
}
@@ -146,6 +162,12 @@ static bool is_flow_key_valid(const struct sw_flow_key *key)
return !(key->mac_proto & SW_FLOW_KEY_INVALID);
}
+static int clone_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key,
+ u32 recirc_id,
+ const struct nlattr *actions, int len,
+ bool last, bool clone_flow_key);
+
static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
__be16 ethertype)
{
@@ -908,72 +930,35 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
}
+/* When 'last' is true, sample() should always consume the 'skb'.
+ * Otherwise, sample() should keep 'skb' intact regardless what
+ * actions are executed within sample().
+ */
static int sample(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
- const struct nlattr *actions, int actions_len)
+ bool last)
{
- const struct nlattr *acts_list = NULL;
- const struct nlattr *a;
- int rem;
- u32 cutlen = 0;
-
- for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
- a = nla_next(a, &rem)) {
- u32 probability;
-
- switch (nla_type(a)) {
- case OVS_SAMPLE_ATTR_PROBABILITY:
- probability = nla_get_u32(a);
- if (!probability || prandom_u32() > probability)
- return 0;
- break;
-
- case OVS_SAMPLE_ATTR_ACTIONS:
- acts_list = a;
- break;
- }
- }
-
- rem = nla_len(acts_list);
- a = nla_data(acts_list);
-
- /* Actions list is empty, do nothing */
- if (unlikely(!rem))
+ struct nlattr *actions;
+ struct nlattr *sample_arg;
+ int rem = nla_len(attr);
+ const struct sample_arg *arg;
+ bool clone_flow_key;
+
+ /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
+ sample_arg = nla_data(attr);
+ arg = nla_data(sample_arg);
+ actions = nla_next(sample_arg, &rem);
+
+ if ((arg->probability != U32_MAX) &&
+ (!arg->probability || prandom_u32() > arg->probability)) {
+ if (last)
+ consume_skb(skb);
return 0;
-
- /* The only known usage of sample action is having a single user-space
- * action, or having a truncate action followed by a single user-space
- * action. Treat this usage as a special case.
- * The output_userspace() should clone the skb to be sent to the
- * user space. This skb will be consumed by its caller.
- */
- if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) {
- struct ovs_action_trunc *trunc = nla_data(a);
-
- if (skb->len > trunc->max_len)
- cutlen = skb->len - trunc->max_len;
-
- a = nla_next(a, &rem);
}
- if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
- nla_is_last(a, rem)))
- return output_userspace(dp, skb, key, a, actions,
- actions_len, cutlen);
-
- skb = skb_clone(skb, GFP_ATOMIC);
- if (!skb)
- /* Skip the sample action when out of memory. */
- return 0;
-
- if (!add_deferred_actions(skb, key, a)) {
- if (net_ratelimit())
- pr_warn("%s: deferred actions limit reached, dropping sample action\n",
- ovs_dp_name(dp));
-
- kfree_skb(skb);
- }
- return 0;
+ clone_flow_key = !arg->exec;
+ return clone_execute(dp, skb, key, 0, actions, rem, last,
+ clone_flow_key);
}
static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
@@ -1084,10 +1069,9 @@ static int execute_masked_set_action(struct sk_buff *skb,
static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
- const struct nlattr *a, int rem)
+ const struct nlattr *a, bool last)
{
- struct deferred_action *da;
- int level;
+ u32 recirc_id;
if (!is_flow_key_valid(key)) {
int err;
@@ -1098,43 +1082,8 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
}
BUG_ON(!is_flow_key_valid(key));
- if (!nla_is_last(a, rem)) {
- /* Recirc action is the not the last action
- * of the action list, need to clone the skb.
- */
- skb = skb_clone(skb, GFP_ATOMIC);
-
- /* Skip the recirc action when out of memory, but
- * continue on with the rest of the action list.
- */
- if (!skb)
- return 0;
- }
-
- level = this_cpu_read(exec_actions_level);
- if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
- struct recirc_keys *rks = this_cpu_ptr(recirc_keys);
- struct sw_flow_key *recirc_key = &rks->key[level - 1];
-
- *recirc_key = *key;
- recirc_key->recirc_id = nla_get_u32(a);
- ovs_dp_process_packet(skb, recirc_key);
-
- return 0;
- }
-
- da = add_deferred_actions(skb, key, NULL);
- if (da) {
- da->pkt_key.recirc_id = nla_get_u32(a);
- } else {
- kfree_skb(skb);
-
- if (net_ratelimit())
- pr_warn("%s: deferred action limit reached, drop recirc action\n",
- ovs_dp_name(dp));
- }
-
- return 0;
+ recirc_id = nla_get_u32(a);
+ return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
}
/* Execute a list of actions against 'skb'. */
@@ -1206,9 +1155,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = pop_vlan(skb, key);
break;
- case OVS_ACTION_ATTR_RECIRC:
- err = execute_recirc(dp, skb, key, a, rem);
- if (nla_is_last(a, rem)) {
+ case OVS_ACTION_ATTR_RECIRC: {
+ bool last = nla_is_last(a, rem);
+
+ err = execute_recirc(dp, skb, key, a, last);
+ if (last) {
/* If this is the last action, the skb has
* been consumed or freed.
* Return immediately.
@@ -1216,6 +1167,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return err;
}
break;
+ }
case OVS_ACTION_ATTR_SET:
err = execute_set_action(skb, key, nla_data(a));
@@ -1226,9 +1178,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = execute_masked_set_action(skb, key, nla_data(a));
break;
- case OVS_ACTION_ATTR_SAMPLE:
- err = sample(dp, skb, key, a, attr, len);
+ case OVS_ACTION_ATTR_SAMPLE: {
+ bool last = nla_is_last(a, rem);
+
+ err = sample(dp, skb, key, a, last);
+ if (last)
+ return err;
+
break;
+ }
case OVS_ACTION_ATTR_CT:
if (!is_flow_key_valid(key)) {
@@ -1264,6 +1222,79 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return 0;
}
+/* Execute the actions on the clone of the packet. The effect of the
+ * execution does not affect the original 'skb' nor the original 'key'.
+ *
+ * The execution may be deferred in case the actions can not be executed
+ * immediately.
+ */
+static int clone_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key, u32 recirc_id,
+ const struct nlattr *actions, int len,
+ bool last, bool clone_flow_key)
+{
+ struct deferred_action *da;
+ struct sw_flow_key *clone;
+
+ skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
+ if (!skb) {
+ /* Out of memory, skip this action.
+ */
+ return 0;
+ }
+
+ /* When clone_flow_key is false, the 'key' will not be change
+ * by the actions, then the 'key' can be used directly.
+ * Otherwise, try to clone key from the next recursion level of
+ * 'flow_keys'. If clone is successful, execute the actions
+ * without deferring.
+ */
+ clone = clone_flow_key ? clone_key(key) : key;
+ if (clone) {
+ int err = 0;
+
+ if (actions) { /* Sample action */
+ if (clone_flow_key)
+ __this_cpu_inc(exec_actions_level);
+
+ err = do_execute_actions(dp, skb, clone,
+ actions, len);
+
+ if (clone_flow_key)
+ __this_cpu_dec(exec_actions_level);
+ } else { /* Recirc action */
+ clone->recirc_id = recirc_id;
+ ovs_dp_process_packet(skb, clone);
+ }
+ return err;
+ }
+
+ /* Out of 'flow_keys' space. Defer actions */
+ da = add_deferred_actions(skb, key, actions, len);
+ if (da) {
+ if (!actions) { /* Recirc action */
+ key = &da->pkt_key;
+ key->recirc_id = recirc_id;
+ }
+ } else {
+ /* Out of per CPU action FIFO space. Drop the 'skb' and
+ * log an error.
+ */
+ kfree_skb(skb);
+
+ if (net_ratelimit()) {
+ if (actions) { /* Sample action */
+ pr_warn("%s: deferred action limit reached, drop sample action\n",
+ ovs_dp_name(dp));
+ } else { /* Recirc action */
+ pr_warn("%s: deferred action limit reached, drop recirc action\n",
+ ovs_dp_name(dp));
+ }
+ }
+ }
+ return 0;
+}
+
static void process_deferred_actions(struct datapath *dp)
{
struct action_fifo *fifo = this_cpu_ptr(action_fifos);
@@ -1278,10 +1309,10 @@ static void process_deferred_actions(struct datapath *dp)
struct sk_buff *skb = da->skb;
struct sw_flow_key *key = &da->pkt_key;
const struct nlattr *actions = da->actions;
+ int actions_len = da->actions_len;
if (actions)
- do_execute_actions(dp, skb, key, actions,
- nla_len(actions));
+ do_execute_actions(dp, skb, key, actions, actions_len);
else
ovs_dp_process_packet(skb, key);
} while (!action_fifo_is_empty(fifo));
@@ -1323,8 +1354,8 @@ int action_fifos_init(void)
if (!action_fifos)
return -ENOMEM;
- recirc_keys = alloc_percpu(struct recirc_keys);
- if (!recirc_keys) {
+ flow_keys = alloc_percpu(struct action_flow_keys);
+ if (!flow_keys) {
free_percpu(action_fifos);
return -ENOMEM;
}
@@ -1335,5 +1366,5 @@ int action_fifos_init(void)
void action_fifos_exit(void)
{
free_percpu(action_fifos);
- free_percpu(recirc_keys);
+ free_percpu(flow_keys);
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 7b2c2fce408a..bf602e33c40a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -66,7 +66,9 @@ struct ovs_conntrack_info {
u8 commit : 1;
u8 nat : 3; /* enum ovs_ct_nat */
u8 force : 1;
+ u8 have_eventmask : 1;
u16 family;
+ u32 eventmask; /* Mask of 1 << IPCT_*. */
struct md_mark mark;
struct md_labels labels;
#ifdef CONFIG_NF_NAT_NEEDED
@@ -373,7 +375,7 @@ static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key,
}
/* Labels are included in the IPCTNL_MSG_CT_NEW event only if the
- * IPCT_LABEL bit it set in the event cache.
+ * IPCT_LABEL bit is set in the event cache.
*/
nf_conntrack_event_cache(IPCT_LABEL, ct);
@@ -514,10 +516,38 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
u16 proto, const struct sk_buff *skb)
{
struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_expect *exp;
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
return NULL;
- return __nf_ct_expect_find(net, zone, &tuple);
+
+ exp = __nf_ct_expect_find(net, zone, &tuple);
+ if (exp) {
+ struct nf_conntrack_tuple_hash *h;
+
+ /* Delete existing conntrack entry, if it clashes with the
+ * expectation. This can happen since conntrack ALGs do not
+ * check for clashes between (new) expectations and existing
+ * conntrack entries. nf_conntrack_in() will check the
+ * expectations only if a conntrack entry can not be found,
+ * which can lead to OVS finding the expectation (here) in the
+ * init direction, but which will not be removed by the
+ * nf_conntrack_in() call, if a matching conntrack entry is
+ * found instead. In this case all init direction packets
+ * would be reported as new related packets, while reply
+ * direction packets would be reported as un-related
+ * established packets.
+ */
+ h = nf_conntrack_find_get(net, zone, &tuple);
+ if (h) {
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ nf_ct_delete(ct, 0, 0);
+ nf_conntrack_put(&ct->ct_general);
+ }
+ }
+
+ return exp;
}
/* This replicates logic from nf_conntrack_core.c that is not exported. */
@@ -795,11 +825,6 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
enum nf_nat_manip_type maniptype;
int err;
- if (nf_ct_is_untracked(ct)) {
- /* A NAT action may only be performed on tracked packets. */
- return NF_ACCEPT;
- }
-
/* Add NAT extension if not confirmed yet. */
if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
return NF_ACCEPT; /* Can't NAT. */
@@ -1007,6 +1032,20 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
if (!ct)
return 0;
+ /* Set the conntrack event mask if given. NEW and DELETE events have
+ * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener
+ * typically would receive many kinds of updates. Setting the event
+ * mask allows those events to be filtered. The set event mask will
+ * remain in effect for the lifetime of the connection unless changed
+ * by a further CT action with both the commit flag and the eventmask
+ * option. */
+ if (info->have_eventmask) {
+ struct nf_conntrack_ecache *cache = nf_ct_ecache_find(ct);
+
+ if (cache)
+ cache->ctmask = info->eventmask;
+ }
+
/* Apply changes before confirming the connection so that the initial
* conntrack NEW netlink event carries the values given in the CT
* action.
@@ -1238,6 +1277,8 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
/* NAT length is checked when parsing the nested attributes. */
[OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX },
#endif
+ [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32),
+ .maxlen = sizeof(u32) },
};
static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
@@ -1316,6 +1357,11 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
break;
}
#endif
+ case OVS_CT_ATTR_EVENTMASK:
+ info->have_eventmask = true;
+ info->eventmask = nla_get_u32(a);
+ break;
+
default:
OVS_NLERR(log, "Unknown conntrack attr (%d)",
type);
@@ -1515,6 +1561,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
ct_info->helper->name))
return -EMSGSIZE;
}
+ if (ct_info->have_eventmask &&
+ nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask))
+ return -EMSGSIZE;
+
#ifdef CONFIG_NF_NAT_NEEDED
if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
return -EMSGSIZE;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9c62b6325f7a..7b17da9a94a0 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1353,7 +1353,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
int err;
err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
- OVS_FLOW_ATTR_MAX, flow_policy);
+ OVS_FLOW_ATTR_MAX, flow_policy, NULL);
if (err)
return err;
ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 1c6e9377436d..da931bdef8a7 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -34,8 +34,6 @@
#define DP_MAX_PORTS USHRT_MAX
#define DP_VPORT_HASH_BUCKETS 1024
-#define SAMPLE_ACTION_DEPTH 3
-
/**
* struct dp_stats_percpu - per-cpu packet processing statistics for a given
* datapath.
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 1105a838bab8..7e1d8a2afa63 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2014 Nicira, Inc.
+ * Copyright (c) 2007-2017 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -59,6 +59,39 @@ struct ovs_len_tbl {
#define OVS_ATTR_NESTED -1
#define OVS_ATTR_VARIABLE -2
+static bool actions_may_change_flow(const struct nlattr *actions)
+{
+ struct nlattr *nla;
+ int rem;
+
+ nla_for_each_nested(nla, actions, rem) {
+ u16 action = nla_type(nla);
+
+ switch (action) {
+ case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_RECIRC:
+ case OVS_ACTION_ATTR_TRUNC:
+ case OVS_ACTION_ATTR_USERSPACE:
+ break;
+
+ case OVS_ACTION_ATTR_CT:
+ case OVS_ACTION_ATTR_HASH:
+ case OVS_ACTION_ATTR_POP_ETH:
+ case OVS_ACTION_ATTR_POP_MPLS:
+ case OVS_ACTION_ATTR_POP_VLAN:
+ case OVS_ACTION_ATTR_PUSH_ETH:
+ case OVS_ACTION_ATTR_PUSH_MPLS:
+ case OVS_ACTION_ATTR_PUSH_VLAN:
+ case OVS_ACTION_ATTR_SAMPLE:
+ case OVS_ACTION_ATTR_SET:
+ case OVS_ACTION_ATTR_SET_MASKED:
+ default:
+ return true;
+ }
+ }
+ return false;
+}
+
static void update_range(struct sw_flow_match *match,
size_t offset, size_t size, bool is_mask)
{
@@ -2023,18 +2056,20 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
- int depth, struct sw_flow_actions **sfa,
+ struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
- const struct sw_flow_key *key, int depth,
+ const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci, bool log)
+ __be16 eth_type, __be16 vlan_tci,
+ bool log, bool last)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
const struct nlattr *a;
- int rem, start, err, st_acts;
+ int rem, start, err;
+ struct sample_arg arg;
memset(attrs, 0, sizeof(attrs));
nla_for_each_nested(a, attr, rem) {
@@ -2058,20 +2093,32 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
if (start < 0)
return start;
- err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
- nla_data(probability), sizeof(u32), log);
+
+ /* When both skb and flow may be changed, put the sample
+ * into a deferred fifo. On the other hand, if only skb
+ * may be modified, the actions can be executed in place.
+ *
+ * Do this analysis at the flow installation time.
+ * Set 'clone_action->exec' to true if the actions can be
+ * executed without being deferred.
+ *
+ * If the sample is the last action, it can always be excuted
+ * rather than deferred.
+ */
+ arg.exec = last || !actions_may_change_flow(actions);
+ arg.probability = nla_get_u32(probability);
+
+ err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
+ log);
if (err)
return err;
- st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
- if (st_acts < 0)
- return st_acts;
- err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa,
+ err = __ovs_nla_copy_actions(net, actions, key, sfa,
eth_type, vlan_tci, log);
+
if (err)
return err;
- add_nested_action_end(*sfa, st_acts);
add_nested_action_end(*sfa, start);
return 0;
@@ -2380,8 +2427,8 @@ static int validate_userspace(const struct nlattr *attr)
struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
int error;
- error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
- attr, userspace_policy);
+ error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr,
+ userspace_policy, NULL);
if (error)
return error;
@@ -2408,16 +2455,13 @@ static int copy_action(const struct nlattr *from,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
- int depth, struct sw_flow_actions **sfa,
+ struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
int rem, err;
- if (depth >= SAMPLE_ACTION_DEPTH)
- return -EOVERFLOW;
-
nla_for_each_nested(a, attr, rem) {
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
@@ -2555,13 +2599,17 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return err;
break;
- case OVS_ACTION_ATTR_SAMPLE:
- err = validate_and_copy_sample(net, a, key, depth, sfa,
- eth_type, vlan_tci, log);
+ case OVS_ACTION_ATTR_SAMPLE: {
+ bool last = nla_is_last(a, rem);
+
+ err = validate_and_copy_sample(net, a, key, sfa,
+ eth_type, vlan_tci,
+ log, last);
if (err)
return err;
skip_copy = true;
break;
+ }
case OVS_ACTION_ATTR_CT:
err = ovs_ct_copy_action(net, a, key, sfa, log);
@@ -2615,7 +2663,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return PTR_ERR(*sfa);
(*sfa)->orig_len = nla_len(attr);
- err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
+ err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
key->eth.vlan.tci, log);
if (err)
ovs_nla_free_flow_actions(*sfa);
@@ -2623,39 +2671,44 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return err;
}
-static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
+static int sample_action_to_attr(const struct nlattr *attr,
+ struct sk_buff *skb)
{
- const struct nlattr *a;
- struct nlattr *start;
- int err = 0, rem;
+ struct nlattr *start, *ac_start = NULL, *sample_arg;
+ int err = 0, rem = nla_len(attr);
+ const struct sample_arg *arg;
+ struct nlattr *actions;
start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
if (!start)
return -EMSGSIZE;
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
- struct nlattr *st_sample;
+ sample_arg = nla_data(attr);
+ arg = nla_data(sample_arg);
+ actions = nla_next(sample_arg, &rem);
- switch (type) {
- case OVS_SAMPLE_ATTR_PROBABILITY:
- if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
- sizeof(u32), nla_data(a)))
- return -EMSGSIZE;
- break;
- case OVS_SAMPLE_ATTR_ACTIONS:
- st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
- if (!st_sample)
- return -EMSGSIZE;
- err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
- if (err)
- return err;
- nla_nest_end(skb, st_sample);
- break;
- }
+ if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
+ err = -EMSGSIZE;
+ goto out;
+ }
+
+ ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
+ if (!ac_start) {
+ err = -EMSGSIZE;
+ goto out;
+ }
+
+ err = ovs_nla_put_actions(actions, rem, skb);
+
+out:
+ if (err) {
+ nla_nest_cancel(skb, ac_start);
+ nla_nest_cancel(skb, start);
+ } else {
+ nla_nest_end(skb, ac_start);
+ nla_nest_end(skb, start);
}
- nla_nest_end(skb, start);
return err;
}
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 7eb955e453e6..869acb3b3d3f 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -70,7 +70,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
if (nla_len(attr) < sizeof(struct nlattr))
return -EINVAL;
- err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
+ err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy,
+ NULL);
if (err < 0)
return err;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ea81ccf3c7d6..e3eeed19cc7a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1496,6 +1496,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
DEFINE_MUTEX(fanout_mutex);
EXPORT_SYMBOL_GPL(fanout_mutex);
static LIST_HEAD(fanout_list);
+static u16 fanout_next_id;
static void __fanout_link(struct sock *sk, struct packet_sock *po)
{
@@ -1629,6 +1630,36 @@ static void fanout_release_data(struct packet_fanout *f)
};
}
+static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
+{
+ struct packet_fanout *f;
+
+ list_for_each_entry(f, &fanout_list, list) {
+ if (f->id == candidate_id &&
+ read_pnet(&f->net) == sock_net(sk)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool fanout_find_new_id(struct sock *sk, u16 *new_id)
+{
+ u16 id = fanout_next_id;
+
+ do {
+ if (__fanout_id_is_free(sk, id)) {
+ *new_id = id;
+ fanout_next_id = id + 1;
+ return true;
+ }
+
+ id++;
+ } while (id != fanout_next_id);
+
+ return false;
+}
+
static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
{
struct packet_rollover *rollover = NULL;
@@ -1676,6 +1707,19 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
po->rollover = rollover;
}
+ if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
+ if (id != 0) {
+ err = -EINVAL;
+ goto out;
+ }
+ if (!fanout_find_new_id(sk, &id)) {
+ err = -ENOMEM;
+ goto out;
+ }
+ /* ephemeral flag for the first socket in the group: drop it */
+ flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
+ }
+
match = NULL;
list_for_each_entry(f, &fanout_list, list) {
if (f->id == id &&
@@ -2614,13 +2658,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
}
- sockc.tsflags = po->sk.sk_tsflags;
- if (msg->msg_controllen) {
- err = sock_cmsg_send(&po->sk, msg, &sockc);
- if (unlikely(err))
- goto out;
- }
-
err = -ENXIO;
if (unlikely(dev == NULL))
goto out;
@@ -2628,6 +2665,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
+ sockc.tsflags = po->sk.sk_tsflags;
+ if (msg->msg_controllen) {
+ err = sock_cmsg_send(&po->sk, msg, &sockc);
+ if (unlikely(err))
+ goto out_put;
+ }
+
if (po->sk.sk_socket->type == SOCK_RAW)
reserve = dev->hard_header_len;
size_max = po->tx_ring.frame_size
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index bc5ee5fbe6ae..45b3af3080d8 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -61,7 +61,8 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = {
[IFA_LOCAL] = { .type = NLA_U8 },
};
-static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
@@ -78,7 +79,8 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy,
+ extack);
if (err < 0)
return err;
@@ -226,7 +228,8 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = {
[RTA_OIF] = { .type = NLA_U32 },
};
-static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[RTA_MAX+1];
@@ -243,7 +246,8 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy,
+ extack);
if (err < 0)
return err;
diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
index b83c6807a5ae..326fd97444f5 100644
--- a/net/qrtr/Kconfig
+++ b/net/qrtr/Kconfig
@@ -16,7 +16,7 @@ if QRTR
config QRTR_SMD
tristate "SMD IPC Router channels"
- depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+ depends on RPMSG || (COMPILE_TEST && RPMSG=n)
---help---
Say Y here to support SMD based ipcrouter channels. SMD is the
most common transport for IPC Router.
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 9da7368b0140..a9a8c7d5a4a9 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -945,7 +945,8 @@ static const struct nla_policy qrtr_policy[IFA_MAX + 1] = {
[IFA_LOCAL] = { .type = NLA_U32 },
};
-static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[IFA_MAX + 1];
struct ifaddrmsg *ifm;
@@ -959,7 +960,7 @@ static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
- rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy);
+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy, extack);
if (rc < 0)
return rc;
diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c
index 0d11132b3370..50615d5efac1 100644
--- a/net/qrtr/smd.c
+++ b/net/qrtr/smd.c
@@ -14,21 +14,21 @@
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <linux/soc/qcom/smd.h>
+#include <linux/rpmsg.h>
#include "qrtr.h"
struct qrtr_smd_dev {
struct qrtr_endpoint ep;
- struct qcom_smd_channel *channel;
+ struct rpmsg_endpoint *channel;
struct device *dev;
};
/* from smd to qrtr */
-static int qcom_smd_qrtr_callback(struct qcom_smd_channel *channel,
- const void *data, size_t len)
+static int qcom_smd_qrtr_callback(struct rpmsg_device *rpdev,
+ void *data, int len, void *priv, u32 addr)
{
- struct qrtr_smd_dev *qdev = qcom_smd_get_drvdata(channel);
+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev);
int rc;
if (!qdev)
@@ -54,7 +54,7 @@ static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
if (rc)
goto out;
- rc = qcom_smd_send(qdev->channel, skb->data, skb->len);
+ rc = rpmsg_send(qdev->channel, skb->data, skb->len);
out:
if (rc)
@@ -64,57 +64,55 @@ out:
return rc;
}
-static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev)
+static int qcom_smd_qrtr_probe(struct rpmsg_device *rpdev)
{
struct qrtr_smd_dev *qdev;
int rc;
- qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL);
+ qdev = devm_kzalloc(&rpdev->dev, sizeof(*qdev), GFP_KERNEL);
if (!qdev)
return -ENOMEM;
- qdev->channel = sdev->channel;
- qdev->dev = &sdev->dev;
+ qdev->channel = rpdev->ept;
+ qdev->dev = &rpdev->dev;
qdev->ep.xmit = qcom_smd_qrtr_send;
rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO);
if (rc)
return rc;
- qcom_smd_set_drvdata(sdev->channel, qdev);
- dev_set_drvdata(&sdev->dev, qdev);
+ dev_set_drvdata(&rpdev->dev, qdev);
- dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n");
+ dev_dbg(&rpdev->dev, "Qualcomm SMD QRTR driver probed\n");
return 0;
}
-static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev)
+static void qcom_smd_qrtr_remove(struct rpmsg_device *rpdev)
{
- struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev);
+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev);
qrtr_endpoint_unregister(&qdev->ep);
- dev_set_drvdata(&sdev->dev, NULL);
+ dev_set_drvdata(&rpdev->dev, NULL);
}
-static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = {
+static const struct rpmsg_device_id qcom_smd_qrtr_smd_match[] = {
{ "IPCRTR" },
{}
};
-static struct qcom_smd_driver qcom_smd_qrtr_driver = {
+static struct rpmsg_driver qcom_smd_qrtr_driver = {
.probe = qcom_smd_qrtr_probe,
.remove = qcom_smd_qrtr_remove,
.callback = qcom_smd_qrtr_callback,
- .smd_match_table = qcom_smd_qrtr_smd_match,
- .driver = {
+ .id_table = qcom_smd_qrtr_smd_match,
+ .drv = {
.name = "qcom_smd_qrtr",
- .owner = THIS_MODULE,
},
};
-module_qcom_smd_driver(qcom_smd_qrtr_driver);
+module_rpmsg_driver(qcom_smd_qrtr_driver);
MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver");
MODULE_LICENSE("GPL v2");
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 1fa75ab7b733..6a5ebdea7d2e 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -333,11 +333,19 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
rds_conn_path_reset(cp);
if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING,
+ RDS_CONN_DOWN) &&
+ !rds_conn_path_transition(cp, RDS_CONN_ERROR,
RDS_CONN_DOWN)) {
/* This can happen - eg when we're in the middle of tearing
* down the connection, and someone unloads the rds module.
- * Quite reproduceable with loopback connections.
+ * Quite reproducible with loopback connections.
* Mostly harmless.
+ *
+ * Note that this also happens with rds-tcp because
+ * we could have triggered rds_conn_path_drop in irq
+ * mode from rds_tcp_state change on the receipt of
+ * a FIN, thus we need to recheck for RDS_CONN_ERROR
+ * here.
*/
rds_conn_path_error(cp, "%s: failed to transition "
"to state DOWN, current state "
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 1c38d2c7caa8..80fb6f63e768 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -702,9 +702,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
event->param.conn.initiator_depth);
/* rdma_accept() calls rdma_reject() internally if it fails */
- err = rdma_accept(cm_id, &conn_param);
- if (err)
- rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
+ if (rdma_accept(cm_id, &conn_param))
+ rds_ib_conn_error(conn, "rdma_accept failed\n");
out:
if (conn)
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index 4fe8f4fec4ee..86ef907067bb 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -78,17 +78,15 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
return ibmr;
out_no_cigar:
- if (ibmr) {
- if (fmr->fmr)
- ib_dealloc_fmr(fmr->fmr);
- kfree(ibmr);
- }
+ kfree(ibmr);
atomic_dec(&pool->item_count);
+
return ERR_PTR(err);
}
-int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
- struct scatterlist *sg, unsigned int nents)
+static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
+ struct rds_ib_mr *ibmr, struct scatterlist *sg,
+ unsigned int nents)
{
struct ib_device *dev = rds_ibdev->dev;
struct rds_ib_fmr *fmr = &ibmr->u.fmr;
@@ -114,29 +112,39 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
if (dma_addr & ~PAGE_MASK) {
- if (i > 0)
+ if (i > 0) {
+ ib_dma_unmap_sg(dev, sg, nents,
+ DMA_BIDIRECTIONAL);
return -EINVAL;
- else
+ } else {
++page_cnt;
+ }
}
if ((dma_addr + dma_len) & ~PAGE_MASK) {
- if (i < sg_dma_len - 1)
+ if (i < sg_dma_len - 1) {
+ ib_dma_unmap_sg(dev, sg, nents,
+ DMA_BIDIRECTIONAL);
return -EINVAL;
- else
+ } else {
++page_cnt;
+ }
}
len += dma_len;
}
page_cnt += len >> PAGE_SHIFT;
- if (page_cnt > ibmr->pool->fmr_attr.max_pages)
+ if (page_cnt > ibmr->pool->fmr_attr.max_pages) {
+ ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
return -EINVAL;
+ }
dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
rdsibdev_to_node(rds_ibdev));
- if (!dma_pages)
+ if (!dma_pages) {
+ ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
return -ENOMEM;
+ }
page_cnt = 0;
for (i = 0; i < sg_dma_len; ++i) {
@@ -149,8 +157,10 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
}
ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr);
- if (ret)
+ if (ret) {
+ ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
goto out;
+ }
/* Success - we successfully remapped the MR, so we can
* safely tear down the old mapping.
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 5d6e98a79a5e..0ea4ab017a8c 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -125,8 +125,6 @@ void rds_ib_mr_exit(void);
void __rds_ib_teardown_mr(struct rds_ib_mr *);
void rds_ib_teardown_mr(struct rds_ib_mr *);
struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int);
-int rds_ib_map_fmr(struct rds_ib_device *, struct rds_ib_mr *,
- struct scatterlist *, unsigned int);
struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *);
int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **);
struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *,
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 8b7e7b7f2c2d..c70c32cb05f5 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -594,7 +594,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
goto out;
while (1) {
- struct iov_iter save;
/* If there are pending notifications, do those - and nothing else */
if (!list_empty(&rs->rs_notify_queue)) {
ret = rds_notify_queue_get(rs, msg);
@@ -630,7 +629,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
rdsdebug("copying inc %p from %pI4:%u to user\n", inc,
&inc->i_conn->c_faddr,
ntohs(inc->i_hdr.h_sport));
- save = msg->msg_iter;
ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter);
if (ret < 0)
break;
@@ -644,7 +642,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
rds_inc_put(inc);
inc = NULL;
rds_stats_inc(s_recv_deliver_raced);
- msg->msg_iter = save;
+ iov_iter_revert(&msg->msg_iter, ret);
continue;
}
diff --git a/net/rds/threads.c b/net/rds/threads.c
index e36e333a0aa0..3e447d056d09 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -156,7 +156,7 @@ void rds_connect_worker(struct work_struct *work)
struct rds_connection *conn = cp->cp_conn;
int ret;
- if (cp->cp_index > 1 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
+ if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
return;
clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 26a7b1db1361..7486926e60a8 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -740,6 +740,25 @@ static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
}
/*
+ * Abort a call due to a protocol error.
+ */
+static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
+ struct sk_buff *skb,
+ const char *eproto_why,
+ const char *why,
+ u32 abort_code)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why);
+ return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO);
+}
+
+#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \
+ __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \
+ (abort_why), (abort_code))
+
+/*
* conn_client.c
*/
extern unsigned int rxrpc_max_client_connections;
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 0ed181f53f32..1752fcf8e8f1 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -413,11 +413,11 @@ found_service:
case RXRPC_CONN_REMOTELY_ABORTED:
rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- conn->remote_abort, ECONNABORTED);
+ conn->remote_abort, -ECONNABORTED);
break;
case RXRPC_CONN_LOCALLY_ABORTED:
rxrpc_abort_call("CON", call, sp->hdr.seq,
- conn->local_abort, ECONNABORTED);
+ conn->local_abort, -ECONNABORTED);
break;
default:
BUG();
@@ -600,7 +600,7 @@ int rxrpc_reject_call(struct rxrpc_sock *rx)
write_lock_bh(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_SERVER_ACCEPTING:
- __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED);
+ __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, -ECONNABORTED);
abort = true;
/* fall through */
case RXRPC_CALL_COMPLETE:
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 97a17ada4431..7a77844aab16 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -386,7 +386,7 @@ recheck_state:
now = ktime_get_real();
if (ktime_before(call->expire_at, now)) {
- rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME);
+ rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index d79cd36987a9..47f7f4205653 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -486,7 +486,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->to_be_accepted.next,
struct rxrpc_call, accept_link);
list_del(&call->accept_link);
- rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET);
+ rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET);
rxrpc_put_call(call, rxrpc_call_put);
}
@@ -494,7 +494,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->sock_calls.next,
struct rxrpc_call, sock_link);
rxrpc_get_call(call, rxrpc_call_got);
- rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET);
+ rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET);
rxrpc_send_abort_packet(call);
rxrpc_release_call(rx, call);
rxrpc_put_call(call, rxrpc_call_put);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index c3be03e8d098..e8dea0d49e7f 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -550,6 +550,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
call->cid = conn->proto.cid | channel;
call->call_id = call_id;
+ trace_rxrpc_connect_call(call);
_net("CONNECT call %08x:%08x as call %d on conn %d",
call->cid, call->call_id, call->debug_id, conn->debug_id);
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index b099b64366f3..46babcf82ce8 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -168,7 +168,7 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn,
* generate a connection-level abort
*/
static int rxrpc_abort_connection(struct rxrpc_connection *conn,
- u32 error, u32 abort_code)
+ int error, u32 abort_code)
{
struct rxrpc_wire_header whdr;
struct msghdr msg;
@@ -281,14 +281,17 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
case RXRPC_PACKET_TYPE_ABORT:
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- &wtmp, sizeof(wtmp)) < 0)
+ &wtmp, sizeof(wtmp)) < 0) {
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_abort"));
return -EPROTO;
+ }
abort_code = ntohl(wtmp);
_proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
conn->state = RXRPC_CONN_REMOTELY_ABORTED;
rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
- abort_code, ECONNABORTED);
+ abort_code, -ECONNABORTED);
return -ECONNABORTED;
case RXRPC_PACKET_TYPE_CHALLENGE:
@@ -327,7 +330,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
return 0;
default:
- _leave(" = -EPROTO [%u]", sp->hdr.type);
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_conn_pkt"));
return -EPROTO;
}
}
@@ -370,7 +374,7 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn)
abort:
_debug("abort %d, %d", ret, abort_code);
- rxrpc_abort_connection(conn, -ret, abort_code);
+ rxrpc_abort_connection(conn, ret, abort_code);
_leave(" [aborted]");
}
@@ -419,9 +423,8 @@ requeue_and_leave:
goto out;
protocol_error:
- if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
+ if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
goto requeue_and_leave;
rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
- _leave(" [EPROTO]");
goto out;
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 18b2ad8be8e2..45dba732a3b4 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -30,7 +30,7 @@
static void rxrpc_proto_abort(const char *why,
struct rxrpc_call *call, rxrpc_seq_t seq)
{
- if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) {
+ if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) {
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
rxrpc_queue_call(call);
}
@@ -665,6 +665,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
rwind = RXRPC_RXTX_BUFF_SIZE - 1;
if (rwind > call->tx_winsize)
wake = true;
+ trace_rxrpc_rx_rwind_change(call, sp->hdr.serial,
+ ntohl(ackinfo->rwind), wake);
call->tx_winsize = rwind;
}
@@ -877,7 +879,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
}
/*
- * Process an ABORT packet.
+ * Process an ABORT packet directed at a call.
*/
static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
{
@@ -892,10 +894,12 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
&wtmp, sizeof(wtmp)) >= 0)
abort_code = ntohl(wtmp);
+ trace_rxrpc_rx_abort(call, sp->hdr.serial, abort_code);
+
_proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- abort_code, ECONNABORTED))
+ abort_code, -ECONNABORTED))
rxrpc_notify_socket(call);
}
@@ -958,7 +962,7 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
case RXRPC_CALL_COMPLETE:
break;
default:
- if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) {
+ if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) {
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
rxrpc_queue_call(call);
}
@@ -1017,8 +1021,11 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
struct rxrpc_wire_header whdr;
/* dig out the RxRPC connection details */
- if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
+ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) {
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_hdr"));
return -EBADMSG;
+ }
memset(sp, 0, sizeof(*sp));
sp->hdr.epoch = ntohl(whdr.epoch);
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 7d4375e557e6..af276f173b10 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -46,7 +46,10 @@ static int none_respond_to_challenge(struct rxrpc_connection *conn,
struct sk_buff *skb,
u32 *_abort_code)
{
- *_abort_code = RX_PROTOCOL_ERROR;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("chall_none"));
return -EPROTO;
}
@@ -54,7 +57,10 @@ static int none_verify_response(struct rxrpc_connection *conn,
struct sk_buff *skb,
u32 *_abort_code)
{
- *_abort_code = RX_PROTOCOL_ERROR;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("resp_none"));
return -EPROTO;
}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index bf13b8470c9a..1ed9c0c2e94f 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -296,7 +296,7 @@ void rxrpc_peer_error_distributor(struct work_struct *work)
hlist_del_init(&call->error_link);
rxrpc_see_call(call);
- if (rxrpc_set_call_completion(call, compl, 0, error))
+ if (rxrpc_set_call_completion(call, compl, 0, -error))
rxrpc_notify_socket(call);
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 3e2f1a8e9c5b..f9caf3b77509 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -83,11 +83,11 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
break;
case RXRPC_CALL_NETWORK_ERROR:
- tmp = call->error;
+ tmp = -call->error;
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp);
break;
case RXRPC_CALL_LOCAL_ERROR:
- tmp = call->error;
+ tmp = -call->error;
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp);
break;
default:
@@ -682,14 +682,16 @@ out:
return ret;
short_data:
+ trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data"));
ret = -EBADMSG;
goto out;
excess_data:
+ trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data"));
ret = -EMSGSIZE;
goto out;
call_complete:
*_abort = call->abort_code;
- ret = -call->error;
+ ret = call->error;
if (call->completion == RXRPC_CALL_SUCCEEDED) {
ret = 1;
if (size > 0)
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 4374e7b9c7bf..1bb9b2ccc267 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -148,15 +148,13 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
u32 data_size,
void *sechdr)
{
- struct rxrpc_skb_priv *sp;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxkad_level1_hdr hdr;
struct rxrpc_crypt iv;
struct scatterlist sg;
u16 check;
- sp = rxrpc_skb(skb);
-
_enter("");
check = sp->hdr.seq ^ call->call_id;
@@ -323,6 +321,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_crypt iv;
struct scatterlist sg[16];
struct sk_buff *trailer;
+ bool aborted;
u32 data_size, buf;
u16 check;
int nsg;
@@ -330,7 +329,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
_enter("");
if (len < 8) {
- rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H",
+ RXKADSEALEDINCON);
goto protocol_error;
}
@@ -355,7 +355,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
/* Extract the decrypted packet length */
if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
- rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1",
+ RXKADDATALEN);
goto protocol_error;
}
offset += sizeof(sechdr);
@@ -368,12 +369,14 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
check ^= seq ^ call->call_id;
check &= 0xffff;
if (check != 0) {
- rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C",
+ RXKADSEALEDINCON);
goto protocol_error;
}
if (data_size > len) {
- rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L",
+ RXKADDATALEN);
goto protocol_error;
}
@@ -381,8 +384,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
return 0;
protocol_error:
- rxrpc_send_abort_packet(call);
- _leave(" = -EPROTO");
+ if (aborted)
+ rxrpc_send_abort_packet(call);
return -EPROTO;
nomem:
@@ -403,6 +406,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_crypt iv;
struct scatterlist _sg[4], *sg;
struct sk_buff *trailer;
+ bool aborted;
u32 data_size, buf;
u16 check;
int nsg;
@@ -410,7 +414,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
_enter(",{%d}", skb->len);
if (len < 8) {
- rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H",
+ RXKADSEALEDINCON);
goto protocol_error;
}
@@ -445,7 +450,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
/* Extract the decrypted packet length */
if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
- rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2",
+ RXKADDATALEN);
goto protocol_error;
}
offset += sizeof(sechdr);
@@ -458,12 +464,14 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
check ^= seq ^ call->call_id;
check &= 0xffff;
if (check != 0) {
- rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C",
+ RXKADSEALEDINCON);
goto protocol_error;
}
if (data_size > len) {
- rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L",
+ RXKADDATALEN);
goto protocol_error;
}
@@ -471,8 +479,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
return 0;
protocol_error:
- rxrpc_send_abort_packet(call);
- _leave(" = -EPROTO");
+ if (aborted)
+ rxrpc_send_abort_packet(call);
return -EPROTO;
nomem:
@@ -491,6 +499,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg;
+ bool aborted;
u16 cksum;
u32 x, y;
@@ -522,10 +531,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
cksum = 1; /* zero checksums are not permitted */
if (cksum != expected_cksum) {
- rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO);
- rxrpc_send_abort_packet(call);
- _leave(" = -EPROTO [csum failed]");
- return -EPROTO;
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK",
+ RXKADSEALEDINCON);
+ goto protocol_error;
}
switch (call->conn->params.security_level) {
@@ -538,6 +546,11 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
default:
return -ENOANO;
}
+
+protocol_error:
+ if (aborted)
+ rxrpc_send_abort_packet(call);
+ return -EPROTO;
}
/*
@@ -754,22 +767,23 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
struct rxkad_response resp
__attribute__((aligned(8))); /* must be aligned for crypto */
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ const char *eproto;
u32 version, nonce, min_level, abort_code;
int ret;
_enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key));
- if (!conn->params.key) {
- _leave(" = -EPROTO [no key]");
- return -EPROTO;
- }
+ eproto = tracepoint_string("chall_no_key");
+ abort_code = RX_PROTOCOL_ERROR;
+ if (!conn->params.key)
+ goto protocol_error;
+ abort_code = RXKADEXPIRED;
ret = key_validate(conn->params.key);
- if (ret < 0) {
- *_abort_code = RXKADEXPIRED;
- return ret;
- }
+ if (ret < 0)
+ goto other_error;
+ eproto = tracepoint_string("chall_short");
abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
&challenge, sizeof(challenge)) < 0)
@@ -782,13 +796,15 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
_proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
sp->hdr.serial, version, nonce, min_level);
+ eproto = tracepoint_string("chall_ver");
abort_code = RXKADINCONSISTENCY;
if (version != RXKAD_VERSION)
goto protocol_error;
abort_code = RXKADLEVELFAIL;
+ ret = -EACCES;
if (conn->params.security_level < min_level)
- goto protocol_error;
+ goto other_error;
token = conn->params.key->payload.data[0];
@@ -815,28 +831,34 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
return rxkad_send_response(conn, &sp->hdr, &resp, token->kad);
protocol_error:
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
+ ret = -EPROTO;
+other_error:
*_abort_code = abort_code;
- _leave(" = -EPROTO [%d]", abort_code);
- return -EPROTO;
+ return ret;
}
/*
* decrypt the kerberos IV ticket in the response
*/
static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
void *ticket, size_t ticket_len,
struct rxrpc_crypt *_session_key,
time_t *_expiry,
u32 *_abort_code)
{
struct skcipher_request *req;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv, key;
struct scatterlist sg[1];
struct in_addr addr;
unsigned int life;
+ const char *eproto;
time_t issue, now;
bool little_endian;
int ret;
+ u32 abort_code;
u8 *p, *q, *name, *end;
_enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key));
@@ -847,11 +869,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
if (ret < 0) {
switch (ret) {
case -EKEYEXPIRED:
- *_abort_code = RXKADEXPIRED;
- goto error;
+ abort_code = RXKADEXPIRED;
+ goto other_error;
default:
- *_abort_code = RXKADNOAUTH;
- goto error;
+ abort_code = RXKADNOAUTH;
+ goto other_error;
}
}
@@ -860,13 +882,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv));
+ ret = -ENOMEM;
req = skcipher_request_alloc(conn->server_key->payload.data[0],
GFP_NOFS);
- if (!req) {
- *_abort_code = RXKADNOAUTH;
- ret = -ENOMEM;
- goto error;
- }
+ if (!req)
+ goto temporary_error;
sg_init_one(&sg[0], ticket, ticket_len);
skcipher_request_set_callback(req, 0, NULL, NULL);
@@ -877,11 +897,12 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p = ticket;
end = p + ticket_len;
-#define Z(size) \
+#define Z(field) \
({ \
u8 *__str = p; \
+ eproto = tracepoint_string("rxkad_bad_"#field); \
q = memchr(p, 0, end - p); \
- if (!q || q - p > (size)) \
+ if (!q || q - p > (field##_SZ)) \
goto bad_ticket; \
for (; p < q; p++) \
if (!isprint(*p)) \
@@ -896,17 +917,18 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p++;
/* extract the authentication name */
- name = Z(ANAME_SZ);
+ name = Z(ANAME);
_debug("KIV ANAME: %s", name);
/* extract the principal's instance */
- name = Z(INST_SZ);
+ name = Z(INST);
_debug("KIV INST : %s", name);
/* extract the principal's authentication domain */
- name = Z(REALM_SZ);
+ name = Z(REALM);
_debug("KIV REALM: %s", name);
+ eproto = tracepoint_string("rxkad_bad_len");
if (end - p < 4 + 8 + 4 + 2)
goto bad_ticket;
@@ -941,36 +963,37 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
/* check the ticket is in date */
if (issue > now) {
- *_abort_code = RXKADNOAUTH;
+ abort_code = RXKADNOAUTH;
ret = -EKEYREJECTED;
- goto error;
+ goto other_error;
}
if (issue < now - life) {
- *_abort_code = RXKADEXPIRED;
+ abort_code = RXKADEXPIRED;
ret = -EKEYEXPIRED;
- goto error;
+ goto other_error;
}
*_expiry = issue + life;
/* get the service name */
- name = Z(SNAME_SZ);
+ name = Z(SNAME);
_debug("KIV SNAME: %s", name);
/* get the service instance name */
- name = Z(INST_SZ);
+ name = Z(INST);
_debug("KIV SINST: %s", name);
-
- ret = 0;
-error:
- _leave(" = %d", ret);
- return ret;
+ return 0;
bad_ticket:
- *_abort_code = RXKADBADTICKET;
- ret = -EBADMSG;
- goto error;
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
+ abort_code = RXKADBADTICKET;
+ ret = -EPROTO;
+other_error:
+ *_abort_code = abort_code;
+ return ret;
+temporary_error:
+ return ret;
}
/*
@@ -1020,6 +1043,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
__attribute__((aligned(8))); /* must be aligned for crypto */
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt session_key;
+ const char *eproto;
time_t expiry;
void *ticket;
u32 abort_code, version, kvno, ticket_len, level;
@@ -1028,6 +1052,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
+ eproto = tracepoint_string("rxkad_rsp_short");
abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
&response, sizeof(response)) < 0)
@@ -1041,40 +1066,43 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
sp->hdr.serial, version, kvno, ticket_len);
+ eproto = tracepoint_string("rxkad_rsp_ver");
abort_code = RXKADINCONSISTENCY;
if (version != RXKAD_VERSION)
goto protocol_error;
+ eproto = tracepoint_string("rxkad_rsp_tktlen");
abort_code = RXKADTICKETLEN;
if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
goto protocol_error;
+ eproto = tracepoint_string("rxkad_rsp_unkkey");
abort_code = RXKADUNKNOWNKEY;
if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
goto protocol_error;
/* extract the kerberos ticket and decrypt and decode it */
+ ret = -ENOMEM;
ticket = kmalloc(ticket_len, GFP_NOFS);
if (!ticket)
- return -ENOMEM;
+ goto temporary_error;
+ eproto = tracepoint_string("rxkad_tkt_short");
abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
ticket, ticket_len) < 0)
goto protocol_error_free;
- ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
- &expiry, &abort_code);
- if (ret < 0) {
- *_abort_code = abort_code;
- kfree(ticket);
- return ret;
- }
+ ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key,
+ &expiry, _abort_code);
+ if (ret < 0)
+ goto temporary_error_free;
/* use the session key from inside the ticket to decrypt the
* response */
rxkad_decrypt_response(conn, &response, &session_key);
+ eproto = tracepoint_string("rxkad_rsp_param");
abort_code = RXKADSEALEDINCON;
if (ntohl(response.encrypted.epoch) != conn->proto.epoch)
goto protocol_error_free;
@@ -1085,6 +1113,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
csum = response.encrypted.checksum;
response.encrypted.checksum = 0;
rxkad_calc_response_checksum(&response);
+ eproto = tracepoint_string("rxkad_rsp_csum");
if (response.encrypted.checksum != csum)
goto protocol_error_free;
@@ -1093,11 +1122,15 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
struct rxrpc_call *call;
u32 call_id = ntohl(response.encrypted.call_id[i]);
+ eproto = tracepoint_string("rxkad_rsp_callid");
if (call_id > INT_MAX)
goto protocol_error_unlock;
+ eproto = tracepoint_string("rxkad_rsp_callctr");
if (call_id < conn->channels[i].call_counter)
goto protocol_error_unlock;
+
+ eproto = tracepoint_string("rxkad_rsp_callst");
if (call_id > conn->channels[i].call_counter) {
call = rcu_dereference_protected(
conn->channels[i].call,
@@ -1109,10 +1142,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
}
spin_unlock(&conn->channel_lock);
+ eproto = tracepoint_string("rxkad_rsp_seq");
abort_code = RXKADOUTOFSEQUENCE;
if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
goto protocol_error_free;
+ eproto = tracepoint_string("rxkad_rsp_level");
abort_code = RXKADLEVELFAIL;
level = ntohl(response.encrypted.level);
if (level > RXRPC_SECURITY_ENCRYPT)
@@ -1123,10 +1158,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
* this the connection security can be handled in exactly the same way
* as for a client connection */
ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
- if (ret < 0) {
- kfree(ticket);
- return ret;
- }
+ if (ret < 0)
+ goto temporary_error_free;
kfree(ticket);
_leave(" = 0");
@@ -1137,9 +1170,18 @@ protocol_error_unlock:
protocol_error_free:
kfree(ticket);
protocol_error:
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
*_abort_code = abort_code;
- _leave(" = -EPROTO [%d]", abort_code);
return -EPROTO;
+
+temporary_error_free:
+ kfree(ticket);
+temporary_error:
+ /* Ignore the response packet if we got a temporary error such as
+ * ENOMEM. We just want to send the challenge again. Note that we
+ * also come out this way if the ticket decryption fails.
+ */
+ return ret;
}
/*
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 97ab214ca411..96ffa5d5733b 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -556,7 +556,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = -ESHUTDOWN;
} else if (cmd == RXRPC_CMD_SEND_ABORT) {
ret = 0;
- if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED))
+ if (rxrpc_abort_call("CMD", call, 0, abort_code, -ECONNABORTED))
ret = rxrpc_send_abort_packet(call);
} else if (cmd != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
@@ -623,7 +623,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
read_unlock_bh(&call->state_lock);
break;
default:
- /* Request phase complete for this client call */
+ /* Request phase complete for this client call */
+ trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send"));
ret = -EPROTO;
break;
}
@@ -642,20 +643,24 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data);
* @error: Local error value
* @why: 3-char string indicating why.
*
- * Allow a kernel service to abort a call, if it's still in an abortable state.
+ * Allow a kernel service to abort a call, if it's still in an abortable state
+ * and return true if the call was aborted, false if it was already complete.
*/
-void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
+bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
u32 abort_code, int error, const char *why)
{
+ bool aborted;
+
_enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
mutex_lock(&call->user_mutex);
- if (rxrpc_abort_call(why, call, 0, abort_code, error))
+ aborted = rxrpc_abort_call(why, call, 0, abort_code, error);
+ if (aborted)
rxrpc_send_abort_packet(call);
mutex_unlock(&call->user_mutex);
- _leave("");
+ return aborted;
}
EXPORT_SYMBOL(rxrpc_kernel_abort_call);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 403790cce7d2..9fb84f0de6af 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -352,6 +352,51 @@ config NET_SCH_PLUG
To compile this code as a module, choose M here: the
module will be called sch_plug.
+menuconfig NET_SCH_DEFAULT
+ bool "Allow override default queue discipline"
+ ---help---
+ Support for selection of default queuing discipline.
+
+ Nearly all users can safely say no here, and the default
+ of pfifo_fast will be used. Many distributions already set
+ the default value via /proc/sys/net/core/default_qdisc.
+
+ If unsure, say N.
+
+if NET_SCH_DEFAULT
+
+choice
+ prompt "Default queuing discipline"
+ default DEFAULT_PFIFO_FAST
+ help
+ Select the queueing discipline that will be used by default
+ for all network devices.
+
+ config DEFAULT_FQ
+ bool "Fair Queue" if NET_SCH_FQ
+
+ config DEFAULT_CODEL
+ bool "Controlled Delay" if NET_SCH_CODEL
+
+ config DEFAULT_FQ_CODEL
+ bool "Fair Queue Controlled Delay" if NET_SCH_FQ_CODEL
+
+ config DEFAULT_SFQ
+ bool "Stochastic Fair Queue" if NET_SCH_SFQ
+
+ config DEFAULT_PFIFO_FAST
+ bool "Priority FIFO Fast"
+endchoice
+
+config DEFAULT_NET_SCH
+ string
+ default "pfifo_fast" if DEFAULT_PFIFO_FAST
+ default "fq" if DEFAULT_FQ
+ default "fq_codel" if DEFAULT_FQ_CODEL
+ default "sfq" if DEFAULT_SFQ
+ default "pfifo_fast"
+endif
+
comment "Classification"
config NET_CLS
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e05b924618a0..a90e8f355c00 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -428,24 +428,49 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
return res;
}
+/*TCA_ACT_MAX_PRIO is 32, there count upto 32 */
+#define TCA_ACT_MAX_PRIO_MASK 0x1FF
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res)
{
int ret = -1, i;
+ u32 jmp_prgcnt = 0;
+ u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */
if (skb_skip_tc_classify(skb))
return TC_ACT_OK;
+restart_act_graph:
for (i = 0; i < nr_actions; i++) {
const struct tc_action *a = actions[i];
+ if (jmp_prgcnt > 0) {
+ jmp_prgcnt -= 1;
+ continue;
+ }
repeat:
ret = a->ops->act(skb, a, res);
if (ret == TC_ACT_REPEAT)
goto repeat; /* we need a ttl - JHS */
+
+ if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) {
+ jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK;
+ if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) {
+ /* faulty opcode, stop pipeline */
+ return TC_ACT_OK;
+ } else {
+ jmp_ttl -= 1;
+ if (jmp_ttl > 0)
+ goto restart_act_graph;
+ else /* faulty graph, stop pipeline */
+ return TC_ACT_OK;
+ }
+ }
+
if (ret != TC_ACT_PIPE)
break;
}
+
return ret;
}
EXPORT_SYMBOL(tcf_action_exec);
@@ -558,7 +583,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
int err;
if (name == NULL) {
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
if (err < 0)
goto err_out;
err = -EINVAL;
@@ -663,7 +688,7 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est,
int err;
int i;
- err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
if (err < 0)
return err;
@@ -795,7 +820,7 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
int index;
int err;
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
if (err < 0)
goto err_out;
@@ -844,7 +869,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
b = skb_tail_pointer(skb);
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
if (err < 0)
goto err_out;
@@ -930,7 +955,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
struct tc_action *act;
LIST_HEAD(actions);
- ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
+ ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
if (ret < 0)
return ret;
@@ -1002,7 +1027,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
return tcf_add_notify(net, n, &actions, portid);
}
-static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
+static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ACT_MAX + 1];
@@ -1013,7 +1039,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
+ ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
+ extack);
if (ret < 0)
return ret;
@@ -1060,19 +1087,20 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
struct nlattr *nla[TCAA_MAX + 1];
struct nlattr *kind;
- if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, NULL) < 0)
+ if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
+ NULL, NULL) < 0)
return NULL;
tb1 = nla[TCA_ACT_TAB];
if (tb1 == NULL)
return NULL;
if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1),
- NLMSG_ALIGN(nla_len(tb1)), NULL) < 0)
+ NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0)
return NULL;
if (tb[1] == NULL)
return NULL;
- if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL) < 0)
+ if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0)
return NULL;
kind = tb2[TCA_ACT_KIND];
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 520baa41cba3..d33947d6e9d0 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -283,7 +283,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
+ ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy, NULL);
if (ret < 0)
return ret;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index f9bb43c25697..2155bc6c6a1e 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -109,7 +109,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy);
+ ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy,
+ NULL);
if (ret < 0)
return ret;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index e978ccd4402c..ab6fdbd34db7 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -59,7 +59,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
+ err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy, NULL);
if (err < 0)
return err;
@@ -181,6 +181,9 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl,
struct tcphdr *tcph;
const struct iphdr *iph;
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+ return 1;
+
tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
if (tcph == NULL)
return 0;
@@ -202,6 +205,9 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl,
struct tcphdr *tcph;
const struct ipv6hdr *ip6h;
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+ return 1;
+
tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
if (tcph == NULL)
return 0;
@@ -225,6 +231,9 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
const struct iphdr *iph;
u16 ul;
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ return 1;
+
/*
* Support both UDP and UDPLITE checksum algorithms, Don't use
* udph->len to get the real length without any protocol check,
@@ -278,6 +287,9 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
const struct ipv6hdr *ip6h;
u16 ul;
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ return 1;
+
/*
* Support both UDP and UDPLITE checksum algorithms, Don't use
* udph->len to get the real length without any protocol check,
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e6c874a2b283..99afe8b1f1fb 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -73,7 +73,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy);
+ err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 71e7ff22f7c9..c5dec308b8b1 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -443,7 +443,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
int ret = 0;
int err;
- err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy);
+ err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy, NULL);
if (err < 0)
return err;
@@ -514,7 +514,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (tb[TCA_IFE_METALST]) {
err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST],
- NULL);
+ NULL, NULL);
if (err) {
metadata_parse_err:
if (exists)
@@ -603,8 +603,8 @@ nla_put_failure:
return -1;
}
-int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
- u16 metaid, u16 mlen, void *mdata)
+static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
+ u16 metaid, u16 mlen, void *mdata)
{
struct tcf_meta_info *e;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 992ef8d624f1..36f0ced9e60c 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -107,7 +107,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy);
+ err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index af49c7dca860..1b5549ababd4 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -87,7 +87,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy);
+ ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL);
if (ret < 0)
return ret;
if (tb[TCA_MIRRED_PARMS] == NULL)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 9b6aec665495..9016ab8a0649 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -50,7 +50,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy);
+ err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index c1310472f620..164b5ac094be 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -72,7 +72,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla,
}
err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka,
- pedit_key_ex_policy);
+ pedit_key_ex_policy, NULL);
if (err)
goto err_out;
@@ -147,7 +147,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy);
+ err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0ba91d1ce994..f42008b29311 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -90,7 +90,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy);
+ err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 0b8217b4763f..59d6645a4007 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -50,7 +50,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy);
+ ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy, NULL);
if (ret < 0)
return ret;
if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] ||
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 823a73ad0c60..43605e7ce051 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -94,7 +94,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy);
+ err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 06ccae3c12ee..6b3e65d7de0c 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -82,7 +82,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy);
+ err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index c736627f8f4a..a73c4bbcada2 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -103,7 +103,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy);
+ err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index e3a58e021198..b9a2f241a5b3 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -89,7 +89,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy);
+ err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy,
+ NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 19e0dba305ce..13ba3a89f675 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -121,7 +121,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy);
+ err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 732f7cae459d..22f88b35a546 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -178,14 +178,11 @@ errout:
return ERR_PTR(err);
}
-static bool tcf_proto_destroy(struct tcf_proto *tp, bool force)
+static void tcf_proto_destroy(struct tcf_proto *tp)
{
- if (tp->ops->destroy(tp, force)) {
- module_put(tp->ops->owner);
- kfree_rcu(tp, rcu);
- return true;
- }
- return false;
+ tp->ops->destroy(tp);
+ module_put(tp->ops->owner);
+ kfree_rcu(tp, rcu);
}
void tcf_destroy_chain(struct tcf_proto __rcu **fl)
@@ -194,14 +191,15 @@ void tcf_destroy_chain(struct tcf_proto __rcu **fl)
while ((tp = rtnl_dereference(*fl)) != NULL) {
RCU_INIT_POINTER(*fl, tp->next);
- tcf_proto_destroy(tp, true);
+ tcf_proto_destroy(tp);
}
}
EXPORT_SYMBOL(tcf_destroy_chain);
/* Add/change/delete/get a filter node */
-static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
+static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
@@ -229,7 +227,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
replay:
tp_created = 0;
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
+ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -360,7 +358,7 @@ replay:
RCU_INIT_POINTER(*back, next);
tfilter_notify(net, skb, n, tp, fh,
RTM_DELTFILTER, false);
- tcf_proto_destroy(tp, true);
+ tcf_proto_destroy(tp);
err = 0;
goto errout;
}
@@ -371,24 +369,28 @@ replay:
goto errout;
}
} else {
+ bool last;
+
switch (n->nlmsg_type) {
case RTM_NEWTFILTER:
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
- tcf_proto_destroy(tp, true);
+ tcf_proto_destroy(tp);
err = -EEXIST;
goto errout;
}
break;
case RTM_DELTFILTER:
- err = tp->ops->delete(tp, fh);
+ err = tp->ops->delete(tp, fh, &last);
if (err)
goto errout;
next = rtnl_dereference(tp->next);
tfilter_notify(net, skb, n, tp, t->tcm_handle,
RTM_DELTFILTER, false);
- if (tcf_proto_destroy(tp, false))
+ if (last) {
RCU_INIT_POINTER(*back, next);
+ tcf_proto_destroy(tp);
+ }
goto errout;
case RTM_GETTFILTER:
err = tfilter_notify(net, skb, n, tp, fh,
@@ -410,7 +412,7 @@ replay:
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
} else {
if (tp_created)
- tcf_proto_destroy(tp, true);
+ tcf_proto_destroy(tp);
}
errout:
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 5877f6061b57..c4fd63a068f9 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -93,30 +93,28 @@ static void basic_delete_filter(struct rcu_head *head)
kfree(f);
}
-static bool basic_destroy(struct tcf_proto *tp, bool force)
+static void basic_destroy(struct tcf_proto *tp)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
- if (!force && !list_empty(&head->flist))
- return false;
-
list_for_each_entry_safe(f, n, &head->flist, link) {
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
call_rcu(&f->rcu, basic_delete_filter);
}
kfree_rcu(head, rcu);
- return true;
}
-static int basic_delete(struct tcf_proto *tp, unsigned long arg)
+static int basic_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f = (struct basic_filter *) arg;
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
call_rcu(&f->rcu, basic_delete_filter);
+ *last = list_empty(&head->flist);
return 0;
}
@@ -174,7 +172,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS],
- basic_policy);
+ basic_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 80f688436dd7..5ebeae996e63 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -274,25 +274,24 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
}
-static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
+static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
+
__cls_bpf_delete(tp, (struct cls_bpf_prog *) arg);
+ *last = list_empty(&head->plist);
return 0;
}
-static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
+static void cls_bpf_destroy(struct tcf_proto *tp)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
- if (!force && !list_empty(&head->plist))
- return false;
-
list_for_each_entry_safe(prog, tmp, &head->plist, link)
__cls_bpf_delete(tp, prog);
kfree_rcu(head, rcu);
- return true;
}
static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
@@ -478,7 +477,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (tca[TCA_OPTIONS] == NULL)
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy);
+ ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy,
+ NULL);
if (ret < 0)
return ret;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index c1f20077837f..12ce547eea04 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -99,7 +99,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
new->handle = handle;
new->tp = tp;
err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
- cgroup_policy);
+ cgroup_policy, NULL);
if (err < 0)
goto errout;
@@ -131,20 +131,16 @@ errout:
return err;
}
-static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force)
+static void cls_cgroup_destroy(struct tcf_proto *tp)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
- if (!force)
- return false;
/* Head can still be NULL due to cls_cgroup_init(). */
if (head)
call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
-
- return true;
}
-static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
return -EOPNOTSUPP;
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 3d6b9286c203..3065752b9cda 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -400,7 +400,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy);
+ err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL);
if (err < 0)
return err;
@@ -508,9 +508,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
get_random_bytes(&fnew->hashrnd, 4);
}
- fnew->perturb_timer.function = flow_perturbation;
- fnew->perturb_timer.data = (unsigned long)fnew;
- init_timer_deferrable(&fnew->perturb_timer);
+ setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation,
+ (unsigned long)fnew);
tcf_exts_change(tp, &fnew->exts, &e);
tcf_em_tree_change(tp, &fnew->ematches, &t);
@@ -563,12 +562,14 @@ err1:
return err;
}
-static int flow_delete(struct tcf_proto *tp, unsigned long arg)
+static int flow_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
+ struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f = (struct flow_filter *)arg;
list_del_rcu(&f->list);
call_rcu(&f->rcu, flow_destroy_filter);
+ *last = list_empty(&head->filters);
return 0;
}
@@ -584,20 +585,16 @@ static int flow_init(struct tcf_proto *tp)
return 0;
}
-static bool flow_destroy(struct tcf_proto *tp, bool force)
+static void flow_destroy(struct tcf_proto *tp)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
- if (!force && !list_empty(&head->filters))
- return false;
-
list_for_each_entry_safe(f, next, &head->filters, list) {
list_del_rcu(&f->list);
call_rcu(&f->rcu, flow_destroy_filter);
}
kfree_rcu(head, rcu);
- return true;
}
static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9d0c99d2e9fb..ca526c0881bd 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -18,6 +18,7 @@
#include <linux/if_ether.h>
#include <linux/in6.h>
#include <linux/ip.h>
+#include <linux/mpls.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
@@ -47,6 +48,7 @@ struct fl_flow_key {
struct flow_dissector_key_ipv6_addrs enc_ipv6;
};
struct flow_dissector_key_ports enc_tp;
+ struct flow_dissector_key_mpls mpls;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -328,21 +330,16 @@ static void fl_destroy_rcu(struct rcu_head *rcu)
schedule_work(&head->work);
}
-static bool fl_destroy(struct tcf_proto *tp, bool force)
+static void fl_destroy(struct tcf_proto *tp)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f, *next;
- if (!force && !list_empty(&head->filters))
- return false;
-
list_for_each_entry_safe(f, next, &head->filters, list)
__fl_delete(tp, f);
__module_get(THIS_MODULE);
call_rcu(&head->rcu, fl_destroy_rcu);
-
- return true;
}
static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
@@ -423,6 +420,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ARP_SHA_MASK] = { .len = ETH_ALEN },
[TCA_FLOWER_KEY_ARP_THA] = { .len = ETH_ALEN },
[TCA_FLOWER_KEY_ARP_THA_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_MPLS_TTL] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_BOS] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_TC] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NLA_U32 },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -438,6 +439,41 @@ static void fl_set_key_val(struct nlattr **tb,
memcpy(mask, nla_data(tb[mask_type]), len);
}
+static int fl_set_key_mpls(struct nlattr **tb,
+ struct flow_dissector_key_mpls *key_val,
+ struct flow_dissector_key_mpls *key_mask)
+{
+ if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
+ key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
+ key_mask->mpls_ttl = MPLS_TTL_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
+ u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);
+
+ if (bos & ~MPLS_BOS_MASK)
+ return -EINVAL;
+ key_val->mpls_bos = bos;
+ key_mask->mpls_bos = MPLS_BOS_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
+ u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]);
+
+ if (tc & ~MPLS_TC_MASK)
+ return -EINVAL;
+ key_val->mpls_tc = tc;
+ key_mask->mpls_tc = MPLS_TC_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
+ u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]);
+
+ if (label & ~MPLS_LABEL_MASK)
+ return -EINVAL;
+ key_val->mpls_label = label;
+ key_mask->mpls_label = MPLS_LABEL_MASK;
+ }
+ return 0;
+}
+
static void fl_set_key_vlan(struct nlattr **tb,
struct flow_dissector_key_vlan *key_val,
struct flow_dissector_key_vlan *key_mask)
@@ -594,6 +630,11 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
&mask->icmp.code,
TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
sizeof(key->icmp.code));
+ } else if (key->basic.n_proto == htons(ETH_P_MPLS_UC) ||
+ key->basic.n_proto == htons(ETH_P_MPLS_MC)) {
+ ret = fl_set_key_mpls(tb, &key->mpls, &mask->mpls);
+ if (ret)
+ return ret;
} else if (key->basic.n_proto == htons(ETH_P_ARP) ||
key->basic.n_proto == htons(ETH_P_RARP)) {
fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP,
@@ -730,6 +771,8 @@ static void fl_init_dissector(struct cls_fl_head *head,
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_ARP, arp);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_MPLS, mpls);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_VLAN, vlan);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
@@ -848,7 +891,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (!tb)
return -ENOBUFS;
- err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
+ err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
+ fl_policy, NULL);
if (err < 0)
goto errout_tb;
@@ -946,7 +990,7 @@ errout_tb:
return err;
}
-static int fl_delete(struct tcf_proto *tp, unsigned long arg)
+static int fl_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
@@ -955,6 +999,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
rhashtable_remove_fast(&head->ht, &f->ht_node,
head->ht_params);
__fl_delete(tp, f);
+ *last = list_empty(&head->filters);
return 0;
}
@@ -994,6 +1039,41 @@ static int fl_dump_key_val(struct sk_buff *skb,
return 0;
}
+static int fl_dump_key_mpls(struct sk_buff *skb,
+ struct flow_dissector_key_mpls *mpls_key,
+ struct flow_dissector_key_mpls *mpls_mask)
+{
+ int err;
+
+ if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
+ return 0;
+ if (mpls_mask->mpls_ttl) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
+ mpls_key->mpls_ttl);
+ if (err)
+ return err;
+ }
+ if (mpls_mask->mpls_tc) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC,
+ mpls_key->mpls_tc);
+ if (err)
+ return err;
+ }
+ if (mpls_mask->mpls_label) {
+ err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL,
+ mpls_key->mpls_label);
+ if (err)
+ return err;
+ }
+ if (mpls_mask->mpls_bos) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS,
+ mpls_key->mpls_bos);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static int fl_dump_key_vlan(struct sk_buff *skb,
struct flow_dissector_key_vlan *vlan_key,
struct flow_dissector_key_vlan *vlan_mask)
@@ -1099,6 +1179,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
sizeof(key->basic.n_proto)))
goto nla_put_failure;
+ if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
+ goto nla_put_failure;
+
if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
goto nla_put_failure;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 9dc63d54e167..d3885362e017 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -127,20 +127,14 @@ static void fw_delete_filter(struct rcu_head *head)
kfree(f);
}
-static bool fw_destroy(struct tcf_proto *tp, bool force)
+static void fw_destroy(struct tcf_proto *tp)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
int h;
if (head == NULL)
- return true;
-
- if (!force) {
- for (h = 0; h < HTSIZE; h++)
- if (rcu_access_pointer(head->ht[h]))
- return false;
- }
+ return;
for (h = 0; h < HTSIZE; h++) {
while ((f = rtnl_dereference(head->ht[h])) != NULL) {
@@ -150,17 +144,17 @@ static bool fw_destroy(struct tcf_proto *tp, bool force)
call_rcu(&f->rcu, fw_delete_filter);
}
}
- RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
- return true;
}
-static int fw_delete(struct tcf_proto *tp, unsigned long arg)
+static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *)arg;
struct fw_filter __rcu **fp;
struct fw_filter *pfp;
+ int ret = -EINVAL;
+ int h;
if (head == NULL || f == NULL)
goto out;
@@ -173,11 +167,21 @@ static int fw_delete(struct tcf_proto *tp, unsigned long arg)
RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
call_rcu(&f->rcu, fw_delete_filter);
- return 0;
+ ret = 0;
+ break;
}
}
+
+ *last = true;
+ for (h = 0; h < HTSIZE; h++) {
+ if (rcu_access_pointer(head->ht[h])) {
+ *last = false;
+ break;
+ }
+ }
+
out:
- return -EINVAL;
+ return ret;
}
static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
@@ -250,7 +254,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (!opt)
return handle ? -EINVAL : 0; /* Succeed if it is old method. */
- err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy);
+ err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 224eb2c14346..dee469fed967 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -90,19 +90,18 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
&offload);
}
-static bool mall_destroy(struct tcf_proto *tp, bool force)
+static void mall_destroy(struct tcf_proto *tp)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct net_device *dev = tp->q->dev_queue->dev;
if (!head)
- return true;
+ return;
if (tc_should_offload(dev, tp, head->flags))
mall_destroy_hw_filter(tp, head, (unsigned long) head);
call_rcu(&head->rcu, mall_destroy_rcu);
- return true;
}
static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
@@ -161,8 +160,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (head)
return -EEXIST;
- err = nla_parse_nested(tb, TCA_MATCHALL_MAX,
- tca[TCA_OPTIONS], mall_policy);
+ err = nla_parse_nested(tb, TCA_MATCHALL_MAX, tca[TCA_OPTIONS],
+ mall_policy, NULL);
if (err < 0)
return err;
@@ -204,8 +203,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
*arg = (unsigned long) head;
rcu_assign_pointer(tp->root, new);
- if (head)
- call_rcu(&head->rcu, mall_destroy_rcu);
+ call_rcu(&head->rcu, mall_destroy_rcu);
return 0;
err_replace_hw_filter:
@@ -216,7 +214,7 @@ err_exts_init:
return err;
}
-static int mall_delete(struct tcf_proto *tp, unsigned long arg)
+static int mall_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
return -EOPNOTSUPP;
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 455fc8f83d0a..d63d5502ee02 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -140,8 +140,6 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
goto failure;
id = dst->tclassid;
- if (head == NULL)
- goto old_method;
iif = inet_iif(skb);
@@ -194,15 +192,6 @@ restart:
route4_set_fastmap(head, id, iif, ROUTE4_FAILURE);
failure:
return -1;
-
-old_method:
- if (id && (TC_H_MAJ(id) == 0 ||
- !(TC_H_MAJ(id^tp->q->handle)))) {
- res->classid = id;
- res->class = 0;
- return 0;
- }
- return -1;
}
static inline u32 to_hash(u32 id)
@@ -234,9 +223,6 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
struct route4_filter *f;
unsigned int h1, h2;
- if (!head)
- return 0;
-
h1 = to_hash(handle);
if (h1 > 256)
return 0;
@@ -276,20 +262,13 @@ static void route4_delete_filter(struct rcu_head *head)
kfree(f);
}
-static bool route4_destroy(struct tcf_proto *tp, bool force)
+static void route4_destroy(struct tcf_proto *tp)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
if (head == NULL)
- return true;
-
- if (!force) {
- for (h1 = 0; h1 <= 256; h1++) {
- if (rcu_access_pointer(head->table[h1]))
- return false;
- }
- }
+ return;
for (h1 = 0; h1 <= 256; h1++) {
struct route4_bucket *b;
@@ -312,12 +291,10 @@ static bool route4_destroy(struct tcf_proto *tp, bool force)
kfree_rcu(b, rcu);
}
}
- RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
- return true;
}
-static int route4_delete(struct tcf_proto *tp, unsigned long arg)
+static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter *f = (struct route4_filter *)arg;
@@ -325,7 +302,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
struct route4_filter *nf;
struct route4_bucket *b;
unsigned int h = 0;
- int i;
+ int i, h1;
if (!head || !f)
return -EINVAL;
@@ -356,16 +333,25 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
rt = rtnl_dereference(b->ht[i]);
if (rt)
- return 0;
+ goto out;
}
/* OK, session has no flows */
RCU_INIT_POINTER(head->table[to_hash(h)], NULL);
kfree_rcu(b, rcu);
+ break;
+ }
+ }
- return 0;
+out:
+ *last = true;
+ for (h1 = 0; h1 <= 256; h1++) {
+ if (rcu_access_pointer(head->table[h1])) {
+ *last = false;
+ break;
}
}
+
return 0;
}
@@ -489,7 +475,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (opt == NULL)
return handle ? -EINVAL : 0;
- err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy);
+ err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 322438fb3ffc..0d9d07798699 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -152,8 +152,6 @@ static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
nhptr = ip_hdr(skb);
#endif
- if (unlikely(!head))
- return -1;
restart:
#if RSVP_DST_LEN == 4
@@ -302,22 +300,13 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
call_rcu(&f->rcu, rsvp_delete_filter_rcu);
}
-static bool rsvp_destroy(struct tcf_proto *tp, bool force)
+static void rsvp_destroy(struct tcf_proto *tp)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
if (data == NULL)
- return true;
-
- if (!force) {
- for (h1 = 0; h1 < 256; h1++) {
- if (rcu_access_pointer(data->ht[h1]))
- return false;
- }
- }
-
- RCU_INIT_POINTER(tp->root, NULL);
+ return;
for (h1 = 0; h1 < 256; h1++) {
struct rsvp_session *s;
@@ -337,10 +326,9 @@ static bool rsvp_destroy(struct tcf_proto *tp, bool force)
}
}
kfree_rcu(data, rcu);
- return true;
}
-static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
+static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
@@ -348,7 +336,7 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
unsigned int h = f->handle;
struct rsvp_session __rcu **sp;
struct rsvp_session *nsp, *s = f->sess;
- int i;
+ int i, h1;
fp = &s->ht[(h >> 8) & 0xFF];
for (nfp = rtnl_dereference(*fp); nfp;
@@ -361,7 +349,7 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
for (i = 0; i <= 16; i++)
if (s->ht[i])
- return 0;
+ goto out;
/* OK, session has no flows */
sp = &head->ht[h & 0xFF];
@@ -370,13 +358,23 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
if (nsp == s) {
RCU_INIT_POINTER(*sp, s->next);
kfree_rcu(s, rcu);
- return 0;
+ goto out;
}
}
- return 0;
+ break;
}
}
+
+out:
+ *last = true;
+ for (h1 = 0; h1 < 256; h1++) {
+ if (rcu_access_pointer(head->ht[h1])) {
+ *last = false;
+ break;
+ }
+ }
+
return 0;
}
@@ -484,7 +482,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (opt == NULL)
return handle ? -EINVAL : 0;
- err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
+ err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 0751245a6ace..8a8a58357c39 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -150,7 +150,7 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
kfree(f);
}
-static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+static int tcindex_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
@@ -186,6 +186,8 @@ found:
call_rcu(&f->rcu, tcindex_destroy_fexts);
else
call_rcu(&r->rcu, tcindex_destroy_rexts);
+
+ *last = false;
return 0;
}
@@ -193,7 +195,9 @@ static int tcindex_destroy_element(struct tcf_proto *tp,
unsigned long arg,
struct tcf_walker *walker)
{
- return tcindex_delete(tp, arg);
+ bool last;
+
+ return tcindex_delete(tp, arg, &last);
}
static void __tcindex_destroy(struct rcu_head *head)
@@ -482,7 +486,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
if (!opt)
return 0;
- err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy);
+ err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy, NULL);
if (err < 0)
return err;
@@ -529,14 +533,11 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-static bool tcindex_destroy(struct tcf_proto *tp, bool force)
+static void tcindex_destroy(struct tcf_proto *tp)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
- if (!force)
- return false;
-
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
walker.count = 0;
walker.skip = 0;
@@ -544,7 +545,6 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force)
tcindex_walk(tp, &walker);
call_rcu(&p->rcu, __tcindex_destroy);
- return true;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4dbe0c680fe6..d20e72a095d5 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -585,37 +585,13 @@ static bool ht_empty(struct tc_u_hnode *ht)
return true;
}
-static bool u32_destroy(struct tcf_proto *tp, bool force)
+static void u32_destroy(struct tcf_proto *tp)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
WARN_ON(root_ht == NULL);
- if (!force) {
- if (root_ht) {
- if (root_ht->refcnt > 1)
- return false;
- if (root_ht->refcnt == 1) {
- if (!ht_empty(root_ht))
- return false;
- }
- }
-
- if (tp_c->refcnt > 1)
- return false;
-
- if (tp_c->refcnt == 1) {
- struct tc_u_hnode *ht;
-
- for (ht = rtnl_dereference(tp_c->hlist);
- ht;
- ht = rtnl_dereference(ht->next))
- if (!ht_empty(ht))
- return false;
- }
- }
-
if (root_ht && --root_ht->refcnt == 0)
u32_destroy_hnode(tp, root_ht);
@@ -640,20 +616,22 @@ static bool u32_destroy(struct tcf_proto *tp, bool force)
}
tp->data = NULL;
- return true;
}
-static int u32_delete(struct tcf_proto *tp, unsigned long arg)
+static int u32_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
+ struct tc_u_common *tp_c = tp->data;
+ int ret = 0;
if (ht == NULL)
- return 0;
+ goto out;
if (TC_U32_KEY(ht->handle)) {
u32_remove_hw_knode(tp, ht->handle);
- return u32_delete_key(tp, (struct tc_u_knode *)ht);
+ ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
+ goto out;
}
if (root_ht == ht)
@@ -666,7 +644,40 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
return -EBUSY;
}
- return 0;
+out:
+ *last = true;
+ if (root_ht) {
+ if (root_ht->refcnt > 1) {
+ *last = false;
+ goto ret;
+ }
+ if (root_ht->refcnt == 1) {
+ if (!ht_empty(root_ht)) {
+ *last = false;
+ goto ret;
+ }
+ }
+ }
+
+ if (tp_c->refcnt > 1) {
+ *last = false;
+ goto ret;
+ }
+
+ if (tp_c->refcnt == 1) {
+ struct tc_u_hnode *ht;
+
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next))
+ if (!ht_empty(ht)) {
+ *last = false;
+ break;
+ }
+ }
+
+ret:
+ return ret;
}
#define NR_U32_NODE (1<<12)
@@ -860,7 +871,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (opt == NULL)
return handle ? -EINVAL : 0;
- err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy);
+ err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index ae7e4f5b348b..eb0e9bab54c1 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -912,7 +912,7 @@ static int em_meta_change(struct net *net, void *data, int len,
struct tcf_meta_hdr *hdr;
struct meta_match *meta = NULL;
- err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy);
+ err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy, NULL);
if (err < 0)
goto errout;
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index fbb7ebfc58c6..03b677bc0700 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -314,7 +314,7 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
if (!nla)
return 0;
- err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy);
+ err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy, NULL);
if (err < 0)
goto errout;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index bcf49cd22786..e88342fde1bc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -251,6 +251,15 @@ int qdisc_set_default(const char *name)
return ops ? 0 : -ENOENT;
}
+#ifdef CONFIG_NET_SCH_DEFAULT
+/* Set default value from kernel config */
+static int __init sch_default_qdisc(void)
+{
+ return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
+}
+late_initcall(sch_default_qdisc);
+#endif
+
/* We know handle. Find qdisc among all qdisc's attached to device
* (root qdisc, all its children, children of children etc.)
* Note: caller either uses rtnl or rcu_read_lock()
@@ -274,7 +283,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
return NULL;
}
-void qdisc_hash_add(struct Qdisc *q)
+void qdisc_hash_add(struct Qdisc *q, bool invisible)
{
if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
struct Qdisc *root = qdisc_dev(q)->qdisc;
@@ -282,6 +291,8 @@ void qdisc_hash_add(struct Qdisc *q)
WARN_ON_ONCE(root == &noop_qdisc);
ASSERT_RTNL();
hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
+ if (invisible)
+ q->flags |= TCQ_F_INVISIBLE;
}
}
EXPORT_SYMBOL(qdisc_hash_add);
@@ -455,7 +466,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
u16 *tab = NULL;
int err;
- err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
+ err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
if (err < 0)
return ERR_PTR(err);
if (!tb[TCA_STAB_BASE])
@@ -1003,7 +1014,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
goto err_out4;
}
- qdisc_hash_add(sch);
+ qdisc_hash_add(sch, false);
return sch;
}
@@ -1114,7 +1125,8 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
* Delete/get qdisc.
*/
-static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
+static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct tcmsg *tcm = nlmsg_data(n);
@@ -1129,7 +1141,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -1183,7 +1195,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
* Create/change qdisc.
*/
-static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
+static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct tcmsg *tcm;
@@ -1198,7 +1211,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
replay:
/* Reinit, just in case something touches this. */
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -1401,9 +1414,14 @@ nla_put_failure:
return -1;
}
-static bool tc_qdisc_dump_ignore(struct Qdisc *q)
+static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
{
- return (q->flags & TCQ_F_BUILTIN) ? true : false;
+ if (q->flags & TCQ_F_BUILTIN)
+ return true;
+ if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
+ return true;
+
+ return false;
}
static int qdisc_notify(struct net *net, struct sk_buff *oskb,
@@ -1417,12 +1435,12 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (old && !tc_qdisc_dump_ignore(old)) {
+ if (old && !tc_qdisc_dump_ignore(old, false)) {
if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
0, RTM_DELQDISC) < 0)
goto err_out;
}
- if (new && !tc_qdisc_dump_ignore(new)) {
+ if (new && !tc_qdisc_dump_ignore(new, false)) {
if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
goto err_out;
@@ -1439,7 +1457,8 @@ err_out:
static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
struct netlink_callback *cb,
- int *q_idx_p, int s_q_idx, bool recur)
+ int *q_idx_p, int s_q_idx, bool recur,
+ bool dump_invisible)
{
int ret = 0, q_idx = *q_idx_p;
struct Qdisc *q;
@@ -1452,7 +1471,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
if (q_idx < s_q_idx) {
q_idx++;
} else {
- if (!tc_qdisc_dump_ignore(q) &&
+ if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWQDISC) <= 0)
@@ -1474,7 +1493,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
q_idx++;
continue;
}
- if (!tc_qdisc_dump_ignore(q) &&
+ if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWQDISC) <= 0)
@@ -1496,12 +1515,21 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
int idx, q_idx;
int s_idx, s_q_idx;
struct net_device *dev;
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct tcmsg *tcm = nlmsg_data(nlh);
+ struct nlattr *tca[TCA_MAX + 1];
+ int err;
s_idx = cb->args[0];
s_q_idx = q_idx = cb->args[1];
idx = 0;
ASSERT_RTNL();
+
+ err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+ if (err < 0)
+ return err;
+
for_each_netdev(net, dev) {
struct netdev_queue *dev_queue;
@@ -1512,13 +1540,14 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
q_idx = 0;
if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
- true) < 0)
+ true, tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
- &q_idx, s_q_idx, false) < 0)
+ &q_idx, s_q_idx, false,
+ tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
cont:
@@ -1540,7 +1569,8 @@ done:
-static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
+static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct tcmsg *tcm = nlmsg_data(n);
@@ -1559,7 +1589,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -1762,7 +1792,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
{
struct qdisc_dump_args arg;
- if (tc_qdisc_dump_ignore(q) ||
+ if (tc_qdisc_dump_ignore(q, false) ||
*t_p < s_t || !q->ops->cl_ops ||
(tcm->tcm_parent &&
TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
@@ -1801,6 +1831,12 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
if (!qdisc_dev(root))
return 0;
+ if (tcm->tcm_parent) {
+ q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
+ if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
+ return -1;
+ return 0;
+ }
hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
return -1;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 2209c2ddacbf..40cbceed4de8 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -214,7 +214,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
if (opt == NULL)
return -EINVAL;
- error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy);
+ error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy, NULL);
if (error < 0)
return error;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index d6ca18dc04c3..7415859fd4c3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1137,7 +1137,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
struct tc_ratespec *r;
int err;
- err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
+ err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL);
if (err < 0)
return err;
@@ -1161,6 +1161,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
sch->handle);
if (!q->link.q)
q->link.q = &noop_qdisc;
+ else
+ qdisc_hash_add(q->link.q, true);
q->link.priority = TC_CBQ_MAXPRIO - 1;
q->link.priority2 = TC_CBQ_MAXPRIO - 1;
@@ -1472,7 +1474,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
+ err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL);
if (err < 0)
return err;
@@ -1600,6 +1602,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
if (!cl->q)
cl->q = &noop_qdisc;
+ else
+ qdisc_hash_add(cl->q, true);
+
cl->common.classid = classid;
cl->tparent = parent;
cl->qdisc = sch;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 3b86a97bc67c..b30a2c70bd48 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -58,7 +58,6 @@ struct choke_sched_data {
/* Variables */
struct red_vars vars;
- struct tcf_proto __rcu *filter_list;
struct {
u32 prob_drop; /* Early probability drops */
u32 prob_mark; /* Early probability marks */
@@ -152,11 +151,6 @@ static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
choke_skb_cb(skb)->classid = classid;
}
-static u16 choke_get_classid(const struct sk_buff *skb)
-{
- return choke_skb_cb(skb)->classid;
-}
-
/*
* Compare flow of two packets
* Returns true only if source and destination address and port match.
@@ -188,40 +182,6 @@ static bool choke_match_flow(struct sk_buff *skb1,
}
/*
- * Classify flow using either:
- * 1. pre-existing classification result in skb
- * 2. fast internal classification
- * 3. use TC filter based classification
- */
-static bool choke_classify(struct sk_buff *skb,
- struct Qdisc *sch, int *qerr)
-
-{
- struct choke_sched_data *q = qdisc_priv(sch);
- struct tcf_result res;
- struct tcf_proto *fl;
- int result;
-
- fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res, false);
- if (result >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
- switch (result) {
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- case TC_ACT_SHOT:
- return false;
- }
-#endif
- choke_set_classid(skb, TC_H_MIN(res.classid));
- return true;
- }
-
- return false;
-}
-
-/*
* Select a packet at random from queue
* HACK: since queue can have holes from previous deletion; retry several
* times to find a random skb but then just give up and return the head
@@ -257,25 +217,15 @@ static bool choke_match_random(const struct choke_sched_data *q,
return false;
oskb = choke_peek_random(q, pidx);
- if (rcu_access_pointer(q->filter_list))
- return choke_get_classid(nskb) == choke_get_classid(oskb);
-
return choke_match_flow(oskb, nskb);
}
static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
- int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
struct choke_sched_data *q = qdisc_priv(sch);
const struct red_parms *p = &q->parms;
- if (rcu_access_pointer(q->filter_list)) {
- /* If using external classifiers, get result and record it. */
- if (!choke_classify(skb, sch, &ret))
- goto other_drop; /* Packet was eaten by filter */
- }
-
choke_skb_cb(skb)->keys_valid = 0;
/* Compute average queue usage (see RED) */
q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
@@ -339,12 +289,6 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
congestion_drop:
qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;
-
-other_drop:
- if (ret & __NET_XMIT_BYPASS)
- qdisc_qstats_drop(sch);
- __qdisc_drop(skb, to_free);
- return ret;
}
static struct sk_buff *choke_dequeue(struct Qdisc *sch)
@@ -413,7 +357,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
+ err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy, NULL);
if (err < 0)
return err;
@@ -432,10 +376,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
if (mask != q->tab_mask) {
struct sk_buff **ntab;
- ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
- GFP_KERNEL | __GFP_NOWARN);
- if (!ntab)
- ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
+ ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO);
if (!ntab)
return -ENOMEM;
@@ -538,7 +479,6 @@ static void choke_destroy(struct Qdisc *sch)
{
struct choke_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
choke_free(q->tab);
}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 5bfa79ee657c..c518a1efcb9d 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -140,7 +140,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy);
+ err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index bb4cbdf75004..58a8c32eab23 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -76,7 +76,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy);
+ err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, NULL);
if (err < 0)
return err;
@@ -117,6 +117,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
&pfifo_qdisc_ops, classid);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
+ else
+ qdisc_hash_add(cl->qdisc, true);
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 5334e309f17f..1c0f877f673a 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -129,7 +129,7 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
if (!opt)
goto errout;
- err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
+ err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL);
if (err < 0)
goto errout;
@@ -342,7 +342,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
goto errout;
- err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
+ err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL);
if (err < 0)
goto errout;
@@ -374,6 +374,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
if (p->q == NULL)
p->q = &noop_qdisc;
+ else
+ qdisc_hash_add(p->q, true);
pr_debug("%s: qdisc %p\n", __func__, p->q);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a4f738ac7728..b488721a0059 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -624,16 +624,6 @@ static void fq_rehash(struct fq_sched_data *q,
q->stat_gc_flows += fcnt;
}
-static void *fq_alloc_node(size_t sz, int node)
-{
- void *ptr;
-
- ptr = kmalloc_node(sz, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN, node);
- if (!ptr)
- ptr = vmalloc_node(sz, node);
- return ptr;
-}
-
static void fq_free(void *addr)
{
kvfree(addr);
@@ -650,7 +640,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
return 0;
/* If XPS was setup, we can allocate memory on right NUMA node */
- array = fq_alloc_node(sizeof(struct rb_root) << log,
+ array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_REPEAT,
netdev_queue_numa_node_read(sch->dev_queue));
if (!array)
return -ENOMEM;
@@ -698,7 +688,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy);
+ err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 9f3a884d1590..9201abce928c 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -288,7 +288,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
struct fq_codel_flow *flow;
struct list_head *head;
u32 prev_drop_count, prev_ecn_mark;
- unsigned int prev_backlog;
begin:
head = &q->new_flows;
@@ -307,7 +306,6 @@ begin:
prev_drop_count = q->cstats.drop_count;
prev_ecn_mark = q->cstats.ecn_mark;
- prev_backlog = sch->qstats.backlog;
skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams,
&flow->cvars, &q->cstats, qdisc_pkt_len,
@@ -385,7 +383,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy);
+ err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy,
+ NULL);
if (err < 0)
return err;
if (tb[TCA_FQ_CODEL_FLOWS]) {
@@ -447,27 +446,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
-static void *fq_codel_zalloc(size_t sz)
-{
- void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN);
-
- if (!ptr)
- ptr = vzalloc(sz);
- return ptr;
-}
-
-static void fq_codel_free(void *addr)
-{
- kvfree(addr);
-}
-
static void fq_codel_destroy(struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
tcf_destroy_chain(&q->filter_list);
- fq_codel_free(q->backlogs);
- fq_codel_free(q->flows);
+ kvfree(q->backlogs);
+ kvfree(q->flows);
}
static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
@@ -494,13 +479,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
}
if (!q->flows) {
- q->flows = fq_codel_zalloc(q->flows_cnt *
- sizeof(struct fq_codel_flow));
+ q->flows = kvzalloc(q->flows_cnt *
+ sizeof(struct fq_codel_flow), GFP_KERNEL);
if (!q->flows)
return -ENOMEM;
- q->backlogs = fq_codel_zalloc(q->flows_cnt * sizeof(u32));
+ q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL);
if (!q->backlogs) {
- fq_codel_free(q->flows);
+ kvfree(q->flows);
return -ENOMEM;
}
for (i = 0; i < q->flows_cnt; i++) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1a2f9e964330..52a2c55f6d9e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -795,7 +795,7 @@ static void attach_default_qdiscs(struct net_device *dev)
}
#ifdef CONFIG_NET_SCHED
if (dev->qdisc != &noop_qdisc)
- qdisc_hash_add(dev->qdisc);
+ qdisc_hash_add(dev->qdisc, false);
#endif
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index c78a093c551a..17c7130454bd 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -401,7 +401,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
+ err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
if (err < 0)
return err;
@@ -470,7 +470,7 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
+ err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ffaa6fb0990..5cb82f6c1b06 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -957,7 +957,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy);
+ err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy, NULL);
if (err < 0)
return err;
@@ -1066,6 +1066,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
&pfifo_qdisc_ops, classid);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
+ else
+ qdisc_hash_add(cl->qdisc, true);
INIT_LIST_HEAD(&cl->children);
cl->vt_tree = RB_ROOT;
cl->cf_tree = RB_ROOT;
@@ -1425,6 +1427,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
sch->handle);
if (q->root.qdisc == NULL)
q->root.qdisc = &noop_qdisc;
+ else
+ qdisc_hash_add(q->root.qdisc, true);
INIT_LIST_HEAD(&q->root.children);
q->root.vt_tree = RB_ROOT;
q->root.cf_tree = RB_ROOT;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 2fae8b5f1b80..51d3ba682af9 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -467,29 +467,14 @@ static void hhf_reset(struct Qdisc *sch)
rtnl_kfree_skbs(skb, skb);
}
-static void *hhf_zalloc(size_t sz)
-{
- void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN);
-
- if (!ptr)
- ptr = vzalloc(sz);
-
- return ptr;
-}
-
-static void hhf_free(void *addr)
-{
- kvfree(addr);
-}
-
static void hhf_destroy(struct Qdisc *sch)
{
int i;
struct hhf_sched_data *q = qdisc_priv(sch);
for (i = 0; i < HHF_ARRAYS_CNT; i++) {
- hhf_free(q->hhf_arrays[i]);
- hhf_free(q->hhf_valid_bits[i]);
+ kvfree(q->hhf_arrays[i]);
+ kvfree(q->hhf_valid_bits[i]);
}
for (i = 0; i < HH_FLOWS_CNT; i++) {
@@ -503,7 +488,7 @@ static void hhf_destroy(struct Qdisc *sch)
kfree(flow);
}
}
- hhf_free(q->hh_flows);
+ kvfree(q->hh_flows);
}
static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = {
@@ -529,7 +514,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy);
+ err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy, NULL);
if (err < 0)
return err;
@@ -609,8 +594,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
if (!q->hh_flows) {
/* Initialize heavy-hitter flow table. */
- q->hh_flows = hhf_zalloc(HH_FLOWS_CNT *
- sizeof(struct list_head));
+ q->hh_flows = kvzalloc(HH_FLOWS_CNT *
+ sizeof(struct list_head), GFP_KERNEL);
if (!q->hh_flows)
return -ENOMEM;
for (i = 0; i < HH_FLOWS_CNT; i++)
@@ -624,8 +609,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
/* Initialize heavy-hitter filter arrays. */
for (i = 0; i < HHF_ARRAYS_CNT; i++) {
- q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN *
- sizeof(u32));
+ q->hhf_arrays[i] = kvzalloc(HHF_ARRAYS_LEN *
+ sizeof(u32), GFP_KERNEL);
if (!q->hhf_arrays[i]) {
/* Note: hhf_destroy() will be called
* by our caller.
@@ -637,8 +622,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
/* Initialize valid bits of heavy-hitter filter arrays. */
for (i = 0; i < HHF_ARRAYS_CNT; i++) {
- q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN /
- BITS_PER_BYTE);
+ q->hhf_valid_bits[i] = kvzalloc(HHF_ARRAYS_LEN /
+ BITS_PER_BYTE, GFP_KERNEL);
if (!q->hhf_valid_bits[i]) {
/* Note: hhf_destroy() will be called
* by our caller.
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 4cd5fb134bc9..570ef3b0c09b 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1017,7 +1017,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
+ err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL);
if (err < 0)
return err;
@@ -1342,7 +1342,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!opt)
goto failure;
- err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
+ err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL);
if (err < 0)
goto failure;
@@ -1460,6 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
qdisc_class_hash_insert(&q->clhash, &cl->common);
if (parent)
parent->children++;
+ if (cl->un.leaf.q != &noop_qdisc)
+ qdisc_hash_add(cl->un.leaf.q, true);
} else {
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 20b7f1646f69..cadfdd4f1e52 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -84,7 +84,7 @@ static void mq_attach(struct Qdisc *sch)
qdisc_destroy(old);
#ifdef CONFIG_NET_SCHED
if (ntx < dev->real_num_tx_queues)
- qdisc_hash_add(qdisc);
+ qdisc_hash_add(qdisc, false);
#endif
}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 922683418e53..0a4cf27ea54b 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -21,14 +21,13 @@
struct mqprio_sched {
struct Qdisc **qdiscs;
- int hw_owned;
+ int hw_offload;
};
static void mqprio_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mqprio_sched *priv = qdisc_priv(sch);
- struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO};
unsigned int ntx;
if (priv->qdiscs) {
@@ -39,10 +38,15 @@ static void mqprio_destroy(struct Qdisc *sch)
kfree(priv->qdiscs);
}
- if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+ if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
+ struct tc_mqprio_qopt offload = { 0 };
+ struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
+ { .mqprio = &offload } };
+
dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
- else
+ } else {
netdev_set_num_tc(dev, 0);
+ }
}
static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
@@ -59,15 +63,20 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
return -EINVAL;
}
- /* net_device does not support requested operation */
- if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
- return -EINVAL;
+ /* Limit qopt->hw to maximum supported offload value. Drivers have
+ * the option of overriding this later if they don't support the a
+ * given offload type.
+ */
+ if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX)
+ qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX;
- /* if hw owned qcount and qoffset are taken from LLD so
- * no reason to verify them here
+ /* If hardware offload is requested we will leave it to the device
+ * to either populate the queue counts itself or to validate the
+ * provided queue counts. If ndo_setup_tc is not present then
+ * hardware doesn't support offload and we should return an error.
*/
if (qopt->hw)
- return 0;
+ return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL;
for (i = 0; i < qopt->num_tc; i++) {
unsigned int last = qopt->offset[i] + qopt->count[i];
@@ -139,13 +148,15 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
* supplied and verified mapping
*/
if (qopt->hw) {
- struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO,
- { .tc = qopt->num_tc }};
+ struct tc_mqprio_qopt offload = *qopt;
+ struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
+ { .mqprio = &offload } };
- priv->hw_owned = 1;
err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
if (err)
return err;
+
+ priv->hw_offload = offload.hw;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -175,7 +186,7 @@ static void mqprio_attach(struct Qdisc *sch)
if (old)
qdisc_destroy(old);
if (ntx < dev->real_num_tx_queues)
- qdisc_hash_add(qdisc);
+ qdisc_hash_add(qdisc, false);
}
kfree(priv->qdiscs);
priv->qdiscs = NULL;
@@ -243,7 +254,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.num_tc = netdev_get_num_tc(dev);
memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
- opt.hw = priv->hw_owned;
+ opt.hw = priv->hw_offload;
for (i = 0; i < netdev_get_num_tc(dev); i++) {
opt.count[i] = dev->tc_to_txq[i].count;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index e7839a0d0eaa..43a3a10b3c81 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -217,6 +217,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
sch_tree_lock(sch);
old = q->queues[i];
q->queues[i] = child;
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
if (old != &noop_qdisc) {
qdisc_tree_reduce_backlog(old,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index c8bb62a1e744..1b3dd6190e93 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -462,7 +462,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* If a delay is expected, orphan the skb. (orphaning usually takes
* place at TX completion time, so _before_ the link transit delay)
*/
- if (q->latency || q->jitter)
+ if (q->latency || q->jitter || q->rate)
skb_orphan_partial(skb);
/*
@@ -530,21 +530,31 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
now = psched_get_time();
if (q->rate) {
- struct sk_buff *last;
+ struct netem_skb_cb *last = NULL;
+
+ if (sch->q.tail)
+ last = netem_skb_cb(sch->q.tail);
+ if (q->t_root.rb_node) {
+ struct sk_buff *t_skb;
+ struct netem_skb_cb *t_last;
+
+ t_skb = netem_rb_to_skb(rb_last(&q->t_root));
+ t_last = netem_skb_cb(t_skb);
+ if (!last ||
+ t_last->time_to_send > last->time_to_send) {
+ last = t_last;
+ }
+ }
- if (sch->q.qlen)
- last = sch->q.tail;
- else
- last = netem_rb_to_skb(rb_last(&q->t_root));
if (last) {
/*
* Last packet in queue is reference point (now),
* calculate this time bonus and subtract
* from delay.
*/
- delay -= netem_skb_cb(last)->time_to_send - now;
+ delay -= last->time_to_send - now;
delay = max_t(psched_tdiff_t, 0, delay);
- now = netem_skb_cb(last)->time_to_send;
+ now = last->time_to_send;
}
delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
@@ -692,15 +702,11 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
spinlock_t *root_lock;
struct disttable *d;
int i;
- size_t s;
if (n > NETEM_DIST_MAX)
return -EINVAL;
- s = sizeof(struct disttable) + n * sizeof(s16);
- d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
- if (!d)
- d = vmalloc(s);
+ d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
if (!d)
return -ENOMEM;
@@ -833,7 +839,7 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
if (nested_len >= nla_attr_size(0))
return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
- nested_len, policy);
+ nested_len, policy, NULL);
memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
return 0;
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 5c3a99d6aa82..6c2791d6102d 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -190,7 +190,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy);
+ err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index d4d7db267b6e..92c2e6d448d7 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -192,8 +192,11 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
qdisc_destroy(child);
}
- for (i = oldbands; i < q->bands; i++)
+ for (i = oldbands; i < q->bands; i++) {
q->queues[i] = queues[i];
+ if (q->queues[i] != &noop_qdisc)
+ qdisc_hash_add(q->queues[i], true);
+ }
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f9e712ce2d15..041eba3006cc 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -418,7 +418,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return -EINVAL;
}
- err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
+ err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy,
+ NULL);
if (err < 0)
return err;
@@ -494,6 +495,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
goto destroy_class;
}
+ if (cl->qdisc != &noop_qdisc)
+ qdisc_hash_add(cl->qdisc, true);
sch_tree_lock(sch);
qdisc_class_hash_insert(&q->clhash, &cl->common);
sch_tree_unlock(sch);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 249b2a18acbd..11292adce412 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -173,7 +173,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
+ err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
if (err < 0)
return err;
@@ -191,6 +191,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
return PTR_ERR(child);
}
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
sch_tree_lock(sch);
q->flags = ctl->flags;
q->limit = ctl->limit;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index fe6963d21519..0f777273ba29 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -495,7 +495,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
int err;
if (opt) {
- err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy);
+ err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy, NULL);
if (err < 0)
return -EINVAL;
@@ -513,6 +513,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
if (IS_ERR(child))
return PTR_ERR(child);
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
sch_tree_lock(sch);
qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 42e8c8615e65..332d94be6e1c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -685,11 +685,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
static void *sfq_alloc(size_t sz)
{
- void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
-
- if (!ptr)
- ptr = vmalloc(sz);
- return ptr;
+ return kvmalloc(sz, GFP_KERNEL);
}
static void sfq_free(void *addr)
@@ -714,9 +710,8 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
struct sfq_sched_data *q = qdisc_priv(sch);
int i;
- q->perturb_timer.function = sfq_perturbation;
- q->perturb_timer.data = (unsigned long)sch;
- init_timer_deferrable(&q->perturb_timer);
+ setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
+ (unsigned long)sch);
for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
q->dep[i].next = i + SFQ_MAX_FLOWS;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 303355c449ab..b2e4b6ad241a 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -315,7 +315,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
s64 buffer, mtu;
u64 rate64 = 0, prate64 = 0;
- err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy);
+ err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy, NULL);
if (err < 0)
return err;
@@ -396,6 +396,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
}
q->limit = qopt->limit;
if (tb[TCA_TBF_PBURST])
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index e3621cb4827f..697721a7a3f1 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -306,14 +306,24 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
time_after(jiffies, chunk->msg->expires_at)) {
- if (chunk->sent_count)
+ struct sctp_stream_out *streamout =
+ &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+
+ if (chunk->sent_count) {
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
- else
+ streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+ } else {
chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ }
return 1;
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
+ struct sctp_stream_out *streamout =
+ &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+ streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
} else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
chunk->msg->expires_at &&
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 961ee59f696a..f5b45b8b8b16 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -240,12 +240,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
struct sctp_bind_addr *bp;
struct ipv6_pinfo *np = inet6_sk(sk);
struct sctp_sockaddr_entry *laddr;
- union sctp_addr *baddr = NULL;
union sctp_addr *daddr = &t->ipaddr;
union sctp_addr dst_saddr;
struct in6_addr *final_p, final;
__u8 matchlen = 0;
- __u8 bmatchlen;
sctp_scope_t scope;
memset(fl6, 0, sizeof(struct flowi6));
@@ -312,23 +310,37 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
*/
rcu_read_lock();
list_for_each_entry_rcu(laddr, &bp->address_list, list) {
- if (!laddr->valid)
+ struct dst_entry *bdst;
+ __u8 bmatchlen;
+
+ if (!laddr->valid ||
+ laddr->state != SCTP_ADDR_SRC ||
+ laddr->a.sa.sa_family != AF_INET6 ||
+ scope > sctp_scope(&laddr->a))
continue;
- if ((laddr->state == SCTP_ADDR_SRC) &&
- (laddr->a.sa.sa_family == AF_INET6) &&
- (scope <= sctp_scope(&laddr->a))) {
- bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
- if (!baddr || (matchlen < bmatchlen)) {
- baddr = &laddr->a;
- matchlen = bmatchlen;
- }
- }
- }
- if (baddr) {
- fl6->saddr = baddr->v6.sin6_addr;
- fl6->fl6_sport = baddr->v6.sin6_port;
+
+ fl6->saddr = laddr->a.v6.sin6_addr;
+ fl6->fl6_sport = laddr->a.v6.sin6_port;
final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
- dst = ip6_dst_lookup_flow(sk, fl6, final_p);
+ bdst = ip6_dst_lookup_flow(sk, fl6, final_p);
+
+ if (!IS_ERR(bdst) &&
+ ipv6_chk_addr(dev_net(bdst->dev),
+ &laddr->a.v6.sin6_addr, bdst->dev, 1)) {
+ if (!IS_ERR_OR_NULL(dst))
+ dst_release(dst);
+ dst = bdst;
+ break;
+ }
+
+ bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
+ if (matchlen > bmatchlen)
+ continue;
+
+ if (!IS_ERR_OR_NULL(dst))
+ dst_release(dst);
+ dst = bdst;
+ matchlen = bmatchlen;
}
rcu_read_unlock();
@@ -665,6 +677,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
newnp = inet6_sk(newsk);
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+ newnp->ipv6_mc_list = NULL;
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
rcu_read_lock();
opt = rcu_dereference(np->opt);
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 8081476ed313..fe4c3d462f6e 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -353,6 +353,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
struct sctp_chunk *chk, *temp;
list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
+ struct sctp_stream_out *streamout;
+
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
@@ -361,8 +363,10 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
sctp_insert_list(&asoc->outqueue.abandoned,
&chk->transmitted_list);
+ streamout = &asoc->stream->out[chk->sinfo.sinfo_stream];
asoc->sent_cnt_removable--;
asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+ streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
if (!chk->tsn_gap_acked) {
if (chk->transport)
@@ -396,6 +400,12 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
q->out_qlen -= chk->skb->len;
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) {
+ struct sctp_stream_out *streamout =
+ &asoc->stream->out[chk->sinfo.sinfo_stream];
+
+ streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ }
msg_len -= SCTP_DATA_SNDSIZE(chk) +
sizeof(struct sk_buff) +
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 118faff6a332..8a08f13469c4 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1512,14 +1512,12 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len,
struct iov_iter *from)
{
void *target;
- ssize_t copied;
/* Make room in chunk for data. */
target = skb_put(chunk->skb, len);
/* Copy data (whole iovec) into chunk */
- copied = copy_from_iter(target, len, from);
- if (copied != len)
+ if (!copy_from_iter_full(target, len, from))
return -EFAULT;
/* Adjust the chunk length field. */
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 24c6ccce7539..4f5e6cfc7f60 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3872,9 +3872,18 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net,
else if (param.p->type == SCTP_PARAM_RESET_IN_REQUEST)
reply = sctp_process_strreset_inreq(
(struct sctp_association *)asoc, param, &ev);
- /* More handles for other types will be added here, by now it
- * just ignores other types.
- */
+ else if (param.p->type == SCTP_PARAM_RESET_TSN_REQUEST)
+ reply = sctp_process_strreset_tsnreq(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS)
+ reply = sctp_process_strreset_addstrm_out(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_ADD_IN_STREAMS)
+ reply = sctp_process_strreset_addstrm_in(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_RESPONSE)
+ reply = sctp_process_strreset_resp(
+ (struct sctp_association *)asoc, param, &ev);
if (ev)
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d9d4c92e06b3..f16c8d97b7f3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3758,6 +3758,39 @@ out:
return retval;
}
+static int sctp_setsockopt_reconfig_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ asoc->reconf_enable = !!params.assoc_value;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ sp->ep->reconf_enable = !!params.assoc_value;
+ } else {
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_setsockopt_enable_strreset(struct sock *sk,
char __user *optval,
unsigned int optlen)
@@ -4038,6 +4071,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_DEFAULT_PRINFO:
retval = sctp_setsockopt_default_prinfo(sk, optval, optlen);
break;
+ case SCTP_RECONFIG_SUPPORTED:
+ retval = sctp_setsockopt_reconfig_supported(sk, optval, optlen);
+ break;
case SCTP_ENABLE_STREAM_RESET:
retval = sctp_setsockopt_enable_strreset(sk, optval, optlen);
break;
@@ -6540,6 +6576,102 @@ out:
return retval;
}
+static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_stream_out *streamout;
+ struct sctp_association *asoc;
+ struct sctp_prstatus params;
+ int retval = -EINVAL;
+ int policy;
+
+ if (len < sizeof(params))
+ goto out;
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ policy = params.sprstat_policy;
+ if (policy & ~SCTP_PR_SCTP_MASK)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
+ if (!asoc || params.sprstat_sid >= asoc->stream->outcnt)
+ goto out;
+
+ streamout = &asoc->stream->out[params.sprstat_sid];
+ if (policy == SCTP_PR_SCTP_NONE) {
+ params.sprstat_abandoned_unsent = 0;
+ params.sprstat_abandoned_sent = 0;
+ for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
+ params.sprstat_abandoned_unsent +=
+ streamout->abandoned_unsent[policy];
+ params.sprstat_abandoned_sent +=
+ streamout->abandoned_sent[policy];
+ }
+ } else {
+ params.sprstat_abandoned_unsent =
+ streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+ params.sprstat_abandoned_sent =
+ streamout->abandoned_sent[__SCTP_PR_INDEX(policy)];
+ }
+
+ if (put_user(len, optlen) || copy_to_user(optval, &params, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ params.assoc_value = asoc->reconf_enable;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ params.assoc_value = sp->ep->reconf_enable;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt_enable_strreset(struct sock *sk, int len,
char __user *optval,
int __user *optlen)
@@ -6748,6 +6880,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_pr_assocstatus(sk, len, optval,
optlen);
break;
+ case SCTP_PR_STREAM_STATUS:
+ retval = sctp_getsockopt_pr_streamstatus(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_RECONFIG_SUPPORTED:
+ retval = sctp_getsockopt_reconfig_supported(sk, len, optval,
+ optlen);
+ break;
case SCTP_ENABLE_STREAM_RESET:
retval = sctp_getsockopt_enable_strreset(sk, len, optval,
optlen);
@@ -7440,9 +7580,12 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
- if (sk_can_busy_loop(sk) &&
- sk_busy_loop(sk, noblock))
- continue;
+ if (sk_can_busy_loop(sk)) {
+ sk_busy_loop(sk, noblock);
+
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ continue;
+ }
/* User doesn't want to wait. */
error = -EAGAIN;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index bbed997e1c5f..dda53a293986 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -294,18 +294,6 @@ int sctp_send_add_streams(struct sctp_association *asoc,
stream->out = streamout;
}
- if (in) {
- struct sctp_stream_in *streamin;
-
- streamin = krealloc(stream->in, incnt * sizeof(*streamin),
- GFP_KERNEL);
- if (!streamin)
- goto out;
-
- memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
- stream->in = streamin;
- }
-
chunk = sctp_make_strreset_addstrm(asoc, out, in);
if (!chunk)
goto out;
@@ -330,13 +318,14 @@ out:
}
static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
- struct sctp_association *asoc, __u32 resp_seq)
+ struct sctp_association *asoc, __u32 resp_seq,
+ __be16 type)
{
struct sctp_chunk *chunk = asoc->strreset_chunk;
struct sctp_reconf_chunk *hdr;
union sctp_params param;
- if (ntohl(resp_seq) != asoc->strreset_outseq || !chunk)
+ if (!chunk)
return NULL;
hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
@@ -347,13 +336,21 @@ static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
*/
struct sctp_strreset_tsnreq *req = param.v;
- if (req->request_seq == resp_seq)
+ if ((!resp_seq || req->request_seq == resp_seq) &&
+ (!type || type == req->param_hdr.type))
return param.v;
}
return NULL;
}
+static void sctp_update_strreset_result(struct sctp_association *asoc,
+ __u32 result)
+{
+ asoc->strreset_result[1] = asoc->strreset_result[0];
+ asoc->strreset_result[0] = result;
+}
+
struct sctp_chunk *sctp_process_strreset_outreq(
struct sctp_association *asoc,
union sctp_params param,
@@ -370,15 +367,19 @@ struct sctp_chunk *sctp_process_strreset_outreq(
if (ntohl(outreq->send_reset_at_tsn) >
sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map)) {
result = SCTP_STRRESET_IN_PROGRESS;
- goto out;
+ goto err;
}
- if (request_seq > asoc->strreset_inseq) {
+ if (TSN_lt(asoc->strreset_inseq, request_seq) ||
+ TSN_lt(request_seq, asoc->strreset_inseq - 2)) {
result = SCTP_STRRESET_ERR_BAD_SEQNO;
- goto out;
- } else if (request_seq == asoc->strreset_inseq) {
- asoc->strreset_inseq++;
+ goto err;
+ } else if (TSN_lt(request_seq, asoc->strreset_inseq)) {
+ i = asoc->strreset_inseq - request_seq - 1;
+ result = asoc->strreset_result[i];
+ goto err;
}
+ asoc->strreset_inseq++;
/* Check strreset_enable after inseq inc, as sender cannot tell
* the peer doesn't enable strreset after receiving response with
@@ -388,13 +389,9 @@ struct sctp_chunk *sctp_process_strreset_outreq(
goto out;
if (asoc->strreset_chunk) {
- sctp_paramhdr_t *param_hdr;
- struct sctp_transport *t;
-
- param_hdr = sctp_chunk_lookup_strreset_param(
- asoc, outreq->response_seq);
- if (!param_hdr || param_hdr->type !=
- SCTP_PARAM_RESET_IN_REQUEST) {
+ if (!sctp_chunk_lookup_strreset_param(
+ asoc, outreq->response_seq,
+ SCTP_PARAM_RESET_IN_REQUEST)) {
/* same process with outstanding isn't 0 */
result = SCTP_STRRESET_ERR_IN_PROGRESS;
goto out;
@@ -404,6 +401,8 @@ struct sctp_chunk *sctp_process_strreset_outreq(
asoc->strreset_outseq++;
if (!asoc->strreset_outstanding) {
+ struct sctp_transport *t;
+
t = asoc->strreset_chunk->transport;
if (del_timer(&t->reconf_timer))
sctp_transport_put(t);
@@ -439,6 +438,8 @@ struct sctp_chunk *sctp_process_strreset_outreq(
GFP_ATOMIC);
out:
+ sctp_update_strreset_result(asoc, result);
+err:
return sctp_make_strreset_resp(asoc, result, request_seq);
}
@@ -455,12 +456,18 @@ struct sctp_chunk *sctp_process_strreset_inreq(
__u32 request_seq;
request_seq = ntohl(inreq->request_seq);
- if (request_seq > asoc->strreset_inseq) {
+ if (TSN_lt(asoc->strreset_inseq, request_seq) ||
+ TSN_lt(request_seq, asoc->strreset_inseq - 2)) {
result = SCTP_STRRESET_ERR_BAD_SEQNO;
- goto out;
- } else if (request_seq == asoc->strreset_inseq) {
- asoc->strreset_inseq++;
+ goto err;
+ } else if (TSN_lt(request_seq, asoc->strreset_inseq)) {
+ i = asoc->strreset_inseq - request_seq - 1;
+ result = asoc->strreset_result[i];
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+ goto err;
}
+ asoc->strreset_inseq++;
if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ))
goto out;
@@ -495,12 +502,407 @@ struct sctp_chunk *sctp_process_strreset_inreq(
asoc->strreset_outstanding = 1;
sctp_chunk_hold(asoc->strreset_chunk);
+ result = SCTP_STRRESET_PERFORMED;
+
*evp = sctp_ulpevent_make_stream_reset_event(asoc,
SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC);
out:
+ sctp_update_strreset_result(asoc, result);
+err:
if (!chunk)
chunk = sctp_make_strreset_resp(asoc, result, request_seq);
return chunk;
}
+
+struct sctp_chunk *sctp_process_strreset_tsnreq(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ __u32 init_tsn = 0, next_tsn = 0, max_tsn_seen;
+ struct sctp_strreset_tsnreq *tsnreq = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ __u32 request_seq;
+ __u16 i;
+
+ request_seq = ntohl(tsnreq->request_seq);
+ if (TSN_lt(asoc->strreset_inseq, request_seq) ||
+ TSN_lt(request_seq, asoc->strreset_inseq - 2)) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto err;
+ } else if (TSN_lt(request_seq, asoc->strreset_inseq)) {
+ i = asoc->strreset_inseq - request_seq - 1;
+ result = asoc->strreset_result[i];
+ if (result == SCTP_STRRESET_PERFORMED) {
+ next_tsn = asoc->next_tsn;
+ init_tsn =
+ sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1;
+ }
+ goto err;
+ }
+ asoc->strreset_inseq++;
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_outstanding) {
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ /* G3: The same processing as though a SACK chunk with no gap report
+ * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were
+ * received MUST be performed.
+ */
+ max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
+ sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen);
+ sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+
+ /* G1: Compute an appropriate value for the Receiver's Next TSN -- the
+ * TSN that the peer should use to send the next DATA chunk. The
+ * value SHOULD be the smallest TSN not acknowledged by the
+ * receiver of the request plus 2^31.
+ */
+ init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31);
+ sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
+ init_tsn, GFP_ATOMIC);
+
+ /* G4: The same processing as though a FWD-TSN chunk (as defined in
+ * [RFC3758]) with all streams affected and a new cumulative TSN
+ * ACK of the Receiver's Next TSN minus 1 were received MUST be
+ * performed.
+ */
+ sctp_outq_free(&asoc->outqueue);
+
+ /* G2: Compute an appropriate value for the local endpoint's next TSN,
+ * i.e., the next TSN assigned by the receiver of the SSN/TSN reset
+ * chunk. The value SHOULD be the highest TSN sent by the receiver
+ * of the request plus 1.
+ */
+ next_tsn = asoc->next_tsn;
+ asoc->ctsn_ack_point = next_tsn - 1;
+ asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+
+ /* G5: The next expected and outgoing SSNs MUST be reset to 0 for all
+ * incoming and outgoing streams.
+ */
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ for (i = 0; i < stream->incnt; i++)
+ stream->in[i].ssn = 0;
+
+ result = SCTP_STRRESET_PERFORMED;
+
+ *evp = sctp_ulpevent_make_assoc_reset_event(asoc, 0, init_tsn,
+ next_tsn, GFP_ATOMIC);
+
+out:
+ sctp_update_strreset_result(asoc, result);
+err:
+ return sctp_make_strreset_tsnresp(asoc, result, request_seq,
+ next_tsn, init_tsn);
+}
+
+struct sctp_chunk *sctp_process_strreset_addstrm_out(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_addstrm *addstrm = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ struct sctp_stream_in *streamin;
+ __u32 request_seq, incnt;
+ __u16 in, i;
+
+ request_seq = ntohl(addstrm->request_seq);
+ if (TSN_lt(asoc->strreset_inseq, request_seq) ||
+ TSN_lt(request_seq, asoc->strreset_inseq - 2)) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto err;
+ } else if (TSN_lt(request_seq, asoc->strreset_inseq)) {
+ i = asoc->strreset_inseq - request_seq - 1;
+ result = asoc->strreset_result[i];
+ goto err;
+ }
+ asoc->strreset_inseq++;
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_chunk) {
+ if (!sctp_chunk_lookup_strreset_param(
+ asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) {
+ /* same process with outstanding isn't 0 */
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ asoc->strreset_outstanding--;
+ asoc->strreset_outseq++;
+
+ if (!asoc->strreset_outstanding) {
+ struct sctp_transport *t;
+
+ t = asoc->strreset_chunk->transport;
+ if (del_timer(&t->reconf_timer))
+ sctp_transport_put(t);
+
+ sctp_chunk_put(asoc->strreset_chunk);
+ asoc->strreset_chunk = NULL;
+ }
+ }
+
+ in = ntohs(addstrm->number_of_streams);
+ incnt = stream->incnt + in;
+ if (!in || incnt > SCTP_MAX_STREAM)
+ goto out;
+
+ streamin = krealloc(stream->in, incnt * sizeof(*streamin),
+ GFP_ATOMIC);
+ if (!streamin)
+ goto out;
+
+ memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
+ stream->in = streamin;
+ stream->incnt = incnt;
+
+ result = SCTP_STRRESET_PERFORMED;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc,
+ 0, ntohs(addstrm->number_of_streams), 0, GFP_ATOMIC);
+
+out:
+ sctp_update_strreset_result(asoc, result);
+err:
+ return sctp_make_strreset_resp(asoc, result, request_seq);
+}
+
+struct sctp_chunk *sctp_process_strreset_addstrm_in(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_addstrm *addstrm = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ struct sctp_stream_out *streamout;
+ struct sctp_chunk *chunk = NULL;
+ __u32 request_seq, outcnt;
+ __u16 out, i;
+
+ request_seq = ntohl(addstrm->request_seq);
+ if (TSN_lt(asoc->strreset_inseq, request_seq) ||
+ TSN_lt(request_seq, asoc->strreset_inseq - 2)) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto err;
+ } else if (TSN_lt(request_seq, asoc->strreset_inseq)) {
+ i = asoc->strreset_inseq - request_seq - 1;
+ result = asoc->strreset_result[i];
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+ goto err;
+ }
+ asoc->strreset_inseq++;
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_outstanding) {
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ out = ntohs(addstrm->number_of_streams);
+ outcnt = stream->outcnt + out;
+ if (!out || outcnt > SCTP_MAX_STREAM)
+ goto out;
+
+ streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
+ GFP_ATOMIC);
+ if (!streamout)
+ goto out;
+
+ memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
+ stream->out = streamout;
+
+ chunk = sctp_make_strreset_addstrm(asoc, out, 0);
+ if (!chunk)
+ goto out;
+
+ asoc->strreset_chunk = chunk;
+ asoc->strreset_outstanding = 1;
+ sctp_chunk_hold(asoc->strreset_chunk);
+
+ stream->outcnt = outcnt;
+
+ result = SCTP_STRRESET_PERFORMED;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc,
+ 0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC);
+
+out:
+ sctp_update_strreset_result(asoc, result);
+err:
+ if (!chunk)
+ chunk = sctp_make_strreset_resp(asoc, result, request_seq);
+
+ return chunk;
+}
+
+struct sctp_chunk *sctp_process_strreset_resp(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_resp *resp = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ struct sctp_transport *t;
+ __u16 i, nums, flags = 0;
+ sctp_paramhdr_t *req;
+ __u32 result;
+
+ req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0);
+ if (!req)
+ return NULL;
+
+ result = ntohl(resp->result);
+ if (result != SCTP_STRRESET_PERFORMED) {
+ /* if in progress, do nothing but retransmit */
+ if (result == SCTP_STRRESET_IN_PROGRESS)
+ return NULL;
+ else if (result == SCTP_STRRESET_DENIED)
+ flags = SCTP_STREAM_RESET_DENIED;
+ else
+ flags = SCTP_STREAM_RESET_FAILED;
+ }
+
+ if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) {
+ struct sctp_strreset_outreq *outreq;
+ __u16 *str_p;
+
+ outreq = (struct sctp_strreset_outreq *)req;
+ str_p = outreq->list_of_streams;
+ nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2;
+
+ if (result == SCTP_STRRESET_PERFORMED) {
+ if (nums) {
+ for (i = 0; i < nums; i++)
+ stream->out[ntohs(str_p[i])].ssn = 0;
+ } else {
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ }
+
+ flags = SCTP_STREAM_RESET_OUTGOING_SSN;
+ }
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+
+ *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+ nums, str_p, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) {
+ struct sctp_strreset_inreq *inreq;
+ __u16 *str_p;
+
+ /* if the result is performed, it's impossible for inreq */
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+
+ inreq = (struct sctp_strreset_inreq *)req;
+ str_p = inreq->list_of_streams;
+ nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2;
+
+ *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+ nums, str_p, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) {
+ struct sctp_strreset_resptsn *resptsn;
+ __u32 stsn, rtsn;
+
+ /* check for resptsn, as sctp_verify_reconf didn't do it*/
+ if (ntohs(param.p->length) != sizeof(*resptsn))
+ return NULL;
+
+ resptsn = (struct sctp_strreset_resptsn *)resp;
+ stsn = ntohl(resptsn->senders_next_tsn);
+ rtsn = ntohl(resptsn->receivers_next_tsn);
+
+ if (result == SCTP_STRRESET_PERFORMED) {
+ __u32 mtsn = sctp_tsnmap_get_max_tsn_seen(
+ &asoc->peer.tsn_map);
+
+ sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn);
+ sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+
+ sctp_tsnmap_init(&asoc->peer.tsn_map,
+ SCTP_TSN_MAP_INITIAL,
+ stsn, GFP_ATOMIC);
+
+ sctp_outq_free(&asoc->outqueue);
+
+ asoc->next_tsn = rtsn;
+ asoc->ctsn_ack_point = asoc->next_tsn - 1;
+ asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ for (i = 0; i < stream->incnt; i++)
+ stream->in[i].ssn = 0;
+ }
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+
+ *evp = sctp_ulpevent_make_assoc_reset_event(asoc, flags,
+ stsn, rtsn, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS) {
+ struct sctp_strreset_addstrm *addstrm;
+ __u16 number;
+
+ addstrm = (struct sctp_strreset_addstrm *)req;
+ nums = ntohs(addstrm->number_of_streams);
+ number = stream->outcnt - nums;
+
+ if (result == SCTP_STRRESET_PERFORMED)
+ for (i = number; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+ else
+ stream->outcnt = number;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
+ 0, nums, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_ADD_IN_STREAMS) {
+ struct sctp_strreset_addstrm *addstrm;
+
+ /* if the result is performed, it's impossible for addstrm in
+ * request.
+ */
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+
+ addstrm = (struct sctp_strreset_addstrm *)req;
+ nums = ntohs(addstrm->number_of_streams);
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
+ nums, 0, GFP_ATOMIC);
+ }
+
+ asoc->strreset_outstanding--;
+ asoc->strreset_outseq++;
+
+ /* remove everything for this reconf request */
+ if (!asoc->strreset_outstanding) {
+ t = asoc->strreset_chunk->transport;
+ if (del_timer(&t->reconf_timer))
+ sctp_transport_put(t);
+
+ sctp_chunk_put(asoc->strreset_chunk);
+ asoc->strreset_chunk = NULL;
+ }
+
+ return NULL;
+}
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index daf8554fd42a..0e732f68c2bf 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -275,6 +275,13 @@ static struct ctl_table sctp_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "reconf_enable",
+ .data = &init_net.sctp.reconf_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "auth_enable",
.data = &init_net.sctp.auth_enable,
.maxlen = sizeof(int),
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index c8881bc542a0..ec2b3e013c2f 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -883,6 +883,62 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event(
return event;
}
+struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event(
+ const struct sctp_association *asoc, __u16 flags, __u32 local_tsn,
+ __u32 remote_tsn, gfp_t gfp)
+{
+ struct sctp_assoc_reset_event *areset;
+ struct sctp_ulpevent *event;
+ struct sk_buff *skb;
+
+ event = sctp_ulpevent_new(sizeof(struct sctp_assoc_reset_event),
+ MSG_NOTIFICATION, gfp);
+ if (!event)
+ return NULL;
+
+ skb = sctp_event2skb(event);
+ areset = (struct sctp_assoc_reset_event *)
+ skb_put(skb, sizeof(struct sctp_assoc_reset_event));
+
+ areset->assocreset_type = SCTP_ASSOC_RESET_EVENT;
+ areset->assocreset_flags = flags;
+ areset->assocreset_length = sizeof(struct sctp_assoc_reset_event);
+ sctp_ulpevent_set_owner(event, asoc);
+ areset->assocreset_assoc_id = sctp_assoc2id(asoc);
+ areset->assocreset_local_tsn = local_tsn;
+ areset->assocreset_remote_tsn = remote_tsn;
+
+ return event;
+}
+
+struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event(
+ const struct sctp_association *asoc, __u16 flags,
+ __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp)
+{
+ struct sctp_stream_change_event *schange;
+ struct sctp_ulpevent *event;
+ struct sk_buff *skb;
+
+ event = sctp_ulpevent_new(sizeof(struct sctp_stream_change_event),
+ MSG_NOTIFICATION, gfp);
+ if (!event)
+ return NULL;
+
+ skb = sctp_event2skb(event);
+ schange = (struct sctp_stream_change_event *)
+ skb_put(skb, sizeof(struct sctp_stream_change_event));
+
+ schange->strchange_type = SCTP_STREAM_CHANGE_EVENT;
+ schange->strchange_flags = flags;
+ schange->strchange_length = sizeof(struct sctp_stream_change_event);
+ sctp_ulpevent_set_owner(event, asoc);
+ schange->strchange_assoc_id = sctp_assoc2id(asoc);
+ schange->strchange_instrms = strchange_instrms;
+ schange->strchange_outstrms = strchange_outstrms;
+
+ return event;
+}
+
/* Return the notification type, assuming this is a notification
* event.
*/
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index c717ef0896aa..33954852f3f8 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -8,6 +8,10 @@ config SMC
The Linux implementation of the SMC-R solution is designed as
a separate socket family SMC.
+ Warning: SMC will expose all memory for remote reads and writes
+ once a connection is established. Don't enable this option except
+ for tightly controlled lab environment.
+
Select this option if you want to run SMC socket applications
config SMC_DIAG
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 093803786eac..6793d7348cc8 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -101,7 +101,7 @@ struct proto smc_proto = {
.unhash = smc_unhash_sk,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v4_hashinfo,
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
};
EXPORT_SYMBOL_GPL(smc_proto);
@@ -147,7 +147,6 @@ static int smc_release(struct socket *sock)
schedule_delayed_work(&smc->sock_put_work,
SMC_CLOSE_SOCK_PUT_DELAY);
}
- sk->sk_prot->unhash(sk);
release_sock(sk);
sock_put(sk);
@@ -451,6 +450,9 @@ static int smc_connect_rdma(struct smc_sock *smc)
goto decline_rdma_unlock;
}
+ smc_close_init(smc);
+ smc_rx_init(smc);
+
if (local_contact == SMC_FIRST_CONTACT) {
rc = smc_ib_ready_link(link);
if (rc) {
@@ -477,7 +479,6 @@ static int smc_connect_rdma(struct smc_sock *smc)
mutex_unlock(&smc_create_lgr_pending);
smc_tx_init(smc);
- smc_rx_init(smc);
out_connected:
smc_copy_sock_settings_to_clc(smc);
@@ -637,7 +638,8 @@ struct sock *smc_accept_dequeue(struct sock *parent,
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
- /* tbd in follow-on patch: close this sock */
+ new_sk->sk_prot->unhash(new_sk);
+ sock_put(new_sk);
continue;
}
if (new_sock)
@@ -657,8 +659,13 @@ void smc_close_non_accepted(struct sock *sk)
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
- if (!smc->use_fallback)
+ if (smc->use_fallback) {
+ sk->sk_state = SMC_CLOSED;
+ } else {
smc_close_active(smc);
+ sock_set_flag(sk, SOCK_DEAD);
+ sk->sk_shutdown |= SHUTDOWN_MASK;
+ }
if (smc->clcsock) {
struct socket *tcp;
@@ -666,11 +673,9 @@ void smc_close_non_accepted(struct sock *sk)
smc->clcsock = NULL;
sock_release(tcp);
}
- sock_set_flag(sk, SOCK_DEAD);
- sk->sk_shutdown |= SHUTDOWN_MASK;
if (smc->use_fallback) {
schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else {
+ } else if (sk->sk_state == SMC_CLOSED) {
smc_conn_free(&smc->conn);
schedule_delayed_work(&smc->sock_put_work,
SMC_CLOSE_SOCK_PUT_DELAY);
@@ -800,6 +805,9 @@ static void smc_listen_work(struct work_struct *work)
goto decline_rdma;
}
+ smc_close_init(new_smc);
+ smc_rx_init(new_smc);
+
rc = smc_clc_send_accept(new_smc, local_contact);
if (rc)
goto out_err;
@@ -839,7 +847,6 @@ static void smc_listen_work(struct work_struct *work)
}
smc_tx_init(new_smc);
- smc_rx_init(new_smc);
out_connected:
sk_refcnt_debug_inc(newsmcsk);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index ee5fbea24549..6e44313e4467 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -164,6 +164,7 @@ struct smc_connection {
#ifndef KERNEL_HAS_ATOMIC64
spinlock_t acurs_lock; /* protect cursors */
#endif
+ struct work_struct close_work; /* peer sent some closing */
};
struct smc_sock { /* smc sock container */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 5a339493872e..a7294edbc221 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -217,8 +217,13 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc->sk.sk_err = ECONNRESET;
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
}
- if (smc_cdc_rxed_any_close_or_senddone(conn))
- smc_close_passive_received(smc);
+ if (smc_cdc_rxed_any_close_or_senddone(conn)) {
+ smc->sk.sk_shutdown |= RCV_SHUTDOWN;
+ if (smc->clcsock && smc->clcsock->sk)
+ smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
+ sock_set_flag(&smc->sk, SOCK_DONE);
+ schedule_work(&conn->close_work);
+ }
/* piggy backed tx info */
/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
@@ -228,8 +233,6 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc_close_wake_tx_prepared(smc);
}
- /* subsequent patch: trigger socket release if connection closed */
-
/* socket connected but not accepted */
if (!smc->sk.sk_socket)
return;
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index e41f594a1e1d..03ec058d18df 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -204,7 +204,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
hton24(cclc.qpn, link->roce_qp->qp_num);
cclc.rmb_rkey =
- htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+ htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
cclc.rmbe_alert_token = htonl(conn->alert_token_local);
cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
@@ -256,7 +256,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
hton24(aclc.qpn, link->roce_qp->qp_num);
aclc.rmb_rkey =
- htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+ htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */
aclc.rmbe_alert_token = htonl(conn->alert_token_local);
aclc.qp_mtu = link->path_mtu;
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 67a71d170bed..3c2e166b5d22 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -117,7 +117,6 @@ void smc_close_active_abort(struct smc_sock *smc)
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
- bh_lock_sock(&smc->sk);
smc->sk.sk_err = ECONNABORTED;
if (smc->clcsock && smc->clcsock->sk) {
smc->clcsock->sk->sk_err = ECONNABORTED;
@@ -125,6 +124,7 @@ void smc_close_active_abort(struct smc_sock *smc)
}
switch (smc->sk.sk_state) {
case SMC_INIT:
+ case SMC_ACTIVE:
smc->sk.sk_state = SMC_PEERABORTWAIT;
break;
case SMC_APPCLOSEWAIT1:
@@ -161,10 +161,15 @@ void smc_close_active_abort(struct smc_sock *smc)
}
sock_set_flag(&smc->sk, SOCK_DEAD);
- bh_unlock_sock(&smc->sk);
smc->sk.sk_state_change(&smc->sk);
}
+static inline bool smc_close_sent_any_close(struct smc_connection *conn)
+{
+ return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
+}
+
int smc_close_active(struct smc_sock *smc)
{
struct smc_cdc_conn_state_flags *txflags =
@@ -185,8 +190,7 @@ again:
case SMC_INIT:
sk->sk_state = SMC_CLOSED;
if (smc->smc_listen_work.func)
- flush_work(&smc->smc_listen_work);
- sock_put(sk);
+ cancel_work_sync(&smc->smc_listen_work);
break;
case SMC_LISTEN:
sk->sk_state = SMC_CLOSED;
@@ -198,7 +202,7 @@ again:
}
release_sock(sk);
smc_close_cleanup_listen(sk);
- flush_work(&smc->tcp_listen_work);
+ cancel_work_sync(&smc->smc_listen_work);
lock_sock(sk);
break;
case SMC_ACTIVE:
@@ -218,7 +222,7 @@ again:
case SMC_APPFINCLOSEWAIT:
/* socket already shutdown wr or both (active close) */
if (txflags->peer_done_writing &&
- !txflags->peer_conn_closed) {
+ !smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
}
@@ -248,6 +252,13 @@ again:
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
+ if (txflags->peer_done_writing &&
+ !smc_close_sent_any_close(conn)) {
+ /* just shutdown wr done, send close request */
+ rc = smc_close_final(conn);
+ }
+ /* peer sending PeerConnectionClosed will cause transition */
+ break;
case SMC_PEERFINCLOSEWAIT:
/* peer sending PeerConnectionClosed will cause transition */
break;
@@ -285,7 +296,7 @@ static void smc_close_passive_abort_received(struct smc_sock *smc)
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (txflags->peer_done_writing &&
- !txflags->peer_conn_closed) {
+ !smc_close_sent_any_close(&smc->conn)) {
/* just shutdown, but not yet closed locally */
smc_close_abort(&smc->conn);
sk->sk_state = SMC_PROCESSABORT;
@@ -306,22 +317,27 @@ static void smc_close_passive_abort_received(struct smc_sock *smc)
/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
* or peer_done_writing.
- * Called under tasklet context.
*/
-void smc_close_passive_received(struct smc_sock *smc)
+static void smc_close_passive_work(struct work_struct *work)
{
- struct smc_cdc_conn_state_flags *rxflags =
- &smc->conn.local_rx_ctrl.conn_state_flags;
+ struct smc_connection *conn = container_of(work,
+ struct smc_connection,
+ close_work);
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ struct smc_cdc_conn_state_flags *rxflags;
struct sock *sk = &smc->sk;
int old_state;
- sk->sk_shutdown |= RCV_SHUTDOWN;
- if (smc->clcsock && smc->clcsock->sk)
- smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
- sock_set_flag(&smc->sk, SOCK_DONE);
-
+ lock_sock(&smc->sk);
old_state = sk->sk_state;
+ if (!conn->alert_token_local) {
+ /* abnormal termination */
+ smc_close_active_abort(smc);
+ goto wakeup;
+ }
+
+ rxflags = &smc->conn.local_rx_ctrl.conn_state_flags;
if (rxflags->peer_conn_abort) {
smc_close_passive_abort_received(smc);
goto wakeup;
@@ -331,7 +347,7 @@ void smc_close_passive_received(struct smc_sock *smc)
case SMC_INIT:
if (atomic_read(&smc->conn.bytes_to_rcv) ||
(rxflags->peer_done_writing &&
- !rxflags->peer_conn_closed))
+ !smc_cdc_rxed_any_close(conn)))
sk->sk_state = SMC_APPCLOSEWAIT1;
else
sk->sk_state = SMC_CLOSED;
@@ -348,7 +364,7 @@ void smc_close_passive_received(struct smc_sock *smc)
if (!smc_cdc_rxed_any_close(&smc->conn))
break;
if (sock_flag(sk, SOCK_DEAD) &&
- (sk->sk_shutdown == SHUTDOWN_MASK)) {
+ smc_close_sent_any_close(conn)) {
/* smc_release has already been called locally */
sk->sk_state = SMC_CLOSED;
} else {
@@ -367,17 +383,19 @@ void smc_close_passive_received(struct smc_sock *smc)
}
wakeup:
- if (old_state != sk->sk_state)
- sk->sk_state_change(sk);
sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
- if ((sk->sk_state == SMC_CLOSED) &&
- (sock_flag(sk, SOCK_DEAD) || (old_state == SMC_INIT))) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
+ if (old_state != sk->sk_state) {
+ sk->sk_state_change(sk);
+ if ((sk->sk_state == SMC_CLOSED) &&
+ (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
+ smc_conn_free(&smc->conn);
+ schedule_delayed_work(&smc->sock_put_work,
+ SMC_CLOSE_SOCK_PUT_DELAY);
+ }
}
+ release_sock(&smc->sk);
}
void smc_close_sock_put_work(struct work_struct *work)
@@ -442,3 +460,9 @@ again:
sk->sk_state_change(&smc->sk);
return rc;
}
+
+/* Initialize close properties on connection establishment. */
+void smc_close_init(struct smc_sock *smc)
+{
+ INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
+}
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index bc9a2df3633c..4a3d99a8d7cb 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -21,8 +21,8 @@
void smc_close_wake_tx_prepared(struct smc_sock *smc);
void smc_close_active_abort(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
-void smc_close_passive_received(struct smc_sock *smc);
void smc_close_sock_put_work(struct work_struct *work);
int smc_close_shutdown_write(struct smc_sock *smc);
+void smc_close_init(struct smc_sock *smc);
#endif /* SMC_CLOSE_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 0eac633fb354..3ac09a629ea1 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -316,7 +316,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
smc = container_of(conn, struct smc_sock, conn);
sock_hold(&smc->sk);
__smc_lgr_unregister_conn(conn);
- smc_close_active_abort(smc);
+ schedule_work(&conn->close_work);
sock_put(&smc->sk);
node = rb_first(&lgr->conns_all);
}
@@ -613,19 +613,8 @@ int smc_rmb_create(struct smc_sock *smc)
rmb_desc = NULL;
continue; /* if mapping failed, try smaller one */
}
- rc = smc_ib_get_memory_region(lgr->lnk[SMC_SINGLE_LINK].roce_pd,
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_LOCAL_WRITE,
- &rmb_desc->mr_rx[SMC_SINGLE_LINK]);
- if (rc) {
- smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- tmp_bufsize, rmb_desc,
- DMA_FROM_DEVICE);
- kfree(rmb_desc->cpu_addr);
- kfree(rmb_desc);
- rmb_desc = NULL;
- continue;
- }
+ rmb_desc->rkey[SMC_SINGLE_LINK] =
+ lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey;
rmb_desc->used = 1;
write_lock_bh(&lgr->rmbs_lock);
list_add(&rmb_desc->list,
@@ -668,6 +657,7 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
+ (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
test_bit(i, lgr->rtokens_used_mask)) {
conn->rtoken_idx = i;
return 0;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 27eb38056a27..b013cb43a327 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -93,7 +93,7 @@ struct smc_buf_desc {
u64 dma_addr[SMC_LINKS_PER_LGR_MAX];
/* mapped address of buffer */
void *cpu_addr; /* virtual address of buffer */
- struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
+ u32 rkey[SMC_LINKS_PER_LGR_MAX];
/* for rmb only:
* rkey provided to peer
*/
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index e6743c008ac5..b31715505a35 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -37,24 +37,6 @@ u8 local_systemid[SMC_SYSTEMID_LEN] = SMC_LOCAL_SYSTEMID_RESET; /* unique system
* identifier
*/
-int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
- struct ib_mr **mr)
-{
- int rc;
-
- if (*mr)
- return 0; /* already done */
-
- /* obtain unique key -
- * next invocation of get_dma_mr returns a different key!
- */
- *mr = pd->device->get_dma_mr(pd, access_flags);
- rc = PTR_ERR_OR_ZERO(*mr);
- if (IS_ERR(*mr))
- *mr = NULL;
- return rc;
-}
-
static int smc_ib_modify_qp_init(struct smc_link *lnk)
{
struct ib_qp_attr qp_attr;
@@ -80,12 +62,11 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk)
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IB_QPS_RTR;
qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu);
- qp_attr.ah_attr.port_num = lnk->ibport;
- qp_attr.ah_attr.ah_flags = IB_AH_GRH;
- qp_attr.ah_attr.grh.hop_limit = 1;
- memcpy(&qp_attr.ah_attr.grh.dgid, lnk->peer_gid,
- sizeof(lnk->peer_gid));
- memcpy(&qp_attr.ah_attr.dmac, lnk->peer_mac,
+ qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport);
+ rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, 0, 1, 0);
+ rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid);
+ memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
sizeof(lnk->peer_mac));
qp_attr.dest_qp_num = lnk->peer_qpn;
qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */
@@ -179,8 +160,6 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
u8 port_idx;
smcibdev = container_of(handler, struct smc_ib_device, event_handler);
- if (!smc_pnet_find_ib(smcibdev->ibdev->name))
- return;
switch (ibevent->event) {
case IB_EVENT_PORT_ERR:
@@ -213,7 +192,8 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
{
int rc;
- lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
+ lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev,
+ IB_PD_UNSAFE_GLOBAL_RKEY);
rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
if (IS_ERR(lnk->roce_pd))
lnk->roce_pd = NULL;
@@ -259,7 +239,6 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
.max_recv_wr = SMC_WR_BUF_CNT * 3,
.max_send_sge = SMC_IB_MAX_SEND_SGE,
.max_recv_sge = 1,
- .max_inline_data = SMC_WR_TX_SIZE,
},
.sq_sig_type = IB_SIGNAL_REQ_WR,
.qp_type = IB_QPT_RC,
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index a95f74bb5569..b567152a526d 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -11,6 +11,7 @@
#ifndef _SMC_IB_H
#define _SMC_IB_H
+#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <rdma/ib_verbs.h>
@@ -60,8 +61,6 @@ void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
int smc_ib_create_protection_domain(struct smc_link *lnk);
void smc_ib_destroy_queue_pair(struct smc_link *lnk);
int smc_ib_create_queue_pair(struct smc_link *lnk);
-int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
- struct ib_mr **mr);
int smc_ib_ready_link(struct smc_link *lnk);
int smc_ib_modify_qp_rts(struct smc_link *lnk);
int smc_ib_modify_qp_reset(struct smc_link *lnk);
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 9d3e7fb8348d..78f7af28ae4f 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -219,7 +219,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
}
/* Find an infiniband device by a given name. The device might not exist. */
-struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
+static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
{
struct smc_ib_device *ibdev;
@@ -523,8 +523,11 @@ void smc_pnet_find_roce_resource(struct sock *sk,
read_lock(&smc_pnettable.lock);
list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
if (dst->dev == pnetelem->ndev) {
- *smcibdev = pnetelem->smcibdev;
- *ibport = pnetelem->ib_port;
+ if (smc_ib_port_active(pnetelem->smcibdev,
+ pnetelem->ib_port)) {
+ *smcibdev = pnetelem->smcibdev;
+ *ibport = pnetelem->ib_port;
+ }
break;
}
}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 32ab3df928ca..c4f1bccd4358 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -16,7 +16,6 @@ struct smc_ib_device;
int smc_pnet_init(void) __init;
void smc_pnet_exit(void);
int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
-struct smc_ib_device *smc_pnet_find_ib(char *ib_name);
void smc_pnet_find_roce_resource(struct sock *sk,
struct smc_ib_device **smcibdev, u8 *ibport);
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index c4ef9a4ec569..f0c8b089f770 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -36,11 +36,10 @@ static void smc_rx_data_ready(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
POLLRDNORM | POLLRDBAND);
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
- else
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 69a0013dd25c..21ec1832ab51 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -431,9 +431,13 @@ static void smc_tx_work(struct work_struct *work)
struct smc_connection,
tx_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ int rc;
lock_sock(&smc->sk);
- smc_tx_sndbuf_nonempty(conn);
+ rc = smc_tx_sndbuf_nonempty(conn);
+ if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
+ !atomic_read(&conn->bytes_to_rcv))
+ conn->local_rx_ctrl.prod_flags.write_blocked = 0;
release_sock(&smc->sk);
}
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index eadf157418dc..874ee9f9d796 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -447,7 +447,7 @@ static void smc_wr_init_sge(struct smc_link *lnk)
lnk->wr_tx_ibs[i].num_sge = 1;
lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
lnk->wr_tx_ibs[i].send_flags =
- IB_SEND_SIGNALED | IB_SEND_SOLICITED | IB_SEND_INLINE;
+ IB_SEND_SIGNALED | IB_SEND_SOLICITED;
}
for (i = 0; i < lnk->wr_rx_cnt; i++) {
lnk->wr_rx_sges[i].addr =
diff --git a/net/socket.c b/net/socket.c
index 985ef06792d6..c2564eb25c6b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3356,3 +3356,49 @@ int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
return sock->ops->shutdown(sock, how);
}
EXPORT_SYMBOL(kernel_sock_shutdown);
+
+/* This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket. Assumes that the caller has a lock on the socket.
+ */
+u32 kernel_sock_ip_overhead(struct sock *sk)
+{
+ struct inet_sock *inet;
+ struct ip_options_rcu *opt;
+ u32 overhead = 0;
+ bool owned_by_user;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6_pinfo *np;
+ struct ipv6_txoptions *optv6 = NULL;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+ if (!sk)
+ return overhead;
+
+ owned_by_user = sock_owned_by_user(sk);
+ switch (sk->sk_family) {
+ case AF_INET:
+ inet = inet_sk(sk);
+ overhead += sizeof(struct iphdr);
+ opt = rcu_dereference_protected(inet->inet_opt,
+ owned_by_user);
+ if (opt)
+ overhead += opt->opt.optlen;
+ return overhead;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ np = inet6_sk(sk);
+ overhead += sizeof(struct ipv6hdr);
+ if (np)
+ optv6 = rcu_dereference_protected(np->opt,
+ owned_by_user);
+ if (optv6)
+ overhead += (optv6->opt_flen + optv6->opt_nflen);
+ return overhead;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+ default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
+ return overhead;
+ }
+}
+EXPORT_SYMBOL(kernel_sock_ip_overhead);
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 04ce2c0b660e..ac09ca803296 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -52,6 +52,7 @@ config SUNRPC_XPRT_RDMA
tristate "RPC-over-RDMA transport"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
+ select SG_POOL
help
This option allows the NFS client and server to use RDMA
transports (InfiniBand, iWARP, or RoCE).
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 52da3ce54bb5..b5cb921775a0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1042,8 +1042,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
struct rpc_task *task;
task = rpc_new_task(task_setup_data);
- if (IS_ERR(task))
- goto out;
rpc_task_set_client(task, task_setup_data->rpc_client);
rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
@@ -1053,7 +1051,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
atomic_inc(&task->tk_count);
rpc_execute(task);
-out:
return task;
}
EXPORT_SYMBOL_GPL(rpc_run_task);
@@ -1140,10 +1137,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
* Create an rpc_task to send the data
*/
task = rpc_new_task(&task_setup_data);
- if (IS_ERR(task)) {
- xprt_free_bc_request(req);
- goto out;
- }
task->tk_rqstp = req;
/*
@@ -1158,7 +1151,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
rpc_execute(task);
-out:
dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
return task;
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5db68b371db2..0cc83839c13c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -965,11 +965,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
if (task == NULL) {
task = rpc_alloc_task();
- if (task == NULL) {
- rpc_release_calldata(setup_data->callback_ops,
- setup_data->callback_data);
- return ERR_PTR(-ENOMEM);
- }
flags = RPC_TASK_DYNAMIC;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a08aeb56b8e4..bc0f5a0ecbdc 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -702,59 +702,32 @@ found_pool:
return task;
}
-/*
- * Create or destroy enough new threads to make the number
- * of threads the given number. If `pool' is non-NULL, applies
- * only to threads in that pool, otherwise round-robins between
- * all pools. Caller must ensure that mutual exclusion between this and
- * server startup or shutdown.
- *
- * Destroying threads relies on the service threads filling in
- * rqstp->rq_task, which only the nfs ones do. Assumes the serv
- * has been created using svc_create_pooled().
- *
- * Based on code that used to be in nfsd_svc() but tweaked
- * to be pool-aware.
- */
-int
-svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+/* create new threads */
+static int
+svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
struct svc_rqst *rqstp;
struct task_struct *task;
struct svc_pool *chosen_pool;
- int error = 0;
unsigned int state = serv->sv_nrthreads-1;
int node;
- if (pool == NULL) {
- /* The -1 assumes caller has done a svc_get() */
- nrservs -= (serv->sv_nrthreads-1);
- } else {
- spin_lock_bh(&pool->sp_lock);
- nrservs -= pool->sp_nrthreads;
- spin_unlock_bh(&pool->sp_lock);
- }
-
- /* create new threads */
- while (nrservs > 0) {
+ do {
nrservs--;
chosen_pool = choose_pool(serv, pool, &state);
node = svc_pool_map_get_node(chosen_pool->sp_id);
rqstp = svc_prepare_thread(serv, chosen_pool, node);
- if (IS_ERR(rqstp)) {
- error = PTR_ERR(rqstp);
- break;
- }
+ if (IS_ERR(rqstp))
+ return PTR_ERR(rqstp);
__module_get(serv->sv_ops->svo_module);
task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
node, "%s", serv->sv_name);
if (IS_ERR(task)) {
- error = PTR_ERR(task);
module_put(serv->sv_ops->svo_module);
svc_exit_thread(rqstp);
- break;
+ return PTR_ERR(task);
}
rqstp->rq_task = task;
@@ -763,18 +736,103 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
svc_sock_update_bufs(serv);
wake_up_process(task);
- }
+ } while (nrservs > 0);
+
+ return 0;
+}
+
+
+/* destroy old threads */
+static int
+svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+ struct task_struct *task;
+ unsigned int state = serv->sv_nrthreads-1;
+
/* destroy old threads */
- while (nrservs < 0 &&
- (task = choose_victim(serv, pool, &state)) != NULL) {
+ do {
+ task = choose_victim(serv, pool, &state);
+ if (task == NULL)
+ break;
send_sig(SIGINT, task, 1);
nrservs++;
+ } while (nrservs < 0);
+
+ return 0;
+}
+
+/*
+ * Create or destroy enough new threads to make the number
+ * of threads the given number. If `pool' is non-NULL, applies
+ * only to threads in that pool, otherwise round-robins between
+ * all pools. Caller must ensure that mutual exclusion between this and
+ * server startup or shutdown.
+ *
+ * Destroying threads relies on the service threads filling in
+ * rqstp->rq_task, which only the nfs ones do. Assumes the serv
+ * has been created using svc_create_pooled().
+ *
+ * Based on code that used to be in nfsd_svc() but tweaked
+ * to be pool-aware.
+ */
+int
+svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+ if (pool == NULL) {
+ /* The -1 assumes caller has done a svc_get() */
+ nrservs -= (serv->sv_nrthreads-1);
+ } else {
+ spin_lock_bh(&pool->sp_lock);
+ nrservs -= pool->sp_nrthreads;
+ spin_unlock_bh(&pool->sp_lock);
}
- return error;
+ if (nrservs > 0)
+ return svc_start_kthreads(serv, pool, nrservs);
+ if (nrservs < 0)
+ return svc_signal_kthreads(serv, pool, nrservs);
+ return 0;
}
EXPORT_SYMBOL_GPL(svc_set_num_threads);
+/* destroy old threads */
+static int
+svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+ struct task_struct *task;
+ unsigned int state = serv->sv_nrthreads-1;
+
+ /* destroy old threads */
+ do {
+ task = choose_victim(serv, pool, &state);
+ if (task == NULL)
+ break;
+ kthread_stop(task);
+ nrservs++;
+ } while (nrservs < 0);
+ return 0;
+}
+
+int
+svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+ if (pool == NULL) {
+ /* The -1 assumes caller has done a svc_get() */
+ nrservs -= (serv->sv_nrthreads-1);
+ } else {
+ spin_lock_bh(&pool->sp_lock);
+ nrservs -= pool->sp_nrthreads;
+ spin_unlock_bh(&pool->sp_lock);
+ }
+
+ if (nrservs > 0)
+ return svc_start_kthreads(serv, pool, nrservs);
+ if (nrservs < 0)
+ return svc_stop_kthreads(serv, pool, nrservs);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
+
/*
* Called from a server thread as it's exiting. Caller must hold the "service
* mutex" for the service.
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 1f7082144e01..e34f4ee7f2b6 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -807,7 +807,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
EXPORT_SYMBOL_GPL(xdr_init_decode);
/**
- * xdr_init_decode - Initialize an xdr_stream for decoding data.
+ * xdr_init_decode_pages - Initialize an xdr_stream for decoding into pages
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer from which to decode data
* @pages: list of pages to decode into
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index b530a2852ba8..3e63c5e97ebe 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -651,6 +651,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
xprt_wake_pending_tasks(xprt, -EAGAIN);
spin_unlock_bh(&xprt->transport_lock);
}
+EXPORT_SYMBOL_GPL(xprt_force_disconnect);
/**
* xprt_conditional_disconnect - force a transport to disconnect
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index ef19fa42c50f..c1ae8142ab73 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -4,5 +4,5 @@ rpcrdma-y := transport.o rpc_rdma.o verbs.o \
fmr_ops.o frwr_ops.o \
svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
- module.o
+ svc_rdma_rw.o module.o
rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a044be2d6ad7..694e9b13ecf0 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -494,7 +494,7 @@ rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
}
sge->length = len;
- ib_dma_sync_single_for_device(ia->ri_device, sge->addr,
+ ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
sge->length, DMA_TO_DEVICE);
req->rl_send_wr.num_sge++;
return true;
@@ -523,7 +523,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
sge[sge_no].addr = rdmab_addr(rb);
sge[sge_no].length = xdr->head[0].iov_len;
sge[sge_no].lkey = rdmab_lkey(rb);
- ib_dma_sync_single_for_device(device, sge[sge_no].addr,
+ ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr,
sge[sge_no].length, DMA_TO_DEVICE);
/* If there is a Read chunk, the page list is being handled
@@ -781,9 +781,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
return 0;
out_err:
- pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
- PTR_ERR(iptr));
- r_xprt->rx_stats.failed_marshal_count++;
+ if (PTR_ERR(iptr) != -ENOBUFS) {
+ pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
+ PTR_ERR(iptr));
+ r_xprt->rx_stats.failed_marshal_count++;
+ }
return PTR_ERR(iptr);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index c846ca9f1eba..a4a8f6989ee7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -58,9 +58,9 @@ unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
static unsigned int min_max_requests = 4;
static unsigned int max_max_requests = 16384;
-unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
-static unsigned int min_max_inline = 4096;
-static unsigned int max_max_inline = 65536;
+unsigned int svcrdma_max_req_size = RPCRDMA_DEF_INLINE_THRESH;
+static unsigned int min_max_inline = RPCRDMA_DEF_INLINE_THRESH;
+static unsigned int max_max_inline = RPCRDMA_MAX_INLINE_THRESH;
atomic_t rdma_stat_recv;
atomic_t rdma_stat_read;
@@ -247,8 +247,6 @@ int svc_rdma_init(void)
dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
- dprintk("\tsq_depth : %u\n",
- svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index ff1df40f0d26..c676ed0efb5a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -12,7 +12,17 @@
#undef SVCRDMA_BACKCHANNEL_DEBUG
-int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
+/**
+ * svc_rdma_handle_bc_reply - Process incoming backchannel reply
+ * @xprt: controlling backchannel transport
+ * @rdma_resp: pointer to incoming transport header
+ * @rcvbuf: XDR buffer into which to decode the reply
+ *
+ * Returns:
+ * %0 if @rcvbuf is filled in, xprt_complete_rqst called,
+ * %-EAGAIN if server should call ->recvfrom again.
+ */
+int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
struct xdr_buf *rcvbuf)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
@@ -27,13 +37,13 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
p = (__be32 *)src->iov_base;
len = src->iov_len;
- xid = rmsgp->rm_xid;
+ xid = *rdma_resp;
#ifdef SVCRDMA_BACKCHANNEL_DEBUG
pr_info("%s: xid=%08x, length=%zu\n",
__func__, be32_to_cpu(xid), len);
pr_info("%s: RPC/RDMA: %*ph\n",
- __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp);
+ __func__, (int)RPCRDMA_HDRLEN_MIN, rdma_resp);
pr_info("%s: RPC: %*ph\n",
__func__, (int)len, p);
#endif
@@ -53,7 +63,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
goto out_unlock;
memcpy(dst->iov_base, p, len);
- credits = be32_to_cpu(rmsgp->rm_credit);
+ credits = be32_to_cpup(rdma_resp + 2);
if (credits == 0)
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
@@ -90,9 +100,9 @@ out_notfound:
* Caller holds the connection's mutex and has already marshaled
* the RPC/RDMA request.
*
- * This is similar to svc_rdma_reply, but takes an rpc_rqst
- * instead, does not support chunks, and avoids blocking memory
- * allocation.
+ * This is similar to svc_rdma_send_reply_msg, but takes a struct
+ * rpc_rqst instead, does not support chunks, and avoids blocking
+ * memory allocation.
*
* XXX: There is still an opportunity to block in svc_rdma_send()
* if there are no SQ entries to post the Send. This may occur if
@@ -101,59 +111,36 @@ out_notfound:
static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
struct rpc_rqst *rqst)
{
- struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
struct svc_rdma_op_ctxt *ctxt;
- struct svc_rdma_req_map *vec;
- struct ib_send_wr send_wr;
int ret;
- vec = svc_rdma_get_req_map(rdma);
- ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false);
- if (ret)
+ ctxt = svc_rdma_get_context(rdma);
+
+ /* rpcrdma_bc_send_request builds the transport header and
+ * the backchannel RPC message in the same buffer. Thus only
+ * one SGE is needed to send both.
+ */
+ ret = svc_rdma_map_reply_hdr(rdma, ctxt, rqst->rq_buffer,
+ rqst->rq_snd_buf.len);
+ if (ret < 0)
goto out_err;
ret = svc_rdma_repost_recv(rdma, GFP_NOIO);
if (ret)
goto out_err;
- ctxt = svc_rdma_get_context(rdma);
- ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
- ctxt->count = 1;
-
- ctxt->direction = DMA_TO_DEVICE;
- ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
- ctxt->sge[0].length = sndbuf->len;
- ctxt->sge[0].addr =
- ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
- sndbuf->len, DMA_TO_DEVICE);
- if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
- ret = -EIO;
- goto out_unmap;
- }
- svc_rdma_count_mappings(rdma, ctxt);
-
- memset(&send_wr, 0, sizeof(send_wr));
- ctxt->cqe.done = svc_rdma_wc_send;
- send_wr.wr_cqe = &ctxt->cqe;
- send_wr.sg_list = ctxt->sge;
- send_wr.num_sge = 1;
- send_wr.opcode = IB_WR_SEND;
- send_wr.send_flags = IB_SEND_SIGNALED;
-
- ret = svc_rdma_send(rdma, &send_wr);
- if (ret) {
- ret = -EIO;
+ ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
+ if (ret)
goto out_unmap;
- }
out_err:
- svc_rdma_put_req_map(rdma, vec);
dprintk("svcrdma: %s returns %d\n", __func__, ret);
return ret;
out_unmap:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 1);
+ ret = -EIO;
goto out_err;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 1c4aabf0f657..bdcf7d85a3dc 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -166,92 +166,3 @@ out_inval:
dprintk("svcrdma: failed to parse transport header\n");
return -EINVAL;
}
-
-int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
- struct rpcrdma_msg *rmsgp,
- enum rpcrdma_errcode err, __be32 *va)
-{
- __be32 *startp = va;
-
- *va++ = rmsgp->rm_xid;
- *va++ = rmsgp->rm_vers;
- *va++ = xprt->sc_fc_credits;
- *va++ = rdma_error;
- *va++ = cpu_to_be32(err);
- if (err == ERR_VERS) {
- *va++ = rpcrdma_version;
- *va++ = rpcrdma_version;
- }
-
- return (int)((unsigned long)va - (unsigned long)startp);
-}
-
-/**
- * svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header
- * @rdma_resp: buffer containing Reply transport header
- *
- * Returns length of transport header, in bytes.
- */
-unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp)
-{
- unsigned int nsegs;
- __be32 *p;
-
- p = rdma_resp;
-
- /* RPC-over-RDMA V1 replies never have a Read list. */
- p += rpcrdma_fixed_maxsz + 1;
-
- /* Skip Write list. */
- while (*p++ != xdr_zero) {
- nsegs = be32_to_cpup(p++);
- p += nsegs * rpcrdma_segment_maxsz;
- }
-
- /* Skip Reply chunk. */
- if (*p++ != xdr_zero) {
- nsegs = be32_to_cpup(p++);
- p += nsegs * rpcrdma_segment_maxsz;
- }
-
- return (unsigned long)p - (unsigned long)rdma_resp;
-}
-
-void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
-{
- struct rpcrdma_write_array *ary;
-
- /* no read-list */
- rmsgp->rm_body.rm_chunks[0] = xdr_zero;
-
- /* write-array discrim */
- ary = (struct rpcrdma_write_array *)
- &rmsgp->rm_body.rm_chunks[1];
- ary->wc_discrim = xdr_one;
- ary->wc_nchunks = cpu_to_be32(chunks);
-
- /* write-list terminator */
- ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
-
- /* reply-array discriminator */
- ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
-}
-
-void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
- int chunks)
-{
- ary->wc_discrim = xdr_one;
- ary->wc_nchunks = cpu_to_be32(chunks);
-}
-
-void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
- int chunk_no,
- __be32 rs_handle,
- __be64 rs_offset,
- u32 write_len)
-{
- struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
- seg->rs_handle = rs_handle;
- seg->rs_offset = rs_offset;
- seg->rs_length = cpu_to_be32(write_len);
-}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f7b2daf72a86..27a99bf5b1a6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -558,33 +558,85 @@ static void rdma_read_complete(struct svc_rqst *rqstp,
rqstp->rq_arg.buflen = head->arg.buflen;
}
+static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
+ __be32 *rdma_argp, int status)
+{
+ struct svc_rdma_op_ctxt *ctxt;
+ __be32 *p, *err_msgp;
+ unsigned int length;
+ struct page *page;
+ int ret;
+
+ ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
+ if (ret)
+ return;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return;
+ err_msgp = page_address(page);
+
+ p = err_msgp;
+ *p++ = *rdma_argp;
+ *p++ = *(rdma_argp + 1);
+ *p++ = xprt->sc_fc_credits;
+ *p++ = rdma_error;
+ if (status == -EPROTONOSUPPORT) {
+ *p++ = err_vers;
+ *p++ = rpcrdma_version;
+ *p++ = rpcrdma_version;
+ } else {
+ *p++ = err_chunk;
+ }
+ length = (unsigned long)p - (unsigned long)err_msgp;
+
+ /* Map transport header; no RPC message payload */
+ ctxt = svc_rdma_get_context(xprt);
+ ret = svc_rdma_map_reply_hdr(xprt, ctxt, err_msgp, length);
+ if (ret) {
+ dprintk("svcrdma: Error %d mapping send for protocol error\n",
+ ret);
+ return;
+ }
+
+ ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0);
+ if (ret) {
+ dprintk("svcrdma: Error %d posting send for protocol error\n",
+ ret);
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 1);
+ }
+}
+
/* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes
* the RPC/RDMA header small and fixed in size, so it is
* straightforward to check the RPC header's direction field.
*/
-static bool
-svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
+static bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt,
+ __be32 *rdma_resp)
{
- __be32 *p = (__be32 *)rmsgp;
+ __be32 *p;
if (!xprt->xpt_bc_xprt)
return false;
- if (rmsgp->rm_type != rdma_msg)
+ p = rdma_resp + 3;
+ if (*p++ != rdma_msg)
return false;
- if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
+
+ if (*p++ != xdr_zero)
return false;
- if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
+ if (*p++ != xdr_zero)
return false;
- if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
+ if (*p++ != xdr_zero)
return false;
- /* sanity */
- if (p[7] != rmsgp->rm_xid)
+ /* XID sanity */
+ if (*p++ != *rdma_resp)
return false;
/* call direction */
- if (p[8] == cpu_to_be32(RPC_CALL))
+ if (*p == cpu_to_be32(RPC_CALL))
return false;
return true;
@@ -650,8 +702,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto out_drop;
rqstp->rq_xprt_hlen = ret;
- if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
- ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
+ if (svc_rdma_is_backchannel_reply(xprt, &rmsgp->rm_xid)) {
+ ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt,
+ &rmsgp->rm_xid,
&rqstp->rq_arg);
svc_rdma_put_context(ctxt, 0);
if (ret)
@@ -686,7 +739,7 @@ complete:
return ret;
out_err:
- svc_rdma_send_error(rdma_xprt, rmsgp, ret);
+ svc_rdma_send_error(rdma_xprt, &rmsgp->rm_xid, ret);
svc_rdma_put_context(ctxt, 0);
return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
new file mode 100644
index 000000000000..0cf620277693
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2016 Oracle. All rights reserved.
+ *
+ * Use the core R/W API to move RPC-over-RDMA Read and Write chunks.
+ */
+
+#include <linux/sunrpc/rpc_rdma.h>
+#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/debug.h>
+
+#include <rdma/rw.h>
+
+#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+
+/* Each R/W context contains state for one chain of RDMA Read or
+ * Write Work Requests.
+ *
+ * Each WR chain handles a single contiguous server-side buffer,
+ * because scatterlist entries after the first have to start on
+ * page alignment. xdr_buf iovecs cannot guarantee alignment.
+ *
+ * Each WR chain handles only one R_key. Each RPC-over-RDMA segment
+ * from a client may contain a unique R_key, so each WR chain moves
+ * up to one segment at a time.
+ *
+ * The scatterlist makes this data structure over 4KB in size. To
+ * make it less likely to fail, and to handle the allocation for
+ * smaller I/O requests without disabling bottom-halves, these
+ * contexts are created on demand, but cached and reused until the
+ * controlling svcxprt_rdma is destroyed.
+ */
+struct svc_rdma_rw_ctxt {
+ struct list_head rw_list;
+ struct rdma_rw_ctx rw_ctx;
+ int rw_nents;
+ struct sg_table rw_sg_table;
+ struct scatterlist rw_first_sgl[0];
+};
+
+static inline struct svc_rdma_rw_ctxt *
+svc_rdma_next_ctxt(struct list_head *list)
+{
+ return list_first_entry_or_null(list, struct svc_rdma_rw_ctxt,
+ rw_list);
+}
+
+static struct svc_rdma_rw_ctxt *
+svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
+{
+ struct svc_rdma_rw_ctxt *ctxt;
+
+ spin_lock(&rdma->sc_rw_ctxt_lock);
+
+ ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts);
+ if (ctxt) {
+ list_del(&ctxt->rw_list);
+ spin_unlock(&rdma->sc_rw_ctxt_lock);
+ } else {
+ spin_unlock(&rdma->sc_rw_ctxt_lock);
+ ctxt = kmalloc(sizeof(*ctxt) +
+ SG_CHUNK_SIZE * sizeof(struct scatterlist),
+ GFP_KERNEL);
+ if (!ctxt)
+ goto out;
+ INIT_LIST_HEAD(&ctxt->rw_list);
+ }
+
+ ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
+ if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
+ ctxt->rw_sg_table.sgl)) {
+ kfree(ctxt);
+ ctxt = NULL;
+ }
+out:
+ return ctxt;
+}
+
+static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
+ struct svc_rdma_rw_ctxt *ctxt)
+{
+ sg_free_table_chained(&ctxt->rw_sg_table, true);
+
+ spin_lock(&rdma->sc_rw_ctxt_lock);
+ list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts);
+ spin_unlock(&rdma->sc_rw_ctxt_lock);
+}
+
+/**
+ * svc_rdma_destroy_rw_ctxts - Free accumulated R/W contexts
+ * @rdma: transport about to be destroyed
+ *
+ */
+void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
+{
+ struct svc_rdma_rw_ctxt *ctxt;
+
+ while ((ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts)) != NULL) {
+ list_del(&ctxt->rw_list);
+ kfree(ctxt);
+ }
+}
+
+/* A chunk context tracks all I/O for moving one Read or Write
+ * chunk. This is a a set of rdma_rw's that handle data movement
+ * for all segments of one chunk.
+ *
+ * These are small, acquired with a single allocator call, and
+ * no more than one is needed per chunk. They are allocated on
+ * demand, and not cached.
+ */
+struct svc_rdma_chunk_ctxt {
+ struct ib_cqe cc_cqe;
+ struct svcxprt_rdma *cc_rdma;
+ struct list_head cc_rwctxts;
+ int cc_sqecount;
+ enum dma_data_direction cc_dir;
+};
+
+static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc,
+ enum dma_data_direction dir)
+{
+ cc->cc_rdma = rdma;
+ svc_xprt_get(&rdma->sc_xprt);
+
+ INIT_LIST_HEAD(&cc->cc_rwctxts);
+ cc->cc_sqecount = 0;
+ cc->cc_dir = dir;
+}
+
+static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc)
+{
+ struct svcxprt_rdma *rdma = cc->cc_rdma;
+ struct svc_rdma_rw_ctxt *ctxt;
+
+ while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
+ list_del(&ctxt->rw_list);
+
+ rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
+ rdma->sc_port_num, ctxt->rw_sg_table.sgl,
+ ctxt->rw_nents, cc->cc_dir);
+ svc_rdma_put_rw_ctxt(rdma, ctxt);
+ }
+ svc_xprt_put(&rdma->sc_xprt);
+}
+
+/* State for sending a Write or Reply chunk.
+ * - Tracks progress of writing one chunk over all its segments
+ * - Stores arguments for the SGL constructor functions
+ */
+struct svc_rdma_write_info {
+ /* write state of this chunk */
+ unsigned int wi_seg_off;
+ unsigned int wi_seg_no;
+ unsigned int wi_nsegs;
+ __be32 *wi_segs;
+
+ /* SGL constructor arguments */
+ struct xdr_buf *wi_xdr;
+ unsigned char *wi_base;
+ unsigned int wi_next_off;
+
+ struct svc_rdma_chunk_ctxt wi_cc;
+};
+
+static struct svc_rdma_write_info *
+svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk)
+{
+ struct svc_rdma_write_info *info;
+
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return info;
+
+ info->wi_seg_off = 0;
+ info->wi_seg_no = 0;
+ info->wi_nsegs = be32_to_cpup(++chunk);
+ info->wi_segs = ++chunk;
+ svc_rdma_cc_init(rdma, &info->wi_cc, DMA_TO_DEVICE);
+ return info;
+}
+
+static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
+{
+ svc_rdma_cc_release(&info->wi_cc);
+ kfree(info);
+}
+
+/**
+ * svc_rdma_write_done - Write chunk completion
+ * @cq: controlling Completion Queue
+ * @wc: Work Completion
+ *
+ * Pages under I/O are freed by a subsequent Send completion.
+ */
+static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct svc_rdma_chunk_ctxt *cc =
+ container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+ struct svcxprt_rdma *rdma = cc->cc_rdma;
+ struct svc_rdma_write_info *info =
+ container_of(cc, struct svc_rdma_write_info, wi_cc);
+
+ atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+ wake_up(&rdma->sc_send_wait);
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ pr_err("svcrdma: write ctx: %s (%u/0x%x)\n",
+ ib_wc_status_msg(wc->status),
+ wc->status, wc->vendor_err);
+ }
+
+ svc_rdma_write_info_free(info);
+}
+
+/* This function sleeps when the transport's Send Queue is congested.
+ *
+ * Assumptions:
+ * - If ib_post_send() succeeds, only one completion is expected,
+ * even if one or more WRs are flushed. This is true when posting
+ * an rdma_rw_ctx or when posting a single signaled WR.
+ */
+static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
+{
+ struct svcxprt_rdma *rdma = cc->cc_rdma;
+ struct svc_xprt *xprt = &rdma->sc_xprt;
+ struct ib_send_wr *first_wr, *bad_wr;
+ struct list_head *tmp;
+ struct ib_cqe *cqe;
+ int ret;
+
+ first_wr = NULL;
+ cqe = &cc->cc_cqe;
+ list_for_each(tmp, &cc->cc_rwctxts) {
+ struct svc_rdma_rw_ctxt *ctxt;
+
+ ctxt = list_entry(tmp, struct svc_rdma_rw_ctxt, rw_list);
+ first_wr = rdma_rw_ctx_wrs(&ctxt->rw_ctx, rdma->sc_qp,
+ rdma->sc_port_num, cqe, first_wr);
+ cqe = NULL;
+ }
+
+ do {
+ if (atomic_sub_return(cc->cc_sqecount,
+ &rdma->sc_sq_avail) > 0) {
+ ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+ if (ret)
+ break;
+ return 0;
+ }
+
+ atomic_inc(&rdma_stat_sq_starve);
+ atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+ wait_event(rdma->sc_send_wait,
+ atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
+ } while (1);
+
+ pr_err("svcrdma: ib_post_send failed (%d)\n", ret);
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+
+ /* If even one was posted, there will be a completion. */
+ if (bad_wr != first_wr)
+ return 0;
+
+ atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+ wake_up(&rdma->sc_send_wait);
+ return -ENOTCONN;
+}
+
+/* Build and DMA-map an SGL that covers one kvec in an xdr_buf
+ */
+static void svc_rdma_vec_to_sg(struct svc_rdma_write_info *info,
+ unsigned int len,
+ struct svc_rdma_rw_ctxt *ctxt)
+{
+ struct scatterlist *sg = ctxt->rw_sg_table.sgl;
+
+ sg_set_buf(&sg[0], info->wi_base, len);
+ info->wi_base += len;
+
+ ctxt->rw_nents = 1;
+}
+
+/* Build and DMA-map an SGL that covers part of an xdr_buf's pagelist.
+ */
+static void svc_rdma_pagelist_to_sg(struct svc_rdma_write_info *info,
+ unsigned int remaining,
+ struct svc_rdma_rw_ctxt *ctxt)
+{
+ unsigned int sge_no, sge_bytes, page_off, page_no;
+ struct xdr_buf *xdr = info->wi_xdr;
+ struct scatterlist *sg;
+ struct page **page;
+
+ page_off = (info->wi_next_off + xdr->page_base) & ~PAGE_MASK;
+ page_no = (info->wi_next_off + xdr->page_base) >> PAGE_SHIFT;
+ page = xdr->pages + page_no;
+ info->wi_next_off += remaining;
+ sg = ctxt->rw_sg_table.sgl;
+ sge_no = 0;
+ do {
+ sge_bytes = min_t(unsigned int, remaining,
+ PAGE_SIZE - page_off);
+ sg_set_page(sg, *page, sge_bytes, page_off);
+
+ remaining -= sge_bytes;
+ sg = sg_next(sg);
+ page_off = 0;
+ sge_no++;
+ page++;
+ } while (remaining);
+
+ ctxt->rw_nents = sge_no;
+}
+
+/* Construct RDMA Write WRs to send a portion of an xdr_buf containing
+ * an RPC Reply.
+ */
+static int
+svc_rdma_build_writes(struct svc_rdma_write_info *info,
+ void (*constructor)(struct svc_rdma_write_info *info,
+ unsigned int len,
+ struct svc_rdma_rw_ctxt *ctxt),
+ unsigned int remaining)
+{
+ struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
+ struct svcxprt_rdma *rdma = cc->cc_rdma;
+ struct svc_rdma_rw_ctxt *ctxt;
+ __be32 *seg;
+ int ret;
+
+ cc->cc_cqe.done = svc_rdma_write_done;
+ seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz;
+ do {
+ unsigned int write_len;
+ u32 seg_length, seg_handle;
+ u64 seg_offset;
+
+ if (info->wi_seg_no >= info->wi_nsegs)
+ goto out_overflow;
+
+ seg_handle = be32_to_cpup(seg);
+ seg_length = be32_to_cpup(seg + 1);
+ xdr_decode_hyper(seg + 2, &seg_offset);
+ seg_offset += info->wi_seg_off;
+
+ write_len = min(remaining, seg_length - info->wi_seg_off);
+ ctxt = svc_rdma_get_rw_ctxt(rdma,
+ (write_len >> PAGE_SHIFT) + 2);
+ if (!ctxt)
+ goto out_noctx;
+
+ constructor(info, write_len, ctxt);
+ ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp,
+ rdma->sc_port_num, ctxt->rw_sg_table.sgl,
+ ctxt->rw_nents, 0, seg_offset,
+ seg_handle, DMA_TO_DEVICE);
+ if (ret < 0)
+ goto out_initerr;
+
+ list_add(&ctxt->rw_list, &cc->cc_rwctxts);
+ cc->cc_sqecount += ret;
+ if (write_len == seg_length - info->wi_seg_off) {
+ seg += 4;
+ info->wi_seg_no++;
+ info->wi_seg_off = 0;
+ } else {
+ info->wi_seg_off += write_len;
+ }
+ remaining -= write_len;
+ } while (remaining);
+
+ return 0;
+
+out_overflow:
+ dprintk("svcrdma: inadequate space in Write chunk (%u)\n",
+ info->wi_nsegs);
+ return -E2BIG;
+
+out_noctx:
+ dprintk("svcrdma: no R/W ctxs available\n");
+ return -ENOMEM;
+
+out_initerr:
+ svc_rdma_put_rw_ctxt(rdma, ctxt);
+ pr_err("svcrdma: failed to map pagelist (%d)\n", ret);
+ return -EIO;
+}
+
+/* Send one of an xdr_buf's kvecs by itself. To send a Reply
+ * chunk, the whole RPC Reply is written back to the client.
+ * This function writes either the head or tail of the xdr_buf
+ * containing the Reply.
+ */
+static int svc_rdma_send_xdr_kvec(struct svc_rdma_write_info *info,
+ struct kvec *vec)
+{
+ info->wi_base = vec->iov_base;
+ return svc_rdma_build_writes(info, svc_rdma_vec_to_sg,
+ vec->iov_len);
+}
+
+/* Send an xdr_buf's page list by itself. A Write chunk is
+ * just the page list. a Reply chunk is the head, page list,
+ * and tail. This function is shared between the two types
+ * of chunk.
+ */
+static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
+ struct xdr_buf *xdr)
+{
+ info->wi_xdr = xdr;
+ info->wi_next_off = 0;
+ return svc_rdma_build_writes(info, svc_rdma_pagelist_to_sg,
+ xdr->page_len);
+}
+
+/**
+ * svc_rdma_send_write_chunk - Write all segments in a Write chunk
+ * @rdma: controlling RDMA transport
+ * @wr_ch: Write chunk provided by client
+ * @xdr: xdr_buf containing the data payload
+ *
+ * Returns a non-negative number of bytes the chunk consumed, or
+ * %-E2BIG if the payload was larger than the Write chunk,
+ * %-ENOMEM if rdma_rw context pool was exhausted,
+ * %-ENOTCONN if posting failed (connection is lost),
+ * %-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ */
+int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
+ struct xdr_buf *xdr)
+{
+ struct svc_rdma_write_info *info;
+ int ret;
+
+ if (!xdr->page_len)
+ return 0;
+
+ info = svc_rdma_write_info_alloc(rdma, wr_ch);
+ if (!info)
+ return -ENOMEM;
+
+ ret = svc_rdma_send_xdr_pagelist(info, xdr);
+ if (ret < 0)
+ goto out_err;
+
+ ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
+ if (ret < 0)
+ goto out_err;
+ return xdr->page_len;
+
+out_err:
+ svc_rdma_write_info_free(info);
+ return ret;
+}
+
+/**
+ * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk
+ * @rdma: controlling RDMA transport
+ * @rp_ch: Reply chunk provided by client
+ * @writelist: true if client provided a Write list
+ * @xdr: xdr_buf containing an RPC Reply
+ *
+ * Returns a non-negative number of bytes the chunk consumed, or
+ * %-E2BIG if the payload was larger than the Reply chunk,
+ * %-ENOMEM if rdma_rw context pool was exhausted,
+ * %-ENOTCONN if posting failed (connection is lost),
+ * %-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ */
+int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch,
+ bool writelist, struct xdr_buf *xdr)
+{
+ struct svc_rdma_write_info *info;
+ int consumed, ret;
+
+ info = svc_rdma_write_info_alloc(rdma, rp_ch);
+ if (!info)
+ return -ENOMEM;
+
+ ret = svc_rdma_send_xdr_kvec(info, &xdr->head[0]);
+ if (ret < 0)
+ goto out_err;
+ consumed = xdr->head[0].iov_len;
+
+ /* Send the page list in the Reply chunk only if the
+ * client did not provide Write chunks.
+ */
+ if (!writelist && xdr->page_len) {
+ ret = svc_rdma_send_xdr_pagelist(info, xdr);
+ if (ret < 0)
+ goto out_err;
+ consumed += xdr->page_len;
+ }
+
+ if (xdr->tail[0].iov_len) {
+ ret = svc_rdma_send_xdr_kvec(info, &xdr->tail[0]);
+ if (ret < 0)
+ goto out_err;
+ consumed += xdr->tail[0].iov_len;
+ }
+
+ ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
+ if (ret < 0)
+ goto out_err;
+ return consumed;
+
+out_err:
+ svc_rdma_write_info_free(info);
+ return ret;
+}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 515221b16d09..1736337f3a55 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2016 Oracle. All rights reserved.
* Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
@@ -40,6 +41,63 @@
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
+/* Operation
+ *
+ * The main entry point is svc_rdma_sendto. This is called by the
+ * RPC server when an RPC Reply is ready to be transmitted to a client.
+ *
+ * The passed-in svc_rqst contains a struct xdr_buf which holds an
+ * XDR-encoded RPC Reply message. sendto must construct the RPC-over-RDMA
+ * transport header, post all Write WRs needed for this Reply, then post
+ * a Send WR conveying the transport header and the RPC message itself to
+ * the client.
+ *
+ * svc_rdma_sendto must fully transmit the Reply before returning, as
+ * the svc_rqst will be recycled as soon as sendto returns. Remaining
+ * resources referred to by the svc_rqst are also recycled at that time.
+ * Therefore any resources that must remain longer must be detached
+ * from the svc_rqst and released later.
+ *
+ * Page Management
+ *
+ * The I/O that performs Reply transmission is asynchronous, and may
+ * complete well after sendto returns. Thus pages under I/O must be
+ * removed from the svc_rqst before sendto returns.
+ *
+ * The logic here depends on Send Queue and completion ordering. Since
+ * the Send WR is always posted last, it will always complete last. Thus
+ * when it completes, it is guaranteed that all previous Write WRs have
+ * also completed.
+ *
+ * Write WRs are constructed and posted. Each Write segment gets its own
+ * svc_rdma_rw_ctxt, allowing the Write completion handler to find and
+ * DMA-unmap the pages under I/O for that Write segment. The Write
+ * completion handler does not release any pages.
+ *
+ * When the Send WR is constructed, it also gets its own svc_rdma_op_ctxt.
+ * The ownership of all of the Reply's pages are transferred into that
+ * ctxt, the Send WR is posted, and sendto returns.
+ *
+ * The svc_rdma_op_ctxt is presented when the Send WR completes. The
+ * Send completion handler finally releases the Reply's pages.
+ *
+ * This mechanism also assumes that completions on the transport's Send
+ * Completion Queue do not run in parallel. Otherwise a Write completion
+ * and Send completion running at the same time could release pages that
+ * are still DMA-mapped.
+ *
+ * Error Handling
+ *
+ * - If the Send WR is posted successfully, it will either complete
+ * successfully, or get flushed. Either way, the Send completion
+ * handler releases the Reply's pages.
+ * - If the Send WR cannot be not posted, the forward path releases
+ * the Reply's pages.
+ *
+ * This handles the case, without the use of page reference counting,
+ * where two different Write segments send portions of the same page.
+ */
+
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
@@ -55,113 +113,141 @@ static u32 xdr_padsize(u32 len)
return (len & 3) ? (4 - (len & 3)) : 0;
}
-int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct svc_rdma_req_map *vec,
- bool write_chunk_present)
+/* Returns length of transport header, in bytes.
+ */
+static unsigned int svc_rdma_reply_hdr_len(__be32 *rdma_resp)
{
- int sge_no;
- u32 sge_bytes;
- u32 page_bytes;
- u32 page_off;
- int page_no;
-
- if (xdr->len !=
- (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
- pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
- return -EIO;
- }
+ unsigned int nsegs;
+ __be32 *p;
- /* Skip the first sge, this is for the RPCRDMA header */
- sge_no = 1;
+ p = rdma_resp;
+
+ /* RPC-over-RDMA V1 replies never have a Read list. */
+ p += rpcrdma_fixed_maxsz + 1;
- /* Head SGE */
- vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
- vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
- sge_no++;
-
- /* pages SGE */
- page_no = 0;
- page_bytes = xdr->page_len;
- page_off = xdr->page_base;
- while (page_bytes) {
- vec->sge[sge_no].iov_base =
- page_address(xdr->pages[page_no]) + page_off;
- sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
- page_bytes -= sge_bytes;
- vec->sge[sge_no].iov_len = sge_bytes;
-
- sge_no++;
- page_no++;
- page_off = 0; /* reset for next time through loop */
+ /* Skip Write list. */
+ while (*p++ != xdr_zero) {
+ nsegs = be32_to_cpup(p++);
+ p += nsegs * rpcrdma_segment_maxsz;
}
- /* Tail SGE */
- if (xdr->tail[0].iov_len) {
- unsigned char *base = xdr->tail[0].iov_base;
- size_t len = xdr->tail[0].iov_len;
- u32 xdr_pad = xdr_padsize(xdr->page_len);
+ /* Skip Reply chunk. */
+ if (*p++ != xdr_zero) {
+ nsegs = be32_to_cpup(p++);
+ p += nsegs * rpcrdma_segment_maxsz;
+ }
- if (write_chunk_present && xdr_pad) {
- base += xdr_pad;
- len -= xdr_pad;
- }
+ return (unsigned long)p - (unsigned long)rdma_resp;
+}
- if (len) {
- vec->sge[sge_no].iov_base = base;
- vec->sge[sge_no].iov_len = len;
- sge_no++;
+/* One Write chunk is copied from Call transport header to Reply
+ * transport header. Each segment's length field is updated to
+ * reflect number of bytes consumed in the segment.
+ *
+ * Returns number of segments in this chunk.
+ */
+static unsigned int xdr_encode_write_chunk(__be32 *dst, __be32 *src,
+ unsigned int remaining)
+{
+ unsigned int i, nsegs;
+ u32 seg_len;
+
+ /* Write list discriminator */
+ *dst++ = *src++;
+
+ /* number of segments in this chunk */
+ nsegs = be32_to_cpup(src);
+ *dst++ = *src++;
+
+ for (i = nsegs; i; i--) {
+ /* segment's RDMA handle */
+ *dst++ = *src++;
+
+ /* bytes returned in this segment */
+ seg_len = be32_to_cpu(*src);
+ if (remaining >= seg_len) {
+ /* entire segment was consumed */
+ *dst = *src;
+ remaining -= seg_len;
+ } else {
+ /* segment only partly filled */
+ *dst = cpu_to_be32(remaining);
+ remaining = 0;
}
- }
+ dst++; src++;
- dprintk("svcrdma: %s: sge_no %d page_no %d "
- "page_base %u page_len %u head_len %zu tail_len %zu\n",
- __func__, sge_no, page_no, xdr->page_base, xdr->page_len,
- xdr->head[0].iov_len, xdr->tail[0].iov_len);
+ /* segment's RDMA offset */
+ *dst++ = *src++;
+ *dst++ = *src++;
+ }
- vec->count = sge_no;
- return 0;
+ return nsegs;
}
-static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- u32 xdr_off, size_t len, int dir)
+/* The client provided a Write list in the Call message. Fill in
+ * the segments in the first Write chunk in the Reply's transport
+ * header with the number of bytes consumed in each segment.
+ * Remaining chunks are returned unused.
+ *
+ * Assumptions:
+ * - Client has provided only one Write chunk
+ */
+static void svc_rdma_xdr_encode_write_list(__be32 *rdma_resp, __be32 *wr_ch,
+ unsigned int consumed)
{
- struct page *page;
- dma_addr_t dma_addr;
- if (xdr_off < xdr->head[0].iov_len) {
- /* This offset is in the head */
- xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
- page = virt_to_page(xdr->head[0].iov_base);
- } else {
- xdr_off -= xdr->head[0].iov_len;
- if (xdr_off < xdr->page_len) {
- /* This offset is in the page list */
- xdr_off += xdr->page_base;
- page = xdr->pages[xdr_off >> PAGE_SHIFT];
- xdr_off &= ~PAGE_MASK;
- } else {
- /* This offset is in the tail */
- xdr_off -= xdr->page_len;
- xdr_off += (unsigned long)
- xdr->tail[0].iov_base & ~PAGE_MASK;
- page = virt_to_page(xdr->tail[0].iov_base);
- }
+ unsigned int nsegs;
+ __be32 *p, *q;
+
+ /* RPC-over-RDMA V1 replies never have a Read list. */
+ p = rdma_resp + rpcrdma_fixed_maxsz + 1;
+
+ q = wr_ch;
+ while (*q != xdr_zero) {
+ nsegs = xdr_encode_write_chunk(p, q, consumed);
+ q += 2 + nsegs * rpcrdma_segment_maxsz;
+ p += 2 + nsegs * rpcrdma_segment_maxsz;
+ consumed = 0;
}
- dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
- min_t(size_t, PAGE_SIZE, len), dir);
- return dma_addr;
+
+ /* Terminate Write list */
+ *p++ = xdr_zero;
+
+ /* Reply chunk discriminator; may be replaced later */
+ *p = xdr_zero;
+}
+
+/* The client provided a Reply chunk in the Call message. Fill in
+ * the segments in the Reply chunk in the Reply message with the
+ * number of bytes consumed in each segment.
+ *
+ * Assumptions:
+ * - Reply can always fit in the provided Reply chunk
+ */
+static void svc_rdma_xdr_encode_reply_chunk(__be32 *rdma_resp, __be32 *rp_ch,
+ unsigned int consumed)
+{
+ __be32 *p;
+
+ /* Find the Reply chunk in the Reply's xprt header.
+ * RPC-over-RDMA V1 replies never have a Read list.
+ */
+ p = rdma_resp + rpcrdma_fixed_maxsz + 1;
+
+ /* Skip past Write list */
+ while (*p++ != xdr_zero)
+ p += 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+
+ xdr_encode_write_chunk(p, rp_ch, consumed);
}
/* Parse the RPC Call's transport header.
*/
-static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
- struct rpcrdma_write_array **write,
- struct rpcrdma_write_array **reply)
+static void svc_rdma_get_write_arrays(__be32 *rdma_argp,
+ __be32 **write, __be32 **reply)
{
__be32 *p;
- p = (__be32 *)&rmsgp->rm_body.rm_chunks[0];
+ p = rdma_argp + rpcrdma_fixed_maxsz;
/* Read list */
while (*p++ != xdr_zero)
@@ -169,7 +255,7 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
/* Write list */
if (*p != xdr_zero) {
- *write = (struct rpcrdma_write_array *)p;
+ *write = p;
while (*p++ != xdr_zero)
p += 1 + be32_to_cpu(*p) * 4;
} else {
@@ -179,7 +265,7 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
/* Reply chunk */
if (*p != xdr_zero)
- *reply = (struct rpcrdma_write_array *)p;
+ *reply = p;
else
*reply = NULL;
}
@@ -189,360 +275,321 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
* Invalidate, and responder chooses one rkey to invalidate.
*
* Find a candidate rkey to invalidate when sending a reply. Picks the
- * first rkey it finds in the chunks lists.
+ * first R_key it finds in the chunk lists.
*
* Returns zero if RPC's chunk lists are empty.
*/
-static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
- struct rpcrdma_write_array *wr_ary,
- struct rpcrdma_write_array *rp_ary)
+static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
+ __be32 *wr_lst, __be32 *rp_ch)
{
- struct rpcrdma_read_chunk *rd_ary;
- struct rpcrdma_segment *arg_ch;
+ __be32 *p;
- rd_ary = (struct rpcrdma_read_chunk *)&rdma_argp->rm_body.rm_chunks[0];
- if (rd_ary->rc_discrim != xdr_zero)
- return be32_to_cpu(rd_ary->rc_target.rs_handle);
+ p = rdma_argp + rpcrdma_fixed_maxsz;
+ if (*p != xdr_zero)
+ p += 2;
+ else if (wr_lst && be32_to_cpup(wr_lst + 1))
+ p = wr_lst + 2;
+ else if (rp_ch && be32_to_cpup(rp_ch + 1))
+ p = rp_ch + 2;
+ else
+ return 0;
+ return be32_to_cpup(p);
+}
- if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
- arg_ch = &wr_ary->wc_array[0].wc_target;
- return be32_to_cpu(arg_ch->rs_handle);
- }
+/* ib_dma_map_page() is used here because svc_rdma_dma_unmap()
+ * is used during completion to DMA-unmap this memory, and
+ * it uses ib_dma_unmap_page() exclusively.
+ */
+static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
+ struct svc_rdma_op_ctxt *ctxt,
+ unsigned int sge_no,
+ unsigned char *base,
+ unsigned int len)
+{
+ unsigned long offset = (unsigned long)base & ~PAGE_MASK;
+ struct ib_device *dev = rdma->sc_cm_id->device;
+ dma_addr_t dma_addr;
- if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
- arg_ch = &rp_ary->wc_array[0].wc_target;
- return be32_to_cpu(arg_ch->rs_handle);
- }
+ dma_addr = ib_dma_map_page(dev, virt_to_page(base),
+ offset, len, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(dev, dma_addr))
+ return -EIO;
+ ctxt->sge[sge_no].addr = dma_addr;
+ ctxt->sge[sge_no].length = len;
+ ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
+ svc_rdma_count_mappings(rdma, ctxt);
return 0;
}
-/* Assumptions:
- * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
- */
-static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
- u32 rmr, u64 to,
- u32 xdr_off, int write_len,
- struct svc_rdma_req_map *vec)
+static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
+ struct svc_rdma_op_ctxt *ctxt,
+ unsigned int sge_no,
+ struct page *page,
+ unsigned int offset,
+ unsigned int len)
{
- struct ib_rdma_wr write_wr;
- struct ib_sge *sge;
- int xdr_sge_no;
- int sge_no;
- int sge_bytes;
- int sge_off;
- int bc;
- struct svc_rdma_op_ctxt *ctxt;
+ struct ib_device *dev = rdma->sc_cm_id->device;
+ dma_addr_t dma_addr;
- if (vec->count > RPCSVC_MAXPAGES) {
- pr_err("svcrdma: Too many pages (%lu)\n", vec->count);
+ dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(dev, dma_addr))
return -EIO;
- }
- dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
- "write_len=%d, vec->sge=%p, vec->count=%lu\n",
- rmr, (unsigned long long)to, xdr_off,
- write_len, vec->sge, vec->count);
+ ctxt->sge[sge_no].addr = dma_addr;
+ ctxt->sge[sge_no].length = len;
+ ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
+ svc_rdma_count_mappings(rdma, ctxt);
+ return 0;
+}
- ctxt = svc_rdma_get_context(xprt);
+/**
+ * svc_rdma_map_reply_hdr - DMA map the transport header buffer
+ * @rdma: controlling transport
+ * @ctxt: op_ctxt for the Send WR
+ * @rdma_resp: buffer containing transport header
+ * @len: length of transport header
+ *
+ * Returns:
+ * %0 if the header is DMA mapped,
+ * %-EIO if DMA mapping failed.
+ */
+int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma,
+ struct svc_rdma_op_ctxt *ctxt,
+ __be32 *rdma_resp,
+ unsigned int len)
+{
ctxt->direction = DMA_TO_DEVICE;
- sge = ctxt->sge;
-
- /* Find the SGE associated with xdr_off */
- for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
- xdr_sge_no++) {
- if (vec->sge[xdr_sge_no].iov_len > bc)
- break;
- bc -= vec->sge[xdr_sge_no].iov_len;
- }
-
- sge_off = bc;
- bc = write_len;
- sge_no = 0;
-
- /* Copy the remaining SGE */
- while (bc != 0) {
- sge_bytes = min_t(size_t,
- bc, vec->sge[xdr_sge_no].iov_len-sge_off);
- sge[sge_no].length = sge_bytes;
- sge[sge_no].addr =
- dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
- sge_bytes, DMA_TO_DEVICE);
- xdr_off += sge_bytes;
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- sge[sge_no].addr))
- goto err;
- svc_rdma_count_mappings(xprt, ctxt);
- sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
- ctxt->count++;
- sge_off = 0;
- sge_no++;
- xdr_sge_no++;
- if (xdr_sge_no > vec->count) {
- pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no);
- goto err;
- }
- bc -= sge_bytes;
- if (sge_no == xprt->sc_max_sge)
- break;
- }
-
- /* Prepare WRITE WR */
- memset(&write_wr, 0, sizeof write_wr);
- ctxt->cqe.done = svc_rdma_wc_write;
- write_wr.wr.wr_cqe = &ctxt->cqe;
- write_wr.wr.sg_list = &sge[0];
- write_wr.wr.num_sge = sge_no;
- write_wr.wr.opcode = IB_WR_RDMA_WRITE;
- write_wr.wr.send_flags = IB_SEND_SIGNALED;
- write_wr.rkey = rmr;
- write_wr.remote_addr = to;
-
- /* Post It */
- atomic_inc(&rdma_stat_write);
- if (svc_rdma_send(xprt, &write_wr.wr))
- goto err;
- return write_len - bc;
- err:
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 0);
- return -EIO;
+ ctxt->pages[0] = virt_to_page(rdma_resp);
+ ctxt->count = 1;
+ return svc_rdma_dma_map_page(rdma, ctxt, 0, ctxt->pages[0], 0, len);
}
-noinline
-static int send_write_chunks(struct svcxprt_rdma *xprt,
- struct rpcrdma_write_array *wr_ary,
- struct rpcrdma_msg *rdma_resp,
- struct svc_rqst *rqstp,
- struct svc_rdma_req_map *vec)
+/* Load the xdr_buf into the ctxt's sge array, and DMA map each
+ * element as it is added.
+ *
+ * Returns the number of sge elements loaded on success, or
+ * a negative errno on failure.
+ */
+static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
+ struct svc_rdma_op_ctxt *ctxt,
+ struct xdr_buf *xdr, __be32 *wr_lst)
{
- u32 xfer_len = rqstp->rq_res.page_len;
- int write_len;
- u32 xdr_off;
- int chunk_off;
- int chunk_no;
- int nchunks;
- struct rpcrdma_write_array *res_ary;
+ unsigned int len, sge_no, remaining, page_off;
+ struct page **ppages;
+ unsigned char *base;
+ u32 xdr_pad;
int ret;
- res_ary = (struct rpcrdma_write_array *)
- &rdma_resp->rm_body.rm_chunks[1];
-
- /* Write chunks start at the pagelist */
- nchunks = be32_to_cpu(wr_ary->wc_nchunks);
- for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
- xfer_len && chunk_no < nchunks;
- chunk_no++) {
- struct rpcrdma_segment *arg_ch;
- u64 rs_offset;
-
- arg_ch = &wr_ary->wc_array[chunk_no].wc_target;
- write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
-
- /* Prepare the response chunk given the length actually
- * written */
- xdr_decode_hyper((__be32 *)&arg_ch->rs_offset, &rs_offset);
- svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
- arg_ch->rs_handle,
- arg_ch->rs_offset,
- write_len);
- chunk_off = 0;
- while (write_len) {
- ret = send_write(xprt, rqstp,
- be32_to_cpu(arg_ch->rs_handle),
- rs_offset + chunk_off,
- xdr_off,
- write_len,
- vec);
- if (ret <= 0)
- goto out_err;
- chunk_off += ret;
- xdr_off += ret;
- xfer_len -= ret;
- write_len -= ret;
+ sge_no = 1;
+
+ ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++,
+ xdr->head[0].iov_base,
+ xdr->head[0].iov_len);
+ if (ret < 0)
+ return ret;
+
+ /* If a Write chunk is present, the xdr_buf's page list
+ * is not included inline. However the Upper Layer may
+ * have added XDR padding in the tail buffer, and that
+ * should not be included inline.
+ */
+ if (wr_lst) {
+ base = xdr->tail[0].iov_base;
+ len = xdr->tail[0].iov_len;
+ xdr_pad = xdr_padsize(xdr->page_len);
+
+ if (len && xdr_pad) {
+ base += xdr_pad;
+ len -= xdr_pad;
}
+
+ goto tail;
+ }
+
+ ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ page_off = xdr->page_base & ~PAGE_MASK;
+ remaining = xdr->page_len;
+ while (remaining) {
+ len = min_t(u32, PAGE_SIZE - page_off, remaining);
+
+ ret = svc_rdma_dma_map_page(rdma, ctxt, sge_no++,
+ *ppages++, page_off, len);
+ if (ret < 0)
+ return ret;
+
+ remaining -= len;
+ page_off = 0;
}
- /* Update the req with the number of chunks actually used */
- svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
- return rqstp->rq_res.page_len;
+ base = xdr->tail[0].iov_base;
+ len = xdr->tail[0].iov_len;
+tail:
+ if (len) {
+ ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, base, len);
+ if (ret < 0)
+ return ret;
+ }
-out_err:
- pr_err("svcrdma: failed to send write chunks, rc=%d\n", ret);
- return -EIO;
+ return sge_no - 1;
}
-noinline
-static int send_reply_chunks(struct svcxprt_rdma *xprt,
- struct rpcrdma_write_array *rp_ary,
- struct rpcrdma_msg *rdma_resp,
- struct svc_rqst *rqstp,
- struct svc_rdma_req_map *vec)
+/* The svc_rqst and all resources it owns are released as soon as
+ * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
+ * so they are released by the Send completion handler.
+ */
+static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *ctxt)
{
- u32 xfer_len = rqstp->rq_res.len;
- int write_len;
- u32 xdr_off;
- int chunk_no;
- int chunk_off;
- int nchunks;
- struct rpcrdma_segment *ch;
- struct rpcrdma_write_array *res_ary;
- int ret;
+ int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
- /* XXX: need to fix when reply lists occur with read-list and or
- * write-list */
- res_ary = (struct rpcrdma_write_array *)
- &rdma_resp->rm_body.rm_chunks[2];
-
- /* xdr offset starts at RPC message */
- nchunks = be32_to_cpu(rp_ary->wc_nchunks);
- for (xdr_off = 0, chunk_no = 0;
- xfer_len && chunk_no < nchunks;
- chunk_no++) {
- u64 rs_offset;
- ch = &rp_ary->wc_array[chunk_no].wc_target;
- write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
-
- /* Prepare the reply chunk given the length actually
- * written */
- xdr_decode_hyper((__be32 *)&ch->rs_offset, &rs_offset);
- svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
- ch->rs_handle, ch->rs_offset,
- write_len);
- chunk_off = 0;
- while (write_len) {
- ret = send_write(xprt, rqstp,
- be32_to_cpu(ch->rs_handle),
- rs_offset + chunk_off,
- xdr_off,
- write_len,
- vec);
- if (ret <= 0)
- goto out_err;
- chunk_off += ret;
- xdr_off += ret;
- xfer_len -= ret;
- write_len -= ret;
- }
+ ctxt->count += pages;
+ for (i = 0; i < pages; i++) {
+ ctxt->pages[i + 1] = rqstp->rq_respages[i];
+ rqstp->rq_respages[i] = NULL;
}
- /* Update the req with the number of chunks actually used */
- svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+}
- return rqstp->rq_res.len;
+/**
+ * svc_rdma_post_send_wr - Set up and post one Send Work Request
+ * @rdma: controlling transport
+ * @ctxt: op_ctxt for transmitting the Send WR
+ * @num_sge: number of SGEs to send
+ * @inv_rkey: R_key argument to Send With Invalidate, or zero
+ *
+ * Returns:
+ * %0 if the Send* was posted successfully,
+ * %-ENOTCONN if the connection was lost or dropped,
+ * %-EINVAL if there was a problem with the Send we built,
+ * %-ENOMEM if ib_post_send failed.
+ */
+int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
+ struct svc_rdma_op_ctxt *ctxt, int num_sge,
+ u32 inv_rkey)
+{
+ struct ib_send_wr *send_wr = &ctxt->send_wr;
-out_err:
- pr_err("svcrdma: failed to send reply chunks, rc=%d\n", ret);
- return -EIO;
+ dprintk("svcrdma: posting Send WR with %u sge(s)\n", num_sge);
+
+ send_wr->next = NULL;
+ ctxt->cqe.done = svc_rdma_wc_send;
+ send_wr->wr_cqe = &ctxt->cqe;
+ send_wr->sg_list = ctxt->sge;
+ send_wr->num_sge = num_sge;
+ send_wr->send_flags = IB_SEND_SIGNALED;
+ if (inv_rkey) {
+ send_wr->opcode = IB_WR_SEND_WITH_INV;
+ send_wr->ex.invalidate_rkey = inv_rkey;
+ } else {
+ send_wr->opcode = IB_WR_SEND;
+ }
+
+ return svc_rdma_send(rdma, send_wr);
}
-/* This function prepares the portion of the RPCRDMA message to be
- * sent in the RDMA_SEND. This function is called after data sent via
- * RDMA has already been transmitted. There are three cases:
- * - The RPCRDMA header, RPC header, and payload are all sent in a
- * single RDMA_SEND. This is the "inline" case.
- * - The RPCRDMA header and some portion of the RPC header and data
- * are sent via this RDMA_SEND and another portion of the data is
- * sent via RDMA.
- * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
- * header and data are all transmitted via RDMA.
- * In all three cases, this function prepares the RPCRDMA header in
- * sge[0], the 'type' parameter indicates the type to place in the
- * RPCRDMA header, and the 'byte_count' field indicates how much of
- * the XDR to include in this RDMA_SEND. NB: The offset of the payload
- * to send is zero in the XDR.
+/* Prepare the portion of the RPC Reply that will be transmitted
+ * via RDMA Send. The RPC-over-RDMA transport header is prepared
+ * in sge[0], and the RPC xdr_buf is prepared in following sges.
+ *
+ * Depending on whether a Write list or Reply chunk is present,
+ * the server may send all, a portion of, or none of the xdr_buf.
+ * In the latter case, only the transport header (sge[0]) is
+ * transmitted.
+ *
+ * RDMA Send is the last step of transmitting an RPC reply. Pages
+ * involved in the earlier RDMA Writes are here transferred out
+ * of the rqstp and into the ctxt's page array. These pages are
+ * DMA unmapped by each Write completion, but the subsequent Send
+ * completion finally releases these pages.
+ *
+ * Assumptions:
+ * - The Reply's transport header will never be larger than a page.
*/
-static int send_reply(struct svcxprt_rdma *rdma,
- struct svc_rqst *rqstp,
- struct page *page,
- struct rpcrdma_msg *rdma_resp,
- struct svc_rdma_req_map *vec,
- int byte_count,
- u32 inv_rkey)
+static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
+ __be32 *rdma_argp, __be32 *rdma_resp,
+ struct svc_rqst *rqstp,
+ __be32 *wr_lst, __be32 *rp_ch)
{
struct svc_rdma_op_ctxt *ctxt;
- struct ib_send_wr send_wr;
- u32 xdr_off;
- int sge_no;
- int sge_bytes;
- int page_no;
- int pages;
- int ret = -EIO;
-
- /* Prepare the context */
+ u32 inv_rkey;
+ int ret;
+
+ dprintk("svcrdma: sending %s reply: head=%zu, pagelen=%u, tail=%zu\n",
+ (rp_ch ? "RDMA_NOMSG" : "RDMA_MSG"),
+ rqstp->rq_res.head[0].iov_len,
+ rqstp->rq_res.page_len,
+ rqstp->rq_res.tail[0].iov_len);
+
ctxt = svc_rdma_get_context(rdma);
- ctxt->direction = DMA_TO_DEVICE;
- ctxt->pages[0] = page;
- ctxt->count = 1;
- /* Prepare the SGE for the RPCRDMA Header */
- ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
- ctxt->sge[0].length =
- svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp);
- ctxt->sge[0].addr =
- ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
- ctxt->sge[0].length, DMA_TO_DEVICE);
- if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
+ ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp,
+ svc_rdma_reply_hdr_len(rdma_resp));
+ if (ret < 0)
goto err;
- svc_rdma_count_mappings(rdma, ctxt);
-
- ctxt->direction = DMA_TO_DEVICE;
- /* Map the payload indicated by 'byte_count' */
- xdr_off = 0;
- for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
- sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
- byte_count -= sge_bytes;
- ctxt->sge[sge_no].addr =
- dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
- sge_bytes, DMA_TO_DEVICE);
- xdr_off += sge_bytes;
- if (ib_dma_mapping_error(rdma->sc_cm_id->device,
- ctxt->sge[sge_no].addr))
+ if (!rp_ch) {
+ ret = svc_rdma_map_reply_msg(rdma, ctxt,
+ &rqstp->rq_res, wr_lst);
+ if (ret < 0)
goto err;
- svc_rdma_count_mappings(rdma, ctxt);
- ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
- ctxt->sge[sge_no].length = sge_bytes;
}
- if (byte_count != 0) {
- pr_err("svcrdma: Could not map %d bytes\n", byte_count);
+
+ svc_rdma_save_io_pages(rqstp, ctxt);
+
+ inv_rkey = 0;
+ if (rdma->sc_snd_w_inv)
+ inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
+ ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, inv_rkey);
+ if (ret)
goto err;
- }
- /* Save all respages in the ctxt and remove them from the
- * respages array. They are our pages until the I/O
- * completes.
+ return 0;
+
+err:
+ pr_err("svcrdma: failed to post Send WR (%d)\n", ret);
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 1);
+ return ret;
+}
+
+/* Given the client-provided Write and Reply chunks, the server was not
+ * able to form a complete reply. Return an RDMA_ERROR message so the
+ * client can retire this RPC transaction. As above, the Send completion
+ * routine releases payload pages that were part of a previous RDMA Write.
+ *
+ * Remote Invalidation is skipped for simplicity.
+ */
+static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
+ __be32 *rdma_resp, struct svc_rqst *rqstp)
+{
+ struct svc_rdma_op_ctxt *ctxt;
+ __be32 *p;
+ int ret;
+
+ ctxt = svc_rdma_get_context(rdma);
+
+ /* Replace the original transport header with an
+ * RDMA_ERROR response. XID etc are preserved.
*/
- pages = rqstp->rq_next_page - rqstp->rq_respages;
- for (page_no = 0; page_no < pages; page_no++) {
- ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
- ctxt->count++;
- rqstp->rq_respages[page_no] = NULL;
- }
- rqstp->rq_next_page = rqstp->rq_respages + 1;
+ p = rdma_resp + 3;
+ *p++ = rdma_error;
+ *p = err_chunk;
- if (sge_no > rdma->sc_max_sge) {
- pr_err("svcrdma: Too many sges (%d)\n", sge_no);
+ ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, 20);
+ if (ret < 0)
goto err;
- }
- memset(&send_wr, 0, sizeof send_wr);
- ctxt->cqe.done = svc_rdma_wc_send;
- send_wr.wr_cqe = &ctxt->cqe;
- send_wr.sg_list = ctxt->sge;
- send_wr.num_sge = sge_no;
- if (inv_rkey) {
- send_wr.opcode = IB_WR_SEND_WITH_INV;
- send_wr.ex.invalidate_rkey = inv_rkey;
- } else
- send_wr.opcode = IB_WR_SEND;
- send_wr.send_flags = IB_SEND_SIGNALED;
- ret = svc_rdma_send(rdma, &send_wr);
+ svc_rdma_save_io_pages(rqstp, ctxt);
+
+ ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, 0);
if (ret)
goto err;
return 0;
- err:
+err:
+ pr_err("svcrdma: failed to post Send WR (%d)\n", ret);
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 1);
return ret;
@@ -552,39 +599,36 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
{
}
+/**
+ * svc_rdma_sendto - Transmit an RPC reply
+ * @rqstp: processed RPC request, reply XDR already in ::rq_res
+ *
+ * Any resources still associated with @rqstp are released upon return.
+ * If no reply message was possible, the connection is closed.
+ *
+ * Returns:
+ * %0 if an RPC reply has been successfully posted,
+ * %-ENOMEM if a resource shortage occurred (connection is lost),
+ * %-ENOTCONN if posting failed (connection is lost).
+ */
int svc_rdma_sendto(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
- struct rpcrdma_msg *rdma_argp;
- struct rpcrdma_msg *rdma_resp;
- struct rpcrdma_write_array *wr_ary, *rp_ary;
- int ret;
- int inline_bytes;
+ __be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch;
+ struct xdr_buf *xdr = &rqstp->rq_res;
struct page *res_page;
- struct svc_rdma_req_map *vec;
- u32 inv_rkey;
- __be32 *p;
-
- dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
+ int ret;
- /* Get the RDMA request header. The receive logic always
- * places this at the start of page 0.
+ /* Find the call's chunk lists to decide how to send the reply.
+ * Receive places the Call's xprt header at the start of page 0.
*/
rdma_argp = page_address(rqstp->rq_pages[0]);
- svc_rdma_get_write_arrays(rdma_argp, &wr_ary, &rp_ary);
-
- inv_rkey = 0;
- if (rdma->sc_snd_w_inv)
- inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary);
+ svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch);
- /* Build an req vec for the XDR */
- vec = svc_rdma_get_req_map(rdma);
- ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
- if (ret)
- goto err0;
- inline_bytes = rqstp->rq_res.len;
+ dprintk("svcrdma: preparing response for XID 0x%08x\n",
+ be32_to_cpup(rdma_argp));
/* Create the RDMA response header. xprt->xpt_mutex,
* acquired in svc_send(), serializes RPC replies. The
@@ -598,115 +642,57 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
goto err0;
rdma_resp = page_address(res_page);
- p = &rdma_resp->rm_xid;
- *p++ = rdma_argp->rm_xid;
- *p++ = rdma_argp->rm_vers;
+ p = rdma_resp;
+ *p++ = *rdma_argp;
+ *p++ = *(rdma_argp + 1);
*p++ = rdma->sc_fc_credits;
- *p++ = rp_ary ? rdma_nomsg : rdma_msg;
+ *p++ = rp_ch ? rdma_nomsg : rdma_msg;
/* Start with empty chunks */
*p++ = xdr_zero;
*p++ = xdr_zero;
*p = xdr_zero;
- /* Send any write-chunk data and build resp write-list */
- if (wr_ary) {
- ret = send_write_chunks(rdma, wr_ary, rdma_resp, rqstp, vec);
+ if (wr_lst) {
+ /* XXX: Presume the client sent only one Write chunk */
+ ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr);
if (ret < 0)
- goto err1;
- inline_bytes -= ret + xdr_padsize(ret);
+ goto err2;
+ svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret);
}
-
- /* Send any reply-list data and update resp reply-list */
- if (rp_ary) {
- ret = send_reply_chunks(rdma, rp_ary, rdma_resp, rqstp, vec);
+ if (rp_ch) {
+ ret = svc_rdma_send_reply_chunk(rdma, rp_ch, wr_lst, xdr);
if (ret < 0)
- goto err1;
- inline_bytes -= ret;
+ goto err2;
+ svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret);
}
- /* Post a fresh Receive buffer _before_ sending the reply */
ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
if (ret)
goto err1;
-
- ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
- inline_bytes, inv_rkey);
+ ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp,
+ wr_lst, rp_ch);
if (ret < 0)
goto err0;
+ return 0;
- svc_rdma_put_req_map(rdma, vec);
- dprintk("svcrdma: send_reply returns %d\n", ret);
- return ret;
+ err2:
+ if (ret != -E2BIG)
+ goto err1;
+
+ ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
+ if (ret)
+ goto err1;
+ ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp);
+ if (ret < 0)
+ goto err0;
+ return 0;
err1:
put_page(res_page);
err0:
- svc_rdma_put_req_map(rdma, vec);
pr_err("svcrdma: Could not send reply, err=%d. Closing transport.\n",
ret);
- set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
return -ENOTCONN;
}
-
-void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
- int status)
-{
- struct ib_send_wr err_wr;
- struct page *p;
- struct svc_rdma_op_ctxt *ctxt;
- enum rpcrdma_errcode err;
- __be32 *va;
- int length;
- int ret;
-
- ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
- if (ret)
- return;
-
- p = alloc_page(GFP_KERNEL);
- if (!p)
- return;
- va = page_address(p);
-
- /* XDR encode an error reply */
- err = ERR_CHUNK;
- if (status == -EPROTONOSUPPORT)
- err = ERR_VERS;
- length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
-
- ctxt = svc_rdma_get_context(xprt);
- ctxt->direction = DMA_TO_DEVICE;
- ctxt->count = 1;
- ctxt->pages[0] = p;
-
- /* Prepare SGE for local address */
- ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
- ctxt->sge[0].length = length;
- ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
- p, 0, length, DMA_TO_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
- dprintk("svcrdma: Error mapping buffer for protocol error\n");
- svc_rdma_put_context(ctxt, 1);
- return;
- }
- svc_rdma_count_mappings(xprt, ctxt);
-
- /* Prepare SEND WR */
- memset(&err_wr, 0, sizeof(err_wr));
- ctxt->cqe.done = svc_rdma_wc_send;
- err_wr.wr_cqe = &ctxt->cqe;
- err_wr.sg_list = ctxt->sge;
- err_wr.num_sge = 1;
- err_wr.opcode = IB_WR_SEND;
- err_wr.send_flags = IB_SEND_SIGNALED;
-
- /* Post It */
- ret = svc_rdma_send(xprt, &err_wr);
- if (ret) {
- dprintk("svcrdma: Error %d posting send for protocol error\n",
- ret);
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 1);
- }
-}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fc8f14c7bfec..a9d9cb1ba4c6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -272,85 +272,6 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
}
}
-static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
-{
- struct svc_rdma_req_map *map;
-
- map = kmalloc(sizeof(*map), flags);
- if (map)
- INIT_LIST_HEAD(&map->free);
- return map;
-}
-
-static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
-{
- unsigned int i;
-
- /* One for each receive buffer on this connection. */
- i = xprt->sc_max_requests;
-
- while (i--) {
- struct svc_rdma_req_map *map;
-
- map = alloc_req_map(GFP_KERNEL);
- if (!map) {
- dprintk("svcrdma: No memory for request map\n");
- return false;
- }
- list_add(&map->free, &xprt->sc_maps);
- }
- return true;
-}
-
-struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
-{
- struct svc_rdma_req_map *map = NULL;
-
- spin_lock(&xprt->sc_map_lock);
- if (list_empty(&xprt->sc_maps))
- goto out_empty;
-
- map = list_first_entry(&xprt->sc_maps,
- struct svc_rdma_req_map, free);
- list_del_init(&map->free);
- spin_unlock(&xprt->sc_map_lock);
-
-out:
- map->count = 0;
- return map;
-
-out_empty:
- spin_unlock(&xprt->sc_map_lock);
-
- /* Pre-allocation amount was incorrect */
- map = alloc_req_map(GFP_NOIO);
- if (map)
- goto out;
-
- WARN_ONCE(1, "svcrdma: empty request map list?\n");
- return NULL;
-}
-
-void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
- struct svc_rdma_req_map *map)
-{
- spin_lock(&xprt->sc_map_lock);
- list_add(&map->free, &xprt->sc_maps);
- spin_unlock(&xprt->sc_map_lock);
-}
-
-static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
-{
- while (!list_empty(&xprt->sc_maps)) {
- struct svc_rdma_req_map *map;
-
- map = list_first_entry(&xprt->sc_maps,
- struct svc_rdma_req_map, free);
- list_del(&map->free);
- kfree(map);
- }
-}
-
/* QP event handler */
static void qp_event_handler(struct ib_event *event, void *context)
{
@@ -474,24 +395,6 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
}
/**
- * svc_rdma_wc_write - Invoked by RDMA provider for each polled Write WC
- * @cq: completion queue
- * @wc: completed WR
- *
- */
-void svc_rdma_wc_write(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct ib_cqe *cqe = wc->wr_cqe;
- struct svc_rdma_op_ctxt *ctxt;
-
- svc_rdma_send_wc_common_put(cq, wc, "write");
-
- ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 0);
-}
-
-/**
* svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC
* @cq: completion queue
* @wc: completed WR
@@ -561,14 +464,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
- INIT_LIST_HEAD(&cma_xprt->sc_maps);
+ INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
spin_lock_init(&cma_xprt->sc_frmr_q_lock);
spin_lock_init(&cma_xprt->sc_ctxt_lock);
- spin_lock_init(&cma_xprt->sc_map_lock);
+ spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
/*
* Note that this implies that the underlying transport support
@@ -999,6 +902,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt, newxprt->sc_cm_id);
dev = newxprt->sc_cm_id->device;
+ newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
/* Qualify the transport resource defaults with the
* capabilities of this particular device */
@@ -1014,13 +918,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
svcrdma_max_bc_requests);
newxprt->sc_rq_depth = newxprt->sc_max_requests +
newxprt->sc_max_bc_requests;
- newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+ newxprt->sc_sq_depth = newxprt->sc_rq_depth;
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
if (!svc_rdma_prealloc_ctxts(newxprt))
goto errout;
- if (!svc_rdma_prealloc_maps(newxprt))
- goto errout;
/*
* Limit ORD based on client limit, local device limit, and
@@ -1050,6 +952,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.event_handler = qp_event_handler;
qp_attr.qp_context = &newxprt->sc_xprt;
+ qp_attr.port_num = newxprt->sc_cm_id->port_num;
+ qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests;
qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
@@ -1248,8 +1152,8 @@ static void __svc_rdma_free(struct work_struct *work)
}
rdma_dealloc_frmr_q(rdma);
+ svc_rdma_destroy_rw_ctxts(rdma);
svc_rdma_destroy_ctxts(rdma);
- svc_rdma_destroy_maps(rdma);
/* Destroy the QP if present (not a listener) */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index c717f5410776..62ecbccd9748 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -66,8 +66,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
-static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
- int xprt_rdma_pad_optimize = 0;
+unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
+int xprt_rdma_pad_optimize;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -396,7 +396,7 @@ xprt_setup_rdma(struct xprt_create *args)
new_xprt = rpcx_to_rdmax(xprt);
- rc = rpcrdma_ia_open(new_xprt, sap, xprt_rdma_memreg_strategy);
+ rc = rpcrdma_ia_open(new_xprt, sap);
if (rc)
goto out1;
@@ -457,19 +457,33 @@ out1:
return ERR_PTR(rc);
}
-/*
- * Close a connection, during shutdown or timeout/reconnect
+/**
+ * xprt_rdma_close - Close down RDMA connection
+ * @xprt: generic transport to be closed
+ *
+ * Called during transport shutdown reconnect, or device
+ * removal. Caller holds the transport's write lock.
*/
static void
xprt_rdma_close(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+ dprintk("RPC: %s: closing xprt %p\n", __func__, xprt);
- dprintk("RPC: %s: closing\n", __func__);
- if (r_xprt->rx_ep.rep_connected > 0)
+ if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
+ xprt_clear_connected(xprt);
+ rpcrdma_ia_remove(ia);
+ return;
+ }
+ if (ep->rep_connected == -ENODEV)
+ return;
+ if (ep->rep_connected > 0)
xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
- rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ rpcrdma_ep_disconnect(ep, ia);
}
static void
@@ -484,6 +498,27 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
dprintk("RPC: %s: %u\n", __func__, port);
}
+/**
+ * xprt_rdma_timer - invoked when an RPC times out
+ * @xprt: controlling RPC transport
+ * @task: RPC task that timed out
+ *
+ * Invoked when the transport is still connected, but an RPC
+ * retransmit timeout occurs.
+ *
+ * Since RDMA connections don't have a keep-alive, forcibly
+ * disconnect and retry to connect. This drives full
+ * detection of the network path, and retransmissions of
+ * all pending RPCs.
+ */
+static void
+xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
+
+ xprt_force_disconnect(xprt);
+}
+
static void
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
@@ -659,6 +694,8 @@ xprt_rdma_free(struct rpc_task *task)
* xprt_rdma_send_request - marshal and send an RPC request
* @task: RPC task with an RPC message in rq_snd_buf
*
+ * Caller holds the transport's write lock.
+ *
* Return values:
* 0: The request has been sent
* ENOTCONN: Caller needs to invoke connect logic then call again
@@ -685,6 +722,9 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc = 0;
+ if (!xprt_connected(xprt))
+ goto drop_connection;
+
/* On retransmit, remove any previously registered chunks */
if (unlikely(!list_empty(&req->rl_registered)))
r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
@@ -776,6 +816,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
.alloc_slot = xprt_alloc_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
.set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
+ .timer = xprt_rdma_timer,
.rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
.set_port = xprt_rdma_set_port,
.connect = xprt_rdma_connect,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3b332b395045..3dbce9ac4327 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -53,7 +53,7 @@
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc_rdma.h>
#include <asm/bitops.h>
-#include <linux/module.h> /* try_module_get()/module_put() */
+
#include <rdma/ib_cm.h>
#include "xprt_rdma.h"
@@ -69,8 +69,11 @@
/*
* internal functions
*/
+static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
+static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
+static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
-static struct workqueue_struct *rpcrdma_receive_wq;
+static struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
int
rpcrdma_alloc_wq(void)
@@ -180,7 +183,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rep->rr_wc_flags = wc->wc_flags;
rep->rr_inv_rkey = wc->ex.invalidate_rkey;
- ib_dma_sync_single_for_cpu(rep->rr_device,
+ ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf),
rdmab_addr(rep->rr_rdmabuf),
rep->rr_len, DMA_FROM_DEVICE);
@@ -262,6 +265,21 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
__func__, ep);
complete(&ia->ri_done);
break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+ pr_info("rpcrdma: removing device for %pIS:%u\n",
+ sap, rpc_get_port(sap));
+#endif
+ set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
+ ep->rep_connected = -ENODEV;
+ xprt_force_disconnect(&xprt->rx_xprt);
+ wait_for_completion(&ia->ri_remove_done);
+
+ ia->ri_id = NULL;
+ ia->ri_pd = NULL;
+ ia->ri_device = NULL;
+ /* Return 1 to ensure the core destroys the id. */
+ return 1;
case RDMA_CM_EVENT_ESTABLISHED:
connstate = 1;
ib_query_qp(ia->ri_id->qp, attr,
@@ -291,9 +309,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
goto connected;
case RDMA_CM_EVENT_DISCONNECTED:
connstate = -ECONNABORTED;
- goto connected;
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- connstate = -ENODEV;
connected:
dprintk("RPC: %s: %sconnected\n",
__func__, connstate > 0 ? "" : "dis");
@@ -329,14 +344,6 @@ connected:
return 0;
}
-static void rpcrdma_destroy_id(struct rdma_cm_id *id)
-{
- if (id) {
- module_put(id->device->owner);
- rdma_destroy_id(id);
- }
-}
-
static struct rdma_cm_id *
rpcrdma_create_id(struct rpcrdma_xprt *xprt,
struct rpcrdma_ia *ia, struct sockaddr *addr)
@@ -346,6 +353,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
int rc;
init_completion(&ia->ri_done);
+ init_completion(&ia->ri_remove_done);
id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
IB_QPT_RC);
@@ -370,16 +378,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
goto out;
}
- /* FIXME:
- * Until xprtrdma supports DEVICE_REMOVAL, the provider must
- * be pinned while there are active NFS/RDMA mounts to prevent
- * hangs and crashes at umount time.
- */
- if (!ia->ri_async_rc && !try_module_get(id->device->owner)) {
- dprintk("RPC: %s: Failed to get device module\n",
- __func__);
- ia->ri_async_rc = -ENODEV;
- }
rc = ia->ri_async_rc;
if (rc)
goto out;
@@ -389,21 +387,20 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
if (rc) {
dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
__func__, rc);
- goto put;
+ goto out;
}
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
dprintk("RPC: %s: wait() exited: %i\n",
__func__, rc);
- goto put;
+ goto out;
}
rc = ia->ri_async_rc;
if (rc)
- goto put;
+ goto out;
return id;
-put:
- module_put(id->device->owner);
+
out:
rdma_destroy_id(id);
return ERR_PTR(rc);
@@ -413,13 +410,16 @@ out:
* Exported functions.
*/
-/*
- * Open and initialize an Interface Adapter.
- * o initializes fields of struct rpcrdma_ia, including
- * interface and provider attributes and protection zone.
+/**
+ * rpcrdma_ia_open - Open and initialize an Interface Adapter.
+ * @xprt: controlling transport
+ * @addr: IP address of remote peer
+ *
+ * Returns 0 on success, negative errno if an appropriate
+ * Interface Adapter could not be found and opened.
*/
int
-rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
{
struct rpcrdma_ia *ia = &xprt->rx_ia;
int rc;
@@ -427,7 +427,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
if (IS_ERR(ia->ri_id)) {
rc = PTR_ERR(ia->ri_id);
- goto out1;
+ goto out_err;
}
ia->ri_device = ia->ri_id->device;
@@ -435,10 +435,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
if (IS_ERR(ia->ri_pd)) {
rc = PTR_ERR(ia->ri_pd);
pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
- goto out2;
+ goto out_err;
}
- switch (memreg) {
+ switch (xprt_rdma_memreg_strategy) {
case RPCRDMA_FRMR:
if (frwr_is_supported(ia)) {
ia->ri_ops = &rpcrdma_frwr_memreg_ops;
@@ -452,28 +452,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
}
/*FALLTHROUGH*/
default:
- pr_err("rpcrdma: Unsupported memory registration mode: %d\n",
- memreg);
+ pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
+ ia->ri_device->name, xprt_rdma_memreg_strategy);
rc = -EINVAL;
- goto out3;
+ goto out_err;
}
return 0;
-out3:
- ib_dealloc_pd(ia->ri_pd);
- ia->ri_pd = NULL;
-out2:
- rpcrdma_destroy_id(ia->ri_id);
- ia->ri_id = NULL;
-out1:
+out_err:
+ rpcrdma_ia_close(ia);
return rc;
}
-/*
- * Clean up/close an IA.
- * o if event handles and PD have been initialized, free them.
- * o close the IA
+/**
+ * rpcrdma_ia_remove - Handle device driver unload
+ * @ia: interface adapter being removed
+ *
+ * Divest transport H/W resources associated with this adapter,
+ * but allow it to be restored later.
+ */
+void
+rpcrdma_ia_remove(struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
+ rx_ia);
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_req *req;
+ struct rpcrdma_rep *rep;
+
+ cancel_delayed_work_sync(&buf->rb_refresh_worker);
+
+ /* This is similar to rpcrdma_ep_destroy, but:
+ * - Don't cancel the connect worker.
+ * - Don't call rpcrdma_ep_disconnect, which waits
+ * for another conn upcall, which will deadlock.
+ * - rdma_disconnect is unneeded, the underlying
+ * connection is already gone.
+ */
+ if (ia->ri_id->qp) {
+ ib_drain_qp(ia->ri_id->qp);
+ rdma_destroy_qp(ia->ri_id);
+ ia->ri_id->qp = NULL;
+ }
+ ib_free_cq(ep->rep_attr.recv_cq);
+ ib_free_cq(ep->rep_attr.send_cq);
+
+ /* The ULP is responsible for ensuring all DMA
+ * mappings and MRs are gone.
+ */
+ list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
+ rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
+ list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
+ rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
+ rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
+ rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
+ }
+ rpcrdma_destroy_mrs(buf);
+
+ /* Allow waiters to continue */
+ complete(&ia->ri_remove_done);
+}
+
+/**
+ * rpcrdma_ia_close - Clean up/close an IA.
+ * @ia: interface adapter to close
+ *
*/
void
rpcrdma_ia_close(struct rpcrdma_ia *ia)
@@ -482,13 +527,15 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
if (ia->ri_id->qp)
rdma_destroy_qp(ia->ri_id);
- rpcrdma_destroy_id(ia->ri_id);
- ia->ri_id = NULL;
+ rdma_destroy_id(ia->ri_id);
}
+ ia->ri_id = NULL;
+ ia->ri_device = NULL;
/* If the pd is still busy, xprtrdma missed freeing a resource */
if (ia->ri_pd && !IS_ERR(ia->ri_pd))
ib_dealloc_pd(ia->ri_pd);
+ ia->ri_pd = NULL;
}
/*
@@ -646,6 +693,99 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
ib_free_cq(ep->rep_attr.send_cq);
}
+/* Re-establish a connection after a device removal event.
+ * Unlike a normal reconnection, a fresh PD and a new set
+ * of MRs and buffers is needed.
+ */
+static int
+rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+ struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
+ int rc, err;
+
+ pr_info("%s: r_xprt = %p\n", __func__, r_xprt);
+
+ rc = -EHOSTUNREACH;
+ if (rpcrdma_ia_open(r_xprt, sap))
+ goto out1;
+
+ rc = -ENOMEM;
+ err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data);
+ if (err) {
+ pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err);
+ goto out2;
+ }
+
+ rc = -ENETUNREACH;
+ err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+ if (err) {
+ pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
+ goto out3;
+ }
+
+ rpcrdma_create_mrs(r_xprt);
+ return 0;
+
+out3:
+ rpcrdma_ep_destroy(ep, ia);
+out2:
+ rpcrdma_ia_close(ia);
+out1:
+ return rc;
+}
+
+static int
+rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
+ struct rpcrdma_ia *ia)
+{
+ struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
+ struct rdma_cm_id *id, *old;
+ int err, rc;
+
+ dprintk("RPC: %s: reconnecting...\n", __func__);
+
+ rpcrdma_ep_disconnect(ep, ia);
+
+ rc = -EHOSTUNREACH;
+ id = rpcrdma_create_id(r_xprt, ia, sap);
+ if (IS_ERR(id))
+ goto out;
+
+ /* As long as the new ID points to the same device as the
+ * old ID, we can reuse the transport's existing PD and all
+ * previously allocated MRs. Also, the same device means
+ * the transport's previous DMA mappings are still valid.
+ *
+ * This is a sanity check only. There should be no way these
+ * point to two different devices here.
+ */
+ old = id;
+ rc = -ENETUNREACH;
+ if (ia->ri_device != id->device) {
+ pr_err("rpcrdma: can't reconnect on different device!\n");
+ goto out_destroy;
+ }
+
+ err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
+ if (err) {
+ dprintk("RPC: %s: rdma_create_qp returned %d\n",
+ __func__, err);
+ goto out_destroy;
+ }
+
+ /* Atomically replace the transport's ID and QP. */
+ rc = 0;
+ old = ia->ri_id;
+ ia->ri_id = id;
+ rdma_destroy_qp(old);
+
+out_destroy:
+ rdma_destroy_id(old);
+out:
+ return rc;
+}
+
/*
* Connect unconnected endpoint.
*/
@@ -654,61 +794,30 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
rx_ia);
- struct rdma_cm_id *id, *old;
- struct sockaddr *sap;
unsigned int extras;
- int rc = 0;
+ int rc;
- if (ep->rep_connected != 0) {
retry:
- dprintk("RPC: %s: reconnecting...\n", __func__);
-
- rpcrdma_ep_disconnect(ep, ia);
-
- sap = (struct sockaddr *)&r_xprt->rx_data.addr;
- id = rpcrdma_create_id(r_xprt, ia, sap);
- if (IS_ERR(id)) {
- rc = -EHOSTUNREACH;
- goto out;
- }
- /* TEMP TEMP TEMP - fail if new device:
- * Deregister/remarshal *all* requests!
- * Close and recreate adapter, pd, etc!
- * Re-determine all attributes still sane!
- * More stuff I haven't thought of!
- * Rrrgh!
- */
- if (ia->ri_device != id->device) {
- printk("RPC: %s: can't reconnect on "
- "different device!\n", __func__);
- rpcrdma_destroy_id(id);
- rc = -ENETUNREACH;
- goto out;
- }
- /* END TEMP */
- rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
- if (rc) {
- dprintk("RPC: %s: rdma_create_qp failed %i\n",
- __func__, rc);
- rpcrdma_destroy_id(id);
- rc = -ENETUNREACH;
- goto out;
- }
-
- old = ia->ri_id;
- ia->ri_id = id;
-
- rdma_destroy_qp(old);
- rpcrdma_destroy_id(old);
- } else {
+ switch (ep->rep_connected) {
+ case 0:
dprintk("RPC: %s: connecting...\n", __func__);
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
if (rc) {
dprintk("RPC: %s: rdma_create_qp failed %i\n",
__func__, rc);
- /* do not update ep->rep_connected */
- return -ENETUNREACH;
+ rc = -ENETUNREACH;
+ goto out_noupdate;
}
+ break;
+ case -ENODEV:
+ rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia);
+ if (rc)
+ goto out_noupdate;
+ break;
+ default:
+ rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
+ if (rc)
+ goto out;
}
ep->rep_connected = 0;
@@ -736,6 +845,8 @@ retry:
out:
if (rc)
ep->rep_connected = rc;
+
+out_noupdate:
return rc;
}
@@ -878,7 +989,6 @@ struct rpcrdma_rep *
rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_rep *rep;
int rc;
@@ -894,7 +1004,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
goto out_free;
}
- rep->rr_device = ia->ri_device;
rep->rr_cqe.done = rpcrdma_wc_receive;
rep->rr_rxprt = r_xprt;
INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
@@ -1037,6 +1146,7 @@ void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
cancel_delayed_work_sync(&buf->rb_recovery_worker);
+ cancel_delayed_work_sync(&buf->rb_refresh_worker);
while (!list_empty(&buf->rb_recv_bufs)) {
struct rpcrdma_rep *rep;
@@ -1081,7 +1191,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
out_nomws:
dprintk("RPC: %s: no MWs available\n", __func__);
- schedule_delayed_work(&buf->rb_refresh_worker, 0);
+ if (r_xprt->rx_ep.rep_connected != -ENODEV)
+ schedule_delayed_work(&buf->rb_refresh_worker, 0);
/* Allow the reply handler and refresh worker to run */
cond_resched();
@@ -1231,17 +1342,19 @@ rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
bool
__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
{
+ struct ib_device *device = ia->ri_device;
+
if (rb->rg_direction == DMA_NONE)
return false;
- rb->rg_iov.addr = ib_dma_map_single(ia->ri_device,
+ rb->rg_iov.addr = ib_dma_map_single(device,
(void *)rb->rg_base,
rdmab_length(rb),
rb->rg_direction);
- if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb)))
+ if (ib_dma_mapping_error(device, rdmab_addr(rb)))
return false;
- rb->rg_device = ia->ri_device;
+ rb->rg_device = device;
rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
return true;
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 171a35116de9..1d66acf1a723 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -69,6 +69,7 @@ struct rpcrdma_ia {
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct completion ri_done;
+ struct completion ri_remove_done;
int ri_async_rc;
unsigned int ri_max_segs;
unsigned int ri_max_frmr_depth;
@@ -78,10 +79,15 @@ struct rpcrdma_ia {
bool ri_reminv_expected;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
+ unsigned long ri_flags;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};
+enum {
+ RPCRDMA_IAF_REMOVING = 0,
+};
+
/*
* RDMA Endpoint -- one per transport instance
*/
@@ -164,6 +170,12 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
return (struct rpcrdma_msg *)rb->rg_base;
}
+static inline struct ib_device *
+rdmab_device(struct rpcrdma_regbuf *rb)
+{
+ return rb->rg_device;
+}
+
#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
/* To ensure a transport can always make forward progress,
@@ -209,7 +221,6 @@ struct rpcrdma_rep {
unsigned int rr_len;
int rr_wc_flags;
u32 rr_inv_rkey;
- struct ib_device *rr_device;
struct rpcrdma_xprt *rr_rxprt;
struct work_struct rr_work;
struct list_head rr_list;
@@ -380,7 +391,6 @@ struct rpcrdma_buffer {
spinlock_t rb_mwlock; /* protect rb_mws list */
struct list_head rb_mws;
struct list_head rb_all;
- char *rb_pool;
spinlock_t rb_lock; /* protect buf lists */
int rb_send_count, rb_recv_count;
@@ -497,10 +507,16 @@ struct rpcrdma_xprt {
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
extern int xprt_rdma_pad_optimize;
+/* This setting controls the hunt for a supported memory
+ * registration strategy.
+ */
+extern unsigned int xprt_rdma_memreg_strategy;
+
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/
-int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
+int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
+void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *);
bool fmr_is_supported(struct rpcrdma_ia *);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 017801f9dbaa..8d40a7d31c99 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -826,7 +826,7 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
int err;
err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
- switchdev_port_bridge_policy);
+ switchdev_port_bridge_policy, NULL);
if (err)
return err;
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 919981324171..9aed6fe1bf1a 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -106,7 +106,6 @@ __init int net_sysctl_init(void)
ret = register_pernet_subsys(&sysctl_pernet_ops);
if (ret)
goto out1;
- register_sysctl_root(&net_sysctl_root);
out:
return ret;
out1:
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 33a5bdfbef76..d174ee3254ee 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -802,7 +802,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, info->extack);
if (err)
return err;
@@ -851,7 +851,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, info->extack);
if (err)
return err;
@@ -891,7 +891,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, info->extack);
if (err)
return err;
@@ -939,7 +939,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, info->extack);
if (err)
return err;
@@ -982,7 +982,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, info->extack);
if (err)
return err;
@@ -1104,7 +1104,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX,
info->attrs[TIPC_NLA_MEDIA],
- tipc_nl_media_policy);
+ tipc_nl_media_policy, info->extack);
if (err)
return err;
@@ -1152,7 +1152,7 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX,
info->attrs[TIPC_NLA_MEDIA],
- tipc_nl_media_policy);
+ tipc_nl_media_policy, info->extack);
if (!attrs[TIPC_NLA_MEDIA_NAME])
return -EINVAL;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ddd2dd6f77aa..60820dc35a08 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1827,7 +1827,7 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[])
int err;
err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop,
- tipc_nl_prop_policy);
+ tipc_nl_prop_policy, NULL);
if (err)
return err;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 9be6592e4a6f..bd0aac87b41a 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -416,6 +416,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+ tipc_subscrp_get(s);
list_add(&s->nameseq_list, &nseq->subscriptions);
if (!sseq)
@@ -787,6 +788,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
if (seq != NULL) {
spin_lock_bh(&seq->lock);
list_del_init(&s->nameseq_list);
+ tipc_subscrp_put(s);
if (!seq->first_free && list_empty(&seq->subscriptions)) {
hlist_del_init_rcu(&seq->ns_list);
kfree(seq->sseqs);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index ab8a2d5d1e32..719c5924b638 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -211,8 +211,8 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
- info->attrs[TIPC_NLA_NET],
- tipc_nl_net_policy);
+ info->attrs[TIPC_NLA_NET], tipc_nl_net_policy,
+ info->extack);
if (err)
return err;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 26ca8dd64ded..b76f13f6fea1 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -268,7 +268,8 @@ int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
if (!*attr)
return -EOPNOTSUPP;
- return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy);
+ return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy,
+ NULL);
}
int __init tipc_netlink_start(void)
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index e1ae8a8a2b8e..9bfe886ab330 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -296,7 +296,7 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
err = nla_parse(attrbuf, tipc_genl_family.maxattr,
(const struct nlattr *)trans_buf->data,
- trans_buf->len, NULL);
+ trans_buf->len, NULL, NULL);
if (err)
goto parse_out;
@@ -352,7 +352,7 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX,
- attrs[TIPC_NLA_BEARER], NULL);
+ attrs[TIPC_NLA_BEARER], NULL, NULL);
if (err)
return err;
@@ -472,7 +472,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK],
- NULL);
+ NULL, NULL);
if (err)
return err;
@@ -480,7 +480,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX,
- link[TIPC_NLA_LINK_PROP], NULL);
+ link[TIPC_NLA_LINK_PROP], NULL, NULL);
if (err)
return err;
@@ -488,7 +488,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX,
- link[TIPC_NLA_LINK_STATS], NULL);
+ link[TIPC_NLA_LINK_STATS], NULL, NULL);
if (err)
return err;
@@ -598,7 +598,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK],
- NULL);
+ NULL, NULL);
if (err)
return err;
@@ -795,7 +795,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX,
- attrs[TIPC_NLA_NAME_TABLE], NULL);
+ attrs[TIPC_NLA_NAME_TABLE], NULL, NULL);
if (err)
return err;
@@ -803,7 +803,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX,
- nt[TIPC_NLA_NAME_TABLE_PUBL], NULL);
+ nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, NULL);
if (err)
return err;
@@ -863,7 +863,7 @@ static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL],
- NULL);
+ NULL, NULL);
if (err)
return err;
@@ -929,7 +929,7 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK],
- NULL);
+ NULL, NULL);
if (err)
return err;
@@ -940,8 +940,8 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg,
u32 node;
struct nlattr *con[TIPC_NLA_CON_MAX + 1];
- nla_parse_nested(con, TIPC_NLA_CON_MAX, sock[TIPC_NLA_SOCK_CON],
- NULL);
+ nla_parse_nested(con, TIPC_NLA_CON_MAX,
+ sock[TIPC_NLA_SOCK_CON], NULL, NULL);
node = nla_get_u32(con[TIPC_NLA_CON_NODE]);
tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>",
@@ -977,8 +977,8 @@ static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg,
if (!attrs[TIPC_NLA_MEDIA])
return -EINVAL;
- err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA],
- NULL);
+ err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX,
+ attrs[TIPC_NLA_MEDIA], NULL, NULL);
if (err)
return err;
@@ -998,7 +998,7 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE],
- NULL);
+ NULL, NULL);
if (err)
return err;
@@ -1045,7 +1045,7 @@ static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET],
- NULL);
+ NULL, NULL);
if (err)
return err;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 4512e83652b1..aeef8011ac7d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1607,8 +1607,8 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
- info->attrs[TIPC_NLA_NET],
- tipc_nl_net_policy);
+ info->attrs[TIPC_NLA_NET], tipc_nl_net_policy,
+ info->extack);
if (err)
return err;
@@ -1774,7 +1774,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
info->attrs[TIPC_NLA_LINK],
- tipc_nl_link_policy);
+ tipc_nl_link_policy, info->extack);
if (err)
return err;
@@ -1902,7 +1902,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
info->attrs[TIPC_NLA_LINK],
- tipc_nl_link_policy);
+ tipc_nl_link_policy, info->extack);
if (err)
return err;
@@ -2042,7 +2042,7 @@ int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX,
info->attrs[TIPC_NLA_MON],
- tipc_nl_monitor_policy);
+ tipc_nl_monitor_policy, info->extack);
if (err)
return err;
@@ -2098,6 +2098,8 @@ int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info)
int err;
msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg.skb)
+ return -ENOMEM;
msg.portid = info->snd_portid;
msg.seq = info->snd_seq;
@@ -2163,7 +2165,7 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
err = nla_parse_nested(mon, TIPC_NLA_MON_MAX,
attrs[TIPC_NLA_MON],
- tipc_nl_monitor_policy);
+ tipc_nl_monitor_policy, NULL);
if (err)
return err;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index bdce99f9407a..1b92b72e812f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -51,6 +51,7 @@
#define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff
#define TIPC_MIN_PORT 1
+#define TIPC_ACK_RATE 4 /* ACK at 1/4 of of rcv window size */
enum {
TIPC_LISTEN = TCP_LISTEN,
@@ -361,25 +362,25 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout)
return 0;
}
-#define tipc_wait_for_cond(sock_, timeout_, condition_) \
-({ \
- int rc_ = 0; \
- int done_ = 0; \
- \
- while (!(condition_) && !done_) { \
- struct sock *sk_ = sock->sk; \
- DEFINE_WAIT_FUNC(wait_, woken_wake_function); \
- \
- rc_ = tipc_sk_sock_err(sock_, timeout_); \
- if (rc_) \
- break; \
- prepare_to_wait(sk_sleep(sk_), &wait_, \
- TASK_INTERRUPTIBLE); \
- done_ = sk_wait_event(sk_, timeout_, \
- (condition_), &wait_); \
- remove_wait_queue(sk_sleep(sk_), &wait_); \
- } \
- rc_; \
+#define tipc_wait_for_cond(sock_, timeo_, condition_) \
+({ \
+ struct sock *sk_; \
+ int rc_; \
+ \
+ while ((rc_ = !(condition_))) { \
+ DEFINE_WAIT_FUNC(wait_, woken_wake_function); \
+ sk_ = (sock_)->sk; \
+ rc_ = tipc_sk_sock_err((sock_), timeo_); \
+ if (rc_) \
+ break; \
+ prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \
+ release_sock(sk_); \
+ *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
+ sched_annotate_sleep(); \
+ lock_sock(sk_); \
+ remove_wait_queue(sk_sleep(sk_), &wait_); \
+ } \
+ rc_; \
})
/**
@@ -1305,7 +1306,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
/**
* tipc_recvmsg - receive packet-oriented message
* @m: descriptor for message info
- * @buf_len: total size of user buffer area
+ * @buflen: length of user buffer area
* @flags: receive flags
*
* Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
@@ -1313,95 +1314,85 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
*
* Returns size of returned message data, errno otherwise
*/
-static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
- int flags)
+static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
+ size_t buflen, int flags)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct sk_buff *buf;
- struct tipc_msg *msg;
- bool is_connectionless = tipc_sk_type_connectionless(sk);
- long timeo;
- unsigned int sz;
- u32 err;
- int res, hlen;
+ struct sk_buff *skb;
+ struct tipc_msg *hdr;
+ bool connected = !tipc_sk_type_connectionless(sk);
+ int rc, err, hlen, dlen, copy;
+ long timeout;
/* Catch invalid receive requests */
- if (unlikely(!buf_len))
+ if (unlikely(!buflen))
return -EINVAL;
lock_sock(sk);
-
- if (!is_connectionless && unlikely(sk->sk_state == TIPC_OPEN)) {
- res = -ENOTCONN;
+ if (unlikely(connected && sk->sk_state == TIPC_OPEN)) {
+ rc = -ENOTCONN;
goto exit;
}
+ timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-restart:
-
- /* Look for a message in receive queue; wait if necessary */
- res = tipc_wait_for_rcvmsg(sock, &timeo);
- if (res)
- goto exit;
-
- /* Look at first message in receive queue */
- buf = skb_peek(&sk->sk_receive_queue);
- msg = buf_msg(buf);
- sz = msg_data_sz(msg);
- hlen = msg_hdr_sz(msg);
- err = msg_errcode(msg);
-
- /* Discard an empty non-errored message & try again */
- if ((!sz) && (!err)) {
+ do {
+ /* Look at first msg in receive queue; wait if necessary */
+ rc = tipc_wait_for_rcvmsg(sock, &timeout);
+ if (unlikely(rc))
+ goto exit;
+ skb = skb_peek(&sk->sk_receive_queue);
+ hdr = buf_msg(skb);
+ dlen = msg_data_sz(hdr);
+ hlen = msg_hdr_sz(hdr);
+ err = msg_errcode(hdr);
+ if (likely(dlen || err))
+ break;
tsk_advance_rx_queue(sk);
- goto restart;
- }
-
- /* Capture sender's address (optional) */
- set_orig_addr(m, msg);
+ } while (1);
- /* Capture ancillary data (optional) */
- res = tipc_sk_anc_data_recv(m, msg, tsk);
- if (res)
+ /* Collect msg meta data, including error code and rejected data */
+ set_orig_addr(m, hdr);
+ rc = tipc_sk_anc_data_recv(m, hdr, tsk);
+ if (unlikely(rc))
goto exit;
- /* Capture message data (if valid) & compute return value (always) */
- if (!err) {
- if (unlikely(buf_len < sz)) {
- sz = buf_len;
+ /* Capture data if non-error msg, otherwise just set return value */
+ if (likely(!err)) {
+ copy = min_t(int, dlen, buflen);
+ if (unlikely(copy != dlen))
m->msg_flags |= MSG_TRUNC;
- }
- res = skb_copy_datagram_msg(buf, hlen, m, sz);
- if (res)
- goto exit;
- res = sz;
+ rc = skb_copy_datagram_msg(skb, hlen, m, copy);
} else {
- if (is_connectionless || err == TIPC_CONN_SHUTDOWN ||
- m->msg_control)
- res = 0;
- else
- res = -ECONNRESET;
+ copy = 0;
+ rc = 0;
+ if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
+ rc = -ECONNRESET;
}
+ if (unlikely(rc))
+ goto exit;
+ /* Caption of data or error code/rejected data was successful */
if (unlikely(flags & MSG_PEEK))
goto exit;
- if (likely(!is_connectionless)) {
- tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
- if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
- tipc_sk_send_ack(tsk);
- }
tsk_advance_rx_queue(sk);
+ if (likely(!connected))
+ goto exit;
+
+ /* Send connection flow control ack when applicable */
+ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
+ if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
+ tipc_sk_send_ack(tsk);
exit:
release_sock(sk);
- return res;
+ return rc ? rc : copy;
}
/**
- * tipc_recv_stream - receive stream-oriented data
+ * tipc_recvstream - receive stream-oriented data
* @m: descriptor for message info
- * @buf_len: total size of user buffer area
+ * @buflen: total size of user buffer area
* @flags: receive flags
*
* Used for SOCK_STREAM messages only. If not enough data is available
@@ -1409,111 +1400,98 @@ exit:
*
* Returns size of returned message data, errno otherwise
*/
-static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
- size_t buf_len, int flags)
+static int tipc_recvstream(struct socket *sock, struct msghdr *m,
+ size_t buflen, int flags)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct sk_buff *buf;
- struct tipc_msg *msg;
- long timeo;
- unsigned int sz;
- int target;
- int sz_copied = 0;
- u32 err;
- int res = 0, hlen;
+ struct sk_buff *skb;
+ struct tipc_msg *hdr;
+ struct tipc_skb_cb *skb_cb;
+ bool peek = flags & MSG_PEEK;
+ int offset, required, copy, copied = 0;
+ int hlen, dlen, err, rc;
+ long timeout;
/* Catch invalid receive attempts */
- if (unlikely(!buf_len))
+ if (unlikely(!buflen))
return -EINVAL;
lock_sock(sk);
if (unlikely(sk->sk_state == TIPC_OPEN)) {
- res = -ENOTCONN;
- goto exit;
- }
-
- target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
-restart:
- /* Look for a message in receive queue; wait if necessary */
- res = tipc_wait_for_rcvmsg(sock, &timeo);
- if (res)
+ rc = -ENOTCONN;
goto exit;
-
- /* Look at first message in receive queue */
- buf = skb_peek(&sk->sk_receive_queue);
- msg = buf_msg(buf);
- sz = msg_data_sz(msg);
- hlen = msg_hdr_sz(msg);
- err = msg_errcode(msg);
-
- /* Discard an empty non-errored message & try again */
- if ((!sz) && (!err)) {
- tsk_advance_rx_queue(sk);
- goto restart;
}
+ required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen);
+ timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
- /* Optionally capture sender's address & ancillary data of first msg */
- if (sz_copied == 0) {
- set_orig_addr(m, msg);
- res = tipc_sk_anc_data_recv(m, msg, tsk);
- if (res)
- goto exit;
- }
-
- /* Capture message data (if valid) & compute return value (always) */
- if (!err) {
- u32 offset = TIPC_SKB_CB(buf)->bytes_read;
- u32 needed;
- int sz_to_copy;
-
- sz -= offset;
- needed = (buf_len - sz_copied);
- sz_to_copy = min(sz, needed);
-
- res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
- if (res)
- goto exit;
+ do {
+ /* Look at first msg in receive queue; wait if necessary */
+ rc = tipc_wait_for_rcvmsg(sock, &timeout);
+ if (unlikely(rc))
+ break;
+ skb = skb_peek(&sk->sk_receive_queue);
+ skb_cb = TIPC_SKB_CB(skb);
+ hdr = buf_msg(skb);
+ dlen = msg_data_sz(hdr);
+ hlen = msg_hdr_sz(hdr);
+ err = msg_errcode(hdr);
+
+ /* Discard any empty non-errored (SYN-) message */
+ if (unlikely(!dlen && !err)) {
+ tsk_advance_rx_queue(sk);
+ continue;
+ }
- sz_copied += sz_to_copy;
+ /* Collect msg meta data, incl. error code and rejected data */
+ if (!copied) {
+ set_orig_addr(m, hdr);
+ rc = tipc_sk_anc_data_recv(m, hdr, tsk);
+ if (rc)
+ break;
+ }
- if (sz_to_copy < sz) {
- if (!(flags & MSG_PEEK))
- TIPC_SKB_CB(buf)->bytes_read =
- offset + sz_to_copy;
- goto exit;
+ /* Copy data if msg ok, otherwise return error/partial data */
+ if (likely(!err)) {
+ offset = skb_cb->bytes_read;
+ copy = min_t(int, dlen - offset, buflen - copied);
+ rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+ if (unlikely(rc))
+ break;
+ copied += copy;
+ offset += copy;
+ if (unlikely(offset < dlen)) {
+ if (!peek)
+ skb_cb->bytes_read = offset;
+ break;
+ }
+ } else {
+ rc = 0;
+ if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control)
+ rc = -ECONNRESET;
+ if (copied || rc)
+ break;
}
- } else {
- if (sz_copied != 0)
- goto exit; /* can't add error msg to valid data */
- if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
- res = 0;
- else
- res = -ECONNRESET;
- }
+ if (unlikely(peek))
+ break;
- if (unlikely(flags & MSG_PEEK))
- goto exit;
+ tsk_advance_rx_queue(sk);
- tsk->rcv_unacked += tsk_inc(tsk, hlen + msg_data_sz(msg));
- if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
- tipc_sk_send_ack(tsk);
- tsk_advance_rx_queue(sk);
+ /* Send connection flow control advertisement when applicable */
+ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
+ if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE))
+ tipc_sk_send_ack(tsk);
- /* Loop around if more data is required */
- if ((sz_copied < buf_len) && /* didn't get all requested data */
- (!skb_queue_empty(&sk->sk_receive_queue) ||
- (sz_copied < target)) && /* and more is ready or required */
- (!err)) /* and haven't reached a FIN */
- goto restart;
+ /* Exit if all requested data or FIN/error received */
+ if (copied == buflen || err)
+ break;
+ } while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required);
exit:
release_sock(sk);
- return sz_copied ? sz_copied : res;
+ return copied ? copied : rc;
}
/**
@@ -2537,6 +2515,28 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
}
+static int tipc_socketpair(struct socket *sock1, struct socket *sock2)
+{
+ struct tipc_sock *tsk2 = tipc_sk(sock2->sk);
+ struct tipc_sock *tsk1 = tipc_sk(sock1->sk);
+ u32 onode = tipc_own_addr(sock_net(sock1->sk));
+
+ tsk1->peer.family = AF_TIPC;
+ tsk1->peer.addrtype = TIPC_ADDR_ID;
+ tsk1->peer.scope = TIPC_NODE_SCOPE;
+ tsk1->peer.addr.id.ref = tsk2->portid;
+ tsk1->peer.addr.id.node = onode;
+ tsk2->peer.family = AF_TIPC;
+ tsk2->peer.addrtype = TIPC_ADDR_ID;
+ tsk2->peer.scope = TIPC_NODE_SCOPE;
+ tsk2->peer.addr.id.ref = tsk1->portid;
+ tsk2->peer.addr.id.node = onode;
+
+ tipc_sk_finish_conn(tsk1, tsk2->portid, onode);
+ tipc_sk_finish_conn(tsk2, tsk1->portid, onode);
+ return 0;
+}
+
/* Protocol switches for the various types of TIPC sockets */
static const struct proto_ops msg_ops = {
@@ -2545,7 +2545,7 @@ static const struct proto_ops msg_ops = {
.release = tipc_release,
.bind = tipc_bind,
.connect = tipc_connect,
- .socketpair = sock_no_socketpair,
+ .socketpair = tipc_socketpair,
.accept = sock_no_accept,
.getname = tipc_getname,
.poll = tipc_poll,
@@ -2566,7 +2566,7 @@ static const struct proto_ops packet_ops = {
.release = tipc_release,
.bind = tipc_bind,
.connect = tipc_connect,
- .socketpair = sock_no_socketpair,
+ .socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
.poll = tipc_poll,
@@ -2587,7 +2587,7 @@ static const struct proto_ops stream_ops = {
.release = tipc_release,
.bind = tipc_bind,
.connect = tipc_connect,
- .socketpair = sock_no_socketpair,
+ .socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
.poll = tipc_poll,
@@ -2597,7 +2597,7 @@ static const struct proto_ops stream_ops = {
.setsockopt = tipc_setsockopt,
.getsockopt = tipc_getsockopt,
.sendmsg = tipc_sendstream,
- .recvmsg = tipc_recv_stream,
+ .recvmsg = tipc_recvstream,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage
};
@@ -2870,7 +2870,7 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
attrs[TIPC_NLA_SOCK],
- tipc_nl_sock_policy);
+ tipc_nl_sock_policy, NULL);
if (err)
return err;
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 271cd66e4b3b..0bf91cd3733c 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -54,8 +54,6 @@ struct tipc_subscriber {
static void tipc_subscrp_delete(struct tipc_subscription *sub);
static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
-static void tipc_subscrp_put(struct tipc_subscription *subscription);
-static void tipc_subscrp_get(struct tipc_subscription *subscription);
/**
* htohl - convert value to endianness used by destination
@@ -125,7 +123,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
{
struct tipc_name_seq seq;
- tipc_subscrp_get(sub);
tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
return;
@@ -135,7 +132,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
node);
- tipc_subscrp_put(sub);
}
static void tipc_subscrp_timeout(unsigned long data)
@@ -145,6 +141,7 @@ static void tipc_subscrp_timeout(unsigned long data)
spin_lock_bh(&subscriber->lock);
tipc_nametbl_unsubscribe(sub);
+ list_del(&sub->subscrp_list);
spin_unlock_bh(&subscriber->lock);
/* Notify subscriber of timeout */
@@ -177,20 +174,17 @@ static void tipc_subscrp_kref_release(struct kref *kref)
struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
struct tipc_subscriber *subscriber = sub->subscriber;
- spin_lock_bh(&subscriber->lock);
- list_del(&sub->subscrp_list);
atomic_dec(&tn->subscription_count);
- spin_unlock_bh(&subscriber->lock);
kfree(sub);
tipc_subscrb_put(subscriber);
}
-static void tipc_subscrp_put(struct tipc_subscription *subscription)
+void tipc_subscrp_put(struct tipc_subscription *subscription)
{
kref_put(&subscription->kref, tipc_subscrp_kref_release);
}
-static void tipc_subscrp_get(struct tipc_subscription *subscription)
+void tipc_subscrp_get(struct tipc_subscription *subscription)
{
kref_get(&subscription->kref);
}
@@ -210,11 +204,8 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
continue;
tipc_nametbl_unsubscribe(sub);
- tipc_subscrp_get(sub);
- spin_unlock_bh(&subscriber->lock);
+ list_del(&sub->subscrp_list);
tipc_subscrp_delete(sub);
- tipc_subscrp_put(sub);
- spin_lock_bh(&subscriber->lock);
if (s)
break;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index ffdc214c117a..ee52957dc952 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -78,4 +78,7 @@ u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
int tipc_topsrv_start(struct net *net);
void tipc_topsrv_stop(struct net *net);
+void tipc_subscrp_put(struct tipc_subscription *subscription);
+void tipc_subscrp_get(struct tipc_subscription *subscription);
+
#endif
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 46061cf48cd1..ecca64fc6a6f 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -457,7 +457,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX,
attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, NULL);
if (err)
return err;
@@ -609,7 +609,8 @@ int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr)
struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
struct udp_media_addr *dst;
- if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy))
+ if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr,
+ tipc_nl_udp_policy, NULL))
return -EINVAL;
if (!opts[TIPC_NLA_UDP_REMOTE])
@@ -662,7 +663,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
attrs[TIPC_NLA_BEARER_UDP_OPTS],
- tipc_nl_udp_policy))
+ tipc_nl_udp_policy, NULL))
goto err;
if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 928691c43408..6a7fe7660551 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -996,7 +996,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
- struct path path = { NULL, NULL };
+ struct path path = { };
err = -EINVAL;
if (sunaddr->sun_family != AF_UNIX)
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index bc27c70e0e59..09fc2eb29dc8 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -3,7 +3,7 @@ obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
-vsock-y += af_vsock.o vsock_addr.o
+vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
vmci_transport_notify_qstate.o
diff --git a/net/vmw_vsock/af_vsock_tap.c b/net/vmw_vsock/af_vsock_tap.c
new file mode 100644
index 000000000000..98f09b539366
--- /dev/null
+++ b/net/vmw_vsock/af_vsock_tap.c
@@ -0,0 +1,114 @@
+/*
+ * Tap functions for AF_VSOCK sockets.
+ *
+ * Code based on net/netlink/af_netlink.c tap functions.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <net/sock.h>
+#include <net/af_vsock.h>
+#include <linux/if_arp.h>
+
+static DEFINE_SPINLOCK(vsock_tap_lock);
+static struct list_head vsock_tap_all __read_mostly =
+ LIST_HEAD_INIT(vsock_tap_all);
+
+int vsock_add_tap(struct vsock_tap *vt)
+{
+ if (unlikely(vt->dev->type != ARPHRD_VSOCKMON))
+ return -EINVAL;
+
+ __module_get(vt->module);
+
+ spin_lock(&vsock_tap_lock);
+ list_add_rcu(&vt->list, &vsock_tap_all);
+ spin_unlock(&vsock_tap_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vsock_add_tap);
+
+int vsock_remove_tap(struct vsock_tap *vt)
+{
+ struct vsock_tap *tmp;
+ bool found = false;
+
+ spin_lock(&vsock_tap_lock);
+
+ list_for_each_entry(tmp, &vsock_tap_all, list) {
+ if (vt == tmp) {
+ list_del_rcu(&vt->list);
+ found = true;
+ goto out;
+ }
+ }
+
+ pr_warn("vsock_remove_tap: %p not found\n", vt);
+out:
+ spin_unlock(&vsock_tap_lock);
+
+ synchronize_net();
+
+ if (found)
+ module_put(vt->module);
+
+ return found ? 0 : -ENODEV;
+}
+EXPORT_SYMBOL_GPL(vsock_remove_tap);
+
+static int __vsock_deliver_tap_skb(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ int ret = 0;
+ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+ if (nskb) {
+ dev_hold(dev);
+
+ nskb->dev = dev;
+ ret = dev_queue_xmit(nskb);
+ if (unlikely(ret > 0))
+ ret = net_xmit_errno(ret);
+
+ dev_put(dev);
+ }
+
+ return ret;
+}
+
+static void __vsock_deliver_tap(struct sk_buff *skb)
+{
+ int ret;
+ struct vsock_tap *tmp;
+
+ list_for_each_entry_rcu(tmp, &vsock_tap_all, list) {
+ ret = __vsock_deliver_tap_skb(skb, tmp->dev);
+ if (unlikely(ret))
+ break;
+ }
+}
+
+void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque)
+{
+ struct sk_buff *skb;
+
+ rcu_read_lock();
+
+ if (likely(list_empty(&vsock_tap_all)))
+ goto out;
+
+ skb = build_skb(opaque);
+ if (skb) {
+ __vsock_deliver_tap(skb);
+ consume_skb(skb);
+ }
+
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(vsock_deliver_tap);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 68675a151f22..403d86e80162 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -144,6 +144,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
list_del_init(&pkt->list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
+ virtio_transport_deliver_tap_pkt(pkt);
+
reply = pkt->reply;
sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
@@ -370,6 +372,7 @@ static void virtio_transport_rx_work(struct work_struct *work)
}
pkt->len = len - sizeof(pkt->hdr);
+ virtio_transport_deliver_tap_pkt(pkt);
virtio_transport_recv_pkt(pkt);
}
} while (!virtqueue_enable_cb(vq));
@@ -573,9 +576,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->vdev = vdev;
- ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
- vsock->vqs, callbacks, names,
- NULL);
+ ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX,
+ vsock->vqs, callbacks, names,
+ NULL);
if (ret < 0)
goto out;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index af087b44ceea..18e24793659f 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -16,6 +16,7 @@
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
#include <linux/virtio_vsock.h>
+#include <uapi/linux/vsockmon.h>
#include <net/sock.h>
#include <net/af_vsock.h>
@@ -85,6 +86,69 @@ out_pkt:
return NULL;
}
+/* Packet capture */
+static struct sk_buff *virtio_transport_build_skb(void *opaque)
+{
+ struct virtio_vsock_pkt *pkt = opaque;
+ unsigned char *t_hdr, *payload;
+ struct af_vsockmon_hdr *hdr;
+ struct sk_buff *skb;
+
+ skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len,
+ GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+
+ hdr = (struct af_vsockmon_hdr *)skb_put(skb, sizeof(*hdr));
+
+ /* pkt->hdr is little-endian so no need to byteswap here */
+ hdr->src_cid = pkt->hdr.src_cid;
+ hdr->src_port = pkt->hdr.src_port;
+ hdr->dst_cid = pkt->hdr.dst_cid;
+ hdr->dst_port = pkt->hdr.dst_port;
+
+ hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
+ hdr->len = cpu_to_le16(sizeof(pkt->hdr));
+ memset(hdr->reserved, 0, sizeof(hdr->reserved));
+
+ switch (le16_to_cpu(pkt->hdr.op)) {
+ case VIRTIO_VSOCK_OP_REQUEST:
+ case VIRTIO_VSOCK_OP_RESPONSE:
+ hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
+ break;
+ case VIRTIO_VSOCK_OP_RST:
+ case VIRTIO_VSOCK_OP_SHUTDOWN:
+ hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT);
+ break;
+ case VIRTIO_VSOCK_OP_RW:
+ hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD);
+ break;
+ case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
+ case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
+ hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL);
+ break;
+ default:
+ hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN);
+ break;
+ }
+
+ t_hdr = skb_put(skb, sizeof(pkt->hdr));
+ memcpy(t_hdr, &pkt->hdr, sizeof(pkt->hdr));
+
+ if (pkt->len) {
+ payload = skb_put(skb, pkt->len);
+ memcpy(payload, pkt->buf, pkt->len);
+ }
+
+ return skb;
+}
+
+void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
+{
+ vsock_deliver_tap(virtio_transport_build_skb, pkt);
+}
+EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
+
static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
struct virtio_vsock_pkt_info *info)
{
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 4be4fbbc0b50..10ae7823a19d 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -96,31 +96,23 @@ static int PROTOCOL_OVERRIDE = -1;
static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
{
- int err;
-
switch (vmci_error) {
case VMCI_ERROR_NO_MEM:
- err = ENOMEM;
- break;
+ return -ENOMEM;
case VMCI_ERROR_DUPLICATE_ENTRY:
case VMCI_ERROR_ALREADY_EXISTS:
- err = EADDRINUSE;
- break;
+ return -EADDRINUSE;
case VMCI_ERROR_NO_ACCESS:
- err = EPERM;
- break;
+ return -EPERM;
case VMCI_ERROR_NO_RESOURCES:
- err = ENOBUFS;
- break;
+ return -ENOBUFS;
case VMCI_ERROR_INVALID_RESOURCE:
- err = EHOSTUNREACH;
- break;
+ return -EHOSTUNREACH;
case VMCI_ERROR_INVALID_ARGS:
default:
- err = EINVAL;
+ break;
}
-
- return err > 0 ? -err : err;
+ return -EINVAL;
}
static u32 vmci_transport_peer_rid(u32 peer_cid)
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index bdad1f951561..25666d3009be 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -32,6 +32,11 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
rdev_set_qos_map(rdev, dev, NULL);
if (notify)
nl80211_send_ap_stopped(wdev);
+
+ /* Should we apply the grace period during beaconing interface
+ * shutdown also?
+ */
+ cfg80211_sched_dfs_chan_update(rdev);
}
return err;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 5497d022fada..b8aa5a7d5c77 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -456,6 +456,123 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
return (r1 + r2 > 0);
}
+/*
+ * Checks if center frequency of chan falls with in the bandwidth
+ * range of chandef.
+ */
+bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
+ struct ieee80211_channel *chan)
+{
+ int width;
+ u32 cf_offset, freq;
+
+ if (chandef->chan->center_freq == chan->center_freq)
+ return true;
+
+ width = cfg80211_chandef_get_width(chandef);
+ if (width <= 20)
+ return false;
+
+ cf_offset = width / 2 - 10;
+
+ for (freq = chandef->center_freq1 - width / 2 + 10;
+ freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) {
+ if (chan->center_freq == freq)
+ return true;
+ }
+
+ if (!chandef->center_freq2)
+ return false;
+
+ for (freq = chandef->center_freq2 - width / 2 + 10;
+ freq <= chandef->center_freq2 + width / 2 - 10; freq += 20) {
+ if (chan->center_freq == freq)
+ return true;
+ }
+
+ return false;
+}
+
+bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev)
+{
+ bool active = false;
+
+ ASSERT_WDEV_LOCK(wdev);
+
+ if (!wdev->chandef.chan)
+ return false;
+
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ active = wdev->beacon_interval != 0;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ active = wdev->ssid_len != 0;
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ active = wdev->mesh_id_len != 0;
+ break;
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_OCB:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ case NL80211_IFTYPE_MONITOR:
+ case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_WDS:
+ case NL80211_IFTYPE_P2P_DEVICE:
+ /* Can NAN type be considered as beaconing interface? */
+ case NL80211_IFTYPE_NAN:
+ break;
+ case NL80211_IFTYPE_UNSPECIFIED:
+ case NUM_NL80211_IFTYPES:
+ WARN_ON(1);
+ }
+
+ return active;
+}
+
+static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy,
+ struct ieee80211_channel *chan)
+{
+ struct wireless_dev *wdev;
+
+ list_for_each_entry(wdev, &wiphy->wdev_list, list) {
+ wdev_lock(wdev);
+ if (!cfg80211_beaconing_iface_active(wdev)) {
+ wdev_unlock(wdev);
+ continue;
+ }
+
+ if (cfg80211_is_sub_chan(&wdev->chandef, chan)) {
+ wdev_unlock(wdev);
+ return true;
+ }
+ wdev_unlock(wdev);
+ }
+
+ return false;
+}
+
+bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy,
+ struct ieee80211_channel *chan)
+{
+ struct cfg80211_registered_device *rdev;
+
+ ASSERT_RTNL();
+
+ if (!(chan->flags & IEEE80211_CHAN_RADAR))
+ return false;
+
+ list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ if (!reg_dfs_domain_same(wiphy, &rdev->wiphy))
+ continue;
+
+ if (cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan))
+ return true;
+ }
+
+ return false;
+}
static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy,
u32 center_freq,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index e55e05bc4805..83ea164f16b3 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -305,30 +305,14 @@ static void cfg80211_event_work(struct work_struct *work)
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
{
- struct cfg80211_iface_destroy *item;
+ struct wireless_dev *wdev, *tmp;
ASSERT_RTNL();
- spin_lock_irq(&rdev->destroy_list_lock);
- while ((item = list_first_entry_or_null(&rdev->destroy_list,
- struct cfg80211_iface_destroy,
- list))) {
- struct wireless_dev *wdev, *tmp;
- u32 nlportid = item->nlportid;
-
- list_del(&item->list);
- kfree(item);
- spin_unlock_irq(&rdev->destroy_list_lock);
-
- list_for_each_entry_safe(wdev, tmp,
- &rdev->wiphy.wdev_list, list) {
- if (nlportid == wdev->owner_nlportid)
- rdev_del_virtual_intf(rdev, wdev);
- }
-
- spin_lock_irq(&rdev->destroy_list_lock);
+ list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
+ if (wdev->nl_owner_dead)
+ rdev_del_virtual_intf(rdev, wdev);
}
- spin_unlock_irq(&rdev->destroy_list_lock);
}
static void cfg80211_destroy_iface_wk(struct work_struct *work)
@@ -346,13 +330,47 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work)
static void cfg80211_sched_scan_stop_wk(struct work_struct *work)
{
struct cfg80211_registered_device *rdev;
+ struct cfg80211_sched_scan_request *req, *tmp;
rdev = container_of(work, struct cfg80211_registered_device,
sched_scan_stop_wk);
rtnl_lock();
+ list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) {
+ if (req->nl_owner_dead)
+ cfg80211_stop_sched_scan_req(rdev, req, false);
+ }
+ rtnl_unlock();
+}
- __cfg80211_stop_sched_scan(rdev, false);
+static void cfg80211_propagate_radar_detect_wk(struct work_struct *work)
+{
+ struct cfg80211_registered_device *rdev;
+
+ rdev = container_of(work, struct cfg80211_registered_device,
+ propagate_radar_detect_wk);
+
+ rtnl_lock();
+
+ regulatory_propagate_dfs_state(&rdev->wiphy, &rdev->radar_chandef,
+ NL80211_DFS_UNAVAILABLE,
+ NL80211_RADAR_DETECTED);
+
+ rtnl_unlock();
+}
+
+static void cfg80211_propagate_cac_done_wk(struct work_struct *work)
+{
+ struct cfg80211_registered_device *rdev;
+
+ rdev = container_of(work, struct cfg80211_registered_device,
+ propagate_cac_done_wk);
+
+ rtnl_lock();
+
+ regulatory_propagate_dfs_state(&rdev->wiphy, &rdev->cac_done_chandef,
+ NL80211_DFS_AVAILABLE,
+ NL80211_RADAR_CAC_FINISHED);
rtnl_unlock();
}
@@ -436,8 +454,8 @@ use_default_name:
spin_lock_init(&rdev->beacon_registrations_lock);
spin_lock_init(&rdev->bss_lock);
INIT_LIST_HEAD(&rdev->bss_list);
+ INIT_LIST_HEAD(&rdev->sched_scan_req_list);
INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done);
- INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results);
INIT_LIST_HEAD(&rdev->mlme_unreg);
spin_lock_init(&rdev->mlme_unreg_lock);
INIT_WORK(&rdev->mlme_unreg_wk, cfg80211_mlme_unreg_wk);
@@ -452,10 +470,12 @@ use_default_name:
rdev->wiphy.dev.platform_data = rdev;
device_enable_async_suspend(&rdev->wiphy.dev);
- INIT_LIST_HEAD(&rdev->destroy_list);
- spin_lock_init(&rdev->destroy_list_lock);
INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk);
INIT_WORK(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk);
+ INIT_WORK(&rdev->sched_scan_res_wk, cfg80211_sched_scan_results_wk);
+ INIT_WORK(&rdev->propagate_radar_detect_wk,
+ cfg80211_propagate_radar_detect_wk);
+ INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk);
#ifdef CONFIG_CFG80211_DEFAULT_PS
rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -915,6 +935,8 @@ void wiphy_unregister(struct wiphy *wiphy)
flush_work(&rdev->destroy_work);
flush_work(&rdev->sched_scan_stop_wk);
flush_work(&rdev->mlme_unreg_wk);
+ flush_work(&rdev->propagate_radar_detect_wk);
+ flush_work(&rdev->propagate_cac_done_wk);
#ifdef CONFIG_PM
if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
@@ -954,6 +976,12 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
}
EXPORT_SYMBOL(wiphy_rfkill_set_hw_state);
+void cfg80211_cqm_config_free(struct wireless_dev *wdev)
+{
+ kfree(wdev->cqm_config);
+ wdev->cqm_config = NULL;
+}
+
void cfg80211_unregister_wdev(struct wireless_dev *wdev)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -980,6 +1008,8 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
WARN_ON_ONCE(1);
break;
}
+
+ cfg80211_cqm_config_free(wdev);
}
EXPORT_SYMBOL(cfg80211_unregister_wdev);
@@ -1001,7 +1031,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev)
{
struct net_device *dev = wdev->netdev;
- struct cfg80211_sched_scan_request *sched_scan_req;
+ struct cfg80211_sched_scan_request *pos, *tmp;
ASSERT_RTNL();
ASSERT_WDEV_LOCK(wdev);
@@ -1012,9 +1042,11 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
break;
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_STATION:
- sched_scan_req = rtnl_dereference(rdev->sched_scan_req);
- if (sched_scan_req && dev == sched_scan_req->dev)
- __cfg80211_stop_sched_scan(rdev, false);
+ list_for_each_entry_safe(pos, tmp, &rdev->sched_scan_req_list,
+ list) {
+ if (dev == pos->dev)
+ cfg80211_stop_sched_scan_req(rdev, pos, false);
+ }
#ifdef CONFIG_CFG80211_WEXT
kfree(wdev->wext.ie);
@@ -1089,7 +1121,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev;
- struct cfg80211_sched_scan_request *sched_scan_req;
+ struct cfg80211_sched_scan_request *pos, *tmp;
if (!wdev)
return NOTIFY_DONE;
@@ -1114,7 +1146,15 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
INIT_LIST_HEAD(&wdev->mgmt_registrations);
spin_lock_init(&wdev->mgmt_registrations_lock);
- wdev->identifier = ++rdev->wdev_id;
+ /*
+ * We get here also when the interface changes network namespaces,
+ * as it's registered into the new one, but we don't want it to
+ * change ID in that case. Checking if the ID is already assigned
+ * works, because 0 isn't considered a valid ID and the memory is
+ * 0-initialized.
+ */
+ if (!wdev->identifier)
+ wdev->identifier = ++rdev->wdev_id;
list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list);
rdev->devlist_generation++;
/* can only change netns with wiphy */
@@ -1158,10 +1198,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
___cfg80211_scan_done(rdev, false);
}
- sched_scan_req = rtnl_dereference(rdev->sched_scan_req);
- if (WARN_ON(sched_scan_req &&
- sched_scan_req->dev == wdev->netdev)) {
- __cfg80211_stop_sched_scan(rdev, false);
+ list_for_each_entry_safe(pos, tmp,
+ &rdev->sched_scan_req_list, list) {
+ if (WARN_ON(pos && pos->dev == wdev->netdev))
+ cfg80211_stop_sched_scan_req(rdev, pos, false);
}
rdev->opencount--;
@@ -1208,12 +1248,12 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
*/
if ((wdev->iftype == NL80211_IFTYPE_STATION ||
wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
- rdev->ops->set_power_mgmt)
- if (rdev_set_power_mgmt(rdev, dev, wdev->ps,
- wdev->ps_timeout)) {
- /* assume this means it's off */
- wdev->ps = false;
- }
+ rdev->ops->set_power_mgmt &&
+ rdev_set_power_mgmt(rdev, dev, wdev->ps,
+ wdev->ps_timeout)) {
+ /* assume this means it's off */
+ wdev->ps = false;
+ }
break;
case NETDEV_UNREGISTER:
/*
@@ -1234,6 +1274,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
kzfree(wdev->wext.keys);
#endif
flush_work(&wdev->disconnect_wk);
+ cfg80211_cqm_config_free(wdev);
}
/*
* synchronise (so that we won't find this netdev
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 58ca206982fe..6e809325af3b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -74,10 +74,9 @@ struct cfg80211_registered_device {
u32 bss_entries;
struct cfg80211_scan_request *scan_req; /* protected by RTNL */
struct sk_buff *scan_msg;
- struct cfg80211_sched_scan_request __rcu *sched_scan_req;
+ struct list_head sched_scan_req_list;
unsigned long suspend_at;
struct work_struct scan_done_wk;
- struct work_struct sched_scan_results_wk;
struct genl_info *cur_cmd_info;
@@ -91,11 +90,15 @@ struct cfg80211_registered_device {
struct cfg80211_coalesce *coalesce;
- spinlock_t destroy_list_lock;
- struct list_head destroy_list;
struct work_struct destroy_work;
-
struct work_struct sched_scan_stop_wk;
+ struct work_struct sched_scan_res_wk;
+
+ struct cfg80211_chan_def radar_chandef;
+ struct work_struct propagate_radar_detect_wk;
+
+ struct cfg80211_chan_def cac_done_chandef;
+ struct work_struct propagate_cac_done_wk;
/* must be last because of the way we do wiphy_priv(),
* and it should at least be aligned to NETDEV_ALIGN */
@@ -220,23 +223,8 @@ struct cfg80211_event {
enum cfg80211_event_type type;
union {
- struct {
- u8 bssid[ETH_ALEN];
- const u8 *req_ie;
- const u8 *resp_ie;
- size_t req_ie_len;
- size_t resp_ie_len;
- struct cfg80211_bss *bss;
- int status; /* -1 = failed; 0..65535 = status code */
- enum nl80211_timeout_reason timeout_reason;
- } cr;
- struct {
- const u8 *req_ie;
- const u8 *resp_ie;
- size_t req_ie_len;
- size_t resp_ie_len;
- struct cfg80211_bss *bss;
- } rm;
+ struct cfg80211_connect_resp_params cr;
+ struct cfg80211_roam_info rm;
struct {
const u8 *ie;
size_t ie_len;
@@ -267,9 +255,11 @@ struct cfg80211_beacon_registration {
u32 nlportid;
};
-struct cfg80211_iface_destroy {
- struct list_head list;
- u32 nlportid;
+struct cfg80211_cqm_config {
+ u32 rssi_hyst;
+ s32 last_rssi_event_value;
+ int n_rssi_thresholds;
+ s32 rssi_thresholds[0];
};
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
@@ -385,21 +375,16 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
struct cfg80211_connect_params *connect,
struct cfg80211_cached_keys *connkeys,
const u8 *prev_bssid);
-void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len,
- int status, bool wextev,
- struct cfg80211_bss *bss,
- enum nl80211_timeout_reason timeout_reason);
+void __cfg80211_connect_result(struct net_device *dev,
+ struct cfg80211_connect_resp_params *params,
+ bool wextev);
void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
size_t ie_len, u16 reason, bool from_ap);
int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
struct net_device *dev, u16 reason,
bool wextev);
void __cfg80211_roamed(struct wireless_dev *wdev,
- struct cfg80211_bss *bss,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len);
+ struct cfg80211_roam_info *info);
int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
void cfg80211_autodisconnect_wk(struct work_struct *work);
@@ -423,13 +408,20 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
void __cfg80211_scan_done(struct work_struct *wk);
void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
bool send_message);
-void __cfg80211_sched_scan_results(struct work_struct *wk);
+void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev,
+ struct cfg80211_sched_scan_request *req);
+int cfg80211_sched_scan_req_possible(struct cfg80211_registered_device *rdev,
+ bool want_multi);
+void cfg80211_sched_scan_results_wk(struct work_struct *work);
+int cfg80211_stop_sched_scan_req(struct cfg80211_registered_device *rdev,
+ struct cfg80211_sched_scan_request *req,
+ bool driver_initiated);
int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
- bool driver_initiated);
+ u64 reqid, bool driver_initiated);
void cfg80211_upload_connect_keys(struct wireless_dev *wdev);
int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype ntype,
- u32 *flags, struct vif_params *params);
+ struct vif_params *params);
void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
void cfg80211_process_wdev_events(struct wireless_dev *wdev);
@@ -459,6 +451,16 @@ unsigned int
cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef);
+void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev);
+
+bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy,
+ struct ieee80211_channel *chan);
+
+bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev);
+
+bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
+ struct ieee80211_channel *chan);
+
static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
{
unsigned long end = jiffies;
@@ -512,4 +514,6 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
#define CFG80211_DEV_WARN_ON(cond) ({bool __r = (cond); __r; })
#endif
+void cfg80211_cqm_config_free(struct wireless_dev *wdev);
+
#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 364f900a3dc4..10bf040a0982 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -190,6 +190,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
if (!nowext)
wdev->wext.ibss.ssid_len = 0;
#endif
+ cfg80211_sched_dfs_chan_update(rdev);
}
void cfg80211_clear_ibss(struct net_device *dev, bool nowext)
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 2d8518a37eab..ec0b1c20ac99 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -262,6 +262,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
wdev->beacon_interval = 0;
memset(&wdev->chandef, 0, sizeof(wdev->chandef));
rdev_set_qos_map(rdev, dev, NULL);
+ cfg80211_sched_dfs_chan_update(rdev);
}
return err;
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 22b3d9990065..d8df7a5180a0 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -26,9 +26,16 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
- u8 *ie = mgmt->u.assoc_resp.variable;
- int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
- u16 status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
+ struct cfg80211_connect_resp_params cr;
+
+ memset(&cr, 0, sizeof(cr));
+ cr.status = (int)le16_to_cpu(mgmt->u.assoc_resp.status_code);
+ cr.bssid = mgmt->bssid;
+ cr.bss = bss;
+ cr.resp_ie = mgmt->u.assoc_resp.variable;
+ cr.resp_ie_len =
+ len - offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
+ cr.timeout_reason = NL80211_TIMEOUT_UNSPECIFIED;
trace_cfg80211_send_rx_assoc(dev, bss);
@@ -38,7 +45,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
* and got a reject -- we only try again with an assoc
* frame instead of reassoc.
*/
- if (cfg80211_sme_rx_assoc_resp(wdev, status_code)) {
+ if (cfg80211_sme_rx_assoc_resp(wdev, cr.status)) {
cfg80211_unhold_bss(bss_from_pub(bss));
cfg80211_put_bss(wiphy, bss);
return;
@@ -46,10 +53,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL, uapsd_queues);
/* update current_bss etc., consumes the bss reference */
- __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs,
- status_code,
- status_code == WLAN_STATUS_SUCCESS, bss,
- NL80211_TIMEOUT_UNSPECIFIED);
+ __cfg80211_connect_result(dev, &cr, cr.status == WLAN_STATUS_SUCCESS);
}
EXPORT_SYMBOL(cfg80211_rx_assoc_resp);
@@ -745,6 +749,12 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
}
EXPORT_SYMBOL(cfg80211_rx_mgmt);
+void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev)
+{
+ cancel_delayed_work(&rdev->dfs_update_channels_wk);
+ queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, 0);
+}
+
void cfg80211_dfs_channels_update_work(struct work_struct *work)
{
struct delayed_work *delayed_work = to_delayed_work(work);
@@ -755,6 +765,8 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work)
struct wiphy *wiphy;
bool check_again = false;
unsigned long timeout, next_time = 0;
+ unsigned long time_dfs_update;
+ enum nl80211_radar_event radar_event;
int bandid, i;
rdev = container_of(delayed_work, struct cfg80211_registered_device,
@@ -770,11 +782,27 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work)
for (i = 0; i < sband->n_channels; i++) {
c = &sband->channels[i];
- if (c->dfs_state != NL80211_DFS_UNAVAILABLE)
+ if (!(c->flags & IEEE80211_CHAN_RADAR))
+ continue;
+
+ if (c->dfs_state != NL80211_DFS_UNAVAILABLE &&
+ c->dfs_state != NL80211_DFS_AVAILABLE)
continue;
- timeout = c->dfs_state_entered + msecs_to_jiffies(
- IEEE80211_DFS_MIN_NOP_TIME_MS);
+ if (c->dfs_state == NL80211_DFS_UNAVAILABLE) {
+ time_dfs_update = IEEE80211_DFS_MIN_NOP_TIME_MS;
+ radar_event = NL80211_RADAR_NOP_FINISHED;
+ } else {
+ if (regulatory_pre_cac_allowed(wiphy) ||
+ cfg80211_any_wiphy_oper_chan(wiphy, c))
+ continue;
+
+ time_dfs_update = REG_PRE_CAC_EXPIRY_GRACE_MS;
+ radar_event = NL80211_RADAR_PRE_CAC_EXPIRED;
+ }
+
+ timeout = c->dfs_state_entered +
+ msecs_to_jiffies(time_dfs_update);
if (time_after_eq(jiffies, timeout)) {
c->dfs_state = NL80211_DFS_USABLE;
@@ -784,8 +812,12 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work)
NL80211_CHAN_NO_HT);
nl80211_radar_notify(rdev, &chandef,
- NL80211_RADAR_NOP_FINISHED,
- NULL, GFP_ATOMIC);
+ radar_event, NULL,
+ GFP_ATOMIC);
+
+ regulatory_propagate_dfs_state(wiphy, &chandef,
+ c->dfs_state,
+ radar_event);
continue;
}
@@ -810,7 +842,6 @@ void cfg80211_radar_event(struct wiphy *wiphy,
gfp_t gfp)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- unsigned long timeout;
trace_cfg80211_radar_event(wiphy, chandef);
@@ -820,11 +851,12 @@ void cfg80211_radar_event(struct wiphy *wiphy,
*/
cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE);
- timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_NOP_TIME_MS);
- queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk,
- timeout);
+ cfg80211_sched_dfs_chan_update(rdev);
nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp);
+
+ memcpy(&rdev->radar_chandef, chandef, sizeof(struct cfg80211_chan_def));
+ queue_work(cfg80211_wq, &rdev->propagate_radar_detect_wk);
}
EXPORT_SYMBOL(cfg80211_radar_event);
@@ -851,6 +883,10 @@ void cfg80211_cac_event(struct net_device *netdev,
msecs_to_jiffies(wdev->cac_time_ms);
WARN_ON(!time_after_eq(jiffies, timeout));
cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE);
+ memcpy(&rdev->cac_done_chandef, chandef,
+ sizeof(struct cfg80211_chan_def));
+ queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk);
+ cfg80211_sched_dfs_chan_update(rdev);
break;
case NL80211_RADAR_CAC_ABORTED:
break;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2312dc2ffdb9..c3bc9da30cff 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -410,6 +410,16 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
.len = sizeof(struct nl80211_bss_select_rssi_adjust)
},
[NL80211_ATTR_TIMEOUT_REASON] = { .type = NLA_U32 },
+ [NL80211_ATTR_FILS_ERP_USERNAME] = { .type = NLA_BINARY,
+ .len = FILS_ERP_MAX_USERNAME_LEN },
+ [NL80211_ATTR_FILS_ERP_REALM] = { .type = NLA_BINARY,
+ .len = FILS_ERP_MAX_REALM_LEN },
+ [NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] = { .type = NLA_U16 },
+ [NL80211_ATTR_FILS_ERP_RRK] = { .type = NLA_BINARY,
+ .len = FILS_ERP_MAX_RRK_LEN },
+ [NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 },
+ [NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
+ [NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG },
};
/* policy for the key attributes */
@@ -487,6 +497,7 @@ static const struct nla_policy
nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {
[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_SSID_LEN },
+ [NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = { .len = ETH_ALEN },
[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },
};
@@ -548,7 +559,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
if (!cb->args[0]) {
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
genl_family_attrbuf(&nl80211_fam),
- nl80211_fam.maxattr, nl80211_policy);
+ nl80211_fam.maxattr, nl80211_policy, NULL);
if (err)
return err;
@@ -719,7 +730,7 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
{
struct nlattr *tb[NL80211_KEY_MAX + 1];
int err = nla_parse_nested(tb, NL80211_KEY_MAX, key,
- nl80211_key_policy);
+ nl80211_key_policy, NULL);
if (err)
return err;
@@ -760,7 +771,7 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1,
tb[NL80211_KEY_DEFAULT_TYPES],
- nl80211_key_default_policy);
+ nl80211_key_default_policy, NULL);
if (err)
return err;
@@ -807,10 +818,11 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k)
if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) {
struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
- int err = nla_parse_nested(
- kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1,
- info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES],
- nl80211_key_default_policy);
+ int err = nla_parse_nested(kdt,
+ NUM_NL80211_KEY_DEFAULT_TYPES - 1,
+ info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES],
+ nl80211_key_default_policy,
+ info->extack);
if (err)
return err;
@@ -1366,7 +1378,7 @@ static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev,
CMD(tdls_mgmt, TDLS_MGMT);
CMD(tdls_oper, TDLS_OPER);
}
- if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+ if (rdev->wiphy.max_sched_scan_reqs)
CMD(sched_scan_start, START_SCHED_SCAN);
CMD(probe_client, PROBE_CLIENT);
CMD(set_noack_map, SET_NOACK_MAP);
@@ -1805,6 +1817,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
goto nla_put_failure;
+ if (rdev->wiphy.max_sched_scan_reqs &&
+ nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_MAX_REQS,
+ rdev->wiphy.max_sched_scan_reqs))
+ goto nla_put_failure;
+
if (nla_put(msg, NL80211_ATTR_EXT_FEATURES,
sizeof(rdev->wiphy.ext_features),
rdev->wiphy.ext_features))
@@ -1892,8 +1909,8 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb,
struct nl80211_dump_wiphy_state *state)
{
struct nlattr **tb = genl_family_attrbuf(&nl80211_fam);
- int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- tb, nl80211_fam.maxattr, nl80211_policy);
+ int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb,
+ nl80211_fam.maxattr, nl80211_policy, NULL);
/* ignore parse errors for backward compatibility */
if (ret)
return 0;
@@ -2308,7 +2325,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
rem_txq_params) {
result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX,
nl_txq_params,
- txq_params_policy);
+ txq_params_policy,
+ info->extack);
if (result)
return result;
result = parse_txq_params(tb, &txq_params);
@@ -2695,17 +2713,82 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags)
if (!nla)
return -EINVAL;
- if (nla_parse_nested(flags, NL80211_MNTR_FLAG_MAX,
- nla, mntr_flags_policy))
+ if (nla_parse_nested(flags, NL80211_MNTR_FLAG_MAX, nla,
+ mntr_flags_policy, NULL))
return -EINVAL;
for (flag = 1; flag <= NL80211_MNTR_FLAG_MAX; flag++)
if (flags[flag])
*mntrflags |= (1<<flag);
+ *mntrflags |= MONITOR_FLAG_CHANGED;
+
return 0;
}
+static int nl80211_parse_mon_options(struct cfg80211_registered_device *rdev,
+ enum nl80211_iftype type,
+ struct genl_info *info,
+ struct vif_params *params)
+{
+ bool change = false;
+ int err;
+
+ if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) {
+ if (type != NL80211_IFTYPE_MONITOR)
+ return -EINVAL;
+
+ err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS],
+ &params->flags);
+ if (err)
+ return err;
+
+ change = true;
+ }
+
+ if (params->flags & MONITOR_FLAG_ACTIVE &&
+ !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
+ return -EOPNOTSUPP;
+
+ if (info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]) {
+ const u8 *mumimo_groups;
+ u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
+
+ if (type != NL80211_IFTYPE_MONITOR)
+ return -EINVAL;
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
+ return -EOPNOTSUPP;
+
+ mumimo_groups =
+ nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]);
+
+ /* bits 0 and 63 are reserved and must be zero */
+ if ((mumimo_groups[0] & BIT(0)) ||
+ (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(7)))
+ return -EINVAL;
+
+ params->vht_mumimo_groups = mumimo_groups;
+ change = true;
+ }
+
+ if (info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]) {
+ u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
+
+ if (type != NL80211_IFTYPE_MONITOR)
+ return -EINVAL;
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
+ return -EOPNOTSUPP;
+
+ params->vht_mumimo_follow_addr =
+ nla_data(info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]);
+ change = true;
+ }
+
+ return change ? 1 : 0;
+}
+
static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
struct net_device *netdev, u8 use_4addr,
enum nl80211_iftype iftype)
@@ -2739,7 +2822,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
int err;
enum nl80211_iftype otype, ntype;
struct net_device *dev = info->user_ptr[1];
- u32 _flags, *flags = NULL;
bool change = false;
memset(&params, 0, sizeof(params));
@@ -2782,56 +2864,14 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
params.use_4addr = -1;
}
- if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) {
- if (ntype != NL80211_IFTYPE_MONITOR)
- return -EINVAL;
- err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS],
- &_flags);
- if (err)
- return err;
-
- flags = &_flags;
- change = true;
- }
-
- if (info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]) {
- const u8 *mumimo_groups;
- u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
-
- if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
- return -EOPNOTSUPP;
-
- mumimo_groups =
- nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]);
-
- /* bits 0 and 63 are reserved and must be zero */
- if ((mumimo_groups[0] & BIT(7)) ||
- (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(0)))
- return -EINVAL;
-
- memcpy(params.vht_mumimo_groups, mumimo_groups,
- VHT_MUMIMO_GROUPS_DATA_LEN);
- change = true;
- }
-
- if (info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]) {
- u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
-
- if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
- return -EOPNOTSUPP;
-
- nla_memcpy(params.macaddr,
- info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR],
- ETH_ALEN);
+ err = nl80211_parse_mon_options(rdev, ntype, info, &params);
+ if (err < 0)
+ return err;
+ if (err > 0)
change = true;
- }
-
- if (flags && (*flags & MONITOR_FLAG_ACTIVE) &&
- !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
- return -EOPNOTSUPP;
if (change)
- err = cfg80211_change_iface(rdev, dev, ntype, flags, &params);
+ err = cfg80211_change_iface(rdev, dev, ntype, &params);
else
err = 0;
@@ -2849,7 +2889,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
int err;
enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
- u32 flags;
/* to avoid failing a new interface creation due to pending removal */
cfg80211_destroy_ifaces(rdev);
@@ -2885,13 +2924,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
return err;
}
- err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ?
- info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL,
- &flags);
-
- if (!err && (flags & MONITOR_FLAG_ACTIVE) &&
- !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
- return -EOPNOTSUPP;
+ err = nl80211_parse_mon_options(rdev, type, info, &params);
+ if (err < 0)
+ return err;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -2899,8 +2934,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
wdev = rdev_add_virtual_intf(rdev,
nla_data(info->attrs[NL80211_ATTR_IFNAME]),
- NET_NAME_USER, type, err ? NULL : &flags,
- &params);
+ NET_NAME_USER, type, &params);
if (WARN_ON(!wdev)) {
nlmsg_free(msg);
return -EPROTO;
@@ -3561,7 +3595,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
if (sband == NULL)
return -EINVAL;
err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates,
- nl80211_txattr_policy);
+ nl80211_txattr_policy, info->extack);
if (err)
return err;
if (tb[NL80211_TXRATE_LEGACY]) {
@@ -3818,6 +3852,19 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
return false;
return true;
case NL80211_CMD_CONNECT:
+ /* SAE not supported yet */
+ if (auth_type == NL80211_AUTHTYPE_SAE)
+ return false;
+ /* FILS with SK PFS or PK not supported yet */
+ if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
+ auth_type == NL80211_AUTHTYPE_FILS_PK)
+ return false;
+ if (!wiphy_ext_feature_isset(
+ &rdev->wiphy,
+ NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) &&
+ auth_type == NL80211_AUTHTYPE_FILS_SK)
+ return false;
+ return true;
case NL80211_CMD_START_AP:
/* SAE not supported yet */
if (auth_type == NL80211_AUTHTYPE_SAE)
@@ -4100,8 +4147,8 @@ static int parse_station_flags(struct genl_info *info,
if (!nla)
return 0;
- if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX,
- nla, sta_flags_policy))
+ if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX, nla,
+ sta_flags_policy, info->extack))
return -EINVAL;
/*
@@ -4151,7 +4198,7 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
struct nlattr *rate;
u32 bitrate;
u16 bitrate_compat;
- enum nl80211_attrs rate_flg;
+ enum nl80211_rate_info rate_flg;
rate = nla_nest_start(msg, attr);
if (!rate)
@@ -4728,7 +4775,7 @@ static int nl80211_parse_sta_wme(struct genl_info *info,
nla = info->attrs[NL80211_ATTR_STA_WME];
err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
- nl80211_sta_wme_policy);
+ nl80211_sta_wme_policy, info->extack);
if (err)
return err;
@@ -5703,7 +5750,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
cur_params.dot11MeshGateAnnouncementProtocol) ||
nla_put_u8(msg, NL80211_MESHCONF_FORWARDING,
cur_params.dot11MeshForwarding) ||
- nla_put_u32(msg, NL80211_MESHCONF_RSSI_THRESHOLD,
+ nla_put_s32(msg, NL80211_MESHCONF_RSSI_THRESHOLD,
cur_params.rssi_threshold) ||
nla_put_u32(msg, NL80211_MESHCONF_HT_OPMODE,
cur_params.ht_opmode) ||
@@ -5853,7 +5900,7 @@ do { \
return -EINVAL;
if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX,
info->attrs[NL80211_ATTR_MESH_CONFIG],
- nl80211_meshconf_params_policy))
+ nl80211_meshconf_params_policy, info->extack))
return -EINVAL;
/* This makes sure that there aren't more than 32 mesh config
@@ -6002,7 +6049,7 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
return -EINVAL;
if (nla_parse_nested(tb, NL80211_MESH_SETUP_ATTR_MAX,
info->attrs[NL80211_ATTR_MESH_SETUP],
- nl80211_mesh_setup_params_policy))
+ nl80211_mesh_setup_params_policy, info->extack))
return -EINVAL;
if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC])
@@ -6393,7 +6440,8 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
rem_reg_rules) {
r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX,
- nl_reg_rule, reg_rule_policy);
+ nl_reg_rule, reg_rule_policy,
+ info->extack);
if (r)
goto bad_reg;
r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]);
@@ -6461,7 +6509,7 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy,
return -EINVAL;
err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest,
- nl80211_bss_select_policy);
+ nl80211_bss_select_policy, NULL);
if (err)
return err;
@@ -6545,6 +6593,19 @@ static int nl80211_parse_random_mac(struct nlattr **attrs,
return 0;
}
+static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev)
+{
+ ASSERT_WDEV_LOCK(wdev);
+
+ if (!cfg80211_beaconing_iface_active(wdev))
+ return true;
+
+ if (!(wdev->chandef.chan->flags & IEEE80211_CHAN_RADAR))
+ return true;
+
+ return regulatory_pre_cac_allowed(wdev->wiphy);
+}
+
static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6670,6 +6731,25 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
request->n_channels = i;
+ wdev_lock(wdev);
+ if (!cfg80211_off_channel_oper_allowed(wdev)) {
+ struct ieee80211_channel *chan;
+
+ if (request->n_channels != 1) {
+ wdev_unlock(wdev);
+ err = -EBUSY;
+ goto out_free;
+ }
+
+ chan = request->channels[0];
+ if (chan->center_freq != wdev->chandef.chan->center_freq) {
+ wdev_unlock(wdev);
+ err = -EBUSY;
+ goto out_free;
+ }
+ }
+ wdev_unlock(wdev);
+
i = 0;
if (n_ssids) {
nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) {
@@ -6862,7 +6942,7 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans,
return -EINVAL;
err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX,
- attr, nl80211_plan_policy);
+ attr, nl80211_plan_policy, NULL);
if (err)
return err;
@@ -6953,11 +7033,19 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
err = nla_parse_nested(tb,
NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
- attr, nl80211_match_policy);
+ attr, nl80211_match_policy,
+ NULL);
if (err)
return ERR_PTR(err);
+
+ /* SSID and BSSID are mutually exclusive */
+ if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] &&
+ tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID])
+ return ERR_PTR(-EINVAL);
+
/* add other standalone attributes here */
- if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]) {
+ if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] ||
+ tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]) {
n_match_sets++;
continue;
}
@@ -7128,15 +7216,17 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
nla_for_each_nested(attr,
attrs[NL80211_ATTR_SCHED_SCAN_MATCH],
tmp) {
- struct nlattr *ssid, *rssi;
+ struct nlattr *ssid, *bssid, *rssi;
err = nla_parse_nested(tb,
NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
- attr, nl80211_match_policy);
+ attr, nl80211_match_policy,
+ NULL);
if (err)
goto out_free;
ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID];
- if (ssid) {
+ bssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID];
+ if (ssid || bssid) {
if (WARN_ON(i >= n_match_sets)) {
/* this indicates a programming error,
* the loop above should have verified
@@ -7146,14 +7236,25 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
goto out_free;
}
- if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) {
- err = -EINVAL;
- goto out_free;
+ if (ssid) {
+ if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) {
+ err = -EINVAL;
+ goto out_free;
+ }
+ memcpy(request->match_sets[i].ssid.ssid,
+ nla_data(ssid), nla_len(ssid));
+ request->match_sets[i].ssid.ssid_len =
+ nla_len(ssid);
}
- memcpy(request->match_sets[i].ssid.ssid,
- nla_data(ssid), nla_len(ssid));
- request->match_sets[i].ssid.ssid_len =
- nla_len(ssid);
+ if (bssid) {
+ if (nla_len(bssid) != ETH_ALEN) {
+ err = -EINVAL;
+ goto out_free;
+ }
+ memcpy(request->match_sets[i].bssid,
+ nla_data(bssid), ETH_ALEN);
+ }
+
/* special attribute - old implementation w/a */
request->match_sets[i].rssi_thold =
default_match_rssi;
@@ -7261,14 +7362,16 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_sched_scan_request *sched_scan_req;
+ bool want_multi;
int err;
- if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
- !rdev->ops->sched_scan_start)
+ if (!rdev->wiphy.max_sched_scan_reqs || !rdev->ops->sched_scan_start)
return -EOPNOTSUPP;
- if (rdev->sched_scan_req)
- return -EINPROGRESS;
+ want_multi = info->attrs[NL80211_ATTR_SCHED_SCAN_MULTI];
+ err = cfg80211_sched_scan_req_possible(rdev, want_multi);
+ if (err)
+ return err;
sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev,
info->attrs,
@@ -7278,6 +7381,14 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
if (err)
goto out_err;
+ /* leave request id zero for legacy request
+ * or if driver does not support multi-scheduled scan
+ */
+ if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) {
+ while (!sched_scan_req->reqid)
+ sched_scan_req->reqid = rdev->wiphy.cookie_counter++;
+ }
+
err = rdev_sched_scan_start(rdev, dev, sched_scan_req);
if (err)
goto out_free;
@@ -7288,10 +7399,9 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
sched_scan_req->owner_nlportid = info->snd_portid;
- rcu_assign_pointer(rdev->sched_scan_req, sched_scan_req);
+ cfg80211_add_sched_scan_req(rdev, sched_scan_req);
- nl80211_send_sched_scan(rdev, dev,
- NL80211_CMD_START_SCHED_SCAN);
+ nl80211_send_sched_scan(sched_scan_req, NL80211_CMD_START_SCHED_SCAN);
return 0;
out_free:
@@ -7303,13 +7413,27 @@ out_err:
static int nl80211_stop_sched_scan(struct sk_buff *skb,
struct genl_info *info)
{
+ struct cfg80211_sched_scan_request *req;
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ u64 cookie;
- if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
- !rdev->ops->sched_scan_stop)
+ if (!rdev->wiphy.max_sched_scan_reqs || !rdev->ops->sched_scan_stop)
return -EOPNOTSUPP;
- return __cfg80211_stop_sched_scan(rdev, false);
+ if (info->attrs[NL80211_ATTR_COOKIE]) {
+ cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]);
+ return __cfg80211_stop_sched_scan(rdev, cookie, false);
+ }
+
+ req = list_first_or_null_rcu(&rdev->sched_scan_req_list,
+ struct cfg80211_sched_scan_request,
+ list);
+ if (!req || req->reqid ||
+ (req->owner_nlportid &&
+ req->owner_nlportid != info->snd_portid))
+ return -ENOENT;
+
+ return cfg80211_stop_sched_scan_req(rdev, req, false);
}
static int nl80211_start_radar_detection(struct sk_buff *skb,
@@ -7433,7 +7557,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(csa_attrs, NL80211_ATTR_MAX,
info->attrs[NL80211_ATTR_CSA_IES],
- nl80211_policy);
+ nl80211_policy, info->extack);
if (err)
return err;
@@ -8639,7 +8763,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- attrbuf, nl80211_fam.maxattr, nl80211_policy);
+ attrbuf, nl80211_fam.maxattr,
+ nl80211_policy, NULL);
if (err)
goto out_err;
@@ -8867,6 +8992,35 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
}
}
+ if (wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) &&
+ info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] &&
+ info->attrs[NL80211_ATTR_FILS_ERP_REALM] &&
+ info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] &&
+ info->attrs[NL80211_ATTR_FILS_ERP_RRK]) {
+ connect.fils_erp_username =
+ nla_data(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]);
+ connect.fils_erp_username_len =
+ nla_len(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]);
+ connect.fils_erp_realm =
+ nla_data(info->attrs[NL80211_ATTR_FILS_ERP_REALM]);
+ connect.fils_erp_realm_len =
+ nla_len(info->attrs[NL80211_ATTR_FILS_ERP_REALM]);
+ connect.fils_erp_next_seq_num =
+ nla_get_u16(
+ info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM]);
+ connect.fils_erp_rrk =
+ nla_data(info->attrs[NL80211_ATTR_FILS_ERP_RRK]);
+ connect.fils_erp_rrk_len =
+ nla_len(info->attrs[NL80211_ATTR_FILS_ERP_RRK]);
+ } else if (info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] ||
+ info->attrs[NL80211_ATTR_FILS_ERP_REALM] ||
+ info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] ||
+ info->attrs[NL80211_ATTR_FILS_ERP_RRK]) {
+ kzfree(connkeys);
+ return -EINVAL;
+ }
+
wdev_lock(dev->ieee80211_ptr);
err = cfg80211_connect(rdev, dev, &connect, connkeys,
@@ -8986,14 +9140,28 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
memset(&pmksa, 0, sizeof(struct cfg80211_pmksa));
- if (!info->attrs[NL80211_ATTR_MAC])
- return -EINVAL;
-
if (!info->attrs[NL80211_ATTR_PMKID])
return -EINVAL;
pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
- pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+ if (info->attrs[NL80211_ATTR_MAC]) {
+ pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ } else if (info->attrs[NL80211_ATTR_SSID] &&
+ info->attrs[NL80211_ATTR_FILS_CACHE_ID] &&
+ (info->genlhdr->cmd == NL80211_CMD_DEL_PMKSA ||
+ info->attrs[NL80211_ATTR_PMK])) {
+ pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
+ pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+ pmksa.cache_id =
+ nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]);
+ } else {
+ return -EINVAL;
+ }
+ if (info->attrs[NL80211_ATTR_PMK]) {
+ pmksa.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]);
+ pmksa.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]);
+ }
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
@@ -9096,6 +9264,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct wireless_dev *wdev = info->user_ptr[1];
struct cfg80211_chan_def chandef;
+ const struct cfg80211_chan_def *compat_chandef;
struct sk_buff *msg;
void *hdr;
u64 cookie;
@@ -9124,6 +9293,18 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
if (err)
return err;
+ wdev_lock(wdev);
+ if (!cfg80211_off_channel_oper_allowed(wdev) &&
+ !cfg80211_chandef_identical(&wdev->chandef, &chandef)) {
+ compat_chandef = cfg80211_chandef_compatible(&wdev->chandef,
+ &chandef);
+ if (compat_chandef != &chandef) {
+ wdev_unlock(wdev);
+ return -EBUSY;
+ }
+ }
+ wdev_unlock(wdev);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -9299,6 +9480,13 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
if (!chandef.chan && params.offchan)
return -EINVAL;
+ wdev_lock(wdev);
+ if (params.offchan && !cfg80211_off_channel_oper_allowed(wdev)) {
+ wdev_unlock(wdev);
+ return -EBUSY;
+ }
+ wdev_unlock(wdev);
+
params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
@@ -9466,7 +9654,7 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info)
static const struct nla_policy
nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] = {
- [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 },
+ [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_BINARY },
[NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 },
[NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 },
[NL80211_ATTR_CQM_TXE_RATE] = { .type = NLA_U32 },
@@ -9495,28 +9683,123 @@ static int nl80211_set_cqm_txe(struct genl_info *info,
return rdev_set_cqm_txe_config(rdev, dev, rate, pkts, intvl);
}
+static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
+ struct net_device *dev)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ s32 last, low, high;
+ u32 hyst;
+ int i, n;
+ int err;
+
+ /* RSSI reporting disabled? */
+ if (!wdev->cqm_config)
+ return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0);
+
+ /*
+ * Obtain current RSSI value if possible, if not and no RSSI threshold
+ * event has been received yet, we should receive an event after a
+ * connection is established and enough beacons received to calculate
+ * the average.
+ */
+ if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss &&
+ rdev->ops->get_station) {
+ struct station_info sinfo;
+ u8 *mac_addr;
+
+ mac_addr = wdev->current_bss->pub.bssid;
+
+ err = rdev_get_station(rdev, dev, mac_addr, &sinfo);
+ if (err)
+ return err;
+
+ if (sinfo.filled & BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG))
+ wdev->cqm_config->last_rssi_event_value =
+ (s8) sinfo.rx_beacon_signal_avg;
+ }
+
+ last = wdev->cqm_config->last_rssi_event_value;
+ hyst = wdev->cqm_config->rssi_hyst;
+ n = wdev->cqm_config->n_rssi_thresholds;
+
+ for (i = 0; i < n; i++)
+ if (last < wdev->cqm_config->rssi_thresholds[i])
+ break;
+
+ low = i > 0 ?
+ (wdev->cqm_config->rssi_thresholds[i - 1] - hyst) : S32_MIN;
+ high = i < n ?
+ (wdev->cqm_config->rssi_thresholds[i] + hyst - 1) : S32_MAX;
+
+ return rdev_set_cqm_rssi_range_config(rdev, dev, low, high);
+}
+
static int nl80211_set_cqm_rssi(struct genl_info *info,
- s32 threshold, u32 hysteresis)
+ const s32 *thresholds, int n_thresholds,
+ u32 hysteresis)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int i, err;
+ s32 prev = S32_MIN;
- if (threshold > 0)
- return -EINVAL;
-
- /* disabling - hysteresis should also be zero then */
- if (threshold == 0)
- hysteresis = 0;
+ /* Check all values negative and sorted */
+ for (i = 0; i < n_thresholds; i++) {
+ if (thresholds[i] > 0 || thresholds[i] <= prev)
+ return -EINVAL;
- if (!rdev->ops->set_cqm_rssi_config)
- return -EOPNOTSUPP;
+ prev = thresholds[i];
+ }
if (wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
return -EOPNOTSUPP;
- return rdev_set_cqm_rssi_config(rdev, dev, threshold, hysteresis);
+ wdev_lock(wdev);
+ cfg80211_cqm_config_free(wdev);
+ wdev_unlock(wdev);
+
+ if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) {
+ if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */
+ return rdev_set_cqm_rssi_config(rdev, dev, 0, 0);
+
+ return rdev_set_cqm_rssi_config(rdev, dev,
+ thresholds[0], hysteresis);
+ }
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_CQM_RSSI_LIST))
+ return -EOPNOTSUPP;
+
+ if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */
+ n_thresholds = 0;
+
+ wdev_lock(wdev);
+ if (n_thresholds) {
+ struct cfg80211_cqm_config *cqm_config;
+
+ cqm_config = kzalloc(sizeof(struct cfg80211_cqm_config) +
+ n_thresholds * sizeof(s32), GFP_KERNEL);
+ if (!cqm_config) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ cqm_config->rssi_hyst = hysteresis;
+ cqm_config->n_rssi_thresholds = n_thresholds;
+ memcpy(cqm_config->rssi_thresholds, thresholds,
+ n_thresholds * sizeof(s32));
+
+ wdev->cqm_config = cqm_config;
+ }
+
+ err = cfg80211_cqm_rssi_update(rdev, dev);
+
+unlock:
+ wdev_unlock(wdev);
+
+ return err;
}
static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
@@ -9530,16 +9813,22 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm,
- nl80211_attr_cqm_policy);
+ nl80211_attr_cqm_policy, info->extack);
if (err)
return err;
if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] &&
attrs[NL80211_ATTR_CQM_RSSI_HYST]) {
- s32 threshold = nla_get_s32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
+ const s32 *thresholds =
+ nla_data(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
+ int len = nla_len(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
u32 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
- return nl80211_set_cqm_rssi(info, threshold, hysteresis);
+ if (len % 4)
+ return -EINVAL;
+
+ return nl80211_set_cqm_rssi(info, thresholds, len / 4,
+ hysteresis);
}
if (attrs[NL80211_ATTR_CQM_TXE_RATE] &&
@@ -9940,7 +10229,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
return -EINVAL;
err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr,
- nl80211_wowlan_tcp_policy);
+ nl80211_wowlan_tcp_policy, NULL);
if (err)
return err;
@@ -10085,7 +10374,8 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev,
goto out;
}
- err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy);
+ err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy,
+ NULL);
if (err)
goto out;
@@ -10122,7 +10412,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG,
info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS],
- nl80211_wowlan_policy);
+ nl80211_wowlan_policy, info->extack);
if (err)
return err;
@@ -10205,7 +10495,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
u8 *mask_pat;
nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
- NULL);
+ NULL, info->extack);
err = -EINVAL;
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
@@ -10416,7 +10706,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule,
- nl80211_coalesce_policy);
+ nl80211_coalesce_policy, NULL);
if (err)
return err;
@@ -10454,7 +10744,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
rem) {
u8 *mask_pat;
- nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL);
+ nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL);
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
return -EINVAL;
@@ -10575,7 +10865,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA,
info->attrs[NL80211_ATTR_REKEY_DATA],
- nl80211_rekey_policy);
+ nl80211_rekey_policy, info->extack);
if (err)
return err;
@@ -10892,7 +11182,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX,
info->attrs[NL80211_ATTR_NAN_FUNC],
- nl80211_nan_func_policy);
+ nl80211_nan_func_policy, info->extack);
if (err)
return err;
@@ -10989,7 +11279,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX,
tb[NL80211_NAN_FUNC_SRF],
- nl80211_nan_srf_policy);
+ nl80211_nan_srf_policy, info->extack);
if (err)
goto out;
@@ -11524,8 +11814,8 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
return 0;
}
- err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- attrbuf, nl80211_fam.maxattr, nl80211_policy);
+ err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf,
+ nl80211_fam.maxattr, nl80211_policy, NULL);
if (err)
return err;
@@ -12970,18 +13260,19 @@ static int nl80211_prep_scan_msg(struct sk_buff *msg,
static int
nl80211_prep_sched_scan_msg(struct sk_buff *msg,
- struct cfg80211_registered_device *rdev,
- struct net_device *netdev,
- u32 portid, u32 seq, int flags, u32 cmd)
+ struct cfg80211_sched_scan_request *req, u32 cmd)
{
void *hdr;
- hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
+ hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
if (!hdr)
return -1;
- if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
- nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex))
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY,
+ wiphy_to_rdev(req->wiphy)->wiphy_idx) ||
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, req->dev->ifindex) ||
+ nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, req->reqid,
+ NL80211_ATTR_PAD))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -13041,8 +13332,7 @@ void nl80211_send_scan_msg(struct cfg80211_registered_device *rdev,
NL80211_MCGRP_SCAN, GFP_KERNEL);
}
-void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, u32 cmd)
+void nl80211_send_sched_scan(struct cfg80211_sched_scan_request *req, u32 cmd)
{
struct sk_buff *msg;
@@ -13050,12 +13340,12 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
if (!msg)
return;
- if (nl80211_prep_sched_scan_msg(msg, rdev, netdev, 0, 0, 0, cmd) < 0) {
+ if (nl80211_prep_sched_scan_msg(msg, req, cmd) < 0) {
nlmsg_free(msg);
return;
}
- genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(req->wiphy), msg, 0,
NL80211_MCGRP_SCAN, GFP_KERNEL);
}
@@ -13296,17 +13586,16 @@ void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev,
}
void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *bssid,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len,
- int status,
- enum nl80211_timeout_reason timeout_reason,
+ struct net_device *netdev,
+ struct cfg80211_connect_resp_params *cr,
gfp_t gfp)
{
struct sk_buff *msg;
void *hdr;
- msg = nlmsg_new(100 + req_ie_len + resp_ie_len, gfp);
+ msg = nlmsg_new(100 + cr->req_ie_len + cr->resp_ie_len +
+ cr->fils_kek_len + cr->pmk_len +
+ (cr->pmkid ? WLAN_PMKID_LEN : 0), gfp);
if (!msg)
return;
@@ -13318,17 +13607,31 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
- (bssid && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) ||
+ (cr->bssid &&
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, cr->bssid)) ||
nla_put_u16(msg, NL80211_ATTR_STATUS_CODE,
- status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE :
- status) ||
- (status < 0 &&
+ cr->status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE :
+ cr->status) ||
+ (cr->status < 0 &&
(nla_put_flag(msg, NL80211_ATTR_TIMED_OUT) ||
- nla_put_u32(msg, NL80211_ATTR_TIMEOUT_REASON, timeout_reason))) ||
- (req_ie &&
- nla_put(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie)) ||
- (resp_ie &&
- nla_put(msg, NL80211_ATTR_RESP_IE, resp_ie_len, resp_ie)))
+ nla_put_u32(msg, NL80211_ATTR_TIMEOUT_REASON,
+ cr->timeout_reason))) ||
+ (cr->req_ie &&
+ nla_put(msg, NL80211_ATTR_REQ_IE, cr->req_ie_len, cr->req_ie)) ||
+ (cr->resp_ie &&
+ nla_put(msg, NL80211_ATTR_RESP_IE, cr->resp_ie_len,
+ cr->resp_ie)) ||
+ (cr->update_erp_next_seq_num &&
+ nla_put_u16(msg, NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM,
+ cr->fils_erp_next_seq_num)) ||
+ (cr->status == WLAN_STATUS_SUCCESS &&
+ ((cr->fils_kek &&
+ nla_put(msg, NL80211_ATTR_FILS_KEK, cr->fils_kek_len,
+ cr->fils_kek)) ||
+ (cr->pmk &&
+ nla_put(msg, NL80211_ATTR_PMK, cr->pmk_len, cr->pmk)) ||
+ (cr->pmkid &&
+ nla_put(msg, NL80211_ATTR_PMKID, WLAN_PMKID_LEN, cr->pmkid)))))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -13343,14 +13646,14 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
}
void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *bssid,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp)
+ struct net_device *netdev,
+ struct cfg80211_roam_info *info, gfp_t gfp)
{
struct sk_buff *msg;
void *hdr;
+ const u8 *bssid = info->bss ? info->bss->bssid : info->bssid;
- msg = nlmsg_new(100 + req_ie_len + resp_ie_len, gfp);
+ msg = nlmsg_new(100 + info->req_ie_len + info->resp_ie_len, gfp);
if (!msg)
return;
@@ -13363,10 +13666,12 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid) ||
- (req_ie &&
- nla_put(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie)) ||
- (resp_ie &&
- nla_put(msg, NL80211_ATTR_RESP_IE, resp_ie_len, resp_ie)))
+ (info->req_ie &&
+ nla_put(msg, NL80211_ATTR_REQ_IE, info->req_ie_len,
+ info->req_ie)) ||
+ (info->resp_ie &&
+ nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len,
+ info->resp_ie)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -13968,6 +14273,8 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
s32 rssi_level, gfp_t gfp)
{
struct sk_buff *msg;
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level);
@@ -13975,6 +14282,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH))
return;
+ if (wdev->cqm_config) {
+ wdev->cqm_config->last_rssi_event_value = rssi_level;
+
+ cfg80211_cqm_rssi_update(rdev, dev);
+
+ if (rssi_level == 0)
+ rssi_level = wdev->cqm_config->last_rssi_event_value;
+ }
+
msg = cfg80211_prepare_cqm(dev, NULL, gfp);
if (!msg)
return;
@@ -14619,26 +14935,26 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
rcu_read_lock();
list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
- bool schedule_destroy_work = false;
- struct cfg80211_sched_scan_request *sched_scan_req =
- rcu_dereference(rdev->sched_scan_req);
-
- if (sched_scan_req && notify->portid &&
- sched_scan_req->owner_nlportid == notify->portid) {
- sched_scan_req->owner_nlportid = 0;
+ struct cfg80211_sched_scan_request *sched_scan_req;
- if (rdev->ops->sched_scan_stop &&
- rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+ list_for_each_entry_rcu(sched_scan_req,
+ &rdev->sched_scan_req_list,
+ list) {
+ if (sched_scan_req->owner_nlportid == notify->portid) {
+ sched_scan_req->nl_owner_dead = true;
schedule_work(&rdev->sched_scan_stop_wk);
+ }
}
list_for_each_entry_rcu(wdev, &rdev->wiphy.wdev_list, list) {
cfg80211_mlme_unregister_socket(wdev, notify->portid);
- if (wdev->owner_nlportid == notify->portid)
- schedule_destroy_work = true;
- else if (wdev->conn_owner_nlportid == notify->portid)
+ if (wdev->owner_nlportid == notify->portid) {
+ wdev->nl_owner_dead = true;
+ schedule_work(&rdev->destroy_work);
+ } else if (wdev->conn_owner_nlportid == notify->portid) {
schedule_work(&wdev->disconnect_wk);
+ }
}
spin_lock_bh(&rdev->beacon_registrations_lock);
@@ -14651,19 +14967,6 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
}
}
spin_unlock_bh(&rdev->beacon_registrations_lock);
-
- if (schedule_destroy_work) {
- struct cfg80211_iface_destroy *destroy;
-
- destroy = kzalloc(sizeof(*destroy), GFP_ATOMIC);
- if (destroy) {
- destroy->nlportid = notify->portid;
- spin_lock(&rdev->destroy_list_lock);
- list_add(&destroy->list, &rdev->destroy_list);
- spin_unlock(&rdev->destroy_list_lock);
- schedule_work(&rdev->destroy_work);
- }
- }
}
rcu_read_unlock();
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index e488dca87423..b96933322077 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -16,8 +16,7 @@ struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev, bool aborted);
void nl80211_send_scan_msg(struct cfg80211_registered_device *rdev,
struct sk_buff *msg);
-void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, u32 cmd);
+void nl80211_send_sched_scan(struct cfg80211_sched_scan_request *req, u32 cmd);
void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
struct regulatory_request *request);
@@ -53,16 +52,12 @@ void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
const u8 *addr, gfp_t gfp);
void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *bssid,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len,
- int status,
- enum nl80211_timeout_reason timeout_reason,
+ struct net_device *netdev,
+ struct cfg80211_connect_resp_params *params,
gfp_t gfp);
void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *bssid,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp);
+ struct net_device *netdev,
+ struct cfg80211_roam_info *info, gfp_t gfp);
void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
struct net_device *netdev, u16 reason,
const u8 *ie, size_t ie_len, bool from_ap);
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 2f425075ada8..0598c1e5d0ad 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -36,13 +36,13 @@ static inline void rdev_set_wakeup(struct cfg80211_registered_device *rdev,
static inline struct wireless_dev
*rdev_add_virtual_intf(struct cfg80211_registered_device *rdev, char *name,
unsigned char name_assign_type,
- enum nl80211_iftype type, u32 *flags,
+ enum nl80211_iftype type,
struct vif_params *params)
{
struct wireless_dev *ret;
trace_rdev_add_virtual_intf(&rdev->wiphy, name, type);
ret = rdev->ops->add_virtual_intf(&rdev->wiphy, name, name_assign_type,
- type, flags, params);
+ type, params);
trace_rdev_return_wdev(&rdev->wiphy, ret);
return ret;
}
@@ -61,12 +61,11 @@ rdev_del_virtual_intf(struct cfg80211_registered_device *rdev,
static inline int
rdev_change_virtual_intf(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype type,
- u32 *flags, struct vif_params *params)
+ struct vif_params *params)
{
int ret;
trace_rdev_change_virtual_intf(&rdev->wiphy, dev, type);
- ret = rdev->ops->change_virtual_intf(&rdev->wiphy, dev, type, flags,
- params);
+ ret = rdev->ops->change_virtual_intf(&rdev->wiphy, dev, type, params);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -750,6 +749,18 @@ rdev_set_cqm_rssi_config(struct cfg80211_registered_device *rdev,
}
static inline int
+rdev_set_cqm_rssi_range_config(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, s32 low, s32 high)
+{
+ int ret;
+ trace_rdev_set_cqm_rssi_range_config(&rdev->wiphy, dev, low, high);
+ ret = rdev->ops->set_cqm_rssi_range_config(&rdev->wiphy, dev,
+ low, high);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
+static inline int
rdev_set_cqm_txe_config(struct cfg80211_registered_device *rdev,
struct net_device *dev, u32 rate, u32 pkts, u32 intvl)
{
@@ -802,18 +813,18 @@ rdev_sched_scan_start(struct cfg80211_registered_device *rdev,
struct cfg80211_sched_scan_request *request)
{
int ret;
- trace_rdev_sched_scan_start(&rdev->wiphy, dev, request);
+ trace_rdev_sched_scan_start(&rdev->wiphy, dev, request->reqid);
ret = rdev->ops->sched_scan_start(&rdev->wiphy, dev, request);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
static inline int rdev_sched_scan_stop(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+ struct net_device *dev, u64 reqid)
{
int ret;
- trace_rdev_sched_scan_stop(&rdev->wiphy, dev);
- ret = rdev->ops->sched_scan_stop(&rdev->wiphy, dev);
+ trace_rdev_sched_scan_stop(&rdev->wiphy, dev, reqid);
+ ret = rdev->ops->sched_scan_stop(&rdev->wiphy, dev, reqid);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 753efcd51fa3..5fae296a6a58 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2067,6 +2067,88 @@ reg_process_hint_country_ie(struct wiphy *wiphy,
return REG_REQ_IGNORE;
}
+bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2)
+{
+ const struct ieee80211_regdomain *wiphy1_regd = NULL;
+ const struct ieee80211_regdomain *wiphy2_regd = NULL;
+ const struct ieee80211_regdomain *cfg80211_regd = NULL;
+ bool dfs_domain_same;
+
+ rcu_read_lock();
+
+ cfg80211_regd = rcu_dereference(cfg80211_regdomain);
+ wiphy1_regd = rcu_dereference(wiphy1->regd);
+ if (!wiphy1_regd)
+ wiphy1_regd = cfg80211_regd;
+
+ wiphy2_regd = rcu_dereference(wiphy2->regd);
+ if (!wiphy2_regd)
+ wiphy2_regd = cfg80211_regd;
+
+ dfs_domain_same = wiphy1_regd->dfs_region == wiphy2_regd->dfs_region;
+
+ rcu_read_unlock();
+
+ return dfs_domain_same;
+}
+
+static void reg_copy_dfs_chan_state(struct ieee80211_channel *dst_chan,
+ struct ieee80211_channel *src_chan)
+{
+ if (!(dst_chan->flags & IEEE80211_CHAN_RADAR) ||
+ !(src_chan->flags & IEEE80211_CHAN_RADAR))
+ return;
+
+ if (dst_chan->flags & IEEE80211_CHAN_DISABLED ||
+ src_chan->flags & IEEE80211_CHAN_DISABLED)
+ return;
+
+ if (src_chan->center_freq == dst_chan->center_freq &&
+ dst_chan->dfs_state == NL80211_DFS_USABLE) {
+ dst_chan->dfs_state = src_chan->dfs_state;
+ dst_chan->dfs_state_entered = src_chan->dfs_state_entered;
+ }
+}
+
+static void wiphy_share_dfs_chan_state(struct wiphy *dst_wiphy,
+ struct wiphy *src_wiphy)
+{
+ struct ieee80211_supported_band *src_sband, *dst_sband;
+ struct ieee80211_channel *src_chan, *dst_chan;
+ int i, j, band;
+
+ if (!reg_dfs_domain_same(dst_wiphy, src_wiphy))
+ return;
+
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
+ dst_sband = dst_wiphy->bands[band];
+ src_sband = src_wiphy->bands[band];
+ if (!dst_sband || !src_sband)
+ continue;
+
+ for (i = 0; i < dst_sband->n_channels; i++) {
+ dst_chan = &dst_sband->channels[i];
+ for (j = 0; j < src_sband->n_channels; j++) {
+ src_chan = &src_sband->channels[j];
+ reg_copy_dfs_chan_state(dst_chan, src_chan);
+ }
+ }
+ }
+}
+
+static void wiphy_all_share_dfs_chan_state(struct wiphy *wiphy)
+{
+ struct cfg80211_registered_device *rdev;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ if (wiphy == &rdev->wiphy)
+ continue;
+ wiphy_share_dfs_chan_state(wiphy, &rdev->wiphy);
+ }
+}
+
/* This processes *all* regulatory hints */
static void reg_process_hint(struct regulatory_request *reg_request)
{
@@ -2110,6 +2192,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
if (treatment == REG_REQ_ALREADY_SET && wiphy &&
wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
wiphy_update_regulatory(wiphy, reg_request->initiator);
+ wiphy_all_share_dfs_chan_state(wiphy);
reg_check_channels();
}
@@ -3061,6 +3144,7 @@ void wiphy_regulatory_register(struct wiphy *wiphy)
lr = get_last_request();
wiphy_update_regulatory(wiphy, lr->initiator);
+ wiphy_all_share_dfs_chan_state(wiphy);
}
void wiphy_regulatory_deregister(struct wiphy *wiphy)
@@ -3120,6 +3204,67 @@ bool regulatory_indoor_allowed(void)
return reg_is_indoor;
}
+bool regulatory_pre_cac_allowed(struct wiphy *wiphy)
+{
+ const struct ieee80211_regdomain *regd = NULL;
+ const struct ieee80211_regdomain *wiphy_regd = NULL;
+ bool pre_cac_allowed = false;
+
+ rcu_read_lock();
+
+ regd = rcu_dereference(cfg80211_regdomain);
+ wiphy_regd = rcu_dereference(wiphy->regd);
+ if (!wiphy_regd) {
+ if (regd->dfs_region == NL80211_DFS_ETSI)
+ pre_cac_allowed = true;
+
+ rcu_read_unlock();
+
+ return pre_cac_allowed;
+ }
+
+ if (regd->dfs_region == wiphy_regd->dfs_region &&
+ wiphy_regd->dfs_region == NL80211_DFS_ETSI)
+ pre_cac_allowed = true;
+
+ rcu_read_unlock();
+
+ return pre_cac_allowed;
+}
+
+void regulatory_propagate_dfs_state(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ enum nl80211_dfs_state dfs_state,
+ enum nl80211_radar_event event)
+{
+ struct cfg80211_registered_device *rdev;
+
+ ASSERT_RTNL();
+
+ if (WARN_ON(!cfg80211_chandef_valid(chandef)))
+ return;
+
+ list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ if (wiphy == &rdev->wiphy)
+ continue;
+
+ if (!reg_dfs_domain_same(wiphy, &rdev->wiphy))
+ continue;
+
+ if (!ieee80211_get_channel(&rdev->wiphy,
+ chandef->chan->center_freq))
+ continue;
+
+ cfg80211_set_dfs_state(&rdev->wiphy, chandef, dfs_state);
+
+ if (event == NL80211_RADAR_DETECTED ||
+ event == NL80211_RADAR_CAC_FINISHED)
+ cfg80211_sched_dfs_chan_update(rdev);
+
+ nl80211_radar_notify(rdev, chandef, event, NULL, GFP_KERNEL);
+ }
+}
+
int __init regulatory_init(void)
{
int err = 0;
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index f6ced316b5a4..ca7fedf2e7a1 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -143,4 +143,40 @@ int cfg80211_get_unii(int freq);
*/
bool regulatory_indoor_allowed(void);
+/*
+ * Grace period to timeout pre-CAC results on the dfs channels. This timeout
+ * value is used for Non-ETSI domain.
+ * TODO: May be make this timeout available through regdb?
+ */
+#define REG_PRE_CAC_EXPIRY_GRACE_MS 2000
+
+/**
+ * regulatory_pre_cac_allowed - if pre-CAC allowed in the current dfs domain
+ * @wiphy: wiphy for which pre-CAC capability is checked.
+
+ * Pre-CAC is allowed only in ETSI domain.
+ */
+bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
+
+/**
+ * regulatory_propagate_dfs_state - Propagate DFS channel state to other wiphys
+ * @wiphy - wiphy on which radar is detected and the event will be propagated
+ * to other available wiphys having the same DFS domain
+ * @chandef - Channel definition of radar detected channel
+ * @dfs_state - DFS channel state to be set
+ * @event - Type of radar event which triggered this DFS state change
+ *
+ * This function should be called with rtnl lock held.
+ */
+void regulatory_propagate_dfs_state(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ enum nl80211_dfs_state dfs_state,
+ enum nl80211_radar_event event);
+
+/**
+ * reg_dfs_domain_same - Checks if both wiphy have same DFS domain configured
+ * @wiphy1 - wiphy it's dfs_region to be checked against that of wiphy2
+ * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1
+ */
+bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
#endif /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 21be56b3128e..14d5f0c8c45f 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -300,93 +300,168 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request,
}
EXPORT_SYMBOL(cfg80211_scan_done);
-void __cfg80211_sched_scan_results(struct work_struct *wk)
+void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev,
+ struct cfg80211_sched_scan_request *req)
{
- struct cfg80211_registered_device *rdev;
- struct cfg80211_sched_scan_request *request;
+ ASSERT_RTNL();
- rdev = container_of(wk, struct cfg80211_registered_device,
- sched_scan_results_wk);
+ list_add_rcu(&req->list, &rdev->sched_scan_req_list);
+}
- rtnl_lock();
+static void cfg80211_del_sched_scan_req(struct cfg80211_registered_device *rdev,
+ struct cfg80211_sched_scan_request *req)
+{
+ ASSERT_RTNL();
- request = rtnl_dereference(rdev->sched_scan_req);
+ list_del_rcu(&req->list);
+ kfree_rcu(req, rcu_head);
+}
- /* we don't have sched_scan_req anymore if the scan is stopping */
- if (request) {
- if (request->flags & NL80211_SCAN_FLAG_FLUSH) {
- /* flush entries from previous scans */
- spin_lock_bh(&rdev->bss_lock);
- __cfg80211_bss_expire(rdev, request->scan_start);
- spin_unlock_bh(&rdev->bss_lock);
- request->scan_start = jiffies;
- }
- nl80211_send_sched_scan(rdev, request->dev,
- NL80211_CMD_SCHED_SCAN_RESULTS);
+static struct cfg80211_sched_scan_request *
+cfg80211_find_sched_scan_req(struct cfg80211_registered_device *rdev, u64 reqid)
+{
+ struct cfg80211_sched_scan_request *pos;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry(pos, &rdev->sched_scan_req_list, list) {
+ if (pos->reqid == reqid)
+ return pos;
}
+ return NULL;
+}
+
+/*
+ * Determines if a scheduled scan request can be handled. When a legacy
+ * scheduled scan is running no other scheduled scan is allowed regardless
+ * whether the request is for legacy or multi-support scan. When a multi-support
+ * scheduled scan is running a request for legacy scan is not allowed. In this
+ * case a request for multi-support scan can be handled if resources are
+ * available, ie. struct wiphy::max_sched_scan_reqs limit is not yet reached.
+ */
+int cfg80211_sched_scan_req_possible(struct cfg80211_registered_device *rdev,
+ bool want_multi)
+{
+ struct cfg80211_sched_scan_request *pos;
+ int i = 0;
+
+ list_for_each_entry(pos, &rdev->sched_scan_req_list, list) {
+ /* request id zero means legacy in progress */
+ if (!i && !pos->reqid)
+ return -EINPROGRESS;
+ i++;
+ }
+
+ if (i) {
+ /* no legacy allowed when multi request(s) are active */
+ if (!want_multi)
+ return -EINPROGRESS;
+
+ /* resource limit reached */
+ if (i == rdev->wiphy.max_sched_scan_reqs)
+ return -ENOSPC;
+ }
+ return 0;
+}
+
+void cfg80211_sched_scan_results_wk(struct work_struct *work)
+{
+ struct cfg80211_registered_device *rdev;
+ struct cfg80211_sched_scan_request *req, *tmp;
+ rdev = container_of(work, struct cfg80211_registered_device,
+ sched_scan_res_wk);
+
+ rtnl_lock();
+ list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) {
+ if (req->report_results) {
+ req->report_results = false;
+ if (req->flags & NL80211_SCAN_FLAG_FLUSH) {
+ /* flush entries from previous scans */
+ spin_lock_bh(&rdev->bss_lock);
+ __cfg80211_bss_expire(rdev, req->scan_start);
+ spin_unlock_bh(&rdev->bss_lock);
+ req->scan_start = jiffies;
+ }
+ nl80211_send_sched_scan(req,
+ NL80211_CMD_SCHED_SCAN_RESULTS);
+ }
+ }
rtnl_unlock();
}
-void cfg80211_sched_scan_results(struct wiphy *wiphy)
+void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid)
{
- trace_cfg80211_sched_scan_results(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ struct cfg80211_sched_scan_request *request;
+
+ trace_cfg80211_sched_scan_results(wiphy, reqid);
/* ignore if we're not scanning */
- if (rcu_access_pointer(wiphy_to_rdev(wiphy)->sched_scan_req))
- queue_work(cfg80211_wq,
- &wiphy_to_rdev(wiphy)->sched_scan_results_wk);
+ rtnl_lock();
+ request = cfg80211_find_sched_scan_req(rdev, reqid);
+ if (request) {
+ request->report_results = true;
+ queue_work(cfg80211_wq, &rdev->sched_scan_res_wk);
+ }
+ rtnl_unlock();
}
EXPORT_SYMBOL(cfg80211_sched_scan_results);
-void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy)
+void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy, u64 reqid)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
ASSERT_RTNL();
- trace_cfg80211_sched_scan_stopped(wiphy);
+ trace_cfg80211_sched_scan_stopped(wiphy, reqid);
- __cfg80211_stop_sched_scan(rdev, true);
+ __cfg80211_stop_sched_scan(rdev, reqid, true);
}
EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl);
-void cfg80211_sched_scan_stopped(struct wiphy *wiphy)
+void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid)
{
rtnl_lock();
- cfg80211_sched_scan_stopped_rtnl(wiphy);
+ cfg80211_sched_scan_stopped_rtnl(wiphy, reqid);
rtnl_unlock();
}
EXPORT_SYMBOL(cfg80211_sched_scan_stopped);
-int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
- bool driver_initiated)
+int cfg80211_stop_sched_scan_req(struct cfg80211_registered_device *rdev,
+ struct cfg80211_sched_scan_request *req,
+ bool driver_initiated)
{
- struct cfg80211_sched_scan_request *sched_scan_req;
- struct net_device *dev;
-
ASSERT_RTNL();
- if (!rdev->sched_scan_req)
- return -ENOENT;
-
- sched_scan_req = rtnl_dereference(rdev->sched_scan_req);
- dev = sched_scan_req->dev;
-
if (!driver_initiated) {
- int err = rdev_sched_scan_stop(rdev, dev);
+ int err = rdev_sched_scan_stop(rdev, req->dev, req->reqid);
if (err)
return err;
}
- nl80211_send_sched_scan(rdev, dev, NL80211_CMD_SCHED_SCAN_STOPPED);
+ nl80211_send_sched_scan(req, NL80211_CMD_SCHED_SCAN_STOPPED);
- RCU_INIT_POINTER(rdev->sched_scan_req, NULL);
- kfree_rcu(sched_scan_req, rcu_head);
+ cfg80211_del_sched_scan_req(rdev, req);
return 0;
}
+int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
+ u64 reqid, bool driver_initiated)
+{
+ struct cfg80211_sched_scan_request *sched_scan_req;
+
+ ASSERT_RTNL();
+
+ sched_scan_req = cfg80211_find_sched_scan_req(rdev, reqid);
+ if (!sched_scan_req)
+ return -ENOENT;
+
+ return cfg80211_stop_sched_scan_req(rdev, sched_scan_req,
+ driver_initiated);
+}
+
void cfg80211_bss_age(struct cfg80211_registered_device *rdev,
unsigned long age_secs)
{
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index b347e63d7aaa..532a0007ce82 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -5,6 +5,7 @@
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright (C) 2009 Intel Corporation. All rights reserved.
+ * Copyright 2017 Intel Deutschland GmbH
*/
#include <linux/etherdevice.h>
@@ -253,10 +254,13 @@ void cfg80211_conn_work(struct work_struct *work)
}
treason = NL80211_TIMEOUT_UNSPECIFIED;
if (cfg80211_conn_do_work(wdev, &treason)) {
- __cfg80211_connect_result(
- wdev->netdev, bssid,
- NULL, 0, NULL, 0, -1, false, NULL,
- treason);
+ struct cfg80211_connect_resp_params cr;
+
+ memset(&cr, 0, sizeof(cr));
+ cr.status = -1;
+ cr.bssid = bssid;
+ cr.timeout_reason = treason;
+ __cfg80211_connect_result(wdev->netdev, &cr, false);
}
wdev_unlock(wdev);
}
@@ -359,10 +363,13 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len)
wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
schedule_work(&rdev->conn_work);
} else if (status_code != WLAN_STATUS_SUCCESS) {
- __cfg80211_connect_result(wdev->netdev, mgmt->bssid,
- NULL, 0, NULL, 0,
- status_code, false, NULL,
- NL80211_TIMEOUT_UNSPECIFIED);
+ struct cfg80211_connect_resp_params cr;
+
+ memset(&cr, 0, sizeof(cr));
+ cr.status = status_code;
+ cr.bssid = mgmt->bssid;
+ cr.timeout_reason = NL80211_TIMEOUT_UNSPECIFIED;
+ __cfg80211_connect_result(wdev->netdev, &cr, false);
} else if (wdev->conn->state == CFG80211_CONN_AUTHENTICATING) {
wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT;
schedule_work(&rdev->conn_work);
@@ -669,12 +676,9 @@ static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
*/
/* This method must consume bss one way or another */
-void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len,
- int status, bool wextev,
- struct cfg80211_bss *bss,
- enum nl80211_timeout_reason timeout_reason)
+void __cfg80211_connect_result(struct net_device *dev,
+ struct cfg80211_connect_resp_params *cr,
+ bool wextev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
const u8 *country_ie;
@@ -686,48 +690,48 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) {
- cfg80211_put_bss(wdev->wiphy, bss);
+ cfg80211_put_bss(wdev->wiphy, cr->bss);
return;
}
- nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev,
- bssid, req_ie, req_ie_len,
- resp_ie, resp_ie_len,
- status, timeout_reason, GFP_KERNEL);
+ nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, cr,
+ GFP_KERNEL);
#ifdef CONFIG_CFG80211_WEXT
if (wextev) {
- if (req_ie && status == WLAN_STATUS_SUCCESS) {
+ if (cr->req_ie && cr->status == WLAN_STATUS_SUCCESS) {
memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = req_ie_len;
- wireless_send_event(dev, IWEVASSOCREQIE, &wrqu, req_ie);
+ wrqu.data.length = cr->req_ie_len;
+ wireless_send_event(dev, IWEVASSOCREQIE, &wrqu,
+ cr->req_ie);
}
- if (resp_ie && status == WLAN_STATUS_SUCCESS) {
+ if (cr->resp_ie && cr->status == WLAN_STATUS_SUCCESS) {
memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = resp_ie_len;
- wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, resp_ie);
+ wrqu.data.length = cr->resp_ie_len;
+ wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu,
+ cr->resp_ie);
}
memset(&wrqu, 0, sizeof(wrqu));
wrqu.ap_addr.sa_family = ARPHRD_ETHER;
- if (bssid && status == WLAN_STATUS_SUCCESS) {
- memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
- memcpy(wdev->wext.prev_bssid, bssid, ETH_ALEN);
+ if (cr->bssid && cr->status == WLAN_STATUS_SUCCESS) {
+ memcpy(wrqu.ap_addr.sa_data, cr->bssid, ETH_ALEN);
+ memcpy(wdev->wext.prev_bssid, cr->bssid, ETH_ALEN);
wdev->wext.prev_bssid_valid = true;
}
wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
}
#endif
- if (!bss && (status == WLAN_STATUS_SUCCESS)) {
+ if (!cr->bss && (cr->status == WLAN_STATUS_SUCCESS)) {
WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect);
- bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
- wdev->ssid, wdev->ssid_len,
- wdev->conn_bss_type,
- IEEE80211_PRIVACY_ANY);
- if (bss)
- cfg80211_hold_bss(bss_from_pub(bss));
+ cr->bss = cfg80211_get_bss(wdev->wiphy, NULL, cr->bssid,
+ wdev->ssid, wdev->ssid_len,
+ wdev->conn_bss_type,
+ IEEE80211_PRIVACY_ANY);
+ if (cr->bss)
+ cfg80211_hold_bss(bss_from_pub(cr->bss));
}
if (wdev->current_bss) {
@@ -736,29 +740,29 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
wdev->current_bss = NULL;
}
- if (status != WLAN_STATUS_SUCCESS) {
+ if (cr->status != WLAN_STATUS_SUCCESS) {
kzfree(wdev->connect_keys);
wdev->connect_keys = NULL;
wdev->ssid_len = 0;
wdev->conn_owner_nlportid = 0;
- if (bss) {
- cfg80211_unhold_bss(bss_from_pub(bss));
- cfg80211_put_bss(wdev->wiphy, bss);
+ if (cr->bss) {
+ cfg80211_unhold_bss(bss_from_pub(cr->bss));
+ cfg80211_put_bss(wdev->wiphy, cr->bss);
}
cfg80211_sme_free(wdev);
return;
}
- if (WARN_ON(!bss))
+ if (WARN_ON(!cr->bss))
return;
- wdev->current_bss = bss_from_pub(bss);
+ wdev->current_bss = bss_from_pub(cr->bss);
if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
cfg80211_upload_connect_keys(wdev);
rcu_read_lock();
- country_ie = ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY);
+ country_ie = ieee80211_bss_get_ie(cr->bss, WLAN_EID_COUNTRY);
if (!country_ie) {
rcu_read_unlock();
return;
@@ -775,70 +779,99 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
* - country_ie + 2, the start of the country ie data, and
* - and country_ie[1] which is the IE length
*/
- regulatory_hint_country_ie(wdev->wiphy, bss->channel->band,
+ regulatory_hint_country_ie(wdev->wiphy, cr->bss->channel->band,
country_ie + 2, country_ie[1]);
kfree(country_ie);
}
/* Consumes bss object one way or another */
-void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid,
- struct cfg80211_bss *bss, const u8 *req_ie,
- size_t req_ie_len, const u8 *resp_ie,
- size_t resp_ie_len, int status, gfp_t gfp,
- enum nl80211_timeout_reason timeout_reason)
+void cfg80211_connect_done(struct net_device *dev,
+ struct cfg80211_connect_resp_params *params,
+ gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_event *ev;
unsigned long flags;
+ u8 *next;
- if (bss) {
+ if (params->bss) {
/* Make sure the bss entry provided by the driver is valid. */
- struct cfg80211_internal_bss *ibss = bss_from_pub(bss);
+ struct cfg80211_internal_bss *ibss = bss_from_pub(params->bss);
if (WARN_ON(list_empty(&ibss->list))) {
- cfg80211_put_bss(wdev->wiphy, bss);
+ cfg80211_put_bss(wdev->wiphy, params->bss);
return;
}
}
- ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp);
+ ev = kzalloc(sizeof(*ev) + (params->bssid ? ETH_ALEN : 0) +
+ params->req_ie_len + params->resp_ie_len +
+ params->fils_kek_len + params->pmk_len +
+ (params->pmkid ? WLAN_PMKID_LEN : 0), gfp);
if (!ev) {
- cfg80211_put_bss(wdev->wiphy, bss);
+ cfg80211_put_bss(wdev->wiphy, params->bss);
return;
}
ev->type = EVENT_CONNECT_RESULT;
- if (bssid)
- memcpy(ev->cr.bssid, bssid, ETH_ALEN);
- if (req_ie_len) {
- ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev);
- ev->cr.req_ie_len = req_ie_len;
- memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len);
+ next = ((u8 *)ev) + sizeof(*ev);
+ if (params->bssid) {
+ ev->cr.bssid = next;
+ memcpy((void *)ev->cr.bssid, params->bssid, ETH_ALEN);
+ next += ETH_ALEN;
}
- if (resp_ie_len) {
- ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len;
- ev->cr.resp_ie_len = resp_ie_len;
- memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len);
+ if (params->req_ie_len) {
+ ev->cr.req_ie = next;
+ ev->cr.req_ie_len = params->req_ie_len;
+ memcpy((void *)ev->cr.req_ie, params->req_ie,
+ params->req_ie_len);
+ next += params->req_ie_len;
}
- if (bss)
- cfg80211_hold_bss(bss_from_pub(bss));
- ev->cr.bss = bss;
- ev->cr.status = status;
- ev->cr.timeout_reason = timeout_reason;
+ if (params->resp_ie_len) {
+ ev->cr.resp_ie = next;
+ ev->cr.resp_ie_len = params->resp_ie_len;
+ memcpy((void *)ev->cr.resp_ie, params->resp_ie,
+ params->resp_ie_len);
+ next += params->resp_ie_len;
+ }
+ if (params->fils_kek_len) {
+ ev->cr.fils_kek = next;
+ ev->cr.fils_kek_len = params->fils_kek_len;
+ memcpy((void *)ev->cr.fils_kek, params->fils_kek,
+ params->fils_kek_len);
+ next += params->fils_kek_len;
+ }
+ if (params->pmk_len) {
+ ev->cr.pmk = next;
+ ev->cr.pmk_len = params->pmk_len;
+ memcpy((void *)ev->cr.pmk, params->pmk, params->pmk_len);
+ next += params->pmk_len;
+ }
+ if (params->pmkid) {
+ ev->cr.pmkid = next;
+ memcpy((void *)ev->cr.pmkid, params->pmkid, WLAN_PMKID_LEN);
+ next += WLAN_PMKID_LEN;
+ }
+ ev->cr.update_erp_next_seq_num = params->update_erp_next_seq_num;
+ if (params->update_erp_next_seq_num)
+ ev->cr.fils_erp_next_seq_num = params->fils_erp_next_seq_num;
+ if (params->bss)
+ cfg80211_hold_bss(bss_from_pub(params->bss));
+ ev->cr.bss = params->bss;
+ ev->cr.status = params->status;
+ ev->cr.timeout_reason = params->timeout_reason;
spin_lock_irqsave(&wdev->event_lock, flags);
list_add_tail(&ev->list, &wdev->event_list);
spin_unlock_irqrestore(&wdev->event_lock, flags);
queue_work(cfg80211_wq, &rdev->event_work);
}
-EXPORT_SYMBOL(cfg80211_connect_bss);
+EXPORT_SYMBOL(cfg80211_connect_done);
/* Consumes bss object one way or another */
void __cfg80211_roamed(struct wireless_dev *wdev,
- struct cfg80211_bss *bss,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len)
+ struct cfg80211_roam_info *info)
{
#ifdef CONFIG_CFG80211_WEXT
union iwreq_data wrqu;
@@ -856,97 +889,84 @@ void __cfg80211_roamed(struct wireless_dev *wdev,
cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
wdev->current_bss = NULL;
- cfg80211_hold_bss(bss_from_pub(bss));
- wdev->current_bss = bss_from_pub(bss);
+ if (WARN_ON(!info->bss))
+ return;
+
+ cfg80211_hold_bss(bss_from_pub(info->bss));
+ wdev->current_bss = bss_from_pub(info->bss);
nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy),
- wdev->netdev, bss->bssid,
- req_ie, req_ie_len, resp_ie, resp_ie_len,
- GFP_KERNEL);
+ wdev->netdev, info, GFP_KERNEL);
#ifdef CONFIG_CFG80211_WEXT
- if (req_ie) {
+ if (info->req_ie) {
memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = req_ie_len;
+ wrqu.data.length = info->req_ie_len;
wireless_send_event(wdev->netdev, IWEVASSOCREQIE,
- &wrqu, req_ie);
+ &wrqu, info->req_ie);
}
- if (resp_ie) {
+ if (info->resp_ie) {
memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = resp_ie_len;
+ wrqu.data.length = info->resp_ie_len;
wireless_send_event(wdev->netdev, IWEVASSOCRESPIE,
- &wrqu, resp_ie);
+ &wrqu, info->resp_ie);
}
memset(&wrqu, 0, sizeof(wrqu));
wrqu.ap_addr.sa_family = ARPHRD_ETHER;
- memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN);
- memcpy(wdev->wext.prev_bssid, bss->bssid, ETH_ALEN);
+ memcpy(wrqu.ap_addr.sa_data, info->bss->bssid, ETH_ALEN);
+ memcpy(wdev->wext.prev_bssid, info->bss->bssid, ETH_ALEN);
wdev->wext.prev_bssid_valid = true;
wireless_send_event(wdev->netdev, SIOCGIWAP, &wrqu, NULL);
#endif
return;
out:
- cfg80211_put_bss(wdev->wiphy, bss);
-}
-
-void cfg80211_roamed(struct net_device *dev,
- struct ieee80211_channel *channel,
- const u8 *bssid,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_bss *bss;
-
- bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid,
- wdev->ssid_len,
- wdev->conn_bss_type, IEEE80211_PRIVACY_ANY);
- if (WARN_ON(!bss))
- return;
-
- cfg80211_roamed_bss(dev, bss, req_ie, req_ie_len, resp_ie,
- resp_ie_len, gfp);
+ cfg80211_put_bss(wdev->wiphy, info->bss);
}
-EXPORT_SYMBOL(cfg80211_roamed);
-/* Consumes bss object one way or another */
-void cfg80211_roamed_bss(struct net_device *dev,
- struct cfg80211_bss *bss, const u8 *req_ie,
- size_t req_ie_len, const u8 *resp_ie,
- size_t resp_ie_len, gfp_t gfp)
+/* Consumes info->bss object one way or another */
+void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
+ gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_event *ev;
unsigned long flags;
- if (WARN_ON(!bss))
+ if (!info->bss) {
+ info->bss = cfg80211_get_bss(wdev->wiphy, info->channel,
+ info->bssid, wdev->ssid,
+ wdev->ssid_len,
+ wdev->conn_bss_type,
+ IEEE80211_PRIVACY_ANY);
+ }
+
+ if (WARN_ON(!info->bss))
return;
- ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp);
+ ev = kzalloc(sizeof(*ev) + info->req_ie_len + info->resp_ie_len, gfp);
if (!ev) {
- cfg80211_put_bss(wdev->wiphy, bss);
+ cfg80211_put_bss(wdev->wiphy, info->bss);
return;
}
ev->type = EVENT_ROAMED;
ev->rm.req_ie = ((u8 *)ev) + sizeof(*ev);
- ev->rm.req_ie_len = req_ie_len;
- memcpy((void *)ev->rm.req_ie, req_ie, req_ie_len);
- ev->rm.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len;
- ev->rm.resp_ie_len = resp_ie_len;
- memcpy((void *)ev->rm.resp_ie, resp_ie, resp_ie_len);
- ev->rm.bss = bss;
+ ev->rm.req_ie_len = info->req_ie_len;
+ memcpy((void *)ev->rm.req_ie, info->req_ie, info->req_ie_len);
+ ev->rm.resp_ie = ((u8 *)ev) + sizeof(*ev) + info->req_ie_len;
+ ev->rm.resp_ie_len = info->resp_ie_len;
+ memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len);
+ ev->rm.bss = info->bss;
spin_lock_irqsave(&wdev->event_lock, flags);
list_add_tail(&ev->list, &wdev->event_list);
spin_unlock_irqrestore(&wdev->event_lock, flags);
queue_work(cfg80211_wq, &rdev->event_work);
}
-EXPORT_SYMBOL(cfg80211_roamed_bss);
+EXPORT_SYMBOL(cfg80211_roamed);
void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
size_t ie_len, u16 reason, bool from_ap)
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 776e80cef9b4..ca8b2059f92c 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -576,11 +576,6 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_stop_ap,
TP_ARGS(wiphy, netdev)
);
-DEFINE_EVENT(wiphy_netdev_evt, rdev_sched_scan_stop,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
- TP_ARGS(wiphy, netdev)
-);
-
DEFINE_EVENT(wiphy_netdev_evt, rdev_set_rekey_data,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
TP_ARGS(wiphy, netdev)
@@ -1322,6 +1317,28 @@ TRACE_EVENT(rdev_set_cqm_rssi_config,
__entry->rssi_thold, __entry->rssi_hyst)
);
+TRACE_EVENT(rdev_set_cqm_rssi_range_config,
+ TP_PROTO(struct wiphy *wiphy,
+ struct net_device *netdev, s32 low, s32 high),
+ TP_ARGS(wiphy, netdev, low, high),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(s32, rssi_low)
+ __field(s32, rssi_high)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->rssi_low = low;
+ __entry->rssi_high = high;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
+ ", range: %d - %d ",
+ WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->rssi_low, __entry->rssi_high)
+);
+
TRACE_EVENT(rdev_set_cqm_txe_config,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u32 rate,
u32 pkts, u32 intvl),
@@ -1588,20 +1605,31 @@ DEFINE_EVENT(tx_rx_evt, rdev_set_antenna,
TP_ARGS(wiphy, rx, tx)
);
-TRACE_EVENT(rdev_sched_scan_start,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_sched_scan_request *request),
- TP_ARGS(wiphy, netdev, request),
+DECLARE_EVENT_CLASS(wiphy_netdev_id_evt,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id),
+ TP_ARGS(wiphy, netdev, id),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
+ __field(u64, id)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
+ __entry->id = id;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", id: %llu",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->id)
+);
+
+DEFINE_EVENT(wiphy_netdev_id_evt, rdev_sched_scan_start,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id),
+ TP_ARGS(wiphy, netdev, id)
+);
+
+DEFINE_EVENT(wiphy_netdev_id_evt, rdev_sched_scan_stop,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id),
+ TP_ARGS(wiphy, netdev, id)
);
TRACE_EVENT(rdev_tdls_mgmt,
@@ -2792,14 +2820,28 @@ TRACE_EVENT(cfg80211_scan_done,
MAC_PR_ARG(tsf_bssid))
);
-DEFINE_EVENT(wiphy_only_evt, cfg80211_sched_scan_results,
- TP_PROTO(struct wiphy *wiphy),
- TP_ARGS(wiphy)
+DECLARE_EVENT_CLASS(wiphy_id_evt,
+ TP_PROTO(struct wiphy *wiphy, u64 id),
+ TP_ARGS(wiphy, id),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ __field(u64, id)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ __entry->id = id;
+ ),
+ TP_printk(WIPHY_PR_FMT ", id: %llu", WIPHY_PR_ARG, __entry->id)
);
-DEFINE_EVENT(wiphy_only_evt, cfg80211_sched_scan_stopped,
- TP_PROTO(struct wiphy *wiphy),
- TP_ARGS(wiphy)
+DEFINE_EVENT(wiphy_id_evt, cfg80211_sched_scan_stopped,
+ TP_PROTO(struct wiphy *wiphy, u64 id),
+ TP_ARGS(wiphy, id)
+);
+
+DEFINE_EVENT(wiphy_id_evt, cfg80211_sched_scan_results,
+ TP_PROTO(struct wiphy *wiphy, u64 id),
+ TP_ARGS(wiphy, id)
);
TRACE_EVENT(cfg80211_get_bss,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 68e5f2ecee1a..7198373e2920 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -659,7 +659,7 @@ __ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame,
int offset, int len)
{
struct skb_shared_info *sh = skb_shinfo(skb);
- const skb_frag_t *frag = &sh->frags[-1];
+ const skb_frag_t *frag = &sh->frags[0];
struct page *frag_page;
void *frag_ptr;
int frag_len, frag_size;
@@ -672,10 +672,10 @@ __ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame,
while (offset >= frag_size) {
offset -= frag_size;
- frag++;
frag_page = skb_frag_page(frag);
frag_ptr = skb_frag_address(frag);
frag_size = skb_frag_size(frag);
+ frag++;
}
frag_ptr += offset;
@@ -687,12 +687,12 @@ __ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame,
len -= cur_len;
while (len > 0) {
- frag++;
frag_len = skb_frag_size(frag);
cur_len = min(len, frag_len);
__frame_add_frag(frame, skb_frag_page(frag),
skb_frag_address(frag), cur_len, frag_len);
len -= cur_len;
+ frag++;
}
}
@@ -914,11 +914,11 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
netdev_err(dev, "failed to set key %d\n", i);
continue;
}
- if (wdev->connect_keys->def == i)
- if (rdev_set_default_key(rdev, dev, i, true, true)) {
- netdev_err(dev, "failed to set defkey %d\n", i);
- continue;
- }
+ if (wdev->connect_keys->def == i &&
+ rdev_set_default_key(rdev, dev, i, true, true)) {
+ netdev_err(dev, "failed to set defkey %d\n", i);
+ continue;
+ }
}
kzfree(wdev->connect_keys);
@@ -929,7 +929,6 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
{
struct cfg80211_event *ev;
unsigned long flags;
- const u8 *bssid = NULL;
spin_lock_irqsave(&wdev->event_lock, flags);
while (!list_empty(&wdev->event_list)) {
@@ -941,20 +940,13 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
wdev_lock(wdev);
switch (ev->type) {
case EVENT_CONNECT_RESULT:
- if (!is_zero_ether_addr(ev->cr.bssid))
- bssid = ev->cr.bssid;
__cfg80211_connect_result(
- wdev->netdev, bssid,
- ev->cr.req_ie, ev->cr.req_ie_len,
- ev->cr.resp_ie, ev->cr.resp_ie_len,
- ev->cr.status,
- ev->cr.status == WLAN_STATUS_SUCCESS,
- ev->cr.bss, ev->cr.timeout_reason);
+ wdev->netdev,
+ &ev->cr,
+ ev->cr.status == WLAN_STATUS_SUCCESS);
break;
case EVENT_ROAMED:
- __cfg80211_roamed(wdev, ev->rm.bss, ev->rm.req_ie,
- ev->rm.req_ie_len, ev->rm.resp_ie,
- ev->rm.resp_ie_len);
+ __cfg80211_roamed(wdev, &ev->rm);
break;
case EVENT_DISCONNECTED:
__cfg80211_disconnected(wdev->netdev,
@@ -991,7 +983,7 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev)
int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype ntype,
- u32 *flags, struct vif_params *params)
+ struct vif_params *params)
{
int err;
enum nl80211_iftype otype = dev->ieee80211_ptr->iftype;
@@ -1049,7 +1041,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
cfg80211_process_rdev_events(rdev);
}
- err = rdev_change_virtual_intf(rdev, dev, ntype, flags, params);
+ err = rdev_change_virtual_intf(rdev, dev, ntype, params);
WARN_ON(!err && dev->ieee80211_ptr->iftype != ntype);
@@ -1097,6 +1089,35 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
return err;
}
+static u32 cfg80211_calculate_bitrate_ht(struct rate_info *rate)
+{
+ int modulation, streams, bitrate;
+
+ /* the formula below does only work for MCS values smaller than 32 */
+ if (WARN_ON_ONCE(rate->mcs >= 32))
+ return 0;
+
+ modulation = rate->mcs & 7;
+ streams = (rate->mcs >> 3) + 1;
+
+ bitrate = (rate->bw == RATE_INFO_BW_40) ? 13500000 : 6500000;
+
+ if (modulation < 4)
+ bitrate *= (modulation + 1);
+ else if (modulation == 4)
+ bitrate *= (modulation + 2);
+ else
+ bitrate *= (modulation + 3);
+
+ bitrate *= streams;
+
+ if (rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+ bitrate = (bitrate / 9) * 10;
+
+ /* do NOT round down here */
+ return (bitrate + 50000) / 100000;
+}
+
static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate)
{
static const u32 __mcs2bitrate[] = {
@@ -1230,39 +1251,14 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
u32 cfg80211_calculate_bitrate(struct rate_info *rate)
{
- int modulation, streams, bitrate;
-
- if (!(rate->flags & RATE_INFO_FLAGS_MCS) &&
- !(rate->flags & RATE_INFO_FLAGS_VHT_MCS))
- return rate->legacy;
+ if (rate->flags & RATE_INFO_FLAGS_MCS)
+ return cfg80211_calculate_bitrate_ht(rate);
if (rate->flags & RATE_INFO_FLAGS_60G)
return cfg80211_calculate_bitrate_60g(rate);
if (rate->flags & RATE_INFO_FLAGS_VHT_MCS)
return cfg80211_calculate_bitrate_vht(rate);
- /* the formula below does only work for MCS values smaller than 32 */
- if (WARN_ON_ONCE(rate->mcs >= 32))
- return 0;
-
- modulation = rate->mcs & 7;
- streams = (rate->mcs >> 3) + 1;
-
- bitrate = (rate->bw == RATE_INFO_BW_40) ? 13500000 : 6500000;
-
- if (modulation < 4)
- bitrate *= (modulation + 1);
- else if (modulation == 4)
- bitrate *= (modulation + 2);
- else
- bitrate *= (modulation + 3);
-
- bitrate *= streams;
-
- if (rate->flags & RATE_INFO_FLAGS_SHORT_GI)
- bitrate = (bitrate / 9) * 10;
-
- /* do NOT round down here */
- return (bitrate + 50000) / 100000;
+ return rate->legacy;
}
EXPORT_SYMBOL(cfg80211_calculate_bitrate);
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index a220156cf217..5d4a02c7979b 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -62,7 +62,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
memset(&vifparams, 0, sizeof(vifparams));
- return cfg80211_change_iface(rdev, dev, type, NULL, &vifparams);
+ return cfg80211_change_iface(rdev, dev, type, &vifparams);
}
EXPORT_WEXT_HANDLER(cfg80211_wext_siwmode);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 8b911c29860e..5a1a98df3499 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1791,32 +1791,40 @@ void x25_kill_by_neigh(struct x25_neigh *nb)
static int __init x25_init(void)
{
- int rc = proto_register(&x25_proto, 0);
+ int rc;
- if (rc != 0)
+ rc = proto_register(&x25_proto, 0);
+ if (rc)
goto out;
rc = sock_register(&x25_family_ops);
- if (rc != 0)
+ if (rc)
goto out_proto;
dev_add_pack(&x25_packet_type);
rc = register_netdevice_notifier(&x25_dev_notifier);
- if (rc != 0)
+ if (rc)
goto out_sock;
- pr_info("Linux Version 0.2\n");
+ rc = x25_register_sysctl();
+ if (rc)
+ goto out_dev;
- x25_register_sysctl();
rc = x25_proc_init();
- if (rc != 0)
- goto out_dev;
+ if (rc)
+ goto out_sysctl;
+
+ pr_info("Linux Version 0.2\n");
+
out:
return rc;
+out_sysctl:
+ x25_unregister_sysctl();
out_dev:
unregister_netdevice_notifier(&x25_dev_notifier);
out_sock:
+ dev_remove_pack(&x25_packet_type);
sock_unregister(AF_X25);
out_proto:
proto_unregister(&x25_proto);
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index a06dfe143c67..ba078c85f0a1 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -73,9 +73,12 @@ static struct ctl_table x25_table[] = {
{ },
};
-void __init x25_register_sysctl(void)
+int __init x25_register_sysctl(void)
{
x25_table_header = register_net_sysctl(&init_net, "net/x25", x25_table);
+ if (!x25_table_header)
+ return -ENOMEM;
+ return 0;
}
void x25_unregister_sysctl(void)
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index c0e961983f17..abf81b329dc1 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
xfrm_input.o xfrm_output.o \
xfrm_sysctl.o xfrm_replay.o
+obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o
obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
new file mode 100644
index 000000000000..8ec8a3fcf8d4
--- /dev/null
+++ b/net/xfrm/xfrm_device.c
@@ -0,0 +1,208 @@
+/*
+ * xfrm_device.c - IPsec device offloading code.
+ *
+ * Copyright (c) 2015 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <net/dst.h>
+#include <net/xfrm.h>
+#include <linux/notifier.h>
+
+int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
+{
+ int err;
+ struct xfrm_state *x;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (skb_is_gso(skb))
+ return 0;
+
+ if (xo) {
+ x = skb->sp->xvec[skb->sp->len - 1];
+ if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ return 0;
+
+ x->outer_mode->xmit(x, skb);
+
+ err = x->type_offload->xmit(x, skb, features);
+ if (err) {
+ XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+ return err;
+ }
+
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(validate_xmit_xfrm);
+
+int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
+ struct xfrm_user_offload *xuo)
+{
+ int err;
+ struct dst_entry *dst;
+ struct net_device *dev;
+ struct xfrm_state_offload *xso = &x->xso;
+ xfrm_address_t *saddr;
+ xfrm_address_t *daddr;
+
+ if (!x->type_offload)
+ return 0;
+
+ /* We don't yet support UDP encapsulation, TFC padding and ESN. */
+ if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
+ return 0;
+
+ dev = dev_get_by_index(net, xuo->ifindex);
+ if (!dev) {
+ if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
+ saddr = &x->props.saddr;
+ daddr = &x->id.daddr;
+ } else {
+ saddr = &x->id.daddr;
+ daddr = &x->props.saddr;
+ }
+
+ dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, x->props.family);
+ if (IS_ERR(dst))
+ return 0;
+
+ dev = dst->dev;
+
+ dev_hold(dev);
+ dst_release(dst);
+ }
+
+ if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) {
+ dev_put(dev);
+ return 0;
+ }
+
+ xso->dev = dev;
+ xso->num_exthdrs = 1;
+ xso->flags = xuo->flags;
+
+ err = dev->xfrmdev_ops->xdo_dev_state_add(x);
+ if (err) {
+ dev_put(dev);
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xfrm_dev_state_add);
+
+bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+ int mtu;
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ struct net_device *dev = x->xso.dev;
+
+ if (!x->type_offload || x->encap)
+ return false;
+
+ if ((x->xso.offload_handle && (dev == dst->path->dev)) &&
+ !dst->child->xfrm && x->type->get_mtu) {
+ mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
+
+ if (skb->len <= mtu)
+ goto ok;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ goto ok;
+ }
+
+ return false;
+
+ok:
+ if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_offload_ok)
+ return x->xso.dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x);
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
+
+int xfrm_dev_register(struct net_device *dev)
+{
+ if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
+ return NOTIFY_BAD;
+ if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
+ !(dev->features & NETIF_F_HW_ESP))
+ return NOTIFY_BAD;
+
+ return NOTIFY_DONE;
+}
+
+static int xfrm_dev_unregister(struct net_device *dev)
+{
+ return NOTIFY_DONE;
+}
+
+static int xfrm_dev_feat_change(struct net_device *dev)
+{
+ if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
+ return NOTIFY_BAD;
+ else if (!(dev->features & NETIF_F_HW_ESP))
+ dev->xfrmdev_ops = NULL;
+
+ if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
+ !(dev->features & NETIF_F_HW_ESP))
+ return NOTIFY_BAD;
+
+ return NOTIFY_DONE;
+}
+
+static int xfrm_dev_down(struct net_device *dev)
+{
+ if (dev->hw_features & NETIF_F_HW_ESP)
+ xfrm_dev_state_flush(dev_net(dev), dev, true);
+
+ xfrm_garbage_collect(dev_net(dev));
+
+ return NOTIFY_DONE;
+}
+
+static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ return xfrm_dev_register(dev);
+
+ case NETDEV_UNREGISTER:
+ return xfrm_dev_unregister(dev);
+
+ case NETDEV_FEAT_CHANGE:
+ return xfrm_dev_feat_change(dev);
+
+ case NETDEV_DOWN:
+ return xfrm_dev_down(dev);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block xfrm_dev_notifier = {
+ .notifier_call = xfrm_dev_event,
+};
+
+void __net_init xfrm_dev_init(void)
+{
+ register_netdevice_notifier(&xfrm_dev_notifier);
+}
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 666c5ffe929d..eaea9c4fb3b0 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -54,8 +54,8 @@ static inline unsigned int __xfrm4_dpref_spref_hash(const xfrm_address_t *daddr,
static inline unsigned int __xfrm6_pref_hash(const xfrm_address_t *addr,
__u8 prefixlen)
{
- int pdw;
- int pbi;
+ unsigned int pdw;
+ unsigned int pbi;
u32 initval = 0;
pdw = prefixlen >> 5; /* num of whole u32 in prefix */
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index e23570b647ae..9de4b1dbc0ae 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -107,6 +107,8 @@ struct sec_path *secpath_dup(struct sec_path *src)
sp->len = 0;
sp->olen = 0;
+ memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH]));
+
if (src) {
int i;
@@ -207,8 +209,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
unsigned int family;
int decaps = 0;
int async = 0;
- struct xfrm_offload *xo;
bool xfrm_gro = false;
+ bool crypto_done = false;
+ struct xfrm_offload *xo = xfrm_offload(skb);
if (encap_type < 0) {
x = xfrm_input_state(skb);
@@ -220,9 +223,40 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
seq = XFRM_SKB_CB(skb)->seq.input.low;
goto resume;
}
+
/* encap_type < -1 indicates a GRO call. */
encap_type = 0;
seq = XFRM_SPI_SKB_CB(skb)->seq;
+
+ if (xo && (xo->flags & CRYPTO_DONE)) {
+ crypto_done = true;
+ x = xfrm_input_state(skb);
+ family = XFRM_SPI_SKB_CB(skb)->family;
+
+ if (!(xo->status & CRYPTO_SUCCESS)) {
+ if (xo->status &
+ (CRYPTO_TRANSPORT_AH_AUTH_FAILED |
+ CRYPTO_TRANSPORT_ESP_AUTH_FAILED |
+ CRYPTO_TUNNEL_AH_AUTH_FAILED |
+ CRYPTO_TUNNEL_ESP_AUTH_FAILED)) {
+
+ xfrm_audit_state_icvfail(x, skb,
+ x->type->proto);
+ x->stats.integrity_failed++;
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
+ goto drop;
+ }
+
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+ goto drop;
+ }
+
+ if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
+ goto drop;
+ }
+ }
+
goto lock;
}
@@ -311,7 +345,10 @@ lock:
skb_dst_force(skb);
dev_hold(skb->dev);
- nexthdr = x->type->input(x, skb);
+ if (crypto_done)
+ nexthdr = x->type_offload->input_tail(x, skb);
+ else
+ nexthdr = x->type->input(x, skb);
if (nexthdr == -EINPROGRESS)
return 0;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 8ba29fe58352..8c0b6722aaa8 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -99,12 +99,13 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
skb_dst_force(skb);
- /* Inner headers are invalid now. */
- skb->encapsulation = 0;
-
- err = x->type->output(x, skb);
- if (err == -EINPROGRESS)
- goto out;
+ if (xfrm_offload(skb)) {
+ x->type_offload->encap(x, skb);
+ } else {
+ err = x->type->output(x, skb);
+ if (err == -EINPROGRESS)
+ goto out;
+ }
resume:
if (err) {
@@ -200,8 +201,40 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
int xfrm_output(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
int err;
+ secpath_reset(skb);
+ skb->encapsulation = 0;
+
+ if (xfrm_dev_offload_ok(skb, x)) {
+ struct sec_path *sp;
+
+ sp = secpath_dup(skb->sp);
+ if (!sp) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ if (skb->sp)
+ secpath_put(skb->sp);
+ skb->sp = sp;
+ skb->encapsulation = 1;
+
+ sp->olen++;
+ sp->xvec[skb->sp->len++] = x;
+ xfrm_state_hold(x);
+
+ if (skb_is_gso(skb)) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_ESP;
+
+ return xfrm_output2(net, sk, skb);
+ }
+
+ if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM)
+ goto out;
+ }
+
if (skb_is_gso(skb))
return xfrm_output_gso(net, sk, skb);
@@ -214,6 +247,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
}
}
+out:
return xfrm_output2(net, sk, skb);
}
EXPORT_SYMBOL_GPL(xfrm_output);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index dfc77b9c5e5a..b00a1d5a7f52 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -116,11 +116,10 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa
return afinfo;
}
-static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
- int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- int family)
+struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
+ const xfrm_address_t *saddr,
+ const xfrm_address_t *daddr,
+ int family)
{
const struct xfrm_policy_afinfo *afinfo;
struct dst_entry *dst;
@@ -135,6 +134,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
return dst;
}
+EXPORT_SYMBOL(__xfrm_dst_lookup);
static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
int tos, int oif,
@@ -2933,21 +2933,6 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo)
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
-static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- switch (event) {
- case NETDEV_DOWN:
- xfrm_garbage_collect(dev_net(dev));
- }
- return NOTIFY_DONE;
-}
-
-static struct notifier_block xfrm_dev_notifier = {
- .notifier_call = xfrm_dev_event,
-};
-
#ifdef CONFIG_XFRM_STATISTICS
static int __net_init xfrm_statistics_init(struct net *net)
{
@@ -3024,7 +3009,7 @@ static int __net_init xfrm_policy_init(struct net *net)
INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
if (net_eq(net, &init_net))
- register_netdevice_notifier(&xfrm_dev_notifier);
+ xfrm_dev_init();
return 0;
out_bydst:
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index cdc2e2e71bff..8b23c5bcf8e8 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -45,7 +45,8 @@ u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq)
return seq_hi;
}
-
+EXPORT_SYMBOL(xfrm_replay_seqhi);
+;
static void xfrm_replay_notify(struct xfrm_state *x, int event)
{
struct km_event c;
@@ -558,6 +559,158 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
x->repl->notify(x, XFRM_REPLAY_UPDATE);
}
+#ifdef CONFIG_XFRM_OFFLOAD
+static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err = 0;
+ struct net *net = xs_net(x);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ __u32 oseq = x->replay.oseq;
+
+ if (!xo)
+ return xfrm_replay_overflow(x, skb);
+
+ if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
+ if (!skb_is_gso(skb)) {
+ XFRM_SKB_CB(skb)->seq.output.low = ++oseq;
+ xo->seq.low = oseq;
+ } else {
+ XFRM_SKB_CB(skb)->seq.output.low = oseq + 1;
+ xo->seq.low = oseq + 1;
+ oseq += skb_shinfo(skb)->gso_segs;
+ }
+
+ XFRM_SKB_CB(skb)->seq.output.hi = 0;
+ xo->seq.hi = 0;
+ if (unlikely(oseq < x->replay.oseq)) {
+ xfrm_audit_state_replay_overflow(x, skb);
+ err = -EOVERFLOW;
+
+ return err;
+ }
+
+ x->replay.oseq = oseq;
+
+ if (xfrm_aevent_is_on(net))
+ x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ }
+
+ return err;
+}
+
+static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err = 0;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+ struct net *net = xs_net(x);
+ __u32 oseq = replay_esn->oseq;
+
+ if (!xo)
+ return xfrm_replay_overflow_bmp(x, skb);
+
+ if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
+ if (!skb_is_gso(skb)) {
+ XFRM_SKB_CB(skb)->seq.output.low = ++oseq;
+ xo->seq.low = oseq;
+ } else {
+ XFRM_SKB_CB(skb)->seq.output.low = oseq + 1;
+ xo->seq.low = oseq + 1;
+ oseq += skb_shinfo(skb)->gso_segs;
+ }
+
+ XFRM_SKB_CB(skb)->seq.output.hi = 0;
+ xo->seq.hi = 0;
+ if (unlikely(oseq < replay_esn->oseq)) {
+ xfrm_audit_state_replay_overflow(x, skb);
+ err = -EOVERFLOW;
+
+ return err;
+ } else {
+ replay_esn->oseq = oseq;
+ }
+
+ if (xfrm_aevent_is_on(net))
+ x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ }
+
+ return err;
+}
+
+static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err = 0;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+ struct net *net = xs_net(x);
+ __u32 oseq = replay_esn->oseq;
+ __u32 oseq_hi = replay_esn->oseq_hi;
+
+ if (!xo)
+ return xfrm_replay_overflow_esn(x, skb);
+
+ if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
+ if (!skb_is_gso(skb)) {
+ XFRM_SKB_CB(skb)->seq.output.low = ++oseq;
+ XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi;
+ xo->seq.low = oseq;
+ xo->seq.hi = oseq_hi;
+ } else {
+ XFRM_SKB_CB(skb)->seq.output.low = oseq + 1;
+ XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi;
+ xo->seq.low = oseq = oseq + 1;
+ xo->seq.hi = oseq_hi;
+ oseq += skb_shinfo(skb)->gso_segs;
+ }
+
+ if (unlikely(oseq < replay_esn->oseq)) {
+ XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi;
+ xo->seq.hi = oseq_hi;
+
+ if (replay_esn->oseq_hi == 0) {
+ replay_esn->oseq--;
+ replay_esn->oseq_hi--;
+ xfrm_audit_state_replay_overflow(x, skb);
+ err = -EOVERFLOW;
+
+ return err;
+ }
+ }
+
+ replay_esn->oseq = oseq;
+ replay_esn->oseq_hi = oseq_hi;
+
+ if (xfrm_aevent_is_on(net))
+ x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ }
+
+ return err;
+}
+
+static const struct xfrm_replay xfrm_replay_legacy = {
+ .advance = xfrm_replay_advance,
+ .check = xfrm_replay_check,
+ .recheck = xfrm_replay_check,
+ .notify = xfrm_replay_notify,
+ .overflow = xfrm_replay_overflow_offload,
+};
+
+static const struct xfrm_replay xfrm_replay_bmp = {
+ .advance = xfrm_replay_advance_bmp,
+ .check = xfrm_replay_check_bmp,
+ .recheck = xfrm_replay_check_bmp,
+ .notify = xfrm_replay_notify_bmp,
+ .overflow = xfrm_replay_overflow_offload_bmp,
+};
+
+static const struct xfrm_replay xfrm_replay_esn = {
+ .advance = xfrm_replay_advance_esn,
+ .check = xfrm_replay_check_esn,
+ .recheck = xfrm_replay_recheck_esn,
+ .notify = xfrm_replay_notify_esn,
+ .overflow = xfrm_replay_overflow_offload_esn,
+};
+#else
static const struct xfrm_replay xfrm_replay_legacy = {
.advance = xfrm_replay_advance,
.check = xfrm_replay_check,
@@ -581,6 +734,7 @@ static const struct xfrm_replay xfrm_replay_esn = {
.notify = xfrm_replay_notify_esn,
.overflow = xfrm_replay_overflow_esn,
};
+#endif
int xfrm_init_replay(struct xfrm_state *x)
{
@@ -595,10 +749,12 @@ int xfrm_init_replay(struct xfrm_state *x)
if (replay_esn->replay_window == 0)
return -EINVAL;
x->repl = &xfrm_replay_esn;
- } else
+ } else {
x->repl = &xfrm_replay_bmp;
- } else
+ }
+ } else {
x->repl = &xfrm_replay_legacy;
+ }
return 0;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5a597dbbe564..fc3c5aa38754 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -251,6 +251,75 @@ static void xfrm_put_type(const struct xfrm_type *type)
module_put(type->owner);
}
+static DEFINE_SPINLOCK(xfrm_type_offload_lock);
+int xfrm_register_type_offload(const struct xfrm_type_offload *type,
+ unsigned short family)
+{
+ struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+ const struct xfrm_type_offload **typemap;
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EAFNOSUPPORT;
+ typemap = afinfo->type_offload_map;
+ spin_lock_bh(&xfrm_type_offload_lock);
+
+ if (likely(typemap[type->proto] == NULL))
+ typemap[type->proto] = type;
+ else
+ err = -EEXIST;
+ spin_unlock_bh(&xfrm_type_offload_lock);
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(xfrm_register_type_offload);
+
+int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
+ unsigned short family)
+{
+ struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+ const struct xfrm_type_offload **typemap;
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EAFNOSUPPORT;
+ typemap = afinfo->type_offload_map;
+ spin_lock_bh(&xfrm_type_offload_lock);
+
+ if (unlikely(typemap[type->proto] != type))
+ err = -ENOENT;
+ else
+ typemap[type->proto] = NULL;
+ spin_unlock_bh(&xfrm_type_offload_lock);
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(xfrm_unregister_type_offload);
+
+static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned short family)
+{
+ struct xfrm_state_afinfo *afinfo;
+ const struct xfrm_type_offload **typemap;
+ const struct xfrm_type_offload *type;
+
+ afinfo = xfrm_state_get_afinfo(family);
+ if (unlikely(afinfo == NULL))
+ return NULL;
+ typemap = afinfo->type_offload_map;
+
+ type = typemap[proto];
+ if ((type && !try_module_get(type->owner)))
+ type = NULL;
+
+ rcu_read_unlock();
+ return type;
+}
+
+static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
+{
+ module_put(type->owner);
+}
+
static DEFINE_SPINLOCK(xfrm_mode_lock);
int xfrm_register_mode(struct xfrm_mode *mode, int family)
{
@@ -365,10 +434,13 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
xfrm_put_mode(x->inner_mode_iaf);
if (x->outer_mode)
xfrm_put_mode(x->outer_mode);
+ if (x->type_offload)
+ xfrm_put_type_offload(x->type_offload);
if (x->type) {
x->type->destructor(x);
xfrm_put_type(x->type);
}
+ xfrm_dev_state_free(x);
security_xfrm_state_free(x);
kfree(x);
}
@@ -538,6 +610,8 @@ int __xfrm_state_delete(struct xfrm_state *x)
net->xfrm.state_num--;
spin_unlock(&net->xfrm.xfrm_state_lock);
+ xfrm_dev_state_delete(x);
+
/* All xfrm_state objects are created by xfrm_state_alloc.
* The xfrm_state_alloc call gives a reference, and that
* is what we are dropping here.
@@ -582,12 +656,41 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
return err;
}
+
+static inline int
+xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
+{
+ int i, err = 0;
+
+ for (i = 0; i <= net->xfrm.state_hmask; i++) {
+ struct xfrm_state *x;
+ struct xfrm_state_offload *xso;
+
+ hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ xso = &x->xso;
+
+ if (xso->dev == dev &&
+ (err = security_xfrm_state_delete(x)) != 0) {
+ xfrm_audit_state_delete(x, 0, task_valid);
+ return err;
+ }
+ }
+ }
+
+ return err;
+}
#else
static inline int
xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
{
return 0;
}
+
+static inline int
+xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
+{
+ return 0;
+}
#endif
int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
@@ -630,6 +733,48 @@ out:
}
EXPORT_SYMBOL(xfrm_state_flush);
+int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
+{
+ int i, err = 0, cnt = 0;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
+ if (err)
+ goto out;
+
+ err = -ESRCH;
+ for (i = 0; i <= net->xfrm.state_hmask; i++) {
+ struct xfrm_state *x;
+ struct xfrm_state_offload *xso;
+restart:
+ hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ xso = &x->xso;
+
+ if (!xfrm_state_kern(x) && xso->dev == dev) {
+ xfrm_state_hold(x);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
+ err = xfrm_state_delete(x);
+ xfrm_audit_state_delete(x, err ? 0 : 1,
+ task_valid);
+ xfrm_state_put(x);
+ if (!err)
+ cnt++;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ goto restart;
+ }
+ }
+ }
+ if (cnt)
+ err = 0;
+
+out:
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_dev_state_flush);
+
void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
{
spin_lock_bh(&net->xfrm.xfrm_state_lock);
@@ -2077,6 +2222,8 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
if (x->type == NULL)
goto error;
+ x->type_offload = xfrm_get_type_offload(x->id.proto, family);
+
err = x->type->init_state(x);
if (err)
goto error;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 40a8aa39220d..38614df33ec8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -55,7 +55,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
return -EINVAL;
}
- algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
+ algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
return 0;
}
@@ -71,7 +71,7 @@ static int verify_auth_trunc(struct nlattr **attrs)
if (nla_len(rt) < xfrm_alg_auth_len(algp))
return -EINVAL;
- algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
+ algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
return 0;
}
@@ -87,7 +87,7 @@ static int verify_aead(struct nlattr **attrs)
if (nla_len(rt) < aead_len(algp))
return -EINVAL;
- algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
+ algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
return 0;
}
@@ -595,6 +595,13 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
goto error;
}
+ if (attrs[XFRMA_OFFLOAD_DEV]) {
+ err = xfrm_dev_state_add(net, x,
+ nla_data(attrs[XFRMA_OFFLOAD_DEV]));
+ if (err)
+ goto error;
+ }
+
if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
attrs[XFRMA_REPLAY_ESN_VAL])))
goto error;
@@ -779,6 +786,23 @@ static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb)
return 0;
}
+static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb)
+{
+ struct xfrm_user_offload *xuo;
+ struct nlattr *attr;
+
+ attr = nla_reserve(skb, XFRMA_OFFLOAD_DEV, sizeof(*xuo));
+ if (attr == NULL)
+ return -EMSGSIZE;
+
+ xuo = nla_data(attr);
+
+ xuo->ifindex = xso->dev->ifindex;
+ xuo->flags = xso->flags;
+
+ return 0;
+}
+
static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
{
struct xfrm_algo *algo;
@@ -869,6 +893,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
&x->replay);
if (ret)
goto out;
+ if(x->xso.dev)
+ ret = copy_user_offload(&x->xso, skb);
+ if (ret)
+ goto out;
if (x->security)
ret = copy_sec_ctx(x->security, skb);
out:
@@ -932,8 +960,8 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
u8 proto = 0;
int err;
- err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX,
- xfrma_policy);
+ err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy,
+ NULL);
if (err < 0)
return err;
@@ -2406,6 +2434,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
[XFRMA_PROTO] = { .type = NLA_U8 },
[XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
+ [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) },
};
static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@@ -2448,7 +2477,8 @@ static const struct xfrm_link {
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo },
};
-static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *attrs[XFRMA_MAX+1];
@@ -2488,7 +2518,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs,
link->nla_max ? : XFRMA_MAX,
- link->nla_pol ? : xfrma_policy);
+ link->nla_pol ? : xfrma_policy, extack);
if (err < 0)
return err;
@@ -2622,6 +2652,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
l += nla_total_size(sizeof(*x->coaddr));
if (x->props.extra_flags)
l += nla_total_size(sizeof(x->props.extra_flags));
+ if (x->xso.dev)
+ l += nla_total_size(sizeof(x->xso));
/* Must count x->lastused as it may become non-zero behind our back. */
l += nla_total_size_64bit(sizeof(u64));
@@ -3108,7 +3140,6 @@ static bool xfrm_is_alive(const struct km_event *c)
}
static struct xfrm_mgr netlink_mgr = {
- .id = "netlink",
.notify = xfrm_send_state_notify,
.acquire = xfrm_send_acquire,
.compile_policy = xfrm_compile_policy,