diff options
Diffstat (limited to 'include/net')
58 files changed, 1459 insertions, 362 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index fd08df74c466..6ed9692f20bd 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -86,7 +86,7 @@ struct tc_action_ops { int (*act)(struct sk_buff *, const struct tc_action *, struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); - void (*cleanup)(struct tc_action *, int bind); + void (*cleanup)(struct tc_action *); int (*lookup)(struct net *, struct tc_action **, u32); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, @@ -120,12 +120,19 @@ int tc_action_net_init(struct tc_action_net *tn, void tcf_idrinfo_destroy(const struct tc_action_ops *ops, struct tcf_idrinfo *idrinfo); -static inline void tc_action_net_exit(struct tc_action_net *tn) +static inline void tc_action_net_exit(struct list_head *net_list, + unsigned int id) { + struct net *net; + rtnl_lock(); - tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + list_for_each_entry(net, net_list, exit_list) { + struct tc_action_net *tn = net_generic(net, id); + + tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + kfree(tn->idrinfo); + } rtnl_unlock(); - kfree(tn->idrinfo); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, diff --git a/include/net/addrconf.h b/include/net/addrconf.h index b623b65a79d1..c4185a7b0e90 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -180,7 +180,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout) */ int ipv6_addr_label_init(void); void ipv6_addr_label_cleanup(void); -void ipv6_addr_label_rtnl_register(void); +int ipv6_addr_label_rtnl_register(void); u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, int type, int ifindex); diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h index fe328c52c46b..801489bb14c3 100644 --- a/include/net/caif/cfpkt.h +++ b/include/net/caif/cfpkt.h @@ -32,6 +32,33 @@ void cfpkt_destroy(struct cfpkt *pkt); */ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len); +static inline u8 cfpkt_extr_head_u8(struct cfpkt *pkt) +{ + u8 tmp; + + cfpkt_extr_head(pkt, &tmp, 1); + + return tmp; +} + +static inline u16 cfpkt_extr_head_u16(struct cfpkt *pkt) +{ + __le16 tmp; + + cfpkt_extr_head(pkt, &tmp, 2); + + return le16_to_cpu(tmp); +} + +static inline u32 cfpkt_extr_head_u32(struct cfpkt *pkt) +{ + __le32 tmp; + + cfpkt_extr_head(pkt, &tmp, 4); + + return le32_to_cpu(tmp); +} + /* * Peek header from packet. * Reads data from packet without changing packet. diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fb94a8bd8ab5..81174f9b8d14 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1775,6 +1775,8 @@ enum cfg80211_signal_type { * by %parent_bssid. * @parent_bssid: the BSS according to which %parent_tsf is set. This is set to * the BSS that requested the scan in which the beacon/probe was received. + * @chains: bitmask for filled values in @chain_signal. + * @chain_signal: per-chain signal strength of last received BSS in dBm. */ struct cfg80211_inform_bss { struct ieee80211_channel *chan; @@ -1783,6 +1785,8 @@ struct cfg80211_inform_bss { u64 boottime_ns; u64 parent_tsf; u8 parent_bssid[ETH_ALEN] __aligned(2); + u8 chains; + s8 chain_signal[IEEE80211_MAX_CHAINS]; }; /** @@ -1826,6 +1830,8 @@ struct cfg80211_bss_ies { * that holds the beacon data. @beacon_ies is still valid, of course, and * points to the same data as hidden_beacon_bss->beacon_ies in that case. * @signal: signal strength value (type depends on the wiphy's signal_type) + * @chains: bitmask for filled values in @chain_signal. + * @chain_signal: per-chain signal strength of last received BSS in dBm. * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes */ struct cfg80211_bss { @@ -1844,6 +1850,8 @@ struct cfg80211_bss { u16 capability; u8 bssid[ETH_ALEN]; + u8 chains; + s8 chain_signal[IEEE80211_MAX_CHAINS]; u8 priv[0] __aligned(sizeof(void *)); }; @@ -2023,6 +2031,9 @@ struct cfg80211_disassoc_request { * @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask * will be used in ht_capa. Un-supported values will be ignored. * @ht_capa_mask: The bits of ht_capa which are to be used. + * @wep_keys: static WEP keys, if not NULL points to an array of + * CFG80211_MAX_WEP_KEYS WEP keys + * @wep_tx_key: key index (0..3) of the default TX static WEP key */ struct cfg80211_ibss_params { const u8 *ssid; @@ -2039,6 +2050,8 @@ struct cfg80211_ibss_params { int mcast_rate[NUM_NL80211_BANDS]; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; + struct key_params *wep_keys; + int wep_tx_key; }; /** @@ -5577,7 +5590,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, * cfg80211_rx_mgmt - notification of received, unprocessed management frame * @wdev: wireless device receiving the frame * @freq: Frequency on which the frame was received in MHz - * @sig_dbm: signal strength in mBm, or 0 if unknown + * @sig_dbm: signal strength in dBm, or 0 if unknown * @buf: Management frame (header + body) * @len: length of the frame data * @flags: flags, as defined in enum nl80211_rxmgmt_flags @@ -5756,7 +5769,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, * @frame: the frame * @len: length of the frame * @freq: frequency the frame was received on - * @sig_dbm: signal strength in mBm, or 0 if unknown + * @sig_dbm: signal strength in dBm, or 0 if unknown * * Use this function to report to userspace when a beacon was * received. It is not useful to call this when there is no diff --git a/include/net/devlink.h b/include/net/devlink.h index b9654e133599..6545b03e97f7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -26,10 +26,12 @@ struct devlink { struct list_head port_list; struct list_head sb_list; struct list_head dpipe_table_list; + struct list_head resource_list; struct devlink_dpipe_headers *dpipe_headers; const struct devlink_ops *ops; struct device *dev; possible_net_t _net; + struct mutex lock; char priv[0] __aligned(NETDEV_ALIGN); }; @@ -181,6 +183,9 @@ struct devlink_dpipe_table_ops; * @counters_enabled: indicates if counters are active * @counter_control_extern: indicates if counter control is in dpipe or * external tool + * @resource_valid: Indicate that the resource id is valid + * @resource_id: relative resource this table is related to + * @resource_units: number of resource's unit consumed per table's entry * @table_ops: table operations * @rcu: rcu */ @@ -190,6 +195,9 @@ struct devlink_dpipe_table { const char *name; bool counters_enabled; bool counter_control_extern; + bool resource_valid; + u64 resource_id; + u64 resource_units; struct devlink_dpipe_table_ops *table_ops; struct rcu_head rcu; }; @@ -223,7 +231,63 @@ struct devlink_dpipe_headers { unsigned int headers_count; }; +/** + * struct devlink_resource_ops - resource ops + * @occ_get: get the occupied size + * @size_validate: validate the size of the resource before update, reload + * is needed for changes to take place + */ +struct devlink_resource_ops { + u64 (*occ_get)(struct devlink *devlink); + int (*size_validate)(struct devlink *devlink, u64 size, + struct netlink_ext_ack *extack); +}; + +/** + * struct devlink_resource_size_params - resource's size parameters + * @size_min: minimum size which can be set + * @size_max: maximum size which can be set + * @size_granularity: size granularity + * @size_unit: resource's basic unit + */ +struct devlink_resource_size_params { + u64 size_min; + u64 size_max; + u64 size_granularity; + enum devlink_resource_unit unit; +}; + +/** + * struct devlink_resource - devlink resource + * @name: name of the resource + * @id: id, per devlink instance + * @size: size of the resource + * @size_new: updated size of the resource, reload is needed + * @size_valid: valid in case the total size of the resource is valid + * including its children + * @parent: parent resource + * @size_params: size parameters + * @list: parent list + * @resource_list: list of child resources + * @resource_ops: resource ops + */ +struct devlink_resource { + const char *name; + u64 id; + u64 size; + u64 size_new; + bool size_valid; + struct devlink_resource *parent; + struct devlink_resource_size_params *size_params; + struct list_head list; + struct list_head resource_list; + const struct devlink_resource_ops *resource_ops; +}; + +#define DEVLINK_RESOURCE_ID_PARENT_TOP 0 + struct devlink_ops { + int (*reload)(struct devlink *devlink); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); int (*port_split)(struct devlink *devlink, unsigned int port_index, @@ -332,6 +396,23 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; extern struct devlink_dpipe_header devlink_dpipe_header_ipv6; +int devlink_resource_register(struct devlink *devlink, + const char *resource_name, + bool top_hierarchy, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + struct devlink_resource_size_params *size_params, + const struct devlink_resource_ops *resource_ops); +void devlink_resources_unregister(struct devlink *devlink, + struct devlink_resource *resource); +int devlink_resource_size_get(struct devlink *devlink, + u64 resource_id, + u64 *p_resource_size); +int devlink_dpipe_table_resource_set(struct devlink *devlink, + const char *table_name, u64 resource_id, + u64 resource_units); + #else static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, @@ -468,6 +549,40 @@ devlink_dpipe_match_put(struct sk_buff *skb, return 0; } +static inline int +devlink_resource_register(struct devlink *devlink, + const char *resource_name, + bool top_hierarchy, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + struct devlink_resource_size_params *size_params, + const struct devlink_resource_ops *resource_ops) +{ + return 0; +} + +static inline void +devlink_resources_unregister(struct devlink *devlink, + struct devlink_resource *resource) +{ +} + +static inline int +devlink_resource_size_get(struct devlink *devlink, u64 resource_id, + u64 *p_resource_size) +{ + return -EOPNOTSUPP; +} + +static inline int +devlink_dpipe_table_resource_set(struct devlink *devlink, + const char *table_name, u64 resource_id, + u64 resource_units) +{ + return -EOPNOTSUPP; +} + #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dn_route.h b/include/net/dn_route.h index 55df9939bca2..342d2503cba5 100644 --- a/include/net/dn_route.h +++ b/include/net/dn_route.h @@ -69,6 +69,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, */ struct dn_route { struct dst_entry dst; + struct dn_route __rcu *dn_next; struct neighbour *n; diff --git a/include/net/dsa.h b/include/net/dsa.h index 2a05738570d8..6cb602dd970c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -296,31 +296,39 @@ static inline u32 dsa_user_ports(struct dsa_switch *ds) return mask; } -static inline u8 dsa_upstream_port(struct dsa_switch *ds) +/* Return the local port used to reach an arbitrary switch port */ +static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device, + int port) { - struct dsa_switch_tree *dst = ds->dst; - - /* - * If this is the root switch (i.e. the switch that connects - * to the CPU), return the cpu port number on this switch. - * Else return the (DSA) port number that connects to the - * switch that is one hop closer to the cpu. - */ - if (dst->cpu_dp->ds == ds) - return dst->cpu_dp->index; + if (device == ds->index) + return port; else - return ds->rtable[dst->cpu_dp->ds->index]; + return ds->rtable[device]; +} + +/* Return the local port used to reach the dedicated CPU port */ +static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port) +{ + const struct dsa_port *dp = dsa_to_port(ds, port); + const struct dsa_port *cpu_dp = dp->cpu_dp; + + if (!cpu_dp) + return port; + + return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index); } typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { +#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) /* * Legacy probing. */ const char *(*probe)(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **priv); +#endif enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, int port); @@ -412,12 +420,10 @@ struct dsa_switch_ops { */ int (*port_vlan_filtering)(struct dsa_switch *ds, int port, bool vlan_filtering); - int (*port_vlan_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); - void (*port_vlan_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); + int (*port_vlan_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan); + void (*port_vlan_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); /* @@ -433,12 +439,10 @@ struct dsa_switch_ops { /* * Multicast database */ - int (*port_mdb_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans); - void (*port_mdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans); + int (*port_mdb_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); + void (*port_mdb_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); int (*port_mdb_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); /* @@ -472,11 +476,20 @@ struct dsa_switch_driver { const struct dsa_switch_ops *ops; }; +#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) /* Legacy driver registration */ void register_switch_driver(struct dsa_switch_driver *type); void unregister_switch_driver(struct dsa_switch_driver *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); +#else +static inline void register_switch_driver(struct dsa_switch_driver *type) { } +static inline void unregister_switch_driver(struct dsa_switch_driver *type) { } +static inline struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) +{ + return NULL; +} +#endif struct net_device *dsa_dev_to_net_device(struct device *dev); /* Keep inline for faster access in hot path */ diff --git a/include/net/dst.h b/include/net/dst.h index d49d607dd2b3..c63d2c37f6e9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -34,13 +34,9 @@ struct sk_buff; struct dst_entry { struct net_device *dev; - struct rcu_head rcu_head; - struct dst_entry *child; struct dst_ops *ops; unsigned long _metrics; unsigned long expires; - struct dst_entry *path; - struct dst_entry *from; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; #else @@ -59,8 +55,6 @@ struct dst_entry { #define DST_XFRM_QUEUE 0x0040 #define DST_METADATA 0x0080 - short error; - /* A non-zero value of dst->obsolete forces by-hand validation * of the route entry. Positive values are set by the generic * dst layer to indicate that the entry has been forcefully @@ -76,35 +70,24 @@ struct dst_entry { #define DST_OBSOLETE_KILL -2 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ - unsigned short __pad3; -#ifdef CONFIG_IP_ROUTE_CLASSID - __u32 tclassid; -#else - __u32 __pad2; -#endif - -#ifdef CONFIG_64BIT - /* - * Align __refcnt to a 64 bytes alignment - * (L1_CACHE_SIZE would be too much) - */ - long __pad_to_align_refcnt[2]; -#endif /* * __refcnt wants to be on a different cache line from * input/output/ops or performance tanks badly */ - atomic_t __refcnt; /* client references */ +#ifdef CONFIG_64BIT + atomic_t __refcnt; /* 64-bit offset 64 */ +#endif int __use; unsigned long lastuse; struct lwtunnel_state *lwtstate; - union { - struct dst_entry *next; - struct rtable __rcu *rt_next; - struct rt6_info __rcu *rt6_next; - struct dn_route __rcu *dn_next; - }; + struct rcu_head rcu_head; + short error; + short __pad; + __u32 tclassid; +#ifndef CONFIG_64BIT + atomic_t __refcnt; /* 32-bit offset 64 */ +#endif }; struct dst_metrics { @@ -250,7 +233,7 @@ static inline void dst_hold(struct dst_entry *dst) { /* * If your kernel compilation stops here, please check - * __pad_to_align_refcnt declaration in struct dst_entry + * the placement of __refcnt in struct dst_entry */ BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); diff --git a/include/net/erspan.h b/include/net/erspan.h index ca94fc86865e..5daa4866412b 100644 --- a/include/net/erspan.h +++ b/include/net/erspan.h @@ -15,7 +15,7 @@ * s, Recur, Flags, Version fields only S (bit 03) is set to 1. The * other fields are set to zero, so only a sequence number follows. * - * ERSPAN Type II header (8 octets [42:49]) + * ERSPAN Version 1 (Type II) header (8 octets [42:49]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -24,11 +24,31 @@ * | Reserved | Index | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * + * + * ERSPAN Version 2 (Type III) header (12 octets [42:49]) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Ver | VLAN | COS |BSO|T| Session ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Timestamp | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SGT |P| FT | Hw ID |D|Gra|O| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Platform Specific SubHeader (8 octets, optional) + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Platf ID | Platform Specific Info | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Platform Specific Info | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB */ -#define ERSPAN_VERSION 0x1 +#include <uapi/linux/erspan.h> +#define ERSPAN_VERSION 0x1 /* ERSPAN type II */ #define VER_MASK 0xf000 #define VLAN_MASK 0x0fff #define COS_MASK 0xe000 @@ -37,6 +57,19 @@ #define ID_MASK 0x03ff #define INDEX_MASK 0xfffff +#define ERSPAN_VERSION2 0x2 /* ERSPAN type III*/ +#define BSO_MASK EN_MASK +#define SGT_MASK 0xffff0000 +#define P_MASK 0x8000 +#define FT_MASK 0x7c00 +#define HWID_MASK 0x03f0 +#define DIR_MASK 0x0008 +#define GRA_MASK 0x0006 +#define O_MASK 0x0001 + +#define HWID_OFFSET 4 +#define DIR_OFFSET 3 + enum erspan_encap_type { ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */ ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */ @@ -44,18 +77,199 @@ enum erspan_encap_type { ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */ }; -struct erspan_metadata { - __be32 index; /* type II */ -}; +#define ERSPAN_V1_MDSIZE 4 +#define ERSPAN_V2_MDSIZE 8 -struct erspanhdr { - __be16 ver_vlan; -#define VER_OFFSET 12 - __be16 session_id; -#define COS_OFFSET 13 -#define EN_OFFSET 11 -#define T_OFFSET 10 - struct erspan_metadata md; +struct erspan_base_hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 vlan_upper:4, + ver:4; + __u8 vlan:8; + __u8 session_id_upper:2, + t:1, + en:2, + cos:3; + __u8 session_id:8; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 ver: 4, + vlan_upper:4; + __u8 vlan:8; + __u8 cos:3, + en:2, + t:1, + session_id_upper:2; + __u8 session_id:8; +#else +#error "Please fix <asm/byteorder.h>" +#endif }; +static inline void set_session_id(struct erspan_base_hdr *ershdr, u16 id) +{ + ershdr->session_id = id & 0xff; + ershdr->session_id_upper = (id >> 8) & 0x3; +} + +static inline u16 get_session_id(const struct erspan_base_hdr *ershdr) +{ + return (ershdr->session_id_upper << 8) + ershdr->session_id; +} + +static inline void set_vlan(struct erspan_base_hdr *ershdr, u16 vlan) +{ + ershdr->vlan = vlan & 0xff; + ershdr->vlan_upper = (vlan >> 8) & 0xf; +} + +static inline u16 get_vlan(const struct erspan_base_hdr *ershdr) +{ + return (ershdr->vlan_upper << 8) + ershdr->vlan; +} + +static inline void set_hwid(struct erspan_md2 *md2, u8 hwid) +{ + md2->hwid = hwid & 0xf; + md2->hwid_upper = (hwid >> 4) & 0x3; +} + +static inline u8 get_hwid(const struct erspan_md2 *md2) +{ + return (md2->hwid_upper << 4) + md2->hwid; +} + +static inline int erspan_hdr_len(int version) +{ + return sizeof(struct erspan_base_hdr) + + (version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE); +} + +static inline u8 tos_to_cos(u8 tos) +{ + u8 dscp, cos; + + dscp = tos >> 2; + cos = dscp >> 3; + return cos; +} + +static inline void erspan_build_header(struct sk_buff *skb, + u32 id, u32 index, + bool truncate, bool is_ipv4) +{ + struct ethhdr *eth = (struct ethhdr *)skb->data; + enum erspan_encap_type enc_type; + struct erspan_base_hdr *ershdr; + struct erspan_metadata *ersmd; + struct qtag_prefix { + __be16 eth_type; + __be16 tci; + } *qp; + u16 vlan_tci = 0; + u8 tos; + + tos = is_ipv4 ? ip_hdr(skb)->tos : + (ipv6_hdr(skb)->priority << 4) + + (ipv6_hdr(skb)->flow_lbl[0] >> 4); + + enc_type = ERSPAN_ENCAP_NOVLAN; + + /* If mirrored packet has vlan tag, extract tci and + * perserve vlan header in the mirrored frame. + */ + if (eth->h_proto == htons(ETH_P_8021Q)) { + qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); + vlan_tci = ntohs(qp->tci); + enc_type = ERSPAN_ENCAP_INFRAME; + } + + skb_push(skb, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); + ershdr = (struct erspan_base_hdr *)skb->data; + memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); + + /* Build base header */ + ershdr->ver = ERSPAN_VERSION; + ershdr->cos = tos_to_cos(tos); + ershdr->en = enc_type; + ershdr->t = truncate; + set_vlan(ershdr, vlan_tci); + set_session_id(ershdr, id); + + /* Build metadata */ + ersmd = (struct erspan_metadata *)(ershdr + 1); + ersmd->u.index = htonl(index & INDEX_MASK); +} + +/* ERSPAN GRA: timestamp granularity + * 00b --> granularity = 100 microseconds + * 01b --> granularity = 100 nanoseconds + * 10b --> granularity = IEEE 1588 + * Here we only support 100 microseconds. + */ +static inline __be32 erspan_get_timestamp(void) +{ + u64 h_usecs; + ktime_t kt; + + kt = ktime_get_real(); + h_usecs = ktime_divns(kt, 100 * NSEC_PER_USEC); + + /* ERSPAN base header only has 32-bit, + * so it wraps around 4 days. + */ + return htonl((u32)h_usecs); +} + +static inline void erspan_build_header_v2(struct sk_buff *skb, + u32 id, u8 direction, u16 hwid, + bool truncate, bool is_ipv4) +{ + struct ethhdr *eth = (struct ethhdr *)skb->data; + struct erspan_base_hdr *ershdr; + struct erspan_metadata *md; + struct qtag_prefix { + __be16 eth_type; + __be16 tci; + } *qp; + u16 vlan_tci = 0; + u8 gra = 0; /* 100 usec */ + u8 bso = 0; /* Bad/Short/Oversized */ + u8 sgt = 0; + u8 tos; + + tos = is_ipv4 ? ip_hdr(skb)->tos : + (ipv6_hdr(skb)->priority << 4) + + (ipv6_hdr(skb)->flow_lbl[0] >> 4); + + /* Unlike v1, v2 does not have En field, + * so only extract vlan tci field. + */ + if (eth->h_proto == htons(ETH_P_8021Q)) { + qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); + vlan_tci = ntohs(qp->tci); + } + + skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); + ershdr = (struct erspan_base_hdr *)skb->data; + memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); + + /* Build base header */ + ershdr->ver = ERSPAN_VERSION2; + ershdr->cos = tos_to_cos(tos); + ershdr->en = bso; + ershdr->t = truncate; + set_vlan(ershdr, vlan_tci); + set_session_id(ershdr, id); + + /* Build metadata */ + md = (struct erspan_metadata *)(ershdr + 1); + md->u.md2.timestamp = erspan_get_timestamp(); + md->u.md2.sgt = htons(sgt); + md->u.md2.p = 1; + md->u.md2.ft = 0; + md->u.md2.dir = direction; + md->u.md2.gra = gra; + md->u.md2.o = 0; + set_hwid(&md->u.md2, hwid); +} + #endif diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 304f7aa9cc01..0304ba2ae353 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -49,6 +49,9 @@ int gnet_stats_copy_rate_est(struct gnet_dump *d, int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue __percpu *cpu_q, struct gnet_stats_queue *q, __u32 qlen); +void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *cpu_q, + const struct gnet_stats_queue *q, __u32 qlen); int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); int gnet_stats_finish_copy(struct gnet_dump *d); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ec72cdb5bc39..6692d67e9245 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops { * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data + * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -101,6 +102,7 @@ struct inet_connection_sock { const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; void *icsk_ulp_data; + struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, icsk_ca_setsockopt:1, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 2dbbbff5e1e3..9141e95529e7 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -111,6 +111,7 @@ struct inet_bind_hashbucket { */ struct inet_listen_hashbucket { spinlock_t lock; + unsigned int count; struct hlist_head head; }; @@ -132,12 +133,13 @@ struct inet_hashinfo { /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ + struct kmem_cache *bind_bucket_cachep; struct inet_bind_hashbucket *bhash; - unsigned int bhash_size; - /* 4 bytes hole on 64 bit */ - struct kmem_cache *bind_bucket_cachep; + /* The 2nd listener table hashed by local port and address */ + unsigned int lhash2_mask; + struct inet_listen_hashbucket *lhash2; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. @@ -145,14 +147,25 @@ struct inet_hashinfo { * Now align to a new cache line as all the following members * might be often dirty. */ - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. + /* All sockets in TCP_LISTEN state will be in listening_hash. + * This is the only table where wildcard'd TCP sockets can + * exist. listening_hash is only hashed by local port number. + * If lhash2 is initialized, the same socket will also be hashed + * to lhash2 by port and address. */ struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] ____cacheline_aligned_in_smp; }; +#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ + hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) + +static inline struct inet_listen_hashbucket * +inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash) +{ + return &h->lhash2[hash & h->lhash2_mask]; +} + static inline struct inet_ehash_bucket *inet_ehash_bucket( struct inet_hashinfo *hashinfo, unsigned int hash) @@ -208,6 +221,10 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child); void inet_put_port(struct sock *sk); void inet_hashinfo_init(struct inet_hashinfo *h); +void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, + unsigned long numentries, int scale, + unsigned long low_limit, + unsigned long high_limit); bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 39efb968b7a4..0a671c32d6b9 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -291,6 +291,31 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, int inet_sk_rebuild_header(struct sock *sk); +/** + * inet_sk_state_load - read sk->sk_state for lockless contexts + * @sk: socket pointer + * + * Paired with inet_sk_state_store(). Used in places we don't hold socket lock: + * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... + */ +static inline int inet_sk_state_load(const struct sock *sk) +{ + /* state change might impact lockless readers. */ + return smp_load_acquire(&sk->sk_state); +} + +/** + * inet_sk_state_store - update sk->sk_state + * @sk: socket pointer + * @newstate: new state + * + * Paired with inet_sk_state_load(). Should be used in contexts where + * state change might impact lockless readers. + */ +void inet_sk_state_store(struct sock *sk, int newstate); + +void inet_sk_set_state(struct sock *sk, int state); + static inline unsigned int __inet_ehashfn(const __be32 laddr, const __u16 lport, const __be32 faddr, diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 1356fa6a7566..899495589a7e 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -93,8 +93,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, struct inet_timewait_death_row *dr, const int state); -void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, - struct inet_hashinfo *hashinfo); +void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, + struct inet_hashinfo *hashinfo); void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); diff --git a/include/net/ip.h b/include/net/ip.h index af8addbaa3c1..746abff9ce51 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -26,12 +26,14 @@ #include <linux/ip.h> #include <linux/in.h> #include <linux/skbuff.h> +#include <linux/jhash.h> #include <net/inet_sock.h> #include <net/route.h> #include <net/snmp.h> #include <net/flow.h> #include <net/flow_dissector.h> +#include <net/netns/hash.h> #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MIN_MTU 68 /* RFC 791 */ @@ -522,6 +524,13 @@ static inline unsigned int ipv4_addr_hash(__be32 ip) return (__force unsigned int) ip; } +static inline u32 ipv4_portaddr_hash(const struct net *net, + __be32 saddr, + unsigned int port) +{ + return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; +} + bool ip_call_ra_chain(struct sk_buff *skb); /* diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 10c913816032..34ec321d6a03 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -129,6 +129,8 @@ struct rt6_exception { struct rt6_info { struct dst_entry dst; + struct rt6_info __rcu *rt6_next; + struct rt6_info *from; /* * Tail elements of dst_entry (__refcnt etc.) @@ -147,6 +149,7 @@ struct rt6_info { */ struct list_head rt6i_siblings; unsigned int rt6i_nsiblings; + atomic_t rt6i_nh_upper_bound; atomic_t rt6i_ref; @@ -168,19 +171,21 @@ struct rt6_info { u32 rt6i_metric; u32 rt6i_pmtu; /* more non-fragment space at head required */ + int rt6i_nh_weight; unsigned short rt6i_nfheader_len; u8 rt6i_protocol; u8 exception_bucket_flushed:1, - unused:7; + should_flush:1, + unused:6; }; #define for_each_fib6_node_rt_rcu(fn) \ for (rt = rcu_dereference((fn)->leaf); rt; \ - rt = rcu_dereference(rt->dst.rt6_next)) + rt = rcu_dereference(rt->rt6_next)) #define for_each_fib6_walker_rt(w) \ for (rt = (w)->leaf; rt; \ - rt = rcu_dereference_protected(rt->dst.rt6_next, 1)) + rt = rcu_dereference_protected(rt->rt6_next, 1)) static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { @@ -203,11 +208,9 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) { struct rt6_info *rt; - for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); - rt = (struct rt6_info *)rt->dst.from); + for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from); if (rt && rt != rt0) rt0->dst.expires = rt->dst.expires; - dst_set_expires(&rt0->dst, timeout); rt0->rt6i_flags |= RTF_EXPIRES; } @@ -242,8 +245,8 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt) u32 cookie = 0; if (rt->rt6i_flags & RTF_PCPU || - (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) - rt = (struct rt6_info *)(rt->dst.from); + (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) + rt = rt->from; rt6_get_cookie_safe(rt, &cookie); @@ -404,6 +407,7 @@ unsigned int fib6_tables_seq_read(struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb); void fib6_update_sernum(struct rt6_info *rt); +void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt); #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 18e442ea93d8..27d23a65f3cd 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } +static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) +{ + return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + RTF_GATEWAY; +} + void ip6_route_input(struct sk_buff *skb); struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, @@ -165,10 +171,13 @@ struct rt6_rtnl_dump_arg { }; int rt6_dump_route(struct rt6_info *rt, void *p_arg); -void rt6_ifdown(struct net *net, struct net_device *dev); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); +void rt6_sync_up(struct net_device *dev, unsigned int nh_flags); +void rt6_disable_ip(struct net_device *dev, unsigned long event); +void rt6_sync_down_dev(struct net_device *dev, unsigned long event); +void rt6_multipath_rebalance(struct rt6_info *rt); static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) { diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index d66f70f63734..236e40ba06bf 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -36,6 +36,10 @@ struct __ip6_tnl_parm { __be32 o_key; __u32 fwmark; + __u32 index; /* ERSPAN type II index */ + __u8 erspan_ver; /* ERSPAN version */ + __u8 dir; /* direction */ + __u16 hwid; /* hwid */ }; /* IPv6 tunnel */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 24628f6b09bf..1f16773cfd76 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -116,8 +116,11 @@ struct ip_tunnel { u32 o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ - /* This field used only by ERSPAN */ + /* These four fields used only by ERSPAN */ u32 index; /* ERSPAN type II index */ + u8 erspan_ver; /* ERSPAN version */ + u8 dir; /* ERSPAN direction */ + u16 hwid; /* ERSPAN hardware ID */ struct dst_cache dst_cache; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ff68cf288f9b..eb0bec043c96 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -69,8 +69,7 @@ struct ip_vs_iphdr { }; static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, - int len, void *buffer, - const struct ip_vs_iphdr *ipvsh) + int len, void *buffer) { return skb_header_pointer(skb, offset, len, buffer); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 221238254eb7..8606c9113d3f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -22,6 +22,7 @@ #include <net/flow.h> #include <net/flow_dissector.h> #include <net/snmp.h> +#include <net/netns/hash.h> #define SIN6_LEN_RFC2133 24 @@ -674,6 +675,22 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) cpu_to_be32(0x0000ffff))) == 0UL; } +static inline u32 ipv6_portaddr_hash(const struct net *net, + const struct in6_addr *addr6, + unsigned int port) +{ + unsigned int hash, mix = net_hash_mix(net); + + if (ipv6_addr_any(addr6)) + hash = jhash_1word(0, mix); + else if (ipv6_addr_v4mapped(addr6)) + hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); + else + hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); + + return hash ^ port; +} + /* * Check for a RFC 4843 ORCHID address * (Overlay Routable Cryptographic Hash Identifiers) @@ -953,6 +970,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk) &inet6_sk(sk)->cork); } +unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst); + int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index eec143cca1c0..906e90223066 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1552,6 +1552,9 @@ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); * @IEEE80211_KEY_FLAG_RESERVE_TAILROOM: This flag should be set by the * driver for a key to indicate that sufficient tailroom must always * be reserved for ICV or MIC, even when HW encryption is enabled. + * @IEEE80211_KEY_FLAG_PUT_MIC_SPACE: This flag should be set by the driver for + * a TKIP key if it only requires MIC space. Do not set together with + * @IEEE80211_KEY_FLAG_GENERATE_MMIC on the same key. */ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0), @@ -1562,6 +1565,7 @@ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_PUT_IV_SPACE = BIT(5), IEEE80211_KEY_FLAG_RX_MGMT = BIT(6), IEEE80211_KEY_FLAG_RESERVE_TAILROOM = BIT(7), + IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8), }; /** @@ -1593,8 +1597,8 @@ struct ieee80211_key_conf { u8 icv_len; u8 iv_len; u8 hw_key_idx; - u8 flags; s8 keyidx; + u16 flags; u8 keylen; u8 key[0]; }; @@ -2056,6 +2060,9 @@ struct ieee80211_txq { * The stack will not do fragmentation. * The callback for @set_frag_threshold should be set as well. * + * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on + * TDLS links. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2098,6 +2105,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TX_FRAG_LIST, IEEE80211_HW_REPORTS_LOW_ACK, IEEE80211_HW_SUPPORTS_TX_FRAG, + IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 049008493faf..f306b2aa15a4 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -51,7 +51,7 @@ struct net { refcount_t passive; /* To decided when the network * namespace should be freed. */ - atomic_t count; /* To decided when the network + refcount_t count; /* To decided when the network * namespace should be shut down. */ spinlock_t rules_mod_lock; @@ -195,7 +195,7 @@ void __put_net(struct net *net); static inline struct net *get_net(struct net *net) { - atomic_inc(&net->count); + refcount_inc(&net->count); return net; } @@ -206,14 +206,14 @@ static inline struct net *maybe_get_net(struct net *net) * exists. If the reference count is zero this * function fails and returns NULL. */ - if (!atomic_inc_not_zero(&net->count)) + if (!refcount_inc_not_zero(&net->count)) net = NULL; return net; } static inline void put_net(struct net *net) { - if (atomic_dec_and_test(&net->count)) + if (refcount_dec_and_test(&net->count)) __put_net(net); } @@ -225,7 +225,7 @@ int net_eq(const struct net *net1, const struct net *net2) static inline int check_net(const struct net *net) { - return atomic_read(&net->count) != 0; + return refcount_read(&net->count) != 0; } void net_drop_ns(void *); diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 4ed1040bbe4a..73f825732326 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -13,17 +13,17 @@ const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_DCCP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; #endif #ifdef CONFIG_NF_CT_PROTO_SCTP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; #endif #ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; #endif int nf_conntrack_ipv4_compat_init(void); diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index 9cd55be95853..effa8dfba68c 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -4,17 +4,17 @@ extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; #ifdef CONFIG_NF_CT_PROTO_DCCP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; #endif #ifdef CONFIG_NF_CT_PROTO_SCTP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; #endif #ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; #endif #include <linux/sysctl.h> diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h new file mode 100644 index 000000000000..adf8db44cf86 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_count.h @@ -0,0 +1,17 @@ +#ifndef _NF_CONNTRACK_COUNT_H +#define _NF_CONNTRACK_COUNT_H + +struct nf_conncount_data; + +struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, + unsigned int keylen); +void nf_conncount_destroy(struct net *net, unsigned int family, + struct nf_conncount_data *data); + +unsigned int nf_conncount_count(struct net *net, + struct nf_conncount_data *data, + const u32 *key, + unsigned int family, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); +#endif diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7ef56c13698a..a7220eef9aee 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -27,6 +27,9 @@ struct nf_conntrack_l4proto { /* Resolve clashes on insertion races. */ bool allow_clash; + /* protoinfo nlattr size, closes a hole */ + u16 nlattr_size; + /* Try to fill in the third arg: dataoff is offset past network protocol hdr. Return true if possible. */ bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, @@ -66,8 +69,6 @@ struct nf_conntrack_l4proto { /* convert protoinfo to nfnetink attributes */ int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, struct nf_conn *ct); - /* Calculate protoinfo nlattr size */ - int (*nlattr_size)(void); /* convert nfnetlink attributes to protoinfo */ int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct); @@ -80,8 +81,6 @@ struct nf_conntrack_l4proto { struct nf_conntrack_tuple *t); const struct nla_policy *nla_policy; - size_t nla_size; - #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) struct { int (*nlattr_to_obj)(struct nlattr *tb[], @@ -109,7 +108,7 @@ struct nf_conntrack_l4proto { }; /* Existing built-in generic protocol */ -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO 256 @@ -126,18 +125,18 @@ int nf_ct_l4proto_pernet_register_one(struct net *net, void nf_ct_l4proto_pernet_unregister_one(struct net *net, const struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *const proto[], + const struct nf_conntrack_l4proto *const proto[], unsigned int num_proto); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *const proto[], + const struct nf_conntrack_l4proto *const proto[], unsigned int num_proto); /* Protocol global registration. */ -int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto); +int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[], +int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[], unsigned int num_proto); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[], +void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[], unsigned int num_proto); /* Generic netlink helpers */ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h new file mode 100644 index 000000000000..b22b22082733 --- /dev/null +++ b/include/net/netfilter/nf_flow_table.h @@ -0,0 +1,122 @@ +#ifndef _NF_FLOW_TABLE_H +#define _NF_FLOW_TABLE_H + +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/rhashtable.h> +#include <linux/rcupdate.h> +#include <net/dst.h> + +struct nf_flowtable; + +struct nf_flowtable_type { + struct list_head list; + int family; + void (*gc)(struct work_struct *work); + const struct rhashtable_params *params; + nf_hookfn *hook; + struct module *owner; +}; + +struct nf_flowtable { + struct rhashtable rhashtable; + const struct nf_flowtable_type *type; + struct delayed_work gc_work; +}; + +enum flow_offload_tuple_dir { + FLOW_OFFLOAD_DIR_ORIGINAL, + FLOW_OFFLOAD_DIR_REPLY, + __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY, +}; +#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1) + +struct flow_offload_tuple { + union { + struct in_addr src_v4; + struct in6_addr src_v6; + }; + union { + struct in_addr dst_v4; + struct in6_addr dst_v6; + }; + struct { + __be16 src_port; + __be16 dst_port; + }; + + int iifidx; + + u8 l3proto; + u8 l4proto; + u8 dir; + + int oifidx; + + struct dst_entry *dst_cache; +}; + +struct flow_offload_tuple_rhash { + struct rhash_head node; + struct flow_offload_tuple tuple; +}; + +#define FLOW_OFFLOAD_SNAT 0x1 +#define FLOW_OFFLOAD_DNAT 0x2 +#define FLOW_OFFLOAD_DYING 0x4 + +struct flow_offload { + struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; + u32 flags; + union { + /* Your private driver data here. */ + u32 timeout; + }; +}; + +#define NF_FLOW_TIMEOUT (30 * HZ) + +struct nf_flow_route { + struct { + struct dst_entry *dst; + int ifindex; + } tuple[FLOW_OFFLOAD_DIR_MAX]; +}; + +struct flow_offload *flow_offload_alloc(struct nf_conn *ct, + struct nf_flow_route *route); +void flow_offload_free(struct flow_offload *flow); + +int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); +void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow); +struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, + struct flow_offload_tuple *tuple); +int nf_flow_table_iterate(struct nf_flowtable *flow_table, + void (*iter)(struct flow_offload *flow, void *data), + void *data); +void nf_flow_offload_work_gc(struct work_struct *work); +extern const struct rhashtable_params nf_flow_offload_rhash_params; + +void flow_offload_dead(struct flow_offload *flow); + +int nf_flow_snat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); +int nf_flow_dnat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); + +struct flow_ports { + __be16 source, dest; +}; + +unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); +unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); + +#define MODULE_ALIAS_NF_FLOWTABLE(family) \ + MODULE_ALIAS("nf-flowtable-" __stringify(family)) + +#endif /* _FLOW_OFFLOAD_H */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 814058d0f167..a50a69f5334c 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -25,7 +25,7 @@ struct nf_queue_entry { struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); - unsigned int (*nf_hook_drop)(struct net *net); + void (*nf_hook_drop)(struct net *net); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index fecc6112c768..663b015dace5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -9,6 +9,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/nf_tables.h> #include <linux/u64_stats_sync.h> +#include <net/netfilter/nf_flow_table.h> #include <net/netlink.h> #define NFT_JUMP_STACK_SIZE 16 @@ -54,8 +55,8 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, pkt->xt.state = state; } -static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, - struct sk_buff *skb) +static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, + struct sk_buff *skb) { pkt->tprot_set = false; pkt->tprot = 0; @@ -63,14 +64,6 @@ static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, pkt->xt.fragoff = 0; } -static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - nft_set_pktinfo(pkt, skb, state); - nft_set_pktinfo_proto_unspec(pkt, skb); -} - /** * struct nft_verdict - nf_tables verdict * @@ -150,22 +143,22 @@ static inline void nft_data_debug(const struct nft_data *data) * struct nft_ctx - nf_tables rule/set context * * @net: net namespace - * @afi: address family info * @table: the table the chain is contained in * @chain: the chain the rule is contained in * @nla: netlink attributes * @portid: netlink portID of the original message * @seq: netlink sequence number + * @family: protocol family * @report: notify via unicast netlink message */ struct nft_ctx { struct net *net; - struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; const struct nlattr * const *nla; u32 portid; u32 seq; + u8 family; bool report; }; @@ -381,6 +374,7 @@ void nft_unregister_set(struct nft_set_type *type); * @list: table set list node * @bindings: list of set bindings * @name: name of the set + * @handle: unique handle of the set * @ktype: key type (numeric type defined by userspace, not used in the kernel) * @dtype: data type (verdict or numeric type defined by userspace) * @objtype: object type (see NFT_OBJECT_* definitions) @@ -403,6 +397,7 @@ struct nft_set { struct list_head list; struct list_head bindings; char *name; + u64 handle; u32 ktype; u32 dtype; u32 objtype; @@ -424,6 +419,11 @@ struct nft_set { __attribute__((aligned(__alignof__(u64)))); }; +static inline bool nft_set_is_anonymous(const struct nft_set *set) +{ + return set->flags & NFT_SET_ANONYMOUS; +} + static inline void *nft_set_priv(const struct nft_set *set) { return (void *)set->data; @@ -883,7 +883,7 @@ enum nft_chain_type { * @family: address family * @owner: module owner * @hook_mask: mask of valid hooks - * @hooks: hookfn overrides + * @hooks: array of hook functions */ struct nf_chain_type { const char *name; @@ -905,8 +905,6 @@ struct nft_stats { struct u64_stats_sync syncp; }; -#define NFT_HOOK_OPS_MAX 2 - /** * struct nft_base_chain - nf_tables base chain * @@ -918,7 +916,7 @@ struct nft_stats { * @dev_name: device name that this base chain is attached to (if any) */ struct nft_base_chain { - struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; + struct nf_hook_ops ops; const struct nf_chain_type *type; u8 policy; u8 flags; @@ -948,10 +946,13 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @chains: chains in the table * @sets: sets in the table * @objects: stateful objects in the table + * @flowtables: flow tables in the table * @hgenerator: handle generator state + * @handle: table handle * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) * @genmask: generation mask + * @afinfo: address family info * @name: name of the table */ struct nft_table { @@ -959,46 +960,16 @@ struct nft_table { struct list_head chains; struct list_head sets; struct list_head objects; + struct list_head flowtables; u64 hgenerator; + u64 handle; u32 use; - u16 flags:14, + u16 family:6, + flags:8, genmask:2; char *name; }; -enum nft_af_flags { - NFT_AF_NEEDS_DEV = (1 << 0), -}; - -/** - * struct nft_af_info - nf_tables address family info - * - * @list: used internally - * @family: address family - * @nhooks: number of hooks in this family - * @owner: module owner - * @tables: used internally - * @flags: family flags - * @nops: number of hook ops in this family - * @hook_ops_init: initialization function for chain hook ops - * @hooks: hookfn overrides for packet validation - */ -struct nft_af_info { - struct list_head list; - int family; - unsigned int nhooks; - struct module *owner; - struct list_head tables; - u32 flags; - unsigned int nops; - void (*hook_ops_init)(struct nf_hook_ops *, - unsigned int); - nf_hookfn *hooks[NF_MAX_HOOKS]; -}; - -int nft_register_afinfo(struct net *, struct nft_af_info *); -void nft_unregister_afinfo(struct net *, struct nft_af_info *); - int nft_register_chain_type(const struct nf_chain_type *); void nft_unregister_chain_type(const struct nf_chain_type *); @@ -1016,9 +987,9 @@ int nft_verdict_dump(struct sk_buff *skb, int type, * @name: name of this stateful object * @genmask: generation mask * @use: number of references to this stateful object - * @data: object data, layout depends on type + * @handle: unique object handle * @ops: object operations - * @data: pointer to object data + * @data: object data, layout depends on type */ struct nft_object { struct list_head list; @@ -1026,6 +997,7 @@ struct nft_object { struct nft_table *table; u32 genmask:2, use:30; + u64 handle; /* runtime data below here */ const struct nft_object_ops *ops ____cacheline_aligned; unsigned char data[] @@ -1097,6 +1069,46 @@ int nft_register_obj(struct nft_object_type *obj_type); void nft_unregister_obj(struct nft_object_type *obj_type); /** + * struct nft_flowtable - nf_tables flow table + * + * @list: flow table list node in table list + * @table: the table the flow table is contained in + * @name: name of this flow table + * @hooknum: hook number + * @priority: hook priority + * @ops_len: number of hooks in array + * @genmask: generation mask + * @use: number of references to this flow table + * @handle: unique object handle + * @data: rhashtable and garbage collector + * @ops: array of hooks + */ +struct nft_flowtable { + struct list_head list; + struct nft_table *table; + char *name; + int hooknum; + int priority; + int ops_len; + u32 genmask:2, + use:30; + u64 handle; + /* runtime data below here */ + struct nf_hook_ops *ops ____cacheline_aligned; + struct nf_flowtable data; +}; + +struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, + const struct nlattr *nla, + u8 genmask); +void nft_flow_table_iterate(struct net *net, + void (*iter)(struct nf_flowtable *flowtable, void *data), + void *data); + +void nft_register_flowtable_type(struct nf_flowtable_type *type); +void nft_unregister_flowtable_type(struct nf_flowtable_type *type); + +/** * struct nft_traceinfo - nft tracing information and state * * @pkt: pktinfo currently processed @@ -1125,12 +1137,6 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, void nft_trace_notify(struct nft_traceinfo *info); -#define nft_dereference(p) \ - nfnl_dereference(p, NFNL_SUBSYS_NFTABLES) - -#define MODULE_ALIAS_NFT_FAMILY(family) \ - MODULE_ALIAS("nft-afinfo-" __stringify(family)) - #define MODULE_ALIAS_NFT_CHAIN(family, name) \ MODULE_ALIAS("nft-chain-" __stringify(family) "-" name) @@ -1332,4 +1338,11 @@ struct nft_trans_obj { #define nft_trans_obj(trans) \ (((struct nft_trans_obj *)trans->data)->obj) +struct nft_trans_flowtable { + struct nft_flowtable *flowtable; +}; + +#define nft_trans_flowtable(trans) \ + (((struct nft_trans_flowtable *)trans->data)->flowtable) + #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index f0896ba456c4..ed7b511f0a59 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -5,15 +5,11 @@ #include <net/netfilter/nf_tables.h> #include <net/ip.h> -static inline void -nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, + struct sk_buff *skb) { struct iphdr *ip; - nft_set_pktinfo(pkt, skb, state); - ip = ip_hdr(pkt->skb); pkt->tprot_set = true; pkt->tprot = ip->protocol; @@ -21,10 +17,8 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; } -static inline int -__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { struct iphdr *iph, _iph; u32 len, thoff; @@ -52,16 +46,11 @@ __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, return 0; } -static inline void -nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { - nft_set_pktinfo(pkt, skb, state); - if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0) - nft_set_pktinfo_proto_unspec(pkt, skb); + if (__nft_set_pktinfo_ipv4_validate(pkt, skb) < 0) + nft_set_pktinfo_unspec(pkt, skb); } -extern struct nft_af_info nft_af_ipv4; - #endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index b8065b72f56e..dabe6fdb553a 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -5,20 +5,16 @@ #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/ipv6.h> -static inline void -nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, + struct sk_buff *skb) { unsigned int flags = IP6_FH_F_AUTH; int protohdr, thoff = 0; unsigned short frag_off; - nft_set_pktinfo(pkt, skb, state); - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0) { - nft_set_pktinfo_proto_unspec(pkt, skb); + nft_set_pktinfo_unspec(pkt, skb); return; } @@ -28,10 +24,8 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, pkt->xt.fragoff = frag_off; } -static inline int -__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) unsigned int flags = IP6_FH_F_AUTH; @@ -68,16 +62,11 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, #endif } -static inline void -nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { - nft_set_pktinfo(pkt, skb, state); - if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0) - nft_set_pktinfo_proto_unspec(pkt, skb); + if (__nft_set_pktinfo_ipv6_validate(pkt, skb) < 0) + nft_set_pktinfo_unspec(pkt, skb); } -extern struct nft_af_info nft_af_ipv6; - #endif diff --git a/include/net/netns/can.h b/include/net/netns/can.h index ecf238b8862c..ca9bd9fba5b5 100644 --- a/include/net/netns/can.h +++ b/include/net/netns/can.h @@ -8,7 +8,7 @@ #include <linux/spinlock.h> -struct dev_rcv_lists; +struct can_dev_rcv_lists; struct s_stats; struct s_pstats; @@ -28,7 +28,7 @@ struct netns_can { #endif /* receive filters subscribed for 'all' CAN devices */ - struct dev_rcv_lists *can_rx_alldev_list; + struct can_dev_rcv_lists *can_rx_alldev_list; spinlock_t can_rcvlists_lock; struct timer_list can_stattimer;/* timer for statistics update */ struct s_stats *can_stats; /* packet statistics */ diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 0ad4d0c71228..36c2d998a43c 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -11,7 +11,10 @@ struct netns_core { int sysctl_somaxconn; - struct prot_inuse __percpu *inuse; +#ifdef CONFIG_PROC_FS + int __percpu *sock_inuse; + struct prot_inuse __percpu *prot_inuse; +#endif }; #endif diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index cc00af2ac2d7..ca043342c0eb 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -17,7 +17,17 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS]; + struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS]; +#ifdef CONFIG_NETFILTER_FAMILY_ARP + struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS]; +#endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS]; +#endif +#if IS_ENABLED(CONFIG_DECNET) + struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS]; +#endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) bool defrag_ipv4; #endif diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index 4109b5f3010f..48134353411d 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -7,14 +7,8 @@ struct nft_af_info; struct netns_nftables { - struct list_head af_info; + struct list_head tables; struct list_head commit_list; - struct nft_af_info *ipv4; - struct nft_af_info *ipv6; - struct nft_af_info *inet; - struct nft_af_info *arp; - struct nft_af_info *bridge; - struct nft_af_info *netdev; unsigned int base_seq; u8 gencursor; }; diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index ebc813277662..0db7fb3e4e15 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -122,9 +122,12 @@ struct netns_sctp { /* Flag to indicate if PR-CONFIG is enabled. */ int reconf_enable; - /* Flag to idicate if SCTP-AUTH is enabled */ + /* Flag to indicate if SCTP-AUTH is enabled */ int auth_enable; + /* Flag to indicate if stream interleave is enabled */ + int intl_enable; + /* * Policy to control SCTP IPv4 address scoping * 0 - Disable IPv4 address scoping diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 753ac9361154..87406252f0a3 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -29,6 +29,7 @@ struct tcf_block_ext_info { enum tcf_block_binder_type binder_type; tcf_chain_head_change_t *chain_head_change; void *chain_head_change_priv; + u32 block_index; }; struct tcf_block_cb; @@ -38,16 +39,25 @@ bool tcf_queue_work(struct work_struct *work); struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); void tcf_chain_put(struct tcf_chain *chain); +void tcf_block_netif_keep_dst(struct tcf_block *block); int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q); + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct netlink_ext_ack *extack); int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, - struct tcf_block_ext_info *ei); + struct tcf_block_ext_info *ei, + struct netlink_ext_ack *extack); void tcf_block_put(struct tcf_block *block); void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei); +static inline bool tcf_block_shared(struct tcf_block *block) +{ + return block->index; +} + static inline struct Qdisc *tcf_block_q(struct tcf_block *block) { + WARN_ON(tcf_block_shared(block)); return block->q; } @@ -77,14 +87,16 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, #else static inline int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct netlink_ext_ack *extack) { return 0; } static inline int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, - struct tcf_block_ext_info *ei) + struct tcf_block_ext_info *ei, + struct netlink_ext_ack *extack) { return 0; } @@ -364,7 +376,8 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, - struct tcf_exts *exts, bool ovr); + struct tcf_exts *exts, bool ovr, + struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); @@ -544,13 +557,16 @@ static inline int tcf_valid_offset(const struct sk_buff *skb, #include <net/net_namespace.h> static inline int -tcf_change_indev(struct net *net, struct nlattr *indev_tlv) +tcf_change_indev(struct net *net, struct nlattr *indev_tlv, + struct netlink_ext_ack *extack) { char indev[IFNAMSIZ]; struct net_device *dev; - if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) + if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) { + NL_SET_ERR_MSG(extack, "Interface name too long"); return -EINVAL; + } dev = __dev_get_by_name(net, indev); if (!dev) return -ENODEV; @@ -586,17 +602,9 @@ struct tc_cls_common_offload { u32 chain_index; __be16 protocol; u32 prio; + struct netlink_ext_ack *extack; }; -static inline void -tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, - const struct tcf_proto *tp) -{ - cls_common->chain_index = tp->chain->index; - cls_common->protocol = tp->protocol; - cls_common->prio = tp->prio; -} - struct tc_cls_u32_knode { struct tcf_exts *exts; struct tc_u32_sel *sel; @@ -637,6 +645,31 @@ static inline bool tc_can_offload(const struct net_device *dev) return dev->features & NETIF_F_HW_TC; } +static inline bool tc_can_offload_extack(const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + bool can = tc_can_offload(dev); + + if (!can) + NL_SET_ERR_MSG(extack, "TC offload is disabled on net device"); + + return can; +} + +static inline bool +tc_cls_can_offload_and_chain0(const struct net_device *dev, + struct tc_cls_common_offload *common) +{ + if (!tc_can_offload_extack(dev, common->extack)) + return false; + if (common->chain_index) { + NL_SET_ERR_MSG(common->extack, + "Driver supports only offload of chain 0"); + return false; + } + return true; +} + static inline bool tc_skip_hw(u32 flags) { return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false; @@ -664,6 +697,18 @@ static inline bool tc_in_hw(u32 flags) return (flags & TCA_CLS_FLAGS_IN_HW) ? true : false; } +static inline void +tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, + const struct tcf_proto *tp, u32 flags, + struct netlink_ext_ack *extack) +{ + cls_common->chain_index = tp->chain->index; + cls_common->protocol = tp->protocol; + cls_common->prio = tp->prio; + if (tc_skip_sw(flags)) + cls_common->extack = extack; +} + enum tc_fl_command { TC_CLSFLOWER_REPLACE, TC_CLSFLOWER_DESTROY, @@ -706,7 +751,6 @@ struct tc_cls_bpf_offload { struct bpf_prog *oldprog; const char *name; bool exts_integrated; - u32 gen_flags; }; struct tc_mqprio_qopt_offload { @@ -727,6 +771,11 @@ struct tc_cookie { u32 len; }; +struct tc_qopt_offload_stats { + struct gnet_stats_basic_packed *bstats; + struct gnet_stats_queue *qstats; +}; + enum tc_red_command { TC_RED_REPLACE, TC_RED_DESTROY, @@ -739,9 +788,6 @@ struct tc_red_qopt_offload_params { u32 max; u32 probability; bool is_ecn; -}; -struct tc_red_qopt_offload_stats { - struct gnet_stats_basic_packed *bstats; struct gnet_stats_queue *qstats; }; @@ -751,9 +797,34 @@ struct tc_red_qopt_offload { u32 parent; union { struct tc_red_qopt_offload_params set; - struct tc_red_qopt_offload_stats stats; + struct tc_qopt_offload_stats stats; struct red_stats *xstats; }; }; +enum tc_prio_command { + TC_PRIO_REPLACE, + TC_PRIO_DESTROY, + TC_PRIO_STATS, +}; + +struct tc_prio_qopt_offload_params { + int bands; + u8 priomap[TC_PRIO_MAX + 1]; + /* In case that a prio qdisc is offloaded and now is changed to a + * non-offloadedable config, it needs to update the backlog & qlen + * values to negate the HW backlog & qlen values (and only them). + */ + struct gnet_stats_queue *qstats; +}; + +struct tc_prio_qopt_offload { + enum tc_prio_command command; + u32 handle; + u32 parent; + union { + struct tc_prio_qopt_offload_params replace_params; + struct tc_qopt_offload_stats stats; + }; +}; #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index d1f413f06c72..815b92a23936 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -89,7 +89,8 @@ extern struct Qdisc_ops pfifo_head_drop_qdisc_ops; int fifo_set_limit(struct Qdisc *q, unsigned int limit); struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, - unsigned int limit); + unsigned int limit, + struct netlink_ext_ack *extack); int register_qdisc(struct Qdisc_ops *qops); int unregister_qdisc(struct Qdisc_ops *qops); @@ -99,22 +100,24 @@ int qdisc_set_default(const char *id); void qdisc_hash_add(struct Qdisc *q, bool invisible); void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); -struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, - struct nlattr *tab); + struct nlattr *tab, + struct netlink_ext_ack *extack); void qdisc_put_rtab(struct qdisc_rate_table *tab); void qdisc_put_stab(struct qdisc_size_table *tab); void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); -int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, - struct net_device *dev, struct netdev_queue *txq, - spinlock_t *root_lock, bool validate); +bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, + struct net_device *dev, struct netdev_queue *txq, + spinlock_t *root_lock, bool validate); void __qdisc_run(struct Qdisc *q); static inline void qdisc_run(struct Qdisc *q) { - if (qdisc_run_begin(q)) + if (qdisc_run_begin(q)) { __qdisc_run(q); + qdisc_run_end(q); + } } static inline __be16 tc_skb_protocol(const struct sk_buff *skb) diff --git a/include/net/route.h b/include/net/route.h index d538e6db1afe..1eb9ce470e25 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -217,7 +217,7 @@ unsigned int inet_addr_type_dev_table(struct net *net, const struct net_device *dev, __be32 addr); void ip_rt_multicast_event(struct in_device *); -int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg); +int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index ead018744ff5..14b6b3af8918 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -13,10 +13,10 @@ enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = 1, }; -int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); +int rtnl_register_module(struct module *owner, int protocol, int msgtype, + rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index becf86aa4ac6..e2ab13687fb9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -71,6 +71,7 @@ struct Qdisc { * qdisc_tree_decrease_qlen() should stop. */ #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ +#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ #define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ u32 limit; const struct Qdisc_ops *ops; @@ -88,14 +89,14 @@ struct Qdisc { /* * For performance sake on SMP, we put highly modified fields at the end */ - struct sk_buff *gso_skb ____cacheline_aligned_in_smp; + struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; seqcount_t running; struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; - struct sk_buff *skb_bad_txq; + struct sk_buff_head skb_bad_txq; int padded; refcount_t refcnt; @@ -150,19 +151,23 @@ struct Qdisc_class_ops { /* Child qdisc manipulation */ struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); int (*graft)(struct Qdisc *, unsigned long cl, - struct Qdisc *, struct Qdisc **); + struct Qdisc *, struct Qdisc **, + struct netlink_ext_ack *extack); struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl); void (*qlen_notify)(struct Qdisc *, unsigned long); /* Class manipulation routines */ unsigned long (*find)(struct Qdisc *, u32 classid); int (*change)(struct Qdisc *, u32, u32, - struct nlattr **, unsigned long *); + struct nlattr **, unsigned long *, + struct netlink_ext_ack *); int (*delete)(struct Qdisc *, unsigned long); void (*walk)(struct Qdisc *, struct qdisc_walker * arg); /* Filter manipulation */ - struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long); + struct tcf_block * (*tcf_block)(struct Qdisc *sch, + unsigned long arg, + struct netlink_ext_ack *extack); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); @@ -187,15 +192,26 @@ struct Qdisc_ops { struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); - int (*init)(struct Qdisc *, struct nlattr *arg); + int (*init)(struct Qdisc *sch, struct nlattr *arg, + struct netlink_ext_ack *extack); void (*reset)(struct Qdisc *); void (*destroy)(struct Qdisc *); - int (*change)(struct Qdisc *, struct nlattr *arg); - void (*attach)(struct Qdisc *); + int (*change)(struct Qdisc *sch, + struct nlattr *arg, + struct netlink_ext_ack *extack); + void (*attach)(struct Qdisc *sch); + int (*change_tx_queue_len)(struct Qdisc *, unsigned int); int (*dump)(struct Qdisc *, struct sk_buff *); int (*dump_stats)(struct Qdisc *, struct gnet_dump *); + void (*ingress_block_set)(struct Qdisc *sch, + u32 block_index); + void (*egress_block_set)(struct Qdisc *sch, + u32 block_index); + u32 (*ingress_block_get)(struct Qdisc *sch); + u32 (*egress_block_get)(struct Qdisc *sch); + struct module *owner; }; @@ -218,14 +234,18 @@ struct tcf_proto_ops { const struct tcf_proto *, struct tcf_result *); int (*init)(struct tcf_proto*); - void (*destroy)(struct tcf_proto*); + void (*destroy)(struct tcf_proto *tp, + struct netlink_ext_ack *extack); void* (*get)(struct tcf_proto*, u32 handle); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - void **, bool); - int (*delete)(struct tcf_proto*, void *, bool*); + void **, bool, + struct netlink_ext_ack *); + int (*delete)(struct tcf_proto *tp, void *arg, + bool *last, + struct netlink_ext_ack *); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); void (*bind_class)(void *, u32, unsigned long); @@ -247,8 +267,6 @@ struct tcf_proto { /* All the rest */ u32 prio; - u32 classid; - struct Qdisc *q; void *data; const struct tcf_proto_ops *ops; struct tcf_chain *chain; @@ -267,8 +285,7 @@ typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); struct tcf_chain { struct tcf_proto __rcu *filter_chain; - tcf_chain_head_change_t *chain_head_change; - void *chain_head_change_priv; + struct list_head filter_chain_list; struct list_head list; struct tcf_block *block; u32 index; /* chain index */ @@ -277,12 +294,33 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; + u32 index; /* block index for shared blocks */ + unsigned int refcnt; struct net *net; struct Qdisc *q; struct list_head cb_list; - struct work_struct work; + struct list_head owner_list; + bool keep_dst; + unsigned int offloadcnt; /* Number of oddloaded filters */ + unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ }; +static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) +{ + if (*flags & TCA_CLS_FLAGS_IN_HW) + return; + *flags |= TCA_CLS_FLAGS_IN_HW; + block->offloadcnt++; +} + +static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) +{ + if (!(*flags & TCA_CLS_FLAGS_IN_HW)) + return; + *flags &= ~TCA_CLS_FLAGS_IN_HW; + block->offloadcnt--; +} + static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; @@ -291,11 +329,31 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) BUILD_BUG_ON(sizeof(qcb->data) < sz); } +static inline int qdisc_qlen_cpu(const struct Qdisc *q) +{ + return this_cpu_ptr(q->cpu_qstats)->qlen; +} + static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; } +static inline int qdisc_qlen_sum(const struct Qdisc *q) +{ + __u32 qlen = 0; + int i; + + if (q->flags & TCQ_F_NOLOCK) { + for_each_possible_cpu(i) + qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; + } else { + qlen = q->q.qlen; + } + + return qlen; +} + static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb) { return (struct qdisc_skb_cb *)skb->cb; @@ -432,6 +490,7 @@ void qdisc_class_hash_remove(struct Qdisc_class_hash *, void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); void qdisc_class_hash_destroy(struct Qdisc_class_hash *); +int dev_qdisc_change_tx_queue_len(struct net_device *dev); void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); @@ -444,10 +503,12 @@ void qdisc_destroy(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, unsigned int len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, - const struct Qdisc_ops *ops); + const struct Qdisc_ops *ops, + struct netlink_ext_ack *extack); void qdisc_free(struct Qdisc *qdisc); struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, - const struct Qdisc_ops *ops, u32 parentid); + const struct Qdisc_ops *ops, u32 parentid, + struct netlink_ext_ack *extack); void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab); int skb_do_redirect(struct sk_buff *); @@ -633,12 +694,39 @@ static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch, sch->qstats.backlog -= qdisc_pkt_len(skb); } +static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch, + const struct sk_buff *skb) +{ + this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); +} + static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch, const struct sk_buff *skb) { sch->qstats.backlog += qdisc_pkt_len(skb); } +static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch, + const struct sk_buff *skb) +{ + this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); +} + +static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch) +{ + this_cpu_inc(sch->cpu_qstats->qlen); +} + +static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch) +{ + this_cpu_dec(sch->cpu_qstats->qlen); +} + +static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) +{ + this_cpu_inc(sch->cpu_qstats->requeues); +} + static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count) { sch->qstats.drops += count; @@ -769,26 +857,30 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) /* generic pseudo peek method for non-work-conserving qdisc */ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) { + struct sk_buff *skb = skb_peek(&sch->gso_skb); + /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ - if (!sch->gso_skb) { - sch->gso_skb = sch->dequeue(sch); - if (sch->gso_skb) { + if (!skb) { + skb = sch->dequeue(sch); + + if (skb) { + __skb_queue_head(&sch->gso_skb, skb); /* it's still part of the queue */ - qdisc_qstats_backlog_inc(sch, sch->gso_skb); + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; } } - return sch->gso_skb; + return skb; } /* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) { - struct sk_buff *skb = sch->gso_skb; + struct sk_buff *skb = skb_peek(&sch->gso_skb); if (skb) { - sch->gso_skb = NULL; + skb = __skb_dequeue(&sch->gso_skb); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { @@ -846,6 +938,14 @@ static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) qdisc_qstats_drop(sch); } +static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + __qdisc_drop(skb, to_free); + qdisc_qstats_cpu_drop(sch); + + return NET_XMIT_DROP; +} static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index deaafa9b09cb..20ff237c5eb2 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -145,12 +145,13 @@ SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) -#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA) +#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \ + a->chunk_hdr->type == SCTP_CID_I_DATA) /* Calculate the actual data size in a data chunk */ -#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\ - - (unsigned long)(c->chunk_hdr)\ - - sizeof(struct sctp_data_chunk))) +#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \ + (unsigned long)(c->chunk_hdr) - \ + sctp_datachk_len(&c->asoc->stream))) /* Internal error codes */ enum sctp_ierror { diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 608d123ef25f..f7ae6b0a21d0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -116,7 +116,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); -int sctp_transport_walk_start(struct rhashtable_iter *iter); +void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); @@ -444,13 +444,13 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) int frag = pmtu; frag -= sp->pf->af->net_header_len; - frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk); + frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream); if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN - - sizeof(struct sctp_data_chunk))); + sctp_datachk_len(&asoc->stream))); return frag; } diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 70fb397f65b0..2883c43c5258 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -197,10 +197,14 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc, struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc, const __u32 lowest_tsn, const struct sctp_chunk *chunk); -struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, +struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc, + __u8 flags, int paylen, gfp_t gfp); +struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc, + __u32 new_cum_tsn, size_t nstreams, + struct sctp_ifwdtsn_skip *skiplist); +struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc, const struct sctp_sndrcvinfo *sinfo, - int len, const __u8 flags, - __u16 ssn, gfp_t gfp); + int len, __u8 flags, gfp_t gfp); struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, const __u32 lowest_tsn); struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc); @@ -342,7 +346,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) __u16 size; size = ntohs(chunk->chunk_hdr->length); - size -= sizeof(struct sctp_data_chunk); + size -= sctp_datahdr_len(&chunk->asoc->stream); return size; } @@ -358,6 +362,12 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) typecheck(__u32, b) && \ ((__s32)((a) - (b)) <= 0)) +/* Compare two MIDs */ +#define MID_lt(a, b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) < 0)) + /* Compare two SSNs */ #define SSN_lt(a,b) \ (typecheck(__u16, a) && \ diff --git a/include/net/sctp/stream_interleave.h b/include/net/sctp/stream_interleave.h new file mode 100644 index 000000000000..6657711c8bc4 --- /dev/null +++ b/include/net/sctp/stream_interleave.h @@ -0,0 +1,61 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * These are definitions used by the stream schedulers, defined in RFC + * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11) + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. + * + * Please send any bug reports or fixes you make to the + * email addresses: + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Xin Long <lucien.xin@gmail.com> + */ + +#ifndef __sctp_stream_interleave_h__ +#define __sctp_stream_interleave_h__ + +struct sctp_stream_interleave { + __u16 data_chunk_len; + __u16 ftsn_chunk_len; + /* (I-)DATA process */ + struct sctp_chunk *(*make_datafrag)(const struct sctp_association *asoc, + const struct sctp_sndrcvinfo *sinfo, + int len, __u8 flags, gfp_t gfp); + void (*assign_number)(struct sctp_chunk *chunk); + bool (*validate_data)(struct sctp_chunk *chunk); + int (*ulpevent_data)(struct sctp_ulpq *ulpq, + struct sctp_chunk *chunk, gfp_t gfp); + int (*enqueue_event)(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event); + void (*renege_events)(struct sctp_ulpq *ulpq, + struct sctp_chunk *chunk, gfp_t gfp); + void (*start_pd)(struct sctp_ulpq *ulpq, gfp_t gfp); + void (*abort_pd)(struct sctp_ulpq *ulpq, gfp_t gfp); + /* (I-)FORWARD-TSN process */ + void (*generate_ftsn)(struct sctp_outq *q, __u32 ctsn); + bool (*validate_ftsn)(struct sctp_chunk *chunk); + void (*report_ftsn)(struct sctp_ulpq *ulpq, __u32 ftsn); + void (*handle_ftsn)(struct sctp_ulpq *ulpq, + struct sctp_chunk *chunk); +}; + +void sctp_stream_interleave_init(struct sctp_stream *stream); + +#endif /* __sctp_stream_interleave_h__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 9a5ccf03a59b..02369e379d35 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -89,6 +89,7 @@ struct sctp_stream; #include <net/sctp/tsnmap.h> #include <net/sctp/ulpevent.h> #include <net/sctp/ulpqueue.h> +#include <net/sctp/stream_interleave.h> /* Structures useful for managing bind/connect. */ @@ -217,6 +218,7 @@ struct sctp_sock { disable_fragments:1, v4mapped:1, frag_interleave:1, + strm_interleave:1, recvrcvinfo:1, recvnxtinfo:1, data_ready_signalled:1; @@ -397,6 +399,28 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new); #define sctp_ssn_skip(stream, type, sid, ssn) \ ((stream)->type[sid].ssn = ssn + 1) +/* What is the current MID number for this stream? */ +#define sctp_mid_peek(stream, type, sid) \ + ((stream)->type[sid].mid) + +/* Return the next MID number for this stream. */ +#define sctp_mid_next(stream, type, sid) \ + ((stream)->type[sid].mid++) + +/* Skip over this mid and all below. */ +#define sctp_mid_skip(stream, type, sid, mid) \ + ((stream)->type[sid].mid = mid + 1) + +#define sctp_stream_in(asoc, sid) (&(asoc)->stream.in[sid]) + +/* What is the current MID_uo number for this stream? */ +#define sctp_mid_uo_peek(stream, type, sid) \ + ((stream)->type[sid].mid_uo) + +/* Return the next MID_uo number for this stream. */ +#define sctp_mid_uo_next(stream, type, sid) \ + ((stream)->type[sid].mid_uo++) + /* * Pointers to address related SCTP functions. * (i.e. things that depend on the address family.) @@ -574,6 +598,8 @@ struct sctp_chunk { struct sctp_addiphdr *addip_hdr; struct sctp_fwdtsn_hdr *fwdtsn_hdr; struct sctp_authhdr *auth_hdr; + struct sctp_idatahdr *idata_hdr; + struct sctp_ifwdtsn_hdr *ifwdtsn_hdr; } subh; __u8 *chunk_end; @@ -620,6 +646,7 @@ struct sctp_chunk { __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ +#define has_mid has_ssn singleton:1, /* Only chunk in the packet? */ end_of_packet:1, /* Last chunk in the packet? */ ecn_ce_done:1, /* Have we processed the ECN CE bit? */ @@ -1073,6 +1100,7 @@ void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); void sctp_prsctp_prune(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo, int msg_len); +void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); /* Uncork and flush an outqueue. */ static inline void sctp_outq_cork(struct sctp_outq *q) { @@ -1357,13 +1385,25 @@ struct sctp_stream_out_ext { }; struct sctp_stream_out { - __u16 ssn; - __u8 state; + union { + __u32 mid; + __u16 ssn; + }; + __u32 mid_uo; struct sctp_stream_out_ext *ext; + __u8 state; }; struct sctp_stream_in { - __u16 ssn; + union { + __u32 mid; + __u16 ssn; + }; + __u32 mid_uo; + __u32 fsn; + __u32 fsn_uo; + char pd_mode; + char pd_mode_uo; }; struct sctp_stream { @@ -1387,11 +1427,32 @@ struct sctp_stream { struct sctp_stream_out_ext *rr_next; }; }; + struct sctp_stream_interleave *si; }; #define SCTP_STREAM_CLOSED 0x00 #define SCTP_STREAM_OPEN 0x01 +static inline __u16 sctp_datachk_len(const struct sctp_stream *stream) +{ + return stream->si->data_chunk_len; +} + +static inline __u16 sctp_datahdr_len(const struct sctp_stream *stream) +{ + return stream->si->data_chunk_len - sizeof(struct sctp_chunkhdr); +} + +static inline __u16 sctp_ftsnchk_len(const struct sctp_stream *stream) +{ + return stream->si->ftsn_chunk_len; +} + +static inline __u16 sctp_ftsnhdr_len(const struct sctp_stream *stream) +{ + return stream->si->ftsn_chunk_len - sizeof(struct sctp_chunkhdr); +} + /* SCTP_GET_ASSOC_STATS counters */ struct sctp_priv_assoc_stats { /* Maximum observed rto in the association during subsequent @@ -1940,6 +2001,7 @@ struct sctp_association { __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ temp:1, /* Is it a temporary association? */ force_delay:1, + intl_enable:1, prsctp_enable:1, reconf_enable:1; diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 231dc42f1da6..51b4e0626c34 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -45,19 +45,29 @@ /* A structure to carry information to the ULP (e.g. Sockets API) */ /* Warning: This sits inside an skb.cb[] area. Be very careful of * growing this structure as it is at the maximum limit now. + * + * sctp_ulpevent is saved in sk->cb(48 bytes), whose last 4 bytes + * have been taken by sock_skb_cb, So here it has to use 'packed' + * to make sctp_ulpevent fit into the rest 44 bytes. */ struct sctp_ulpevent { struct sctp_association *asoc; struct sctp_chunk *chunk; unsigned int rmem_len; - __u32 ppid; + union { + __u32 mid; + __u16 ssn; + }; + union { + __u32 ppid; + __u32 fsn; + }; __u32 tsn; __u32 cumtsn; __u16 stream; - __u16 ssn; __u16 flags; __u16 msg_flags; -}; +} __packed; /* Retrieve the skb this event sits inside of. */ static inline struct sk_buff *sctp_event2skb(const struct sctp_ulpevent *ev) @@ -112,7 +122,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( struct sctp_ulpevent *sctp_ulpevent_make_pdapi( const struct sctp_association *asoc, - __u32 indication, gfp_t gfp); + __u32 indication, __u32 sid, __u32 seq, + __u32 flags, gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_adaptation_indication( const struct sctp_association *asoc, gfp_t gfp); @@ -140,6 +151,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event( const struct sctp_association *asoc, __u16 flags, __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp); +struct sctp_ulpevent *sctp_make_reassembled_event( + struct net *net, struct sk_buff_head *queue, + struct sk_buff *f_frag, struct sk_buff *l_frag); + void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *); void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index e0dce07b8794..bb0ecba3db2b 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -45,6 +45,7 @@ struct sctp_ulpq { char pd_mode; struct sctp_association *asoc; struct sk_buff_head reasm; + struct sk_buff_head reasm_uo; struct sk_buff_head lobby; }; @@ -76,11 +77,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc); void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn); void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *, __u32); -#endif /* __sctp_ulpqueue_h__ */ - - - - - +__u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, + struct sk_buff_head *list, __u16 needed); +#endif /* __sctp_ulpqueue_h__ */ diff --git a/include/net/sock.h b/include/net/sock.h index c47c635c9d19..63731289186a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -72,6 +72,7 @@ #include <net/tcp_states.h> #include <linux/net_tstamp.h> #include <net/smc.h> +#include <net/l3mdev.h> /* * This structure really needs to be cleaned up. @@ -1262,6 +1263,7 @@ proto_memory_pressure(struct proto *prot) /* Called with local bh disabled */ void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); int sock_prot_inuse_get(struct net *net, struct proto *proto); +int sock_inuse_get(struct net *net); #else static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc) @@ -2335,31 +2337,6 @@ static inline bool sk_listener(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); } -/** - * sk_state_load - read sk->sk_state for lockless contexts - * @sk: socket pointer - * - * Paired with sk_state_store(). Used in places we do not hold socket lock : - * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... - */ -static inline int sk_state_load(const struct sock *sk) -{ - return smp_load_acquire(&sk->sk_state); -} - -/** - * sk_state_store - update sk->sk_state - * @sk: socket pointer - * @newstate: new state - * - * Paired with sk_state_load(). Should be used in contexts where - * state change might impact lockless readers. - */ -static inline void sk_state_store(struct sock *sk, int newstate) -{ - smp_store_release(&sk->sk_state, newstate); -} - void sock_enable_timestamp(struct sock *sk, int flag); int sock_get_timestamp(struct sock *, struct timeval __user *); int sock_get_timestampns(struct sock *, struct timespec __user *); @@ -2410,4 +2387,34 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) return *proto->sysctl_rmem; } +/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) + * Some wifi drivers need to tweak it to get more chunks. + * They can use this helper from their ndo_start_xmit() + */ +static inline void sk_pacing_shift_update(struct sock *sk, int val) +{ + if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val) + return; + sk->sk_pacing_shift = val; +} + +/* if a socket is bound to a device, check that the given device + * index is either the same or that the socket is bound to an L3 + * master device and the given device index is also enslaved to + * that L3 master + */ +static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif) +{ + int mdif; + + if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif) + return true; + + mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif); + if (mdif && mdif == sk->sk_bound_dev_if) + return true; + + return false; +} + #endif /* _SOCK_H */ diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index 781f3433a0be..9470fd7e4350 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -6,10 +6,16 @@ #include <net/act_api.h> #include <linux/tc_act/tc_csum.h> +struct tcf_csum_params { + int action; + u32 update_flags; + struct rcu_head rcu; +}; + struct tcf_csum { struct tc_action common; - u32 update_flags; + struct tcf_csum_params __rcu *params; }; #define to_tcf_csum(a) ((struct tcf_csum *)a) @@ -24,7 +30,13 @@ static inline bool is_tcf_csum(const struct tc_action *a) static inline u32 tcf_csum_update_flags(const struct tc_action *a) { - return to_tcf_csum(a)->update_flags; + u32 update_flags; + + rcu_read_lock(); + update_flags = rcu_dereference(to_tcf_csum(a)->params)->update_flags; + rcu_read_unlock(); + + return update_flags; } #endif /* __NET_TC_CSUM_H */ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 21d253c9a8c6..a2e9cbca5c9e 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,10 +8,8 @@ struct tcf_mirred { struct tc_action common; int tcfm_eaction; - int tcfm_ifindex; bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; - struct net *net; struct list_head tcfm_list; }; #define to_mirred(a) ((struct tcf_mirred *)a) @@ -34,9 +32,9 @@ static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a) return false; } -static inline int tcf_mirred_ifindex(const struct tc_action *a) +static inline struct net_device *tcf_mirred_dev(const struct tc_action *a) { - return to_mirred(a)->tcfm_ifindex; + return rtnl_dereference(to_mirred(a)->tcfm_dev); } #endif /* __NET_TC_MIR_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 50b21a49d870..58278669cc55 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -953,6 +953,7 @@ struct rate_sample { u32 prior_in_flight; /* in flight before this ACK */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ + bool is_ack_delayed; /* is this (likely) a delayed ACK? */ }; struct tcp_congestion_ops { @@ -1507,8 +1508,7 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, /* From tcp_fastopen.c */ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie, int *syn_loss, - unsigned long *last_syn_loss); + struct tcp_fastopen_cookie *cookie); void tcp_fastopen_cache_set(struct sock *sk, u16 mss, struct tcp_fastopen_cookie *cookie, bool syn_lost, u16 try_exp); @@ -1546,7 +1546,7 @@ extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; void tcp_fastopen_active_disable(struct sock *sk); bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); -void tcp_fastopen_active_timeout_reset(void); +void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired); /* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. @@ -2006,17 +2006,21 @@ void tcp_cleanup_ulp(struct sock *sk); * program loaded). */ #ifdef CONFIG_BPF -static inline int tcp_call_bpf(struct sock *sk, int op) +static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) { struct bpf_sock_ops_kern sock_ops; int ret; - if (sk_fullsock(sk)) + memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); + if (sk_fullsock(sk)) { + sock_ops.is_fullsock = 1; sock_owned_by_me(sk); + } - memset(&sock_ops, 0, sizeof(sock_ops)); sock_ops.sk = sk; sock_ops.op = op; + if (nargs > 0) + memcpy(sock_ops.args, args, nargs * sizeof(*args)); ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); if (ret == 0) @@ -2025,18 +2029,46 @@ static inline int tcp_call_bpf(struct sock *sk, int op) ret = -1; return ret; } + +static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2) +{ + u32 args[2] = {arg1, arg2}; + + return tcp_call_bpf(sk, op, 2, args); +} + +static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2, + u32 arg3) +{ + u32 args[3] = {arg1, arg2, arg3}; + + return tcp_call_bpf(sk, op, 3, args); +} + #else -static inline int tcp_call_bpf(struct sock *sk, int op) +static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) { return -EPERM; } + +static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2) +{ + return -EPERM; +} + +static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2, + u32 arg3) +{ + return -EPERM; +} + #endif static inline u32 tcp_timeout_init(struct sock *sk) { int timeout; - timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT); + timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL); if (timeout <= 0) timeout = TCP_TIMEOUT_INIT; @@ -2047,7 +2079,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk) { int rwnd; - rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT); + rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL); if (rwnd < 0) rwnd = 0; @@ -2056,7 +2088,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk) static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) { - return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1); + return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } #if IS_ENABLED(CONFIG_SMC) diff --git a/include/net/tls.h b/include/net/tls.h index 9185e53a743c..4913430ab807 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -36,6 +36,7 @@ #include <linux/types.h> #include <asm/byteorder.h> +#include <linux/crypto.h> #include <linux/socket.h> #include <linux/tcp.h> #include <net/tcp.h> @@ -57,6 +58,7 @@ struct tls_sw_context { struct crypto_aead *aead_send; + struct crypto_wait async_wait; /* Sending context */ char aad_space[TLS_AAD_SPACE_SIZE]; diff --git a/include/net/vxlan.h b/include/net/vxlan.h index f96391e84a8a..ad73d8b3fcc2 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -301,7 +301,7 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, l4_hdr = ipv6_hdr(skb)->nexthdr; break; default: - return features;; + return features; } if ((l4_hdr == IPPROTO_UDP) && diff --git a/include/net/wext.h b/include/net/wext.h index e51f067fdb3a..aa192a670304 100644 --- a/include/net/wext.h +++ b/include/net/wext.h @@ -7,7 +7,7 @@ struct net; #ifdef CONFIG_WEXT_CORE -int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, +int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg); int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, unsigned long arg); @@ -15,7 +15,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, struct iw_statistics *get_wireless_stats(struct net_device *dev); int call_commit_handler(struct net_device *dev); #else -static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, +static inline int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg) { return -EINVAL; diff --git a/include/net/xdp.h b/include/net/xdp.h new file mode 100644 index 000000000000..b2362ddfa694 --- /dev/null +++ b/include/net/xdp.h @@ -0,0 +1,48 @@ +/* include/net/xdp.h + * + * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + * Released under terms in GPL version 2. See COPYING. + */ +#ifndef __LINUX_NET_XDP_H__ +#define __LINUX_NET_XDP_H__ + +/** + * DOC: XDP RX-queue information + * + * The XDP RX-queue info (xdp_rxq_info) is associated with the driver + * level RX-ring queues. It is information that is specific to how + * the driver have configured a given RX-ring queue. + * + * Each xdp_buff frame received in the driver carry a (pointer) + * reference to this xdp_rxq_info structure. This provides the XDP + * data-path read-access to RX-info for both kernel and bpf-side + * (limited subset). + * + * For now, direct access is only safe while running in NAPI/softirq + * context. Contents is read-mostly and must not be updated during + * driver NAPI/softirq poll. + * + * The driver usage API is a register and unregister API. + * + * The struct is not directly tied to the XDP prog. A new XDP prog + * can be attached as long as it doesn't change the underlying + * RX-ring. If the RX-ring does change significantly, the NIC driver + * naturally need to stop the RX-ring before purging and reallocating + * memory. In that process the driver MUST call unregistor (which + * also apply for driver shutdown and unload). The register API is + * also mandatory during RX-ring setup. + */ + +struct xdp_rxq_info { + struct net_device *dev; + u32 queue_index; + u32 reg_state; +} ____cacheline_aligned; /* perf critical, avoid false-sharing */ + +int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index); +void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); +void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); +bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); + +#endif /* __LINUX_NET_XDP_H__ */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ae35991b5877..7d2077665c0b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -968,7 +968,7 @@ static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_c /* A struct encoding bundle of transformations to apply to some set of flow. * - * dst->child points to the next element of bundle. + * xdst->child points to the next element of bundle. * dst->xfrm points to an instanse of transformer. * * Due to unfortunate limitations of current routing cache, which we @@ -984,6 +984,8 @@ struct xfrm_dst { struct rt6_info rt6; } u; struct dst_entry *route; + struct dst_entry *child; + struct dst_entry *path; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; u32 xfrm_genid; @@ -994,7 +996,35 @@ struct xfrm_dst { u32 path_cookie; }; +static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst) +{ +#ifdef CONFIG_XFRM + if (dst->xfrm) { + const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst; + + return xdst->path; + } +#endif + return (struct dst_entry *) dst; +} + +static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst) +{ +#ifdef CONFIG_XFRM + if (dst->xfrm) { + struct xfrm_dst *xdst = (struct xfrm_dst *) dst; + return xdst->child; + } +#endif + return NULL; +} + #ifdef CONFIG_XFRM +static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *child) +{ + xdst->child = child; +} + static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) { xfrm_pols_put(xdst->pols, xdst->num_pols); @@ -1021,6 +1051,7 @@ struct xfrm_offload { #define XFRM_GSO_SEGMENT 16 #define XFRM_GRO 32 #define XFRM_ESP_NO_TRAILER 64 +#define XFRM_DEV_RESUME 128 __u32 status; #define CRYPTO_SUCCESS 1 @@ -1847,34 +1878,53 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { return skb->sp->xvec[skb->sp->len - 1]; } +#endif + static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) { +#ifdef CONFIG_XFRM struct sec_path *sp = skb->sp; if (!sp || !sp->olen || sp->len != sp->olen) return NULL; return &sp->ovec[sp->olen - 1]; -} +#else + return NULL; #endif +} void __net_init xfrm_dev_init(void); #ifdef CONFIG_XFRM_OFFLOAD -int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features); +void xfrm_dev_resume(struct sk_buff *skb); +void xfrm_dev_backlog(struct softnet_data *sd); +struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); +static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) +{ + struct xfrm_state_offload *xso = &x->xso; + + if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn) + xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); +} + static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) { struct xfrm_state *x = dst->xfrm; + struct xfrm_dst *xdst; if (!x || !x->type_offload) return false; - if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) && - !dst->child->xfrm) + xdst = (struct xfrm_dst *) dst; + if (!x->xso.offload_handle && !xdst->child->xfrm) + return true; + if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) && + !xdst->child->xfrm) return true; return false; @@ -1894,15 +1944,24 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) struct net_device *dev = xso->dev; if (dev && dev->xfrmdev_ops) { - dev->xfrmdev_ops->xdo_dev_state_free(x); + if (dev->xfrmdev_ops->xdo_dev_state_free) + dev->xfrmdev_ops->xdo_dev_state_free(x); xso->dev = NULL; dev_put(dev); } } #else -static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) +static inline void xfrm_dev_resume(struct sk_buff *skb) { - return 0; +} + +static inline void xfrm_dev_backlog(struct softnet_data *sd) +{ +} + +static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) +{ + return skb; } static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo) @@ -1923,6 +1982,10 @@ static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x return false; } +static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) +{ +} + static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) { return false; |