diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-11 09:49:36 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-11 09:49:36 -0800 |
commit | f54ca91fe6f25c2028f953ce82f19ca2ea0f07bb (patch) | |
tree | 6a16c8a40cb30141aa3c26fa8408a5085d6750e1 /include | |
parent | c55a04176cba5a2de085461438b65e1c46541ef3 (diff) | |
parent | d336509cb9d03970911878bb77f0497f64fda061 (diff) | |
download | linux-f54ca91fe6f25c2028f953ce82f19ca2ea0f07bb.tar.bz2 |
Merge tag 'net-5.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski:
"Including fixes from bpf, can and netfilter.
Current release - regressions:
- bpf: do not reject when the stack read size is different from the
tracked scalar size
- net: fix premature exit from NAPI state polling in napi_disable()
- riscv, bpf: fix RV32 broken build, and silence RV64 warning
Current release - new code bugs:
- net: fix possible NULL deref in sock_reserve_memory
- amt: fix error return code in amt_init(); fix stopping the
workqueue
- ax88796c: use the correct ioctl callback
Previous releases - always broken:
- bpf: stop caching subprog index in the bpf_pseudo_func insn
- security: fixups for the security hooks in sctp
- nfc: add necessary privilege flags in netlink layer, limit
operations to admin only
- vsock: prevent unnecessary refcnt inc for non-blocking connect
- net/smc: fix sk_refcnt underflow on link down and fallback
- nfnetlink_queue: fix OOB when mac header was cleared
- can: j1939: ignore invalid messages per standard
- bpf, sockmap:
- fix race in ingress receive verdict with redirect to self
- fix incorrect sk_skb data_end access when src_reg = dst_reg
- strparser, and tls are reusing qdisc_skb_cb and colliding
- ethtool: fix ethtool msg len calculation for pause stats
- vlan: fix a UAF in vlan_dev_real_dev() when ref-holder tries to
access an unregistering real_dev
- udp6: make encap_rcv() bump the v6 not v4 stats
- drv: prestera: add explicit padding to fix m68k build
- drv: felix: fix broken VLAN-tagged PTP under VLAN-aware bridge
- drv: mvpp2: fix wrong SerDes reconfiguration order
Misc & small latecomers:
- ipvs: auto-load ipvs on genl access
- mctp: sanity check the struct sockaddr_mctp padding fields
- libfs: support RENAME_EXCHANGE in simple_rename()
- avoid double accounting for pure zerocopy skbs"
* tag 'net-5.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (123 commits)
selftests/net: udpgso_bench_rx: fix port argument
net: wwan: iosm: fix compilation warning
cxgb4: fix eeprom len when diagnostics not implemented
net: fix premature exit from NAPI state polling in napi_disable()
net/smc: fix sk_refcnt underflow on linkdown and fallback
net/mlx5: Lag, fix a potential Oops with mlx5_lag_create_definer()
gve: fix unmatched u64_stats_update_end()
net: ethernet: lantiq_etop: Fix compilation error
selftests: forwarding: Fix packet matching in mirroring selftests
vsock: prevent unnecessary refcnt inc for nonblocking connect
net: marvell: mvpp2: Fix wrong SerDes reconfiguration order
net: ethernet: ti: cpsw_ale: Fix access to un-initialized memory
net: stmmac: allow a tc-taprio base-time of zero
selftests: net: test_vxlan_under_vrf: fix HV connectivity test
net: hns3: allow configure ETS bandwidth of all TCs
net: hns3: remove check VF uc mac exist when set by PF
net: hns3: fix some mac statistics is always 0 in device version V2
net: hns3: fix kernel crash when unload VF while it is being reset
net: hns3: sync rx ring head in echo common pull
net: hns3: fix pfc packet number incorrect after querying pfc parameters
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bpf.h | 6 | ||||
-rw-r--r-- | include/linux/dsa/ocelot.h | 1 | ||||
-rw-r--r-- | include/linux/ethtool_netlink.h | 3 | ||||
-rw-r--r-- | include/linux/fs.h | 2 | ||||
-rw-r--r-- | include/linux/lsm_hook_defs.h | 6 | ||||
-rw-r--r-- | include/linux/lsm_hooks.h | 13 | ||||
-rw-r--r-- | include/linux/security.h | 17 | ||||
-rw-r--r-- | include/linux/skbuff.h | 35 | ||||
-rw-r--r-- | include/linux/skmsg.h | 12 | ||||
-rw-r--r-- | include/net/llc.h | 4 | ||||
-rw-r--r-- | include/net/sctp/structs.h | 20 | ||||
-rw-r--r-- | include/net/strparser.h | 20 | ||||
-rw-r--r-- | include/net/tcp.h | 8 | ||||
-rw-r--r-- | include/uapi/linux/ethtool_netlink.h | 4 |
14 files changed, 124 insertions, 27 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2be6dfd68df9..f715e8863f4d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -484,6 +484,12 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static inline bool bpf_pseudo_func(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && + insn->src_reg == BPF_PSEUDO_FUNC; +} + struct bpf_prog_ops { int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index d42010cf5468..7ee708ad7df2 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -12,6 +12,7 @@ struct ocelot_skb_cb { struct sk_buff *clone; unsigned int ptp_class; /* valid only for clones */ + u32 tstamp_lo; u8 ptp_cmd; u8 ts_id; }; diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index 1e7bf78cb382..aba348d58ff6 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -10,6 +10,9 @@ #define __ETHTOOL_LINK_MODE_MASK_NWORDS \ DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) +#define ETHTOOL_PAUSE_STAT_CNT (__ETHTOOL_A_PAUSE_STAT_CNT - \ + ETHTOOL_A_PAUSE_STAT_TX_FRAMES) + enum ethtool_multicast_groups { ETHNL_MCGRP_MONITOR, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3afca821df32..1cb616fc1105 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3385,6 +3385,8 @@ extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); +extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename(struct user_namespace *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index a9ac70ae01ab..442a611fa0fb 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -329,12 +329,14 @@ LSM_HOOK(int, 0, tun_dev_create, void) LSM_HOOK(int, 0, tun_dev_attach_queue, void *security) LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security) LSM_HOOK(int, 0, tun_dev_open, void *security) -LSM_HOOK(int, 0, sctp_assoc_request, struct sctp_endpoint *ep, +LSM_HOOK(int, 0, sctp_assoc_request, struct sctp_association *asoc, struct sk_buff *skb) LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, struct sockaddr *address, int addrlen) -LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_endpoint *ep, +LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) +LSM_HOOK(void, LSM_RET_VOID, sctp_assoc_established, struct sctp_association *asoc, + struct sk_buff *skb) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 0bada4df23fc..d6823214d5c1 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1027,9 +1027,9 @@ * Security hooks for SCTP * * @sctp_assoc_request: - * Passes the @ep and @chunk->skb of the association INIT packet to + * Passes the @asoc and @chunk->skb of the association INIT packet to * the security module. - * @ep pointer to sctp endpoint structure. + * @asoc pointer to sctp association structure. * @skb pointer to skbuff of association packet. * Return 0 on success, error on failure. * @sctp_bind_connect: @@ -1047,9 +1047,14 @@ * Called whenever a new socket is created by accept(2) (i.e. a TCP * style socket) or when a socket is 'peeled off' e.g userspace * calls sctp_peeloff(3). - * @ep pointer to current sctp endpoint structure. + * @asoc pointer to current sctp association structure. * @sk pointer to current sock structure. - * @sk pointer to new sock structure. + * @newsk pointer to new sock structure. + * @sctp_assoc_established: + * Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet + * to the security module. + * @asoc pointer to sctp association structure. + * @skb pointer to skbuff of association packet. * * Security hooks for Infiniband * diff --git a/include/linux/security.h b/include/linux/security.h index 7e0ba63b5dde..06eac4e61a13 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -179,7 +179,7 @@ struct xfrm_policy; struct xfrm_state; struct xfrm_user_sec_ctx; struct seq_file; -struct sctp_endpoint; +struct sctp_association; #ifdef CONFIG_MMU extern unsigned long mmap_min_addr; @@ -1425,11 +1425,13 @@ int security_tun_dev_create(void); int security_tun_dev_attach_queue(void *security); int security_tun_dev_attach(struct sock *sk, void *security); int security_tun_dev_open(void *security); -int security_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb); +int security_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb); int security_sctp_bind_connect(struct sock *sk, int optname, struct sockaddr *address, int addrlen); -void security_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk, +void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk); +void security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, @@ -1631,7 +1633,7 @@ static inline int security_tun_dev_open(void *security) return 0; } -static inline int security_sctp_assoc_request(struct sctp_endpoint *ep, +static inline int security_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb) { return 0; @@ -1644,11 +1646,16 @@ static inline int security_sctp_bind_connect(struct sock *sk, int optname, return 0; } -static inline void security_sctp_sk_clone(struct sctp_endpoint *ep, +static inline void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk) { } + +static inline void security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0bd6520329f6..686a666d073d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -454,9 +454,15 @@ enum { * all frags to avoid possible bad checksum */ SKBFL_SHARED_FRAG = BIT(1), + + /* segment contains only zerocopy data and should not be + * charged to the kernel memory. + */ + SKBFL_PURE_ZEROCOPY = BIT(2), }; #define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG) +#define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY) /* * The callback notifies userspace to release buffers when skb DMA is done in @@ -1464,6 +1470,17 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) return is_zcopy ? skb_uarg(skb) : NULL; } +static inline bool skb_zcopy_pure(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY; +} + +static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1, + const struct sk_buff *skb2) +{ + return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2); +} + static inline void net_zcopy_get(struct ubuf_info *uarg) { refcount_inc(&uarg->refcnt); @@ -1528,7 +1545,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) if (!skb_zcopy_is_nouarg(skb)) uarg->callback(skb, uarg, zerocopy_success); - skb_shinfo(skb)->flags &= ~SKBFL_ZEROCOPY_FRAG; + skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY; } } @@ -1675,6 +1692,22 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) return 0; } +/* This variant of skb_unclone() makes sure skb->truesize is not changed */ +static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(gfpflags_allow_blocking(pri)); + + if (skb_cloned(skb)) { + unsigned int save = skb->truesize; + int res; + + res = pskb_expand_head(skb, 0, 0, pri); + skb->truesize = save; + return res; + } + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index b4256847c707..584d94be9c8b 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -507,6 +507,18 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } +static inline bool sk_is_tcp(const struct sock *sk) +{ + return sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP; +} + +static inline bool sk_is_udp(const struct sock *sk) +{ + return sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP; +} + #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) diff --git a/include/net/llc.h b/include/net/llc.h index fd1f9a3fd8dd..e250dca03963 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -72,7 +72,9 @@ struct llc_sap { static inline struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex) { - return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES]; + u32 bucket = hash_32(ifindex, LLC_SK_DEV_HASH_BITS); + + return &sap->sk_dev_hash[bucket]; } static inline diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 651bba654d77..899c29c326ba 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1355,16 +1355,6 @@ struct sctp_endpoint { reconf_enable:1; __u8 strreset_enable; - - /* Security identifiers from incoming (INIT). These are set by - * security_sctp_assoc_request(). These will only be used by - * SCTP TCP type sockets and peeled off connections as they - * cause a new socket to be generated. security_sctp_sk_clone() - * will then plug these into the new socket. - */ - - u32 secid; - u32 peer_secid; }; /* Recover the outter endpoint structure. */ @@ -2104,6 +2094,16 @@ struct sctp_association { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + /* Security identifiers from incoming (INIT). These are set by + * security_sctp_assoc_request(). These will only be used by + * SCTP TCP type sockets and peeled off connections as they + * cause a new socket to be generated. security_sctp_sk_clone() + * will then plug these into the new socket. + */ + + u32 secid; + u32 peer_secid; + struct rcu_head rcu; }; diff --git a/include/net/strparser.h b/include/net/strparser.h index 1d20b98493a1..732b7097d78e 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -54,10 +54,28 @@ struct strp_msg { int offset; }; +struct _strp_msg { + /* Internal cb structure. struct strp_msg must be first for passing + * to upper layer. + */ + struct strp_msg strp; + int accum_len; +}; + +struct sk_skb_cb { +#define SK_SKB_CB_PRIV_LEN 20 + unsigned char data[SK_SKB_CB_PRIV_LEN]; + struct _strp_msg strp; + /* temp_reg is a temporary register used for bpf_convert_data_end_access + * when dst_reg == src_reg. + */ + u64 temp_reg; +}; + static inline struct strp_msg *strp_msg(struct sk_buff *skb) { return (struct strp_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); + offsetof(struct sk_skb_cb, strp)); } /* Structure for an attached lower socket */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 70972f3ac8fa..4da22b41bde6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -293,7 +293,10 @@ static inline bool tcp_out_of_memory(struct sock *sk) static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { sk_wmem_queued_add(sk, -skb->truesize); - sk_mem_uncharge(sk, skb->truesize); + if (!skb_zcopy_pure(skb)) + sk_mem_uncharge(sk, skb->truesize); + else + sk_mem_uncharge(sk, SKB_TRUESIZE(skb_end_offset(skb))); __kfree_skb(skb); } @@ -974,7 +977,8 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { return likely(tcp_skb_can_collapse_to(to) && - mptcp_skb_can_collapse(to, from)); + mptcp_skb_can_collapse(to, from) && + skb_pure_zcopy_same(to, from)); } /* Events passed to congestion control interface */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index ca5fbb59fa42..999777d32dcf 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -411,7 +411,9 @@ enum { ETHTOOL_A_PAUSE_STAT_TX_FRAMES, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, - /* add new constants above here */ + /* add new constants above here + * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! + */ __ETHTOOL_A_PAUSE_STAT_CNT, ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) }; |