summaryrefslogtreecommitdiffstats
path: root/net/netfilter/nft_payload.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 15:47:48 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 15:47:48 -0800
commit7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (patch)
treeae0427c5a3b905f24b3a44b510a9bcf35d9b67a3 /net/netfilter/nft_payload.c
parent1ca06f1c1acecbe02124f14a37cce347b8c1a90c (diff)
parent7c4a6309e27f411743817fe74a832ec2d2798a4b (diff)
downloadlinux-7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91.tar.bz2
Merge tag 'net-next-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "Core: - Allow live renaming when an interface is up - Add retpoline wrappers for tc, improving considerably the performances of complex queue discipline configurations - Add inet drop monitor support - A few GRO performance improvements - Add infrastructure for atomic dev stats, addressing long standing data races - De-duplicate common code between OVS and conntrack offloading infrastructure - A bunch of UBSAN_BOUNDS/FORTIFY_SOURCE improvements - Netfilter: introduce packet parser for tunneled packets - Replace IPVS timer-based estimators with kthreads to scale up the workload with the number of available CPUs - Add the helper support for connection-tracking OVS offload BPF: - Support for user defined BPF objects: the use case is to allocate own objects, build own object hierarchies and use the building blocks to build own data structures flexibly, for example, linked lists in BPF - Make cgroup local storage available to non-cgroup attached BPF programs - Avoid unnecessary deadlock detection and failures wrt BPF task storage helpers - A relevant bunch of BPF verifier fixes and improvements - Veristat tool improvements to support custom filtering, sorting, and replay of results - Add LLVM disassembler as default library for dumping JITed code - Lots of new BPF documentation for various BPF maps - Add bpf_rcu_read_{,un}lock() support for sleepable programs - Add RCU grace period chaining to BPF to wait for the completion of access from both sleepable and non-sleepable BPF programs - Add support storing struct task_struct objects as kptrs in maps - Improve helper UAPI by explicitly defining BPF_FUNC_xxx integer values - Add libbpf *_opts API-variants for bpf_*_get_fd_by_id() functions Protocols: - TCP: implement Protective Load Balancing across switch links - TCP: allow dynamically disabling TCP-MD5 static key, reverting back to fast[er]-path - UDP: Introduce optional per-netns hash lookup table - IPv6: simplify and cleanup sockets disposal - Netlink: support different type policies for each generic netlink operation - MPTCP: add MSG_FASTOPEN and FastOpen listener side support - MPTCP: add netlink notification support for listener sockets events - SCTP: add VRF support, allowing sctp sockets binding to VRF devices - Add bridging MAC Authentication Bypass (MAB) support - Extensions for Ethernet VPN bridging implementation to better support multicast scenarios - More work for Wi-Fi 7 support, comprising conversion of all the existing drivers to internal TX queue usage - IPSec: introduce a new offload type (packet offload) allowing complete header processing and crypto offloading - IPSec: extended ack support for more descriptive XFRM error reporting - RXRPC: increase SACK table size and move processing into a per-local endpoint kernel thread, reducing considerably the required locking - IEEE 802154: synchronous send frame and extended filtering support, initial support for scanning available 15.4 networks - Tun: bump the link speed from 10Mbps to 10Gbps - Tun/VirtioNet: implement UDP segmentation offload support Driver API: - PHY/SFP: improve power level switching between standard level 1 and the higher power levels - New API for netdev <-> devlink_port linkage - PTP: convert existing drivers to new frequency adjustment implementation - DSA: add support for rx offloading - Autoload DSA tagging driver when dynamically changing protocol - Add new PCP and APPTRUST attributes to Data Center Bridging - Add configuration support for 800Gbps link speed - Add devlink port function attribute to enable/disable RoCE and migratable - Extend devlink-rate to support strict prioriry and weighted fair queuing - Add devlink support to directly reading from region memory - New device tree helper to fetch MAC address from nvmem - New big TCP helper to simplify temporary header stripping New hardware / drivers: - Ethernet: - Marvel Octeon CNF95N and CN10KB Ethernet Switches - Marvel Prestera AC5X Ethernet Switch - WangXun 10 Gigabit NIC - Motorcomm yt8521 Gigabit Ethernet - Microchip ksz9563 Gigabit Ethernet Switch - Microsoft Azure Network Adapter - Linux Automation 10Base-T1L adapter - PHY: - Aquantia AQR112 and AQR412 - Motorcomm YT8531S - PTP: - Orolia ART-CARD - WiFi: - MediaTek Wi-Fi 7 (802.11be) devices - RealTek rtw8821cu, rtw8822bu, rtw8822cu and rtw8723du USB devices - Bluetooth: - Broadcom BCM4377/4378/4387 Bluetooth chipsets - Realtek RTL8852BE and RTL8723DS - Cypress.CYW4373A0 WiFi + Bluetooth combo device Drivers: - CAN: - gs_usb: bus error reporting support - kvaser_usb: listen only and bus error reporting support - Ethernet NICs: - Intel (100G): - extend action skbedit to RX queue mapping - implement devlink-rate support - support direct read from memory - nVidia/Mellanox (mlx5): - SW steering improvements, increasing rules update rate - Support for enhanced events compression - extend H/W offload packet manipulation capabilities - implement IPSec packet offload mode - nVidia/Mellanox (mlx4): - better big TCP support - Netronome Ethernet NICs (nfp): - IPsec offload support - add support for multicast filter - Broadcom: - RSS and PTP support improvements - AMD/SolarFlare: - netlink extened ack improvements - add basic flower matches to offload, and related stats - Virtual NICs: - ibmvnic: introduce affinity hint support - small / embedded: - FreeScale fec: add initial XDP support - Marvel mv643xx_eth: support MII/GMII/RGMII modes for Kirkwood - TI am65-cpsw: add suspend/resume support - Mediatek MT7986: add RX wireless wthernet dispatch support - Realtek 8169: enable GRO software interrupt coalescing per default - Ethernet high-speed switches: - Microchip (sparx5): - add support for Sparx5 TC/flower H/W offload via VCAP - Mellanox mlxsw: - add 802.1X and MAC Authentication Bypass offload support - add ip6gre support - Embedded Ethernet switches: - Mediatek (mtk_eth_soc): - improve PCS implementation, add DSA untag support - enable flow offload support - Renesas: - add rswitch R-Car Gen4 gPTP support - Microchip (lan966x): - add full XDP support - add TC H/W offload via VCAP - enable PTP on bridge interfaces - Microchip (ksz8): - add MTU support for KSZ8 series - Qualcomm 802.11ax WiFi (ath11k): - support configuring channel dwell time during scan - MediaTek WiFi (mt76): - enable Wireless Ethernet Dispatch (WED) offload support - add ack signal support - enable coredump support - remain_on_channel support - Intel WiFi (iwlwifi): - enable Wi-Fi 7 Extremely High Throughput (EHT) PHY capabilities - 320 MHz channels support - RealTek WiFi (rtw89): - new dynamic header firmware format support - wake-over-WLAN support" * tag 'net-next-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2002 commits) ipvs: fix type warning in do_div() on 32 bit net: lan966x: Remove a useless test in lan966x_ptp_add_trap() net: ipa: add IPA v4.7 support dt-bindings: net: qcom,ipa: Add SM6350 compatible bnxt: Use generic HBH removal helper in tx path IPv6/GRO: generic helper to remove temporary HBH/jumbo header in driver selftests: forwarding: Add bridge MDB test selftests: forwarding: Rename bridge_mdb test bridge: mcast: Support replacement of MDB port group entries bridge: mcast: Allow user space to specify MDB entry routing protocol bridge: mcast: Allow user space to add (*, G) with a source list and filter mode bridge: mcast: Add support for (*, G) with a source list and filter mode bridge: mcast: Avoid arming group timer when (S, G) corresponds to a source bridge: mcast: Add a flag for user installed source entries bridge: mcast: Expose __br_multicast_del_group_src() bridge: mcast: Expose br_multicast_new_group_src() bridge: mcast: Add a centralized error path bridge: mcast: Place netlink policy before validation functions bridge: mcast: Split (*, G) and (S, G) addition into different functions bridge: mcast: Do not derive entry type from its filter mode ...
Diffstat (limited to 'net/netfilter/nft_payload.c')
-rw-r--r--net/netfilter/nft_payload.c141
1 files changed, 138 insertions, 3 deletions
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 4edd899aeb9b..17b418a5a593 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -19,6 +19,7 @@
/* For layer 4 checksum field offset. */
#include <linux/tcp.h>
#include <linux/udp.h>
+#include <net/gre.h>
#include <linux/icmpv6.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
@@ -100,6 +101,41 @@ static int __nft_payload_inner_offset(struct nft_pktinfo *pkt)
pkt->inneroff = thoff + __tcp_hdrlen(th);
}
break;
+ case IPPROTO_GRE: {
+ u32 offset = sizeof(struct gre_base_hdr);
+ struct gre_base_hdr *gre, _gre;
+ __be16 version;
+
+ gre = skb_header_pointer(pkt->skb, thoff, sizeof(_gre), &_gre);
+ if (!gre)
+ return -1;
+
+ version = gre->flags & GRE_VERSION;
+ switch (version) {
+ case GRE_VERSION_0:
+ if (gre->flags & GRE_ROUTING)
+ return -1;
+
+ if (gre->flags & GRE_CSUM) {
+ offset += sizeof_field(struct gre_full_hdr, csum) +
+ sizeof_field(struct gre_full_hdr, reserved1);
+ }
+ if (gre->flags & GRE_KEY)
+ offset += sizeof_field(struct gre_full_hdr, key);
+
+ if (gre->flags & GRE_SEQ)
+ offset += sizeof_field(struct gre_full_hdr, seq);
+ break;
+ default:
+ return -1;
+ }
+
+ pkt->inneroff = thoff + offset;
+ }
+ break;
+ case IPPROTO_IPIP:
+ pkt->inneroff = thoff;
+ break;
default:
return -1;
}
@@ -109,7 +145,7 @@ static int __nft_payload_inner_offset(struct nft_pktinfo *pkt)
return 0;
}
-static int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
+int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
{
if (!(pkt->flags & NFT_PKTINFO_INNER) &&
__nft_payload_inner_offset((struct nft_pktinfo *)pkt) < 0)
@@ -195,7 +231,8 @@ static int nft_payload_init(const struct nft_ctx *ctx,
priv->len);
}
-static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_payload_dump(struct sk_buff *skb,
+ const struct nft_expr *expr, bool reset)
{
const struct nft_payload *priv = nft_expr_priv(expr);
@@ -552,6 +589,92 @@ const struct nft_expr_ops nft_payload_fast_ops = {
.offload = nft_payload_offload,
};
+void nft_payload_inner_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt,
+ struct nft_inner_tun_ctx *tun_ctx)
+{
+ const struct nft_payload *priv = nft_expr_priv(expr);
+ const struct sk_buff *skb = pkt->skb;
+ u32 *dest = &regs->data[priv->dreg];
+ int offset;
+
+ if (priv->len % NFT_REG32_SIZE)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+
+ switch (priv->base) {
+ case NFT_PAYLOAD_TUN_HEADER:
+ if (!(tun_ctx->flags & NFT_PAYLOAD_CTX_INNER_TUN))
+ goto err;
+
+ offset = tun_ctx->inner_tunoff;
+ break;
+ case NFT_PAYLOAD_LL_HEADER:
+ if (!(tun_ctx->flags & NFT_PAYLOAD_CTX_INNER_LL))
+ goto err;
+
+ offset = tun_ctx->inner_lloff;
+ break;
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ if (!(tun_ctx->flags & NFT_PAYLOAD_CTX_INNER_NH))
+ goto err;
+
+ offset = tun_ctx->inner_nhoff;
+ break;
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+ if (!(tun_ctx->flags & NFT_PAYLOAD_CTX_INNER_TH))
+ goto err;
+
+ offset = tun_ctx->inner_thoff;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ goto err;
+ }
+ offset += priv->offset;
+
+ if (skb_copy_bits(skb, offset, dest, priv->len) < 0)
+ goto err;
+
+ return;
+err:
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_payload_inner_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_payload *priv = nft_expr_priv(expr);
+ u32 base;
+
+ base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+ switch (base) {
+ case NFT_PAYLOAD_TUN_HEADER:
+ case NFT_PAYLOAD_LL_HEADER:
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ priv->base = base;
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+ priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+
+ return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
+}
+
+static const struct nft_expr_ops nft_payload_inner_ops = {
+ .type = &nft_payload_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+ .init = nft_payload_inner_init,
+ .dump = nft_payload_dump,
+ /* direct call to nft_payload_inner_eval(). */
+};
+
static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum)
{
*sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), fsum), tsum));
@@ -665,6 +788,16 @@ static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src,
return 0;
}
+struct nft_payload_set {
+ enum nft_payload_bases base:8;
+ u8 offset;
+ u8 len;
+ u8 sreg;
+ u8 csum_type;
+ u8 csum_offset;
+ u8 csum_flags;
+};
+
static void nft_payload_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -787,7 +920,8 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
priv->len);
}
-static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_payload_set_dump(struct sk_buff *skb,
+ const struct nft_expr *expr, bool reset)
{
const struct nft_payload_set *priv = nft_expr_priv(expr);
@@ -885,6 +1019,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
struct nft_expr_type nft_payload_type __read_mostly = {
.name = "payload",
.select_ops = nft_payload_select_ops,
+ .inner_ops = &nft_payload_inner_ops,
.policy = nft_payload_policy,
.maxattr = NFTA_PAYLOAD_MAX,
.owner = THIS_MODULE,