summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2022-04-11 11:47:58 +0100
committerDavid S. Miller <davem@davemloft.net>2022-04-11 11:47:58 +0100
commit4696ad36d76423c56df02b5485a14629dbcbb9af (patch)
treeefaa8c62de603e3c847099e6f015f98d82cfe3c8
parenta21437d2b4855980a15cc9e5dc230f95c7563772 (diff)
parent0c7b27616fbd64b3b86c59ad5441f82a1a0c4176 (diff)
downloadlinux-4696ad36d76423c56df02b5485a14629dbcbb9af.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for net-next: 1) Replace unnecessary list_for_each_entry_continue() in nf_tables, from Jakob Koschel. 2) Add struct nf_conntrack_net_ecache to conntrack event cache and use it, from Florian Westphal. 3) Refactor ctnetlink_dump_list(), also from Florian. 4) Bump module reference counter on cttimeout object addition/removal, from Florian. 5) Consolidate nf_log MAC printer, from Phil Sutter. 6) Add basic logging support for unknown ethertype, from Phil Sutter. 7) Consolidate check for sysctl nf_log_all_netns toggle, also from Phil. 8) Replace hardcode value in nft_bitwise, from Jeremy Sowden. 9) Rename BASIC-like goto tags in nft_bitwise to more meaningful names, also from Jeremy. 10) nft_fib support for reverse path filtering with policy-based routing on iif. Extend selftests to cover for this new usecase, from Florian. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/netfilter/nf_conntrack.h8
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c4
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c4
-rw-r--r--net/netfilter/nf_conntrack_ecache.c19
-rw-r--r--net/netfilter/nf_conntrack_netlink.c68
-rw-r--r--net/netfilter/nf_log_syslog.c136
-rw-r--r--net/netfilter/nf_tables_api.c6
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c14
-rw-r--r--net/netfilter/nft_bitwise.c13
-rw-r--r--net/netfilter/nft_fib.c4
-rwxr-xr-xtools/testing/selftests/netfilter/nft_fib.sh50
11 files changed, 199 insertions, 127 deletions
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index b08b70989d2c..69e6c6a218be 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -43,6 +43,11 @@ union nf_conntrack_expect_proto {
/* insert expect proto private data here */
};
+struct nf_conntrack_net_ecache {
+ struct delayed_work dwork;
+ struct netns_ct *ct_net;
+};
+
struct nf_conntrack_net {
/* only used when new connection is allocated: */
atomic_t count;
@@ -58,8 +63,7 @@ struct nf_conntrack_net {
struct ctl_table_header *sysctl_header;
#endif
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- struct delayed_work ecache_dwork;
- struct netns_ct *ct_net;
+ struct nf_conntrack_net_ecache ecache;
#endif
};
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 4151eb1262dd..b75cac69bd7e 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -112,6 +112,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
fl4.daddr = iph->daddr;
fl4.saddr = get_saddr(iph->saddr);
} else {
+ if (nft_hook(pkt) == NF_INET_FORWARD &&
+ priv->flags & NFTA_FIB_F_IIF)
+ fl4.flowi4_iif = nft_out(pkt)->ifindex;
+
fl4.daddr = iph->saddr;
fl4.saddr = get_saddr(iph->daddr);
}
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index b3f163b40c2b..8970d0b4faeb 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -30,6 +30,10 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
fl6->daddr = iph->daddr;
fl6->saddr = iph->saddr;
} else {
+ if (nft_hook(pkt) == NF_INET_FORWARD &&
+ priv->flags & NFTA_FIB_F_IIF)
+ fl6->flowi6_iif = nft_out(pkt)->ifindex;
+
fl6->daddr = iph->saddr;
fl6->saddr = iph->daddr;
}
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 07e65b4e92f8..0cb2da0a759a 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -96,8 +96,8 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
static void ecache_work(struct work_struct *work)
{
- struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work);
- struct netns_ct *ctnet = cnet->ct_net;
+ struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work);
+ struct netns_ct *ctnet = cnet->ecache.ct_net;
int cpu, delay = -1;
struct ct_pcpu *pcpu;
@@ -127,7 +127,7 @@ static void ecache_work(struct work_struct *work)
ctnet->ecache_dwork_pending = delay > 0;
if (delay >= 0)
- schedule_delayed_work(&cnet->ecache_dwork, delay);
+ schedule_delayed_work(&cnet->ecache.dwork, delay);
}
static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
@@ -293,12 +293,12 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
if (state == NFCT_ECACHE_DESTROY_FAIL &&
- !delayed_work_pending(&cnet->ecache_dwork)) {
- schedule_delayed_work(&cnet->ecache_dwork, HZ);
+ !delayed_work_pending(&cnet->ecache.dwork)) {
+ schedule_delayed_work(&cnet->ecache.dwork, HZ);
net->ct.ecache_dwork_pending = true;
} else if (state == NFCT_ECACHE_DESTROY_SENT) {
net->ct.ecache_dwork_pending = false;
- mod_delayed_work(system_wq, &cnet->ecache_dwork, 0);
+ mod_delayed_work(system_wq, &cnet->ecache.dwork, 0);
}
}
@@ -310,8 +310,9 @@ void nf_conntrack_ecache_pernet_init(struct net *net)
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
net->ct.sysctl_events = nf_ct_events;
- cnet->ct_net = &net->ct;
- INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work);
+
+ cnet->ecache.ct_net = &net->ct;
+ INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work);
BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */
}
@@ -320,5 +321,5 @@ void nf_conntrack_ecache_pernet_fini(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- cancel_delayed_work_sync(&cnet->ecache_dwork);
+ cancel_delayed_work_sync(&cnet->ecache.dwork);
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1ea2ad732d57..924d766e6c53 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1708,6 +1708,47 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
return 0;
}
+static int ctnetlink_dump_one_entry(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct nf_conn *ct,
+ bool dying)
+{
+ struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
+ struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ u8 l3proto = nfmsg->nfgen_family;
+ int res;
+
+ if (l3proto && nf_ct_l3num(ct) != l3proto)
+ return 0;
+
+ if (ctx->last) {
+ if (ct != ctx->last)
+ return 0;
+
+ ctx->last = NULL;
+ }
+
+ /* We can't dump extension info for the unconfirmed
+ * list because unconfirmed conntracks can have
+ * ct->ext reallocated (and thus freed).
+ *
+ * In the dying list case ct->ext can't be free'd
+ * until after we drop pcpu->lock.
+ */
+ res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+ ct, dying, 0);
+ if (res < 0) {
+ if (!refcount_inc_not_zero(&ct->ct_general.use))
+ return 0;
+
+ ctx->last = ct;
+ }
+
+ return res;
+}
+
static int
ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
{
@@ -1715,12 +1756,9 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- u_int8_t l3proto = nfmsg->nfgen_family;
- int res;
- int cpu;
struct hlist_nulls_head *list;
struct net *net = sock_net(skb->sk);
+ int res, cpu;
if (ctx->done)
return 0;
@@ -1739,30 +1777,10 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
restart:
hlist_nulls_for_each_entry(h, n, list, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
- if (l3proto && nf_ct_l3num(ct) != l3proto)
- continue;
- if (ctx->last) {
- if (ct != last)
- continue;
- ctx->last = NULL;
- }
- /* We can't dump extension info for the unconfirmed
- * list because unconfirmed conntracks can have
- * ct->ext reallocated (and thus freed).
- *
- * In the dying list case ct->ext can't be free'd
- * until after we drop pcpu->lock.
- */
- res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct, dying, 0);
+ res = ctnetlink_dump_one_entry(skb, cb, ct, dying);
if (res < 0) {
- if (!refcount_inc_not_zero(&ct->ct_general.use))
- continue;
ctx->cpu = cpu;
- ctx->last = ct;
spin_unlock_bh(&pcpu->lock);
goto out;
}
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index 13234641cdb3..77bcb10fc586 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -40,6 +40,12 @@ struct arppayload {
unsigned char ip_dst[4];
};
+/* Guard against containers flooding syslog. */
+static bool nf_log_allowed(const struct net *net)
+{
+ return net_eq(net, &init_net) || sysctl_nf_log_all_netns;
+}
+
static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
{
u16 vid;
@@ -133,8 +139,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -766,9 +771,9 @@ dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
}
-static void dump_ipv4_mac_header(struct nf_log_buf *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
+static void dump_mac_header(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
unsigned int logflags = 0;
@@ -798,9 +803,26 @@ fallback:
const unsigned char *p = skb_mac_header(skb);
unsigned int i;
- nf_log_buf_add(m, "%02x", *p++);
- for (i = 1; i < dev->hard_header_len; i++, p++)
- nf_log_buf_add(m, ":%02x", *p);
+ if (dev->type == ARPHRD_SIT) {
+ p -= ETH_HLEN;
+
+ if (p < skb->head)
+ p = NULL;
+ }
+
+ if (p) {
+ nf_log_buf_add(m, "%02x", *p++);
+ for (i = 1; i < dev->hard_header_len; i++)
+ nf_log_buf_add(m, ":%02x", *p++);
+ }
+
+ if (dev->type == ARPHRD_SIT) {
+ const struct iphdr *iph =
+ (struct iphdr *)skb_mac_header(skb);
+
+ nf_log_buf_add(m, " TUNNEL=%pI4->%pI4", &iph->saddr,
+ &iph->daddr);
+ }
}
nf_log_buf_add(m, " ");
}
@@ -814,8 +836,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -827,7 +848,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
out, loginfo, prefix);
if (in)
- dump_ipv4_mac_header(m, loginfo, skb);
+ dump_mac_header(m, loginfo, skb);
dump_ipv4_packet(net, m, loginfo, skb, 0);
@@ -841,64 +862,6 @@ static struct nf_logger nf_ip_logger __read_mostly = {
.me = THIS_MODULE,
};
-static void dump_ipv6_mac_header(struct nf_log_buf *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- unsigned int logflags = 0;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
-
- if (!(logflags & NF_LOG_MACDECODE))
- goto fallback;
-
- switch (dev->type) {
- case ARPHRD_ETHER:
- nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
- nf_log_dump_vlan(m, skb);
- nf_log_buf_add(m, "MACPROTO=%04x ",
- ntohs(eth_hdr(skb)->h_proto));
- return;
- default:
- break;
- }
-
-fallback:
- nf_log_buf_add(m, "MAC=");
- if (dev->hard_header_len &&
- skb->mac_header != skb->network_header) {
- const unsigned char *p = skb_mac_header(skb);
- unsigned int len = dev->hard_header_len;
- unsigned int i;
-
- if (dev->type == ARPHRD_SIT) {
- p -= ETH_HLEN;
-
- if (p < skb->head)
- p = NULL;
- }
-
- if (p) {
- nf_log_buf_add(m, "%02x", *p++);
- for (i = 1; i < len; i++)
- nf_log_buf_add(m, ":%02x", *p++);
- }
- nf_log_buf_add(m, " ");
-
- if (dev->type == ARPHRD_SIT) {
- const struct iphdr *iph =
- (struct iphdr *)skb_mac_header(skb);
- nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
- &iph->daddr);
- }
- } else {
- nf_log_buf_add(m, " ");
- }
-}
-
static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
@@ -908,8 +871,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -921,7 +883,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
loginfo, prefix);
if (in)
- dump_ipv6_mac_header(m, loginfo, skb);
+ dump_mac_header(m, loginfo, skb);
dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
@@ -935,6 +897,32 @@ static struct nf_logger nf_ip6_logger __read_mostly = {
.me = THIS_MODULE,
};
+static void nf_log_unknown_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ struct nf_log_buf *m;
+
+ if (!nf_log_allowed(net))
+ return;
+
+ m = nf_log_buf_open();
+
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+ prefix);
+
+ dump_mac_header(m, loginfo, skb);
+
+ nf_log_buf_close(m);
+}
+
static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
@@ -954,6 +942,10 @@ static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
case htons(ETH_P_RARP):
nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
break;
+ default:
+ nf_log_unknown_packet(net, pf, hooknum, skb,
+ in, out, loginfo, prefix);
+ break;
}
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 128ee3b300d6..217006faebde 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8367,10 +8367,8 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
if (chain->blob_next || !nft_is_active_next(net, chain))
return 0;
- rule = list_entry(&chain->rules, struct nft_rule, list);
-
data_size = 0;
- list_for_each_entry_continue(rule, &chain->rules, list) {
+ list_for_each_entry(rule, &chain->rules, list) {
if (nft_is_active_next(net, rule)) {
data_size += sizeof(*prule) + rule->dlen;
if (data_size > INT_MAX)
@@ -8387,7 +8385,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
data_boundary = data + data_size;
size = 0;
- list_for_each_entry_continue(rule, &chain->rules, list) {
+ list_for_each_entry(rule, &chain->rules, list) {
if (!nft_is_active_next(net, rule))
continue;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index b0d8888a539b..eea486f32971 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -158,6 +158,7 @@ static int cttimeout_new_timeout(struct sk_buff *skb,
timeout->timeout.l3num = l3num;
timeout->timeout.l4proto = l4proto;
refcount_set(&timeout->refcnt, 1);
+ __module_get(THIS_MODULE);
list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list);
return 0;
@@ -506,13 +507,8 @@ static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net,
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
- if (!try_module_get(THIS_MODULE))
+ if (!refcount_inc_not_zero(&timeout->refcnt))
goto err;
-
- if (!refcount_inc_not_zero(&timeout->refcnt)) {
- module_put(THIS_MODULE);
- goto err;
- }
matching = timeout;
break;
}
@@ -525,10 +521,10 @@ static void ctnl_timeout_put(struct nf_ct_timeout *t)
struct ctnl_timeout *timeout =
container_of(t, struct ctnl_timeout, timeout);
- if (refcount_dec_and_test(&timeout->refcnt))
+ if (refcount_dec_and_test(&timeout->refcnt)) {
kfree_rcu(timeout, rcu_head);
-
- module_put(THIS_MODULE);
+ module_put(THIS_MODULE);
+ }
}
static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index f590ee1c8a1b..83590afe3768 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -30,7 +30,7 @@ static void nft_bitwise_eval_bool(u32 *dst, const u32 *src,
{
unsigned int i;
- for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++)
+ for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++)
dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i];
}
@@ -109,22 +109,23 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
return err;
if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) {
err = -EINVAL;
- goto err1;
+ goto err_mask_release;
}
err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor,
tb[NFTA_BITWISE_XOR]);
if (err < 0)
- goto err1;
+ goto err_mask_release;
if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) {
err = -EINVAL;
- goto err2;
+ goto err_xor_release;
}
return 0;
-err2:
+
+err_xor_release:
nft_data_release(&priv->xor, xor.type);
-err1:
+err_mask_release:
nft_data_release(&priv->mask, mask.type);
return err;
}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index f198f2d9ef90..1f12d7ade606 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -35,6 +35,10 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
case NFT_FIB_RESULT_OIF:
case NFT_FIB_RESULT_OIFNAME:
hooks = (1 << NF_INET_PRE_ROUTING);
+ if (priv->flags & NFTA_FIB_F_IIF) {
+ hooks |= (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
+ }
break;
case NFT_FIB_RESULT_ADDRTYPE:
if (priv->flags & NFTA_FIB_F_IIF)
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
index 695a1958723f..fd76b69635a4 100755
--- a/tools/testing/selftests/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -66,6 +66,20 @@ table inet filter {
EOF
}
+load_pbr_ruleset() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain forward {
+ type filter hook forward priority raw;
+ fib saddr . iif oif gt 0 accept
+ log drop
+ }
+}
+EOF
+}
+
load_ruleset_count() {
local netns=$1
@@ -219,4 +233,40 @@ sleep 2
ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
+# delete all rules
+ip netns exec ${ns1} nft flush ruleset
+ip netns exec ${ns2} nft flush ruleset
+ip netns exec ${nsrouter} nft flush ruleset
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr del dead:2::99/64 dev eth0
+
+ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+load_pbr_ruleset ${nsrouter}
+if [ $? -ne 0 ] ; then
+ echo "SKIP: Could not load fib forward ruleset"
+ exit $ksft_skip
+fi
+
+ip -net ${nsrouter} rule add from all table 128
+ip -net ${nsrouter} rule add from all iif veth0 table 129
+ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
+ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net ${nsrouter} -4 rule delete table main
+
+test_ping 10.0.2.99 dead:2::99
+if [ $? -ne 0 ] ; then
+ ip -net ${nsrouter} nft list ruleset
+ echo "FAIL: fib mismatch in pbr setup"
+ exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
exit 0