From f330a7fdbe1611104622faff7e614a246a7d20f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Aug 2016 15:33:31 +0200 Subject: netfilter: conntrack: get rid of conntrack timer With stats enabled this eats 80 bytes on x86_64 per nf_conn entry, as Eric Dumazet pointed out during netfilter workshop 2016. Eric also says: "Another reason was the fact that Thomas was about to change max timer range [..]" (500462a9de657f8, 'timers: Switch to a non-cascading wheel'). Remove the timer and use a 32bit jiffies value containing timestamp until entry is valid. During conntrack lookup, even before doing tuple comparision, check the timeout value and evict the entry in case it is too old. The dying bit is used as a synchronization point to avoid races where multiple cpus try to evict the same entry. Because lookup is always lockless, we need to bump the refcnt once when we evict, else we could try to evict already-dead entry that is being recycled. This is the standard/expected way when conntrack entries are destroyed. Followup patches will introduce garbage colliction via work queue and further places where we can reap obsoleted entries (e.g. during netlink dumps), this is needed to avoid expired conntracks from hanging around for too long when lookup rate is low after a busy period. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2a127480d4cc..7277751128e8 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -42,7 +42,6 @@ union nf_conntrack_expect_proto { #include #include -#include #ifdef CONFIG_NETFILTER_DEBUG #define NF_CT_ASSERT(x) WARN_ON(!(x)) @@ -73,7 +72,7 @@ struct nf_conn_help { #include struct nf_conn { - /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, + /* Usage count in here is 1 for hash table, 1 per skb, * plus 1 for any connection(s) we are `master' for * * Hint, SKB address this struct and refcnt via skb->nfct and @@ -96,8 +95,8 @@ struct nf_conn { /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; - /* Timer function; drops refcnt when it goes off. */ - struct timer_list timeout; + /* jiffies32 when this ct is considered dead */ + u32 timeout; possible_net_t ct_net; @@ -291,14 +290,28 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } +#define nfct_time_stamp ((u32)(jiffies)) + /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { - long timeout = (long)ct->timeout.expires - (long)jiffies; + s32 timeout = ct->timeout - nfct_time_stamp; return timeout > 0 ? timeout : 0; } +static inline bool nf_ct_is_expired(const struct nf_conn *ct) +{ + return (__s32)(ct->timeout - nfct_time_stamp) <= 0; +} + +/* use after obtaining a reference count */ +static inline bool nf_ct_should_gc(const struct nf_conn *ct) +{ + return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && + !nf_ct_is_dying(ct); +} + struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); -- cgit v1.2.3