summaryrefslogtreecommitdiffstats
path: root/net/ipv4/inetpeer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-06 12:30:19 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-06 12:30:19 -0800
commitabb359450f20c32ae03039d8736f12b1d561caf5 (patch)
tree6e8723885feb66a138f19f0ff31615dc13a8d859 /net/ipv4/inetpeer.c
parentcb600d2f83c854ec3d6660063e4466431999489b (diff)
parent4e3dbdb1392a83bd21a6ff8f6bc785495058d37c (diff)
downloadlinux-abb359450f20c32ae03039d8736f12b1d561caf5.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1436 commits) cassini: Use local-mac-address prom property for Cassini MAC address net: remove the duplicate #ifdef __KERNEL__ net: bridge: check the length of skb after nf_bridge_maybe_copy_header() netconsole: clarify stopping message netconsole: don't announce stopping if nothing happened cnic: Fix the type field in SPQ messages netfilter: fix export secctx error handling netfilter: fix the race when initializing nf_ct_expect_hash_rnd ipv4: IP defragmentation must be ECN aware net: r6040: Return proper error for r6040_init_one dcb: use after free in dcb_flushapp() dcb: unlock on error in dcbnl_ieee_get() net: ixp4xx_eth: Return proper error for eth_init_one include/linux/if_ether.h: Add #define ETH_P_LINK_CTL for HPNA and wlan local tunnel net: add POLLPRI to sock_def_readable() af_unix: Avoid socket->sk NULL OOPS in stream connect security hooks. net_sched: pfifo_head_drop problem mac80211: remove stray extern mac80211: implement off-channel TX using hw r-o-c offload mac80211: implement hardware offload for remain-on-channel ...
Diffstat (limited to 'net/ipv4/inetpeer.c')
-rw-r--r--net/ipv4/inetpeer.c167
1 files changed, 108 insertions, 59 deletions
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9e94d7cf4f8a..d9bc85751c74 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -63,7 +63,7 @@
* refcnt: atomically against modifications on other CPU;
* usually under some other lock to prevent node disappearing
* dtime: unused node list lock
- * v4daddr: unchangeable
+ * daddr: unchangeable
* ip_id_count: atomic value (no lock needed)
*/
@@ -79,15 +79,24 @@ static const struct inet_peer peer_fake_node = {
.avl_height = 0
};
-static struct {
+struct inet_peer_base {
struct inet_peer __rcu *root;
spinlock_t lock;
int total;
-} peers = {
+};
+
+static struct inet_peer_base v4_peers = {
+ .root = peer_avl_empty_rcu,
+ .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock),
+ .total = 0,
+};
+
+static struct inet_peer_base v6_peers = {
.root = peer_avl_empty_rcu,
- .lock = __SPIN_LOCK_UNLOCKED(peers.lock),
+ .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock),
.total = 0,
};
+
#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
/* Exported for sysctl_net_ipv4. */
@@ -152,28 +161,45 @@ static void unlink_from_unused(struct inet_peer *p)
}
}
+static int addr_compare(const struct inetpeer_addr *a,
+ const struct inetpeer_addr *b)
+{
+ int i, n = (a->family == AF_INET ? 1 : 4);
+
+ for (i = 0; i < n; i++) {
+ if (a->a6[i] == b->a6[i])
+ continue;
+ if (a->a6[i] < b->a6[i])
+ return -1;
+ return 1;
+ }
+
+ return 0;
+}
+
/*
* Called with local BH disabled and the pool lock held.
*/
-#define lookup(_daddr, _stack) \
+#define lookup(_daddr, _stack, _base) \
({ \
struct inet_peer *u; \
struct inet_peer __rcu **v; \
\
stackptr = _stack; \
- *stackptr++ = &peers.root; \
- for (u = rcu_dereference_protected(peers.root, \
- lockdep_is_held(&peers.lock)); \
+ *stackptr++ = &_base->root; \
+ for (u = rcu_dereference_protected(_base->root, \
+ lockdep_is_held(&_base->lock)); \
u != peer_avl_empty; ) { \
- if (_daddr == u->v4daddr) \
+ int cmp = addr_compare(_daddr, &u->daddr); \
+ if (cmp == 0) \
break; \
- if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
+ if (cmp == -1) \
v = &u->avl_left; \
else \
v = &u->avl_right; \
*stackptr++ = v; \
u = rcu_dereference_protected(*v, \
- lockdep_is_held(&peers.lock)); \
+ lockdep_is_held(&_base->lock)); \
} \
u; \
})
@@ -185,13 +211,15 @@ static void unlink_from_unused(struct inet_peer *p)
* But every pointer we follow is guaranteed to be valid thanks to RCU.
* We exit from this function if number of links exceeds PEER_MAXDEPTH
*/
-static struct inet_peer *lookup_rcu_bh(__be32 daddr)
+static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
+ struct inet_peer_base *base)
{
- struct inet_peer *u = rcu_dereference_bh(peers.root);
+ struct inet_peer *u = rcu_dereference_bh(base->root);
int count = 0;
while (u != peer_avl_empty) {
- if (daddr == u->v4daddr) {
+ int cmp = addr_compare(daddr, &u->daddr);
+ if (cmp == 0) {
/* Before taking a reference, check if this entry was
* deleted, unlink_from_pool() sets refcnt=-1 to make
* distinction between an unused entry (refcnt=0) and
@@ -201,7 +229,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
u = NULL;
return u;
}
- if ((__force __u32)daddr < (__force __u32)u->v4daddr)
+ if (cmp == -1)
u = rcu_dereference_bh(u->avl_left);
else
u = rcu_dereference_bh(u->avl_right);
@@ -212,19 +240,19 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
}
/* Called with local BH disabled and the pool lock held. */
-#define lookup_rightempty(start) \
+#define lookup_rightempty(start, base) \
({ \
struct inet_peer *u; \
struct inet_peer __rcu **v; \
*stackptr++ = &start->avl_left; \
v = &start->avl_left; \
for (u = rcu_dereference_protected(*v, \
- lockdep_is_held(&peers.lock)); \
+ lockdep_is_held(&base->lock)); \
u->avl_right != peer_avl_empty_rcu; ) { \
v = &u->avl_right; \
*stackptr++ = v; \
u = rcu_dereference_protected(*v, \
- lockdep_is_held(&peers.lock)); \
+ lockdep_is_held(&base->lock)); \
} \
u; \
})
@@ -234,7 +262,8 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
* Look into mm/map_avl.c for more detail description of the ideas.
*/
static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
- struct inet_peer __rcu ***stackend)
+ struct inet_peer __rcu ***stackend,
+ struct inet_peer_base *base)
{
struct inet_peer __rcu **nodep;
struct inet_peer *node, *l, *r;
@@ -243,20 +272,20 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
while (stackend > stack) {
nodep = *--stackend;
node = rcu_dereference_protected(*nodep,
- lockdep_is_held(&peers.lock));
+ lockdep_is_held(&base->lock));
l = rcu_dereference_protected(node->avl_left,
- lockdep_is_held(&peers.lock));
+ lockdep_is_held(&base->lock));
r = rcu_dereference_protected(node->avl_right,
- lockdep_is_held(&peers.lock));
+ lockdep_is_held(&base->lock));
lh = node_height(l);
rh = node_height(r);
if (lh > rh + 1) { /* l: RH+2 */
struct inet_peer *ll, *lr, *lrl, *lrr;
int lrh;
ll = rcu_dereference_protected(l->avl_left,
- lockdep_is_held(&peers.lock));
+ lockdep_is_held(&base->lock));
lr = rcu_dereference_protected(l->avl_right,
- lockdep_is_held(&peers.lock));
+ lockdep_is_held(&base->lock));
lrh = node_height(lr);
if (lrh <= node_height(ll)) { /* ll: RH+1 */
RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
@@ -268,9 +297,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
RCU_INIT_POINTER(*nodep, l);
} else { /* ll: RH, lr: RH+1 */
lrl = rcu_dereference_protected(lr->avl_left,
- lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */
+ lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */
lrr = rcu_dereference_protected(lr->avl_right,
- lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */
+ lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */
RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
node->avl_height = rh + 1; /* node: RH+1 */
@@ -286,9 +315,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
struct inet_peer *rr, *rl, *rlr, *rll;
int rlh;
rr = rcu_dereference_protected(r->avl_right,
- lockdep_is_held(&peers.lock));
+ lockdep_is_held(&base->lock));
rl = rcu_dereference_protected(r->avl_left,
- lockdep_is_held(&peers.lock));
+ lockdep_is_held(&base->lock));
rlh = node_height(rl);
if (rlh <= node_height(rr)) { /* rr: LH+1 */
RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
@@ -300,9 +329,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
RCU_INIT_POINTER(*nodep, r);
} else { /* rr: RH, rl: RH+1 */
rlr = rcu_dereference_protected(rl->avl_right,
- lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */
+ lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */
rll = rcu_dereference_protected(rl->avl_left,
- lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */
+ lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */
RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
node->avl_height = lh + 1; /* node: LH+1 */
@@ -321,14 +350,14 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
}
/* Called with local BH disabled and the pool lock held. */
-#define link_to_pool(n) \
+#define link_to_pool(n, base) \
do { \
n->avl_height = 1; \
n->avl_left = peer_avl_empty_rcu; \
n->avl_right = peer_avl_empty_rcu; \
/* lockless readers can catch us now */ \
rcu_assign_pointer(**--stackptr, n); \
- peer_avl_rebalance(stack, stackptr); \
+ peer_avl_rebalance(stack, stackptr, base); \
} while (0)
static void inetpeer_free_rcu(struct rcu_head *head)
@@ -337,13 +366,13 @@ static void inetpeer_free_rcu(struct rcu_head *head)
}
/* May be called with local BH enabled. */
-static void unlink_from_pool(struct inet_peer *p)
+static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
{
int do_free;
do_free = 0;
- spin_lock_bh(&peers.lock);
+ spin_lock_bh(&base->lock);
/* Check the reference counter. It was artificially incremented by 1
* in cleanup() function to prevent sudden disappearing. If we can
* atomically (because of lockless readers) take this last reference,
@@ -353,7 +382,7 @@ static void unlink_from_pool(struct inet_peer *p)
if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
struct inet_peer __rcu **stack[PEER_MAXDEPTH];
struct inet_peer __rcu ***stackptr, ***delp;
- if (lookup(p->v4daddr, stack) != p)
+ if (lookup(&p->daddr, stack, base) != p)
BUG();
delp = stackptr - 1; /* *delp[0] == p */
if (p->avl_left == peer_avl_empty_rcu) {
@@ -362,11 +391,11 @@ static void unlink_from_pool(struct inet_peer *p)
} else {
/* look for a node to insert instead of p */
struct inet_peer *t;
- t = lookup_rightempty(p);
+ t = lookup_rightempty(p, base);
BUG_ON(rcu_dereference_protected(*stackptr[-1],
- lockdep_is_held(&peers.lock)) != t);
+ lockdep_is_held(&base->lock)) != t);
**--stackptr = t->avl_left;
- /* t is removed, t->v4daddr > x->v4daddr for any
+ /* t is removed, t->daddr > x->daddr for any
* x in p->avl_left subtree.
* Put t in the old place of p. */
RCU_INIT_POINTER(*delp[0], t);
@@ -376,11 +405,11 @@ static void unlink_from_pool(struct inet_peer *p)
BUG_ON(delp[1] != &p->avl_left);
delp[1] = &t->avl_left; /* was &p->avl_left */
}
- peer_avl_rebalance(stack, stackptr);
- peers.total--;
+ peer_avl_rebalance(stack, stackptr, base);
+ base->total--;
do_free = 1;
}
- spin_unlock_bh(&peers.lock);
+ spin_unlock_bh(&base->lock);
if (do_free)
call_rcu_bh(&p->rcu, inetpeer_free_rcu);
@@ -395,6 +424,16 @@ static void unlink_from_pool(struct inet_peer *p)
inet_putpeer(p);
}
+static struct inet_peer_base *family_to_base(int family)
+{
+ return (family == AF_INET ? &v4_peers : &v6_peers);
+}
+
+static struct inet_peer_base *peer_to_base(struct inet_peer *p)
+{
+ return family_to_base(p->daddr.family);
+}
+
/* May be called with local BH enabled. */
static int cleanup_once(unsigned long ttl)
{
@@ -428,21 +467,22 @@ static int cleanup_once(unsigned long ttl)
* happen because of entry limits in route cache. */
return -1;
- unlink_from_pool(p);
+ unlink_from_pool(p, peer_to_base(p));
return 0;
}
/* Called with or without local BH being disabled. */
-struct inet_peer *inet_getpeer(__be32 daddr, int create)
+struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
{
- struct inet_peer *p;
struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
+ struct inet_peer_base *base = family_to_base(AF_INET);
+ struct inet_peer *p;
/* Look up for the address quickly, lockless.
* Because of a concurrent writer, we might not find an existing entry.
*/
rcu_read_lock_bh();
- p = lookup_rcu_bh(daddr);
+ p = lookup_rcu_bh(daddr, base);
rcu_read_unlock_bh();
if (p) {
@@ -456,50 +496,57 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
/* retry an exact lookup, taking the lock before.
* At least, nodes should be hot in our cache.
*/
- spin_lock_bh(&peers.lock);
- p = lookup(daddr, stack);
+ spin_lock_bh(&base->lock);
+ p = lookup(daddr, stack, base);
if (p != peer_avl_empty) {
atomic_inc(&p->refcnt);
- spin_unlock_bh(&peers.lock);
+ spin_unlock_bh(&base->lock);
/* Remove the entry from unused list if it was there. */
unlink_from_unused(p);
return p;
}
p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
if (p) {
- p->v4daddr = daddr;
+ p->daddr = *daddr;
atomic_set(&p->refcnt, 1);
atomic_set(&p->rid, 0);
- atomic_set(&p->ip_id_count, secure_ip_id(daddr));
+ atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4));
p->tcp_ts_stamp = 0;
INIT_LIST_HEAD(&p->unused);
/* Link the node. */
- link_to_pool(p);
- peers.total++;
+ link_to_pool(p, base);
+ base->total++;
}
- spin_unlock_bh(&peers.lock);
+ spin_unlock_bh(&base->lock);
- if (peers.total >= inet_peer_threshold)
+ if (base->total >= inet_peer_threshold)
/* Remove one less-recently-used entry. */
cleanup_once(0);
return p;
}
+static int compute_total(void)
+{
+ return v4_peers.total + v6_peers.total;
+}
+EXPORT_SYMBOL_GPL(inet_getpeer);
+
/* Called with local BH disabled. */
static void peer_check_expire(unsigned long dummy)
{
unsigned long now = jiffies;
- int ttl;
+ int ttl, total;
- if (peers.total >= inet_peer_threshold)
+ total = compute_total();
+ if (total >= inet_peer_threshold)
ttl = inet_peer_minttl;
else
ttl = inet_peer_maxttl
- (inet_peer_maxttl - inet_peer_minttl) / HZ *
- peers.total / inet_peer_threshold * HZ;
+ total / inet_peer_threshold * HZ;
while (!cleanup_once(ttl)) {
if (jiffies != now)
break;
@@ -508,13 +555,14 @@ static void peer_check_expire(unsigned long dummy)
/* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
* interval depending on the total number of entries (more entries,
* less interval). */
- if (peers.total >= inet_peer_threshold)
+ total = compute_total();
+ if (total >= inet_peer_threshold)
peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
else
peer_periodic_timer.expires = jiffies
+ inet_peer_gc_maxtime
- (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
- peers.total / inet_peer_threshold * HZ;
+ total / inet_peer_threshold * HZ;
add_timer(&peer_periodic_timer);
}
@@ -530,3 +578,4 @@ void inet_putpeer(struct inet_peer *p)
local_bh_enable();
}
+EXPORT_SYMBOL_GPL(inet_putpeer);