summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt11
-rw-r--r--drivers/net/ethernet/amd/au1000_eth.c6
-rw-r--r--drivers/net/ethernet/sun/sunvnet.c73
-rw-r--r--drivers/net/ethernet/sun/sunvnet.h4
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c1
-rw-r--r--include/linux/netdevice.h29
-rw-r--r--include/linux/rtnetlink.h10
-rw-r--r--include/net/sch_generic.h30
-rw-r--r--net/core/dev.c51
-rw-r--r--net/core/skbuff.c18
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv6/udp_offload.c2
-rw-r--r--net/mac80211/tx.c15
-rw-r--r--net/sched/cls_api.c30
-rw-r--r--net/sched/cls_basic.c80
-rw-r--r--net/sched/cls_bpf.c94
-rw-r--r--net/sched/cls_cgroup.c63
-rw-r--r--net/sched/cls_flow.c145
-rw-r--r--net/sched/cls_fw.c111
-rw-r--r--net/sched/cls_route.c226
-rw-r--r--net/sched/cls_rsvp.h160
-rw-r--r--net/sched/cls_tcindex.c248
-rw-r--r--net/sched/cls_u32.c258
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_atm.c20
-rw-r--r--net/sched/sch_cbq.c11
-rw-r--r--net/sched/sch_choke.c15
-rw-r--r--net/sched/sch_drr.c9
-rw-r--r--net/sched/sch_dsmark.c9
-rw-r--r--net/sched/sch_fq_codel.c11
-rw-r--r--net/sched/sch_generic.c4
-rw-r--r--net/sched/sch_hfsc.c8
-rw-r--r--net/sched/sch_htb.c15
-rw-r--r--net/sched/sch_ingress.c8
-rw-r--r--net/sched/sch_mqprio.c6
-rw-r--r--net/sched/sch_multiq.c8
-rw-r--r--net/sched/sch_prio.c11
-rw-r--r--net/sched/sch_qfq.c9
-rw-r--r--net/sched/sch_sfb.c15
-rw-r--r--net/sched/sch_sfq.c11
-rw-r--r--net/sched/sch_teql.c13
41 files changed, 1151 insertions, 709 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index db2383cb1df9..1b5581a30d77 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -952,14 +952,9 @@ accept_source_route - BOOLEAN
FALSE (host)
accept_local - BOOLEAN
- Accept packets with local source addresses. In combination
- with suitable routing, this can be used to direct packets
- between two local interfaces over the wire and have them
- accepted properly.
-
- rp_filter must be set to a non-zero value in order for
- accept_local to have an effect.
-
+ Accept packets with local source addresses. In combination with
+ suitable routing, this can be used to direct packets between two
+ local interfaces over the wire and have them accepted properly.
default FALSE
route_localnet - BOOLEAN
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index 31c48a7ac2b6..6c323f4f457b 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -1140,7 +1140,6 @@ static const struct net_device_ops au1000_netdev_ops = {
static int au1000_probe(struct platform_device *pdev)
{
- static unsigned version_printed;
struct au1000_private *aup = NULL;
struct au1000_eth_platform_data *pd;
struct net_device *dev = NULL;
@@ -1371,9 +1370,8 @@ static int au1000_probe(struct platform_device *pdev)
netdev_info(dev, "Au1xx0 Ethernet found at 0x%lx, irq %d\n",
(unsigned long)base->start, irq);
- if (version_printed++ == 0)
- pr_info("%s version %s %s\n",
- DRV_NAME, DRV_VERSION, DRV_AUTHOR);
+
+ pr_info_once("%s version %s %s\n", DRV_NAME, DRV_VERSION, DRV_AUTHOR);
return 0;
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index a4657a401278..763cdfc228be 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -37,6 +37,8 @@ MODULE_VERSION(DRV_MODULE_VERSION);
*/
#define VNET_MAX_RETRIES 10
+static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
+
/* Ordered from largest major to lowest */
static struct vio_version vnet_versions[] = {
{ .major = 1, .minor = 0 },
@@ -283,10 +285,18 @@ static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
port->raddr[0], port->raddr[1],
port->raddr[2], port->raddr[3],
port->raddr[4], port->raddr[5]);
- err = -ECONNRESET;
+ break;
}
} while (err == -EAGAIN);
+ if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) {
+ port->stop_rx_idx = end;
+ port->stop_rx = true;
+ } else {
+ port->stop_rx_idx = 0;
+ port->stop_rx = false;
+ }
+
return err;
}
@@ -448,7 +458,7 @@ static int vnet_ack(struct vnet_port *port, void *msgbuf)
struct net_device *dev;
struct vnet *vp;
u32 end;
-
+ struct vio_net_desc *desc;
if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
return 0;
@@ -456,7 +466,24 @@ static int vnet_ack(struct vnet_port *port, void *msgbuf)
if (unlikely(!idx_is_pending(dr, end)))
return 0;
+ /* sync for race conditions with vnet_start_xmit() and tell xmit it
+ * is time to send a trigger.
+ */
dr->cons = next_idx(end, dr);
+ desc = vio_dring_entry(dr, dr->cons);
+ if (desc->hdr.state == VIO_DESC_READY && port->start_cons) {
+ /* vnet_start_xmit() just populated this dring but missed
+ * sending the "start" LDC message to the consumer.
+ * Send a "start" trigger on its behalf.
+ */
+ if (__vnet_tx_trigger(port, dr->cons) > 0)
+ port->start_cons = false;
+ else
+ port->start_cons = true;
+ } else {
+ port->start_cons = true;
+ }
+
vp = port->vp;
dev = vp->dev;
@@ -597,7 +624,7 @@ static void vnet_event(void *arg, int event)
local_irq_restore(flags);
}
-static int __vnet_tx_trigger(struct vnet_port *port)
+static int __vnet_tx_trigger(struct vnet_port *port, u32 start)
{
struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
struct vio_dring_data hdr = {
@@ -608,12 +635,21 @@ static int __vnet_tx_trigger(struct vnet_port *port)
.sid = vio_send_sid(&port->vio),
},
.dring_ident = dr->ident,
- .start_idx = dr->prod,
+ .start_idx = start,
.end_idx = (u32) -1,
};
int err, delay;
int retries = 0;
+ if (port->stop_rx) {
+ err = vnet_send_ack(port,
+ &port->vio.drings[VIO_DRIVER_RX_RING],
+ port->stop_rx_idx, -1,
+ VIO_DRING_STOPPED);
+ if (err <= 0)
+ return err;
+ }
+
hdr.seq = dr->snd_nxt;
delay = 1;
do {
@@ -734,7 +770,30 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
d->hdr.state = VIO_DESC_READY;
- err = __vnet_tx_trigger(port);
+ /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
+ * to notify the consumer that some descriptors are READY.
+ * After that "start" trigger, no additional triggers are needed until
+ * a DRING_STOPPED is received from the consumer. The dr->cons field
+ * (set up by vnet_ack()) has the value of the next dring index
+ * that has not yet been ack-ed. We send a "start" trigger here
+ * if, and only if, start_cons is true (reset it afterward). Conversely,
+ * vnet_ack() should check if the dring corresponding to cons
+ * is marked READY, but start_cons was false.
+ * If so, vnet_ack() should send out the missed "start" trigger.
+ *
+ * Note that the wmb() above makes sure the cookies et al. are
+ * not globally visible before the VIO_DESC_READY, and that the
+ * stores are ordered correctly by the compiler. The consumer will
+ * not proceed until the VIO_DESC_READY is visible assuring that
+ * the consumer does not observe anything related to descriptors
+ * out of order. The HV trap from the LDC start trigger is the
+ * producer to consumer announcement that work is available to the
+ * consumer
+ */
+ if (!port->start_cons)
+ goto ldc_start_done; /* previous trigger suffices */
+
+ err = __vnet_tx_trigger(port, dr->cons);
if (unlikely(err < 0)) {
netdev_info(dev, "TX trigger error %d\n", err);
d->hdr.state = VIO_DESC_FREE;
@@ -742,6 +801,9 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
goto out_dropped_unlock;
}
+ldc_start_done:
+ port->start_cons = false;
+
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
@@ -1035,6 +1097,7 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
(sizeof(struct ldc_trans_cookie) * 2));
dr->num_entries = VNET_TX_RING_SIZE;
dr->prod = dr->cons = 0;
+ port->start_cons = true; /* need an initial trigger */
dr->pending = VNET_TX_RING_SIZE;
dr->ncookies = ncookies;
diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h
index de5c2c64996f..da4933750d06 100644
--- a/drivers/net/ethernet/sun/sunvnet.h
+++ b/drivers/net/ethernet/sun/sunvnet.h
@@ -40,6 +40,10 @@ struct vnet_port {
struct vnet_tx_entry tx_bufs[VNET_TX_RING_SIZE];
struct list_head list;
+
+ u32 stop_rx_idx;
+ bool stop_rx;
+ bool start_cons;
};
static inline struct vnet_port *to_vnet_port(struct vio_driver_state *vio)
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index c8fd94133ecd..4ea2d4e6f1d1 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1485,7 +1485,6 @@ static int axienet_of_probe(struct platform_device *op)
if (!ndev)
return -ENOMEM;
- ether_setup(ndev);
platform_set_drvdata(op, ndev);
SET_NETDEV_DEV(ndev, &op->dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ba72f6baae1a..ae721f53739e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -543,7 +543,7 @@ struct netdev_queue {
* read mostly part
*/
struct net_device *dev;
- struct Qdisc *qdisc;
+ struct Qdisc __rcu *qdisc;
struct Qdisc *qdisc_sleeping;
#ifdef CONFIG_SYSFS
struct kobject kobj;
@@ -2356,12 +2356,7 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd,
DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
void __netif_schedule(struct Qdisc *q);
-
-static inline void netif_schedule_queue(struct netdev_queue *txq)
-{
- if (!(txq->state & QUEUE_STATE_ANY_XOFF))
- __netif_schedule(txq->qdisc);
-}
+void netif_schedule_queue(struct netdev_queue *txq);
static inline void netif_tx_schedule_all(struct net_device *dev)
{
@@ -2397,11 +2392,7 @@ static inline void netif_tx_start_all_queues(struct net_device *dev)
}
}
-static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
-{
- if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state))
- __netif_schedule(dev_queue->qdisc);
-}
+void netif_tx_wake_queue(struct netdev_queue *dev_queue);
/**
* netif_wake_queue - restart transmit
@@ -2673,19 +2664,7 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev,
return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb));
}
-/**
- * netif_wake_subqueue - allow sending packets on subqueue
- * @dev: network device
- * @queue_index: sub queue index
- *
- * Resume individual transmit queue of a device with multiple transmit queues.
- */
-static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
-{
- struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
- if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state))
- __netif_schedule(txq->qdisc);
-}
+void netif_wake_subqueue(struct net_device *dev, u16 queue_index);
#ifdef CONFIG_XPS
int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 167bae7bdfa4..6cacbce1a06c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -47,6 +47,16 @@ static inline int lockdep_rtnl_is_held(void)
rcu_dereference_check(p, lockdep_rtnl_is_held())
/**
+ * rcu_dereference_bh_rtnl - rcu_dereference_bh with debug checking
+ * @p: The pointer to read, prior to dereference
+ *
+ * Do an rcu_dereference_bh(p), but check caller either holds rcu_read_lock_bh()
+ * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference_bh()
+ */
+#define rcu_dereference_bh_rtnl(p) \
+ rcu_dereference_bh_check(p, lockdep_rtnl_is_held())
+
+/**
* rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
* @p: The pointer to read, prior to dereferencing
*
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a3cfb8ebeb53..1e89b9ad3a4c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -143,7 +143,7 @@ struct Qdisc_class_ops {
void (*walk)(struct Qdisc *, struct qdisc_walker * arg);
/* Filter manipulation */
- struct tcf_proto ** (*tcf_chain)(struct Qdisc *, unsigned long);
+ struct tcf_proto __rcu ** (*tcf_chain)(struct Qdisc *, unsigned long);
unsigned long (*bind_tcf)(struct Qdisc *, unsigned long,
u32 classid);
void (*unbind_tcf)(struct Qdisc *, unsigned long);
@@ -212,8 +212,8 @@ struct tcf_proto_ops {
struct tcf_proto {
/* Fast access part */
- struct tcf_proto *next;
- void *root;
+ struct tcf_proto __rcu *next;
+ void __rcu *root;
int (*classify)(struct sk_buff *,
const struct tcf_proto *,
struct tcf_result *);
@@ -225,6 +225,7 @@ struct tcf_proto {
struct Qdisc *q;
void *data;
const struct tcf_proto_ops *ops;
+ struct rcu_head rcu;
};
struct qdisc_skb_cb {
@@ -259,7 +260,9 @@ static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc)
static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc)
{
- return qdisc->dev_queue->qdisc;
+ struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc);
+
+ return q;
}
static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
@@ -376,7 +379,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
void __qdisc_calculate_pkt_len(struct sk_buff *skb,
const struct qdisc_size_table *stab);
void tcf_destroy(struct tcf_proto *tp);
-void tcf_destroy_chain(struct tcf_proto **fl);
+void tcf_destroy_chain(struct tcf_proto __rcu **fl);
/* Reset all TX qdiscs greater then index of a device. */
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
@@ -384,7 +387,7 @@ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
struct Qdisc *qdisc;
for (; i < dev->num_tx_queues; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
if (qdisc) {
spin_lock_bh(qdisc_lock(qdisc));
qdisc_reset(qdisc);
@@ -402,13 +405,18 @@ static inline void qdisc_reset_all_tx(struct net_device *dev)
static inline bool qdisc_all_tx_empty(const struct net_device *dev)
{
unsigned int i;
+
+ rcu_read_lock();
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
- const struct Qdisc *q = txq->qdisc;
+ const struct Qdisc *q = rcu_dereference(txq->qdisc);
- if (q->q.qlen)
+ if (q->q.qlen) {
+ rcu_read_unlock();
return false;
+ }
}
+ rcu_read_unlock();
return true;
}
@@ -416,9 +424,10 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev)
static inline bool qdisc_tx_changing(const struct net_device *dev)
{
unsigned int i;
+
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
- if (txq->qdisc != txq->qdisc_sleeping)
+ if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping)
return true;
}
return false;
@@ -428,9 +437,10 @@ static inline bool qdisc_tx_changing(const struct net_device *dev)
static inline bool qdisc_tx_is_noop(const struct net_device *dev)
{
unsigned int i;
+
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
- if (txq->qdisc != &noop_qdisc)
+ if (rcu_access_pointer(txq->qdisc) != &noop_qdisc)
return false;
}
return true;
diff --git a/net/core/dev.c b/net/core/dev.c
index 3c6a967e5830..b3d6dbc0c696 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2177,6 +2177,53 @@ static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
return (struct dev_kfree_skb_cb *)skb->cb;
}
+void netif_schedule_queue(struct netdev_queue *txq)
+{
+ rcu_read_lock();
+ if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
+ struct Qdisc *q = rcu_dereference(txq->qdisc);
+
+ __netif_schedule(q);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(netif_schedule_queue);
+
+/**
+ * netif_wake_subqueue - allow sending packets on subqueue
+ * @dev: network device
+ * @queue_index: sub queue index
+ *
+ * Resume individual transmit queue of a device with multiple transmit queues.
+ */
+void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
+
+ if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) {
+ struct Qdisc *q;
+
+ rcu_read_lock();
+ q = rcu_dereference(txq->qdisc);
+ __netif_schedule(q);
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL(netif_wake_subqueue);
+
+void netif_tx_wake_queue(struct netdev_queue *dev_queue)
+{
+ if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
+ struct Qdisc *q;
+
+ rcu_read_lock();
+ q = rcu_dereference(dev_queue->qdisc);
+ __netif_schedule(q);
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL(netif_tx_wake_queue);
+
void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
{
unsigned long flags;
@@ -3432,7 +3479,7 @@ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- q = rxq->qdisc;
+ q = rcu_dereference(rxq->qdisc);
if (q != &noop_qdisc) {
spin_lock(qdisc_lock(q));
if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
@@ -3449,7 +3496,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
{
struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
- if (!rxq || rxq->qdisc == &noop_qdisc)
+ if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
goto out;
if (*pt_prev) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a18dfb02d944..c8259ac38745 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3511,6 +3511,19 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
}
EXPORT_SYMBOL(sock_dequeue_err_skb);
+/**
+ * skb_clone_sk - create clone of skb, and take reference to socket
+ * @skb: the skb to clone
+ *
+ * This function creates a clone of a buffer that holds a reference on
+ * sk_refcnt. Buffers created via this function are meant to be
+ * returned using sock_queue_err_skb, or free via kfree_skb.
+ *
+ * When passing buffers allocated with this function to sock_queue_err_skb
+ * it is necessary to wrap the call with sock_hold/sock_put in order to
+ * prevent the socket from being released prior to being enqueued on
+ * the sk_error_queue.
+ */
struct sk_buff *skb_clone_sk(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
@@ -3615,9 +3628,14 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
+ /* take a reference to prevent skb_orphan() from freeing the socket */
+ sock_hold(sk);
+
err = sock_queue_err_skb(sk, skb);
if (err)
kfree_skb(skb);
+
+ sock_put(sk);
}
EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 52d5f46abf86..adab393b2fe5 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -294,7 +294,7 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
- if (!NAPI_GRO_CB(skb)->flush)
+ if (NAPI_GRO_CB(skb)->flush)
goto skip;
if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a1ad34b1c4ec..de85f809bf29 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -138,7 +138,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
- if (!NAPI_GRO_CB(skb)->flush)
+ if (NAPI_GRO_CB(skb)->flush)
goto skip;
if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 925c39f4099e..cf7141452b0f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2072,30 +2072,23 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
if (unlikely(!multicast && skb->sk &&
skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) {
- struct sk_buff *orig_skb = skb;
+ struct sk_buff *ack_skb = skb_clone_sk(skb);
- skb = skb_clone(skb, GFP_ATOMIC);
- if (skb) {
+ if (ack_skb) {
unsigned long flags;
int id;
spin_lock_irqsave(&local->ack_status_lock, flags);
- id = idr_alloc(&local->ack_status_frames, orig_skb,
+ id = idr_alloc(&local->ack_status_frames, ack_skb,
1, 0x10000, GFP_ATOMIC);
spin_unlock_irqrestore(&local->ack_status_lock, flags);
if (id >= 0) {
info_id = id;
info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
- } else if (skb_shared(skb)) {
- kfree_skb(orig_skb);
} else {
- kfree_skb(skb);
- skb = orig_skb;
+ kfree_skb(ack_skb);
}
- } else {
- /* couldn't clone -- lose tx status ... */
- skb = orig_skb;
}
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index c28b0d327b12..e547efdaba9d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -117,7 +117,6 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
- spinlock_t *root_lock;
struct tcmsg *t;
u32 protocol;
u32 prio;
@@ -125,7 +124,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
u32 parent;
struct net_device *dev;
struct Qdisc *q;
- struct tcf_proto **back, **chain;
+ struct tcf_proto __rcu **back;
+ struct tcf_proto __rcu **chain;
struct tcf_proto *tp;
const struct tcf_proto_ops *tp_ops;
const struct Qdisc_class_ops *cops;
@@ -197,7 +197,9 @@ replay:
goto errout;
/* Check the chain for existence of proto-tcf with this priority */
- for (back = chain; (tp = *back) != NULL; back = &tp->next) {
+ for (back = chain;
+ (tp = rtnl_dereference(*back)) != NULL;
+ back = &tp->next) {
if (tp->prio >= prio) {
if (tp->prio == prio) {
if (!nprio ||
@@ -209,8 +211,6 @@ replay:
}
}
- root_lock = qdisc_root_sleeping_lock(q);
-
if (tp == NULL) {
/* Proto-tcf does not exist, create new one */
@@ -259,7 +259,8 @@ replay:
}
tp->ops = tp_ops;
tp->protocol = protocol;
- tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back));
+ tp->prio = nprio ? :
+ TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
tp->q = q;
tp->classify = tp_ops->classify;
tp->classid = parent;
@@ -280,9 +281,9 @@ replay:
if (fh == 0) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
- spin_lock_bh(root_lock);
- *back = tp->next;
- spin_unlock_bh(root_lock);
+ struct tcf_proto *next = rtnl_dereference(tp->next);
+
+ RCU_INIT_POINTER(*back, next);
tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
tcf_destroy(tp);
@@ -322,10 +323,8 @@ replay:
n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
if (err == 0) {
if (tp_created) {
- spin_lock_bh(root_lock);
- tp->next = *back;
- *back = tp;
- spin_unlock_bh(root_lock);
+ RCU_INIT_POINTER(tp->next, rtnl_dereference(*back));
+ rcu_assign_pointer(*back, tp);
}
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
} else {
@@ -420,7 +419,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
int s_t;
struct net_device *dev;
struct Qdisc *q;
- struct tcf_proto *tp, **chain;
+ struct tcf_proto *tp, __rcu **chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
unsigned long cl = 0;
const struct Qdisc_class_ops *cops;
@@ -454,7 +453,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
- for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
+ for (tp = rtnl_dereference(*chain), t = 0;
+ tp; tp = rtnl_dereference(tp->next), t++) {
if (t < s_t)
continue;
if (TC_H_MAJ(tcm->tcm_info) &&
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0ae1813e3e90..1937298d6775 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -24,6 +24,7 @@
struct basic_head {
u32 hgenerator;
struct list_head flist;
+ struct rcu_head rcu;
};
struct basic_filter {
@@ -31,17 +32,19 @@ struct basic_filter {
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
struct tcf_result res;
+ struct tcf_proto *tp;
struct list_head link;
+ struct rcu_head rcu;
};
static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
int r;
- struct basic_head *head = tp->root;
+ struct basic_head *head = rcu_dereference_bh(tp->root);
struct basic_filter *f;
- list_for_each_entry(f, &head->flist, link) {
+ list_for_each_entry_rcu(f, &head->flist, link) {
if (!tcf_em_tree_match(skb, &f->ematches, NULL))
continue;
*res = f->res;
@@ -56,7 +59,7 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
{
unsigned long l = 0UL;
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;
if (head == NULL)
@@ -81,12 +84,15 @@ static int basic_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
INIT_LIST_HEAD(&head->flist);
- tp->root = head;
+ rcu_assign_pointer(tp->root, head);
return 0;
}
-static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
+static void basic_delete_filter(struct rcu_head *head)
{
+ struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+ struct tcf_proto *tp = f->tp;
+
tcf_unbind_filter(tp, &f->res);
tcf_exts_destroy(tp, &f->exts);
tcf_em_tree_destroy(tp, &f->ematches);
@@ -95,27 +101,26 @@ static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
static void basic_destroy(struct tcf_proto *tp)
{
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
list_for_each_entry_safe(f, n, &head->flist, link) {
- list_del(&f->link);
- basic_delete_filter(tp, f);
+ list_del_rcu(&f->link);
+ call_rcu(&f->rcu, basic_delete_filter);
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static int basic_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *t, *f = (struct basic_filter *) arg;
list_for_each_entry(t, &head->flist, link)
if (t == f) {
- tcf_tree_lock(tp);
- list_del(&t->link);
- tcf_tree_unlock(tp);
- basic_delete_filter(tp, t);
+ list_del_rcu(&t->link);
+ call_rcu(&t->rcu, basic_delete_filter);
return 0;
}
@@ -152,6 +157,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
tcf_exts_change(tp, &f->exts, &e);
tcf_em_tree_change(tp, &f->ematches, &t);
+ f->tp = tp;
return 0;
errout:
@@ -164,9 +170,10 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca, unsigned long *arg, bool ovr)
{
int err;
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_BASIC_MAX + 1];
- struct basic_filter *f = (struct basic_filter *) *arg;
+ struct basic_filter *fold = (struct basic_filter *) *arg;
+ struct basic_filter *fnew;
if (tca[TCA_OPTIONS] == NULL)
return -EINVAL;
@@ -176,22 +183,23 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- if (f != NULL) {
- if (handle && f->handle != handle)
+ if (fold != NULL) {
+ if (handle && fold->handle != handle)
return -EINVAL;
- return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
}
err = -ENOBUFS;
- f = kzalloc(sizeof(*f), GFP_KERNEL);
- if (f == NULL)
+ fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+ if (fnew == NULL)
goto errout;
- tcf_exts_init(&f->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+ tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
err = -EINVAL;
- if (handle)
- f->handle = handle;
- else {
+ if (handle) {
+ fnew->handle = handle;
+ } else if (fold) {
+ fnew->handle = fold->handle;
+ } else {
unsigned int i = 0x80000000;
do {
if (++head->hgenerator == 0x7FFFFFFF)
@@ -203,29 +211,31 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
- f->handle = head->hgenerator;
+ fnew->handle = head->hgenerator;
}
- err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
+ err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr);
if (err < 0)
goto errout;
- tcf_tree_lock(tp);
- list_add(&f->link, &head->flist);
- tcf_tree_unlock(tp);
- *arg = (unsigned long) f;
+ *arg = (unsigned long)fnew;
+
+ if (fold) {
+ list_replace_rcu(&fold->link, &fnew->link);
+ call_rcu(&fold->rcu, basic_delete_filter);
+ } else {
+ list_add_rcu(&fnew->link, &head->flist);
+ }
return 0;
errout:
- if (*arg == 0UL && f)
- kfree(f);
-
+ kfree(fnew);
return err;
}
static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;
list_for_each_entry(f, &head->flist, link) {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 0e30d58149da..6a7386e6e5a8 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -27,6 +27,7 @@ MODULE_DESCRIPTION("TC BPF based classifier");
struct cls_bpf_head {
struct list_head plist;
u32 hgen;
+ struct rcu_head rcu;
};
struct cls_bpf_prog {
@@ -37,6 +38,8 @@ struct cls_bpf_prog {
struct list_head link;
u32 handle;
u16 bpf_len;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -49,11 +52,11 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rcu_dereference(tp->root);
struct cls_bpf_prog *prog;
int ret;
- list_for_each_entry(prog, &head->plist, link) {
+ list_for_each_entry_rcu(prog, &head->plist, link) {
int filter_res = BPF_PROG_RUN(prog->filter, skb);
if (filter_res == 0)
@@ -81,8 +84,8 @@ static int cls_bpf_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
- INIT_LIST_HEAD(&head->plist);
- tp->root = head;
+ INIT_LIST_HEAD_RCU(&head->plist);
+ rcu_assign_pointer(tp->root, head);
return 0;
}
@@ -98,18 +101,22 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
kfree(prog);
}
+static void __cls_bpf_delete_prog(struct rcu_head *rcu)
+{
+ struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
+
+ cls_bpf_delete_prog(prog->tp, prog);
+}
+
static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg;
list_for_each_entry(prog, &head->plist, link) {
if (prog == todel) {
- tcf_tree_lock(tp);
- list_del(&prog->link);
- tcf_tree_unlock(tp);
-
- cls_bpf_delete_prog(tp, prog);
+ list_del_rcu(&prog->link);
+ call_rcu(&prog->rcu, __cls_bpf_delete_prog);
return 0;
}
}
@@ -119,27 +126,28 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
static void cls_bpf_destroy(struct tcf_proto *tp)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
list_for_each_entry_safe(prog, tmp, &head->plist, link) {
- list_del(&prog->link);
- cls_bpf_delete_prog(tp, prog);
+ list_del_rcu(&prog->link);
+ call_rcu(&prog->rcu, __cls_bpf_delete_prog);
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog;
unsigned long ret = 0UL;
if (head == NULL)
return 0UL;
- list_for_each_entry(prog, &head->plist, link) {
+ list_for_each_entry_rcu(prog, &head->plist, link) {
if (prog->handle == handle) {
ret = (unsigned long) prog;
break;
@@ -158,10 +166,10 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
- struct sock_filter *bpf_ops, *bpf_old;
+ struct sock_filter *bpf_ops;
struct tcf_exts exts;
struct sock_fprog_kern tmp;
- struct bpf_prog *fp, *fp_old;
+ struct bpf_prog *fp;
u16 bpf_size, bpf_len;
u32 classid;
int ret;
@@ -197,26 +205,15 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
if (ret)
goto errout_free;
- tcf_tree_lock(tp);
- fp_old = prog->filter;
- bpf_old = prog->bpf_ops;
-
prog->bpf_len = bpf_len;
prog->bpf_ops = bpf_ops;
prog->filter = fp;
prog->res.classid = classid;
- tcf_tree_unlock(tp);
tcf_bind_filter(tp, &prog->res, base);
tcf_exts_change(tp, &prog->exts, &exts);
- if (fp_old)
- bpf_prog_destroy(fp_old);
- if (bpf_old)
- kfree(bpf_old);
-
return 0;
-
errout_free:
kfree(bpf_ops);
errout:
@@ -244,9 +241,10 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
u32 handle, struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct cls_bpf_head *head = tp->root;
- struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
+ struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg;
struct nlattr *tb[TCA_BPF_MAX + 1];
+ struct cls_bpf_prog *prog;
int ret;
if (tca[TCA_OPTIONS] == NULL)
@@ -256,18 +254,19 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (ret < 0)
return ret;
- if (prog != NULL) {
- if (handle && prog->handle != handle)
- return -EINVAL;
- return cls_bpf_modify_existing(net, tp, prog, base, tb,
- tca[TCA_RATE], ovr);
- }
-
prog = kzalloc(sizeof(*prog), GFP_KERNEL);
- if (prog == NULL)
+ if (!prog)
return -ENOBUFS;
tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+
+ if (oldprog) {
+ if (handle && oldprog->handle != handle) {
+ ret = -EINVAL;
+ goto errout;
+ }
+ }
+
if (handle == 0)
prog->handle = cls_bpf_grab_new_handle(tp, head);
else
@@ -281,16 +280,17 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (ret < 0)
goto errout;
- tcf_tree_lock(tp);
- list_add(&prog->link, &head->plist);
- tcf_tree_unlock(tp);
+ if (oldprog) {
+ list_replace_rcu(&prog->link, &oldprog->link);
+ call_rcu(&oldprog->rcu, __cls_bpf_delete_prog);
+ } else {
+ list_add_rcu(&prog->link, &head->plist);
+ }
*arg = (unsigned long) prog;
-
return 0;
errout:
- if (*arg == 0UL && prog)
- kfree(prog);
+ kfree(prog);
return ret;
}
@@ -339,10 +339,10 @@ nla_put_failure:
static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog;
- list_for_each_entry(prog, &head->plist, link) {
+ list_for_each_entry_rcu(prog, &head->plist, link) {
if (arg->count < arg->skip)
goto skip;
if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index cacf01bd04f0..3b7548759998 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -22,17 +22,17 @@ struct cls_cgroup_head {
u32 handle;
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rcu_dereference_bh(tp->root);
u32 classid;
- rcu_read_lock();
classid = task_cls_state(current)->classid;
- rcu_read_unlock();
/*
* Due to the nature of the classifier it is required to ignore all
@@ -80,13 +80,25 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
};
+static void cls_cgroup_destroy_rcu(struct rcu_head *root)
+{
+ struct cls_cgroup_head *head = container_of(root,
+ struct cls_cgroup_head,
+ rcu);
+
+ tcf_exts_destroy(head->tp, &head->exts);
+ tcf_em_tree_destroy(head->tp, &head->ematches);
+ kfree(head);
+}
+
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
unsigned long *arg, bool ovr)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
+ struct cls_cgroup_head *new;
struct tcf_ematch_tree t;
struct tcf_exts e;
int err;
@@ -94,25 +106,24 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (!tca[TCA_OPTIONS])
return -EINVAL;
- if (head == NULL) {
- if (!handle)
- return -EINVAL;
+ if (!head && !handle)
+ return -EINVAL;
- head = kzalloc(sizeof(*head), GFP_KERNEL);
- if (head == NULL)
- return -ENOBUFS;
+ if (head && handle != head->handle)
+ return -ENOENT;
- tcf_exts_init(&head->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
- head->handle = handle;
+ new = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (!new)
+ return -ENOBUFS;
- tcf_tree_lock(tp);
- tp->root = head;
- tcf_tree_unlock(tp);
+ if (head) {
+ new->handle = head->handle;
+ } else {
+ tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+ new->handle = handle;
}
- if (handle != head->handle)
- return -ENOENT;
-
+ new->tp = tp;
err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
cgroup_policy);
if (err < 0)
@@ -127,20 +138,24 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- tcf_exts_change(tp, &head->exts, &e);
- tcf_em_tree_change(tp, &head->ematches, &t);
+ tcf_exts_change(tp, &new->exts, &e);
+ tcf_em_tree_change(tp, &new->ematches, &t);
+ rcu_assign_pointer(tp->root, new);
+ if (head)
+ call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
return 0;
}
static void cls_cgroup_destroy(struct tcf_proto *tp)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
if (head) {
tcf_exts_destroy(tp, &head->exts);
tcf_em_tree_destroy(tp, &head->ematches);
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
}
@@ -151,7 +166,7 @@ static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
if (arg->count < arg->skip)
goto skip;
@@ -167,7 +182,7 @@ skip:
static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 35be16f7c192..95736fa479f3 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -34,12 +34,14 @@
struct flow_head {
struct list_head filters;
+ struct rcu_head rcu;
};
struct flow_filter {
struct list_head list;
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
+ struct tcf_proto *tp;
struct timer_list perturb_timer;
u32 perturb_period;
u32 handle;
@@ -54,6 +56,7 @@ struct flow_filter {
u32 divisor;
u32 baseclass;
u32 hashrnd;
+ struct rcu_head rcu;
};
static inline u32 addr_fold(void *addr)
@@ -276,14 +279,14 @@ static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rcu_dereference_bh(tp->root);
struct flow_filter *f;
u32 keymask;
u32 classid;
unsigned int n, key;
int r;
- list_for_each_entry(f, &head->filters, list) {
+ list_for_each_entry_rcu(f, &head->filters, list) {
u32 keys[FLOW_KEY_MAX + 1];
struct flow_keys flow_keys;
@@ -346,13 +349,23 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
[TCA_FLOW_PERTURB] = { .type = NLA_U32 },
};
+static void flow_destroy_filter(struct rcu_head *head)
+{
+ struct flow_filter *f = container_of(head, struct flow_filter, rcu);
+
+ del_timer_sync(&f->perturb_timer);
+ tcf_exts_destroy(f->tp, &f->exts);
+ tcf_em_tree_destroy(f->tp, &f->ematches);
+ kfree(f);
+}
+
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct flow_head *head = tp->root;
- struct flow_filter *f;
+ struct flow_head *head = rtnl_dereference(tp->root);
+ struct flow_filter *fold, *fnew;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_FLOW_MAX + 1];
struct tcf_exts e;
@@ -401,20 +414,42 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto err1;
- f = (struct flow_filter *)*arg;
- if (f != NULL) {
+ err = -ENOBUFS;
+ fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+ if (!fnew)
+ goto err2;
+
+ fold = (struct flow_filter *)*arg;
+ if (fold) {
err = -EINVAL;
- if (f->handle != handle && handle)
+ if (fold->handle != handle && handle)
goto err2;
- mode = f->mode;
+ /* Copy fold into fnew */
+ fnew->handle = fold->handle;
+ fnew->keymask = fold->keymask;
+ fnew->tp = fold->tp;
+
+ fnew->handle = fold->handle;
+ fnew->nkeys = fold->nkeys;
+ fnew->keymask = fold->keymask;
+ fnew->mode = fold->mode;
+ fnew->mask = fold->mask;
+ fnew->xor = fold->xor;
+ fnew->rshift = fold->rshift;
+ fnew->addend = fold->addend;
+ fnew->divisor = fold->divisor;
+ fnew->baseclass = fold->baseclass;
+ fnew->hashrnd = fold->hashrnd;
+
+ mode = fold->mode;
if (tb[TCA_FLOW_MODE])
mode = nla_get_u32(tb[TCA_FLOW_MODE]);
if (mode != FLOW_MODE_HASH && nkeys > 1)
goto err2;
if (mode == FLOW_MODE_HASH)
- perturb_period = f->perturb_period;
+ perturb_period = fold->perturb_period;
if (tb[TCA_FLOW_PERTURB]) {
if (mode != FLOW_MODE_HASH)
goto err2;
@@ -444,83 +479,70 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (TC_H_MIN(baseclass) == 0)
baseclass = TC_H_MAKE(baseclass, 1);
- err = -ENOBUFS;
- f = kzalloc(sizeof(*f), GFP_KERNEL);
- if (f == NULL)
- goto err2;
-
- f->handle = handle;
- f->mask = ~0U;
- tcf_exts_init(&f->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
-
- get_random_bytes(&f->hashrnd, 4);
- f->perturb_timer.function = flow_perturbation;
- f->perturb_timer.data = (unsigned long)f;
- init_timer_deferrable(&f->perturb_timer);
+ fnew->handle = handle;
+ fnew->mask = ~0U;
+ fnew->tp = tp;
+ get_random_bytes(&fnew->hashrnd, 4);
+ tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
}
- tcf_exts_change(tp, &f->exts, &e);
- tcf_em_tree_change(tp, &f->ematches, &t);
+ fnew->perturb_timer.function = flow_perturbation;
+ fnew->perturb_timer.data = (unsigned long)fnew;
+ init_timer_deferrable(&fnew->perturb_timer);
- tcf_tree_lock(tp);
+ tcf_exts_change(tp, &fnew->exts, &e);
+ tcf_em_tree_change(tp, &fnew->ematches, &t);
if (tb[TCA_FLOW_KEYS]) {
- f->keymask = keymask;
- f->nkeys = nkeys;
+ fnew->keymask = keymask;
+ fnew->nkeys = nkeys;
}
- f->mode = mode;
+ fnew->mode = mode;
if (tb[TCA_FLOW_MASK])
- f->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
+ fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
if (tb[TCA_FLOW_XOR])
- f->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
+ fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
if (tb[TCA_FLOW_RSHIFT])
- f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
+ fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
if (tb[TCA_FLOW_ADDEND])
- f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
+ fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
if (tb[TCA_FLOW_DIVISOR])
- f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
+ fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
if (baseclass)
- f->baseclass = baseclass;
+ fnew->baseclass = baseclass;
- f->perturb_period = perturb_period;
- del_timer(&f->perturb_timer);
+ fnew->perturb_period = perturb_period;
if (perturb_period)
- mod_timer(&f->perturb_timer, jiffies + perturb_period);
+ mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
if (*arg == 0)
- list_add_tail(&f->list, &head->filters);
+ list_add_tail_rcu(&fnew->list, &head->filters);
+ else
+ list_replace_rcu(&fnew->list, &fold->list);
- tcf_tree_unlock(tp);
+ *arg = (unsigned long)fnew;
- *arg = (unsigned long)f;
+ if (fold)
+ call_rcu(&fold->rcu, flow_destroy_filter);
return 0;
err2:
tcf_em_tree_destroy(tp, &t);
+ kfree(fnew);
err1:
tcf_exts_destroy(tp, &e);
return err;
}
-static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
-{
- del_timer_sync(&f->perturb_timer);
- tcf_exts_destroy(tp, &f->exts);
- tcf_em_tree_destroy(tp, &f->ematches);
- kfree(f);
-}
-
static int flow_delete(struct tcf_proto *tp, unsigned long arg)
{
struct flow_filter *f = (struct flow_filter *)arg;
- tcf_tree_lock(tp);
- list_del(&f->list);
- tcf_tree_unlock(tp);
- flow_destroy_filter(tp, f);
+ list_del_rcu(&f->list);
+ call_rcu(&f->rcu, flow_destroy_filter);
return 0;
}
@@ -532,28 +554,29 @@ static int flow_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
INIT_LIST_HEAD(&head->filters);
- tp->root = head;
+ rcu_assign_pointer(tp->root, head);
return 0;
}
static void flow_destroy(struct tcf_proto *tp)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
list_for_each_entry_safe(f, next, &head->filters, list) {
- list_del(&f->list);
- flow_destroy_filter(tp, f);
+ list_del_rcu(&f->list);
+ call_rcu(&f->rcu, flow_destroy_filter);
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
- list_for_each_entry(f, &head->filters, list)
+ list_for_each_entry_rcu(f, &head->filters, list)
if (f->handle == handle)
return (unsigned long)f;
return 0;
@@ -626,10 +649,10 @@ nla_put_failure:
static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
- list_for_each_entry(f, &head->filters, list) {
+ list_for_each_entry_rcu(f, &head->filters, list) {
if (arg->count < arg->skip)
goto skip;
if (arg->fn(tp, (unsigned long)f, arg) < 0) {
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 861b03ccfed0..006b45a67fdd 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -33,17 +33,20 @@
struct fw_head {
u32 mask;
- struct fw_filter *ht[HTSIZE];
+ struct fw_filter __rcu *ht[HTSIZE];
+ struct rcu_head rcu;
};
struct fw_filter {
- struct fw_filter *next;
+ struct fw_filter __rcu *next;
u32 id;
struct tcf_result res;
#ifdef CONFIG_NET_CLS_IND
int ifindex;
#endif /* CONFIG_NET_CLS_IND */
struct tcf_exts exts;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
static u32 fw_hash(u32 handle)
@@ -56,14 +59,16 @@ static u32 fw_hash(u32 handle)
static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rcu_dereference_bh(tp->root);
struct fw_filter *f;
int r;
u32 id = skb->mark;
if (head != NULL) {
id &= head->mask;
- for (f = head->ht[fw_hash(id)]; f; f = f->next) {
+
+ for (f = rcu_dereference_bh(head->ht[fw_hash(id)]); f;
+ f = rcu_dereference_bh(f->next)) {
if (f->id == id) {
*res = f->res;
#ifdef CONFIG_NET_CLS_IND
@@ -92,13 +97,14 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
if (head == NULL)
return 0;
- for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
+ f = rtnl_dereference(head->ht[fw_hash(handle)]);
+ for (; f; f = rtnl_dereference(f->next)) {
if (f->id == handle)
return (unsigned long)f;
}
@@ -114,8 +120,11 @@ static int fw_init(struct tcf_proto *tp)
return 0;
}
-static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
+static void fw_delete_filter(struct rcu_head *head)
{
+ struct fw_filter *f = container_of(head, struct fw_filter, rcu);
+ struct tcf_proto *tp = f->tp;
+
tcf_unbind_filter(tp, &f->res);
tcf_exts_destroy(tp, &f->exts);
kfree(f);
@@ -123,7 +132,7 @@ static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
static void fw_destroy(struct tcf_proto *tp)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
int h;
@@ -131,29 +140,33 @@ static void fw_destroy(struct tcf_proto *tp)
return;
for (h = 0; h < HTSIZE; h++) {
- while ((f = head->ht[h]) != NULL) {
- head->ht[h] = f->next;
- fw_delete_filter(tp, f);
+ while ((f = rtnl_dereference(head->ht[h])) != NULL) {
+ RCU_INIT_POINTER(head->ht[h],
+ rtnl_dereference(f->next));
+ call_rcu(&f->rcu, fw_delete_filter);
}
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static int fw_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *)arg;
- struct fw_filter **fp;
+ struct fw_filter __rcu **fp;
+ struct fw_filter *pfp;
if (head == NULL || f == NULL)
goto out;
- for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
- tcf_tree_lock(tp);
- *fp = f->next;
- tcf_tree_unlock(tp);
- fw_delete_filter(tp, f);
+ fp = &head->ht[fw_hash(f->id)];
+
+ for (pfp = rtnl_dereference(*fp); pfp;
+ fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
+ if (pfp == f) {
+ RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
+ call_rcu(&f->rcu, fw_delete_filter);
return 0;
}
}
@@ -171,7 +184,7 @@ static int
fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct tcf_exts e;
u32 mask;
int err;
@@ -220,7 +233,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *) *arg;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_FW_MAX + 1];
@@ -233,10 +246,42 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- if (f != NULL) {
+ if (f) {
+ struct fw_filter *pfp, *fnew;
+ struct fw_filter __rcu **fp;
+
if (f->id != handle && handle)
return -EINVAL;
- return fw_change_attrs(net, tp, f, tb, tca, base, ovr);
+
+ fnew = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
+ if (!fnew)
+ return -ENOBUFS;
+
+ fnew->id = f->id;
+ fnew->res = f->res;
+#ifdef CONFIG_NET_CLS_IND
+ fnew->ifindex = f->ifindex;
+#endif /* CONFIG_NET_CLS_IND */
+ fnew->tp = f->tp;
+
+ err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr);
+ if (err < 0) {
+ kfree(fnew);
+ return err;
+ }
+
+ fp = &head->ht[fw_hash(fnew->id)];
+ for (pfp = rtnl_dereference(*fp); pfp;
+ fp = &pfp->next, pfp = rtnl_dereference(*fp))
+ if (pfp == f)
+ break;
+
+ RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next));
+ rcu_assign_pointer(*fp, fnew);
+ call_rcu(&f->rcu, fw_delete_filter);
+
+ *arg = (unsigned long)fnew;
+ return err;
}
if (!handle)
@@ -252,9 +297,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
head->mask = mask;
- tcf_tree_lock(tp);
- tp->root = head;
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(tp->root, head);
}
f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
@@ -263,15 +306,14 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
f->id = handle;
+ f->tp = tp;
err = fw_change_attrs(net, tp, f, tb, tca, base, ovr);
if (err < 0)
goto errout;
- f->next = head->ht[fw_hash(handle)];
- tcf_tree_lock(tp);
- head->ht[fw_hash(handle)] = f;
- tcf_tree_unlock(tp);
+ RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]);
+ rcu_assign_pointer(head->ht[fw_hash(handle)], f);
*arg = (unsigned long)f;
return 0;
@@ -283,7 +325,7 @@ errout:
static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
int h;
if (head == NULL)
@@ -295,7 +337,8 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
for (h = 0; h < HTSIZE; h++) {
struct fw_filter *f;
- for (f = head->ht[h]; f; f = f->next) {
+ for (f = rtnl_dereference(head->ht[h]); f;
+ f = rtnl_dereference(f->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
@@ -312,7 +355,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *)fh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index dd9fc2523c76..ba96deacf27c 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -29,25 +29,26 @@
* are mutually exclusive.
* 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
*/
-
struct route4_fastmap {
- struct route4_filter *filter;
- u32 id;
- int iif;
+ struct route4_filter *filter;
+ u32 id;
+ int iif;
};
struct route4_head {
- struct route4_fastmap fastmap[16];
- struct route4_bucket *table[256 + 1];
+ struct route4_fastmap fastmap[16];
+ struct route4_bucket __rcu *table[256 + 1];
+ struct rcu_head rcu;
};
struct route4_bucket {
/* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
- struct route4_filter *ht[16 + 16 + 1];
+ struct route4_filter __rcu *ht[16 + 16 + 1];
+ struct rcu_head rcu;
};
struct route4_filter {
- struct route4_filter *next;
+ struct route4_filter __rcu *next;
u32 id;
int iif;
@@ -55,6 +56,8 @@ struct route4_filter {
struct tcf_exts exts;
u32 handle;
struct route4_bucket *bkt;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
@@ -64,14 +67,13 @@ static inline int route4_fastmap_hash(u32 id, int iif)
return id & 0xF;
}
+static DEFINE_SPINLOCK(fastmap_lock);
static void
-route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
+route4_reset_fastmap(struct route4_head *head)
{
- spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
-
- spin_lock_bh(root_lock);
+ spin_lock_bh(&fastmap_lock);
memset(head->fastmap, 0, sizeof(head->fastmap));
- spin_unlock_bh(root_lock);
+ spin_unlock_bh(&fastmap_lock);
}
static void
@@ -80,9 +82,12 @@ route4_set_fastmap(struct route4_head *head, u32 id, int iif,
{
int h = route4_fastmap_hash(id, iif);
+ /* fastmap updates must look atomic to aling id, iff, filter */
+ spin_lock_bh(&fastmap_lock);
head->fastmap[h].id = id;
head->fastmap[h].iif = iif;
head->fastmap[h].filter = f;
+ spin_unlock_bh(&fastmap_lock);
}
static inline int route4_hash_to(u32 id)
@@ -123,7 +128,7 @@ static inline int route4_hash_wild(void)
static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rcu_dereference_bh(tp->root);
struct dst_entry *dst;
struct route4_bucket *b;
struct route4_filter *f;
@@ -141,32 +146,43 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
iif = inet_iif(skb);
h = route4_fastmap_hash(id, iif);
+
+ spin_lock(&fastmap_lock);
if (id == head->fastmap[h].id &&
iif == head->fastmap[h].iif &&
(f = head->fastmap[h].filter) != NULL) {
- if (f == ROUTE4_FAILURE)
+ if (f == ROUTE4_FAILURE) {
+ spin_unlock(&fastmap_lock);
goto failure;
+ }
*res = f->res;
+ spin_unlock(&fastmap_lock);
return 0;
}
+ spin_unlock(&fastmap_lock);
h = route4_hash_to(id);
restart:
- b = head->table[h];
+ b = rcu_dereference_bh(head->table[h]);
if (b) {
- for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
+ for (f = rcu_dereference_bh(b->ht[route4_hash_from(id)]);
+ f;
+ f = rcu_dereference_bh(f->next))
if (f->id == id)
ROUTE4_APPLY_RESULT();
- for (f = b->ht[route4_hash_iif(iif)]; f; f = f->next)
+ for (f = rcu_dereference_bh(b->ht[route4_hash_iif(iif)]);
+ f;
+ f = rcu_dereference_bh(f->next))
if (f->iif == iif)
ROUTE4_APPLY_RESULT();
- for (f = b->ht[route4_hash_wild()]; f; f = f->next)
+ for (f = rcu_dereference_bh(b->ht[route4_hash_wild()]);
+ f;
+ f = rcu_dereference_bh(f->next))
ROUTE4_APPLY_RESULT();
-
}
if (h < 256) {
h = 256;
@@ -213,7 +229,7 @@ static inline u32 from_hash(u32 id)
static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rtnl_dereference(tp->root);
struct route4_bucket *b;
struct route4_filter *f;
unsigned int h1, h2;
@@ -229,9 +245,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
if (h2 > 32)
return 0;
- b = head->table[h1];
+ b = rtnl_dereference(head->table[h1]);
if (b) {
- for (f = b->ht[h2]; f; f = f->next)
+ for (f = rtnl_dereference(b->ht[h2]);
+ f;
+ f = rtnl_dereference(f->next))
if (f->handle == handle)
return (unsigned long)f;
}
@@ -248,8 +266,11 @@ static int route4_init(struct tcf_proto *tp)
}
static void
-route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
+route4_delete_filter(struct rcu_head *head)
{
+ struct route4_filter *f = container_of(head, struct route4_filter, rcu);
+ struct tcf_proto *tp = f->tp;
+
tcf_unbind_filter(tp, &f->res);
tcf_exts_destroy(tp, &f->exts);
kfree(f);
@@ -257,7 +278,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
static void route4_destroy(struct tcf_proto *tp)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
if (head == NULL)
@@ -266,28 +287,35 @@ static void route4_destroy(struct tcf_proto *tp)
for (h1 = 0; h1 <= 256; h1++) {
struct route4_bucket *b;
- b = head->table[h1];
+ b = rtnl_dereference(head->table[h1]);
if (b) {
for (h2 = 0; h2 <= 32; h2++) {
struct route4_filter *f;
- while ((f = b->ht[h2]) != NULL) {
- b->ht[h2] = f->next;
- route4_delete_filter(tp, f);
+ while ((f = rtnl_dereference(b->ht[h2])) != NULL) {
+ struct route4_filter *next;
+
+ next = rtnl_dereference(f->next);
+ RCU_INIT_POINTER(b->ht[h2], next);
+ call_rcu(&f->rcu, route4_delete_filter);
}
}
- kfree(b);
+ RCU_INIT_POINTER(head->table[h1], NULL);
+ kfree_rcu(b, rcu);
}
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static int route4_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct route4_head *head = tp->root;
- struct route4_filter **fp, *f = (struct route4_filter *)arg;
- unsigned int h = 0;
+ struct route4_head *head = rtnl_dereference(tp->root);
+ struct route4_filter *f = (struct route4_filter *)arg;
+ struct route4_filter __rcu **fp;
+ struct route4_filter *nf;
struct route4_bucket *b;
+ unsigned int h = 0;
int i;
if (!head || !f)
@@ -296,27 +324,35 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
h = f->handle;
b = f->bkt;
- for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
- tcf_tree_lock(tp);
- *fp = f->next;
- tcf_tree_unlock(tp);
+ fp = &b->ht[from_hash(h >> 16)];
+ for (nf = rtnl_dereference(*fp); nf;
+ fp = &nf->next, nf = rtnl_dereference(*fp)) {
+ if (nf == f) {
+ /* unlink it */
+ RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
- route4_reset_fastmap(tp->q, head, f->id);
- route4_delete_filter(tp, f);
+ /* Remove any fastmap lookups that might ref filter
+ * notice we unlink'd the filter so we can't get it
+ * back in the fastmap.
+ */
+ route4_reset_fastmap(head);
- /* Strip tree */
+ /* Delete it */
+ call_rcu(&f->rcu, route4_delete_filter);
- for (i = 0; i <= 32; i++)
- if (b->ht[i])
+ /* Strip RTNL protected tree */
+ for (i = 0; i <= 32; i++) {
+ struct route4_filter *rt;
+
+ rt = rtnl_dereference(b->ht[i]);
+ if (rt)
return 0;
+ }
/* OK, session has no flows */
- tcf_tree_lock(tp);
- head->table[to_hash(h)] = NULL;
- tcf_tree_unlock(tp);
+ RCU_INIT_POINTER(head->table[to_hash(h)], NULL);
+ kfree_rcu(b, rcu);
- kfree(b);
return 0;
}
}
@@ -380,26 +416,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
}
h1 = to_hash(nhandle);
- b = head->table[h1];
+ b = rtnl_dereference(head->table[h1]);
if (!b) {
err = -ENOBUFS;
b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
if (b == NULL)
goto errout;
- tcf_tree_lock(tp);
- head->table[h1] = b;
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(head->table[h1], b);
} else {
unsigned int h2 = from_hash(nhandle >> 16);
err = -EEXIST;
- for (fp = b->ht[h2]; fp; fp = fp->next)
+ for (fp = rtnl_dereference(b->ht[h2]);
+ fp;
+ fp = rtnl_dereference(fp->next))
if (fp->handle == f->handle)
goto errout;
}
- tcf_tree_lock(tp);
if (tb[TCA_ROUTE4_TO])
f->id = to;
@@ -410,7 +445,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
f->handle = nhandle;
f->bkt = b;
- tcf_tree_unlock(tp);
+ f->tp = tp;
if (tb[TCA_ROUTE4_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_ROUTE4_CLASSID]);
@@ -431,14 +466,15 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct route4_head *head = tp->root;
- struct route4_filter *f, *f1, **fp;
+ struct route4_head *head = rtnl_dereference(tp->root);
+ struct route4_filter __rcu **fp;
+ struct route4_filter *fold, *f1, *pfp, *f = NULL;
struct route4_bucket *b;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_ROUTE4_MAX + 1];
unsigned int h, th;
- u32 old_handle = 0;
int err;
+ bool new = true;
if (opt == NULL)
return handle ? -EINVAL : 0;
@@ -447,70 +483,70 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- f = (struct route4_filter *)*arg;
- if (f) {
- if (f->handle != handle && handle)
+ fold = (struct route4_filter *)*arg;
+ if (fold && handle && fold->handle != handle)
return -EINVAL;
- if (f->bkt)
- old_handle = f->handle;
-
- err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], 0, ovr);
- if (err < 0)
- return err;
-
- goto reinsert;
- }
-
err = -ENOBUFS;
if (head == NULL) {
head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
if (head == NULL)
goto errout;
-
- tcf_tree_lock(tp);
- tp->root = head;
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(tp->root, head);
}
f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL);
- if (f == NULL)
+ if (!f)
goto errout;
tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+ if (fold) {
+ f->id = fold->id;
+ f->iif = fold->iif;
+ f->res = fold->res;
+ f->handle = fold->handle;
+
+ f->tp = fold->tp;
+ f->bkt = fold->bkt;
+ new = false;
+ }
+
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], 1, ovr);
+ tca[TCA_RATE], new, ovr);
if (err < 0)
goto errout;
-reinsert:
h = from_hash(f->handle >> 16);
- for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
+ fp = &f->bkt->ht[h];
+ for (pfp = rtnl_dereference(*fp);
+ (f1 = rtnl_dereference(*fp)) != NULL;
+ fp = &f1->next)
if (f->handle < f1->handle)
break;
- f->next = f1;
- tcf_tree_lock(tp);
- *fp = f;
+ rcu_assign_pointer(f->next, f1);
+ rcu_assign_pointer(*fp, f);
- if (old_handle && f->handle != old_handle) {
- th = to_hash(old_handle);
- h = from_hash(old_handle >> 16);
- b = head->table[th];
+ if (fold && fold->handle && f->handle != fold->handle) {
+ th = to_hash(fold->handle);
+ h = from_hash(fold->handle >> 16);
+ b = rtnl_dereference(head->table[th]);
if (b) {
- for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
+ fp = &b->ht[h];
+ for (pfp = rtnl_dereference(*fp); pfp;
+ fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
+ if (pfp == f) {
*fp = f->next;
break;
}
}
}
}
- tcf_tree_unlock(tp);
- route4_reset_fastmap(tp->q, head, f->id);
+ route4_reset_fastmap(head);
*arg = (unsigned long)f;
+ if (fold)
+ call_rcu(&fold->rcu, route4_delete_filter);
return 0;
errout:
@@ -520,7 +556,7 @@ errout:
static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
if (head == NULL)
@@ -530,13 +566,15 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
return;
for (h = 0; h <= 256; h++) {
- struct route4_bucket *b = head->table[h];
+ struct route4_bucket *b = rtnl_dereference(head->table[h]);
if (b) {
for (h1 = 0; h1 <= 32; h1++) {
struct route4_filter *f;
- for (f = b->ht[h1]; f; f = f->next) {
+ for (f = rtnl_dereference(b->ht[h1]);
+ f;
+ f = rtnl_dereference(f->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 1020e233a5d6..b044c208b133 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -70,31 +70,34 @@ struct rsvp_head {
u32 tmap[256/32];
u32 hgenerator;
u8 tgenerator;
- struct rsvp_session *ht[256];
+ struct rsvp_session __rcu *ht[256];
+ struct rcu_head rcu;
};
struct rsvp_session {
- struct rsvp_session *next;
- __be32 dst[RSVP_DST_LEN];
- struct tc_rsvp_gpi dpi;
- u8 protocol;
- u8 tunnelid;
+ struct rsvp_session __rcu *next;
+ __be32 dst[RSVP_DST_LEN];
+ struct tc_rsvp_gpi dpi;
+ u8 protocol;
+ u8 tunnelid;
/* 16 (src,sport) hash slots, and one wildcard source slot */
- struct rsvp_filter *ht[16 + 1];
+ struct rsvp_filter __rcu *ht[16 + 1];
+ struct rcu_head rcu;
};
struct rsvp_filter {
- struct rsvp_filter *next;
- __be32 src[RSVP_DST_LEN];
- struct tc_rsvp_gpi spi;
- u8 tunnelhdr;
+ struct rsvp_filter __rcu *next;
+ __be32 src[RSVP_DST_LEN];
+ struct tc_rsvp_gpi spi;
+ u8 tunnelhdr;
- struct tcf_result res;
- struct tcf_exts exts;
+ struct tcf_result res;
+ struct tcf_exts exts;
- u32 handle;
- struct rsvp_session *sess;
+ u32 handle;
+ struct rsvp_session *sess;
+ struct rcu_head rcu;
};
static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
@@ -128,7 +131,7 @@ static inline unsigned int hash_src(__be32 *src)
static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
+ struct rsvp_head *head = rcu_dereference_bh(tp->root);
struct rsvp_session *s;
struct rsvp_filter *f;
unsigned int h1, h2;
@@ -169,7 +172,8 @@ restart:
h1 = hash_dst(dst, protocol, tunnelid);
h2 = hash_src(src);
- for (s = sht[h1]; s; s = s->next) {
+ for (s = rcu_dereference_bh(head->ht[h1]); s;
+ s = rcu_dereference_bh(s->next)) {
if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
protocol == s->protocol &&
!(s->dpi.mask &
@@ -181,7 +185,8 @@ restart:
#endif
tunnelid == s->tunnelid) {
- for (f = s->ht[h2]; f; f = f->next) {
+ for (f = rcu_dereference_bh(s->ht[h2]); f;
+ f = rcu_dereference_bh(f->next)) {
if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
!(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
#if RSVP_DST_LEN == 4
@@ -205,7 +210,8 @@ matched:
}
/* And wildcard bucket... */
- for (f = s->ht[16]; f; f = f->next) {
+ for (f = rcu_dereference_bh(s->ht[16]); f;
+ f = rcu_dereference_bh(f->next)) {
*res = f->res;
RSVP_APPLY_RESULT();
goto matched;
@@ -218,7 +224,7 @@ matched:
static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
{
- struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
+ struct rsvp_head *head = rtnl_dereference(tp->root);
struct rsvp_session *s;
struct rsvp_filter *f;
unsigned int h1 = handle & 0xFF;
@@ -227,8 +233,10 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
if (h2 > 16)
return 0;
- for (s = sht[h1]; s; s = s->next) {
- for (f = s->ht[h2]; f; f = f->next) {
+ for (s = rtnl_dereference(head->ht[h1]); s;
+ s = rtnl_dereference(s->next)) {
+ for (f = rtnl_dereference(s->ht[h2]); f;
+ f = rtnl_dereference(f->next)) {
if (f->handle == handle)
return (unsigned long)f;
}
@@ -246,7 +254,7 @@ static int rsvp_init(struct tcf_proto *tp)
data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
if (data) {
- tp->root = data;
+ rcu_assign_pointer(tp->root, data);
return 0;
}
return -ENOBUFS;
@@ -257,53 +265,54 @@ rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
{
tcf_unbind_filter(tp, &f->res);
tcf_exts_destroy(tp, &f->exts);
- kfree(f);
+ kfree_rcu(f, rcu);
}
static void rsvp_destroy(struct tcf_proto *tp)
{
- struct rsvp_head *data = xchg(&tp->root, NULL);
- struct rsvp_session **sht;
+ struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
if (data == NULL)
return;
- sht = data->ht;
+ RCU_INIT_POINTER(tp->root, NULL);
for (h1 = 0; h1 < 256; h1++) {
struct rsvp_session *s;
- while ((s = sht[h1]) != NULL) {
- sht[h1] = s->next;
+ while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
+ RCU_INIT_POINTER(data->ht[h1], s->next);
for (h2 = 0; h2 <= 16; h2++) {
struct rsvp_filter *f;
- while ((f = s->ht[h2]) != NULL) {
- s->ht[h2] = f->next;
+ while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
+ rcu_assign_pointer(s->ht[h2], f->next);
rsvp_delete_filter(tp, f);
}
}
- kfree(s);
+ kfree_rcu(s, rcu);
}
}
- kfree(data);
+ kfree_rcu(data, rcu);
}
static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
+ struct rsvp_head *head = rtnl_dereference(tp->root);
+ struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
+ struct rsvp_filter __rcu **fp;
unsigned int h = f->handle;
- struct rsvp_session **sp;
- struct rsvp_session *s = f->sess;
+ struct rsvp_session __rcu **sp;
+ struct rsvp_session *nsp, *s = f->sess;
int i;
- for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
- tcf_tree_lock(tp);
- *fp = f->next;
- tcf_tree_unlock(tp);
+ fp = &s->ht[(h >> 8) & 0xFF];
+ for (nfp = rtnl_dereference(*fp); nfp;
+ fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
+ if (nfp == f) {
+ RCU_INIT_POINTER(*fp, f->next);
rsvp_delete_filter(tp, f);
/* Strip tree */
@@ -313,14 +322,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
return 0;
/* OK, session has no flows */
- for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
- *sp; sp = &(*sp)->next) {
- if (*sp == s) {
- tcf_tree_lock(tp);
- *sp = s->next;
- tcf_tree_unlock(tp);
-
- kfree(s);
+ sp = &head->ht[h & 0xFF];
+ for (nsp = rtnl_dereference(*sp); nsp;
+ sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
+ if (nsp == s) {
+ RCU_INIT_POINTER(*sp, s->next);
+ kfree_rcu(s, rcu);
return 0;
}
}
@@ -333,7 +340,7 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
{
- struct rsvp_head *data = tp->root;
+ struct rsvp_head *data = rtnl_dereference(tp->root);
int i = 0xFFFF;
while (i-- > 0) {
@@ -361,7 +368,7 @@ static int tunnel_bts(struct rsvp_head *data)
static void tunnel_recycle(struct rsvp_head *data)
{
- struct rsvp_session **sht = data->ht;
+ struct rsvp_session __rcu **sht = data->ht;
u32 tmap[256/32];
int h1, h2;
@@ -369,11 +376,13 @@ static void tunnel_recycle(struct rsvp_head *data)
for (h1 = 0; h1 < 256; h1++) {
struct rsvp_session *s;
- for (s = sht[h1]; s; s = s->next) {
+ for (s = rtnl_dereference(sht[h1]); s;
+ s = rtnl_dereference(s->next)) {
for (h2 = 0; h2 <= 16; h2++) {
struct rsvp_filter *f;
- for (f = s->ht[h2]; f; f = f->next) {
+ for (f = rtnl_dereference(s->ht[h2]); f;
+ f = rtnl_dereference(f->next)) {
if (f->tunnelhdr == 0)
continue;
data->tgenerator = f->res.classid;
@@ -417,9 +426,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct rsvp_head *data = tp->root;
- struct rsvp_filter *f, **fp;
- struct rsvp_session *s, **sp;
+ struct rsvp_head *data = rtnl_dereference(tp->root);
+ struct rsvp_filter *f, *nfp;
+ struct rsvp_filter __rcu **fp;
+ struct rsvp_session *nsp, *s;
+ struct rsvp_session __rcu **sp;
struct tc_rsvp_pinfo *pinfo = NULL;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_RSVP_MAX + 1];
@@ -499,7 +510,9 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
- for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
+ for (sp = &data->ht[h1];
+ (s = rtnl_dereference(*sp)) != NULL;
+ sp = &s->next) {
if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
pinfo && pinfo->protocol == s->protocol &&
memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -521,12 +534,16 @@ insert:
tcf_exts_change(tp, &f->exts, &e);
- for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
- if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
+ fp = &s->ht[h2];
+ for (nfp = rtnl_dereference(*fp); nfp;
+ fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
+ __u32 mask = nfp->spi.mask & f->spi.mask;
+
+ if (mask != f->spi.mask)
break;
- f->next = *fp;
- wmb();
- *fp = f;
+ }
+ RCU_INIT_POINTER(f->next, nfp);
+ rcu_assign_pointer(*fp, f);
*arg = (unsigned long)f;
return 0;
@@ -546,13 +563,14 @@ insert:
s->protocol = pinfo->protocol;
s->tunnelid = pinfo->tunnelid;
}
- for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
- if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
+ sp = &data->ht[h1];
+ for (nsp = rtnl_dereference(*sp); nsp;
+ sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
+ if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
break;
}
- s->next = *sp;
- wmb();
- *sp = s;
+ RCU_INIT_POINTER(s->next, nsp);
+ rcu_assign_pointer(*sp, s);
goto insert;
@@ -565,7 +583,7 @@ errout2:
static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct rsvp_head *head = tp->root;
+ struct rsvp_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
if (arg->stop)
@@ -574,11 +592,13 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
for (h = 0; h < 256; h++) {
struct rsvp_session *s;
- for (s = head->ht[h]; s; s = s->next) {
+ for (s = rtnl_dereference(head->ht[h]); s;
+ s = rtnl_dereference(s->next)) {
for (h1 = 0; h1 <= 16; h1++) {
struct rsvp_filter *f;
- for (f = s->ht[h1]; f; f = f->next) {
+ for (f = rtnl_dereference(s->ht[h1]); f;
+ f = rtnl_dereference(f->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 3e9f76413b3b..a9f4279fbd69 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -32,19 +32,21 @@ struct tcindex_filter_result {
struct tcindex_filter {
u16 key;
struct tcindex_filter_result result;
- struct tcindex_filter *next;
+ struct tcindex_filter __rcu *next;
+ struct rcu_head rcu;
};
struct tcindex_data {
struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
- struct tcindex_filter **h; /* imperfect hash; only used if !perfect;
- NULL if unused */
+ struct tcindex_filter __rcu **h; /* imperfect hash; */
+ struct tcf_proto *tp;
u16 mask; /* AND key with mask */
- int shift; /* shift ANDed key to the right */
- int hash; /* hash table size; 0 if undefined */
- int alloc_hash; /* allocated size */
- int fall_through; /* 0: only classify if explicit match */
+ u32 shift; /* shift ANDed key to the right */
+ u32 hash; /* hash table size; 0 if undefined */
+ u32 alloc_hash; /* allocated size */
+ u32 fall_through; /* 0: only classify if explicit match */
+ struct rcu_head rcu;
};
static inline int
@@ -56,13 +58,18 @@ tcindex_filter_is_set(struct tcindex_filter_result *r)
static struct tcindex_filter_result *
tcindex_lookup(struct tcindex_data *p, u16 key)
{
- struct tcindex_filter *f;
+ if (p->perfect) {
+ struct tcindex_filter_result *f = p->perfect + key;
+
+ return tcindex_filter_is_set(f) ? f : NULL;
+ } else if (p->h) {
+ struct tcindex_filter __rcu **fp;
+ struct tcindex_filter *f;
- if (p->perfect)
- return tcindex_filter_is_set(p->perfect + key) ?
- p->perfect + key : NULL;
- else if (p->h) {
- for (f = p->h[key % p->hash]; f; f = f->next)
+ fp = &p->h[key % p->hash];
+ for (f = rcu_dereference_bh_rtnl(*fp);
+ f;
+ fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
if (f->key == key)
return &f->result;
}
@@ -74,7 +81,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key)
static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rcu_dereference(tp->root);
struct tcindex_filter_result *f;
int key = (skb->tc_index & p->mask) >> p->shift;
@@ -99,7 +106,7 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r;
pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
@@ -129,49 +136,59 @@ static int tcindex_init(struct tcf_proto *tp)
p->hash = DEFAULT_HASH_SIZE;
p->fall_through = 1;
- tp->root = p;
+ rcu_assign_pointer(tp->root, p);
return 0;
}
-
static int
-__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
+tcindex_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+ struct tcindex_filter __rcu **walk;
struct tcindex_filter *f = NULL;
- pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f);
+ pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p);
if (p->perfect) {
if (!r->res.class)
return -ENOENT;
} else {
int i;
- struct tcindex_filter **walk = NULL;
- for (i = 0; i < p->hash; i++)
- for (walk = p->h+i; *walk; walk = &(*walk)->next)
- if (&(*walk)->result == r)
+ for (i = 0; i < p->hash; i++) {
+ walk = p->h + i;
+ for (f = rtnl_dereference(*walk); f;
+ walk = &f->next, f = rtnl_dereference(*walk)) {
+ if (&f->result == r)
goto found;
+ }
+ }
return -ENOENT;
found:
- f = *walk;
- if (lock)
- tcf_tree_lock(tp);
- *walk = f->next;
- if (lock)
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(*walk, rtnl_dereference(f->next));
}
tcf_unbind_filter(tp, &r->res);
tcf_exts_destroy(tp, &r->exts);
- kfree(f);
+ if (f)
+ kfree_rcu(f, rcu);
return 0;
}
-static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+static int tcindex_destroy_element(struct tcf_proto *tp,
+ unsigned long arg,
+ struct tcf_walker *walker)
{
- return __tcindex_delete(tp, arg, 1);
+ return tcindex_delete(tp, arg);
+}
+
+static void __tcindex_destroy(struct rcu_head *head)
+{
+ struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+
+ kfree(p->perfect);
+ kfree(p->h);
+ kfree(p);
}
static inline int
@@ -194,6 +211,14 @@ static void tcindex_filter_result_init(struct tcindex_filter_result *r)
tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
}
+static void __tcindex_partial_destroy(struct rcu_head *head)
+{
+ struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+
+ kfree(p->perfect);
+ kfree(p);
+}
+
static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
@@ -203,7 +228,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
int err, balloc = 0;
struct tcindex_filter_result new_filter_result, *old_r = r;
struct tcindex_filter_result cr;
- struct tcindex_data cp;
+ struct tcindex_data *cp, *oldp;
struct tcindex_filter *f = NULL; /* make gcc behave */
struct tcf_exts e;
@@ -212,84 +237,118 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (err < 0)
return err;
- memcpy(&cp, p, sizeof(cp));
- tcindex_filter_result_init(&new_filter_result);
+ /* tcindex_data attributes must look atomic to classifier/lookup so
+ * allocate new tcindex data and RCU assign it onto root. Keeping
+ * perfect hash and hash pointers from old data.
+ */
+ cp = kzalloc(sizeof(cp), GFP_KERNEL);
+ if (!cp)
+ return -ENOMEM;
+
+ cp->mask = p->mask;
+ cp->shift = p->shift;
+ cp->hash = p->hash;
+ cp->alloc_hash = p->alloc_hash;
+ cp->fall_through = p->fall_through;
+ cp->tp = tp;
+
+ if (p->perfect) {
+ cp->perfect = kmemdup(p->perfect,
+ sizeof(*r) * cp->hash, GFP_KERNEL);
+ if (!cp->perfect)
+ goto errout;
+ }
+ cp->h = p->h;
+
+ memset(&new_filter_result, 0, sizeof(new_filter_result));
+ tcf_exts_init(&new_filter_result.exts,
+ TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
tcindex_filter_result_init(&cr);
if (old_r)
cr.res = r->res;
if (tb[TCA_TCINDEX_HASH])
- cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+ cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
if (tb[TCA_TCINDEX_MASK])
- cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+ cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
if (tb[TCA_TCINDEX_SHIFT])
- cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+ cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
err = -EBUSY;
+
/* Hash already allocated, make sure that we still meet the
* requirements for the allocated hash.
*/
- if (cp.perfect) {
- if (!valid_perfect_hash(&cp) ||
- cp.hash > cp.alloc_hash)
+ if (cp->perfect) {
+ if (!valid_perfect_hash(cp) ||
+ cp->hash > cp->alloc_hash)
goto errout;
- } else if (cp.h && cp.hash != cp.alloc_hash)
+ } else if (cp->h && cp->hash != cp->alloc_hash) {
goto errout;
+ }
err = -EINVAL;
if (tb[TCA_TCINDEX_FALL_THROUGH])
- cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
+ cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
- if (!cp.hash) {
+ if (!cp->hash) {
/* Hash not specified, use perfect hash if the upper limit
* of the hashing index is below the threshold.
*/
- if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
- cp.hash = (cp.mask >> cp.shift) + 1;
+ if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+ cp->hash = (cp->mask >> cp->shift) + 1;
else
- cp.hash = DEFAULT_HASH_SIZE;
+ cp->hash = DEFAULT_HASH_SIZE;
}
- if (!cp.perfect && !cp.h)
- cp.alloc_hash = cp.hash;
+ if (!cp->perfect && cp->h)
+ cp->alloc_hash = cp->hash;
/* Note: this could be as restrictive as if (handle & ~(mask >> shift))
* but then, we'd fail handles that may become valid after some future
* mask change. While this is extremely unlikely to ever matter,
* the check below is safer (and also more backwards-compatible).
*/
- if (cp.perfect || valid_perfect_hash(&cp))
- if (handle >= cp.alloc_hash)
+ if (cp->perfect || valid_perfect_hash(cp))
+ if (handle >= cp->alloc_hash)
goto errout;
err = -ENOMEM;
- if (!cp.perfect && !cp.h) {
- if (valid_perfect_hash(&cp)) {
+ if (!cp->perfect && !cp->h) {
+ if (valid_perfect_hash(cp)) {
int i;
- cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL);
- if (!cp.perfect)
+ cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL);
+ if (!cp->perfect)
goto errout;
- for (i = 0; i < cp.hash; i++)
- tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT,
+ for (i = 0; i < cp->hash; i++)
+ tcf_exts_init(&cp->perfect[i].exts,
+ TCA_TCINDEX_ACT,
TCA_TCINDEX_POLICE);
balloc = 1;
} else {
- cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL);
- if (!cp.h)
+ struct tcindex_filter __rcu **hash;
+
+ hash = kcalloc(cp->hash,
+ sizeof(struct tcindex_filter *),
+ GFP_KERNEL);
+
+ if (!hash)
goto errout;
+
+ cp->h = hash;
balloc = 2;
}
}
- if (cp.perfect)
- r = cp.perfect + handle;
+ if (cp->perfect)
+ r = cp->perfect + handle;
else
- r = tcindex_lookup(&cp, handle) ? : &new_filter_result;
+ r = tcindex_lookup(cp, handle) ? : &new_filter_result;
if (r == &new_filter_result) {
f = kzalloc(sizeof(*f), GFP_KERNEL);
@@ -307,33 +366,41 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
else
tcf_exts_change(tp, &cr.exts, &e);
- tcf_tree_lock(tp);
if (old_r && old_r != r)
tcindex_filter_result_init(old_r);
- memcpy(p, &cp, sizeof(cp));
+ oldp = p;
r->res = cr.res;
+ rcu_assign_pointer(tp->root, cp);
if (r == &new_filter_result) {
- struct tcindex_filter **fp;
+ struct tcindex_filter *nfp;
+ struct tcindex_filter __rcu **fp;
f->key = handle;
f->result = new_filter_result;
f->next = NULL;
- for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next)
- /* nothing */;
- *fp = f;
+
+ fp = p->h + (handle % p->hash);
+ for (nfp = rtnl_dereference(*fp);
+ nfp;
+ fp = &nfp->next, nfp = rtnl_dereference(*fp))
+ ; /* nothing */
+
+ rcu_assign_pointer(*fp, f);
}
- tcf_tree_unlock(tp);
+ if (oldp)
+ call_rcu(&oldp->rcu, __tcindex_partial_destroy);
return 0;
errout_alloc:
if (balloc == 1)
- kfree(cp.perfect);
+ kfree(cp->perfect);
else if (balloc == 2)
- kfree(cp.h);
+ kfree(cp->h);
errout:
+ kfree(cp);
tcf_exts_destroy(tp, &e);
return err;
}
@@ -345,7 +412,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
int err;
@@ -364,10 +431,9 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
tca[TCA_RATE], ovr);
}
-
static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter *f, *next;
int i;
@@ -390,8 +456,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
if (!p->h)
return;
for (i = 0; i < p->hash; i++) {
- for (f = p->h[i]; f; f = next) {
- next = f->next;
+ for (f = rtnl_dereference(p->h[i]); f; f = next) {
+ next = rtnl_dereference(f->next);
if (walker->count >= walker->skip) {
if (walker->fn(tp, (unsigned long) &f->result,
walker) < 0) {
@@ -404,17 +470,9 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-
-static int tcindex_destroy_element(struct tcf_proto *tp,
- unsigned long arg, struct tcf_walker *walker)
-{
- return __tcindex_delete(tp, arg, 0);
-}
-
-
static void tcindex_destroy(struct tcf_proto *tp)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
@@ -422,17 +480,16 @@ static void tcindex_destroy(struct tcf_proto *tp)
walker.skip = 0;
walker.fn = tcindex_destroy_element;
tcindex_walk(tp, &walker);
- kfree(p->perfect);
- kfree(p->h);
- kfree(p);
- tp->root = NULL;
+
+ RCU_INIT_POINTER(tp->root, NULL);
+ call_rcu(&p->rcu, __tcindex_destroy);
}
static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
@@ -455,15 +512,18 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
nla_nest_end(skb, nest);
} else {
if (p->perfect) {
- t->tcm_handle = r-p->perfect;
+ t->tcm_handle = r - p->perfect;
} else {
struct tcindex_filter *f;
+ struct tcindex_filter __rcu **fp;
int i;
t->tcm_handle = 0;
for (i = 0; !t->tcm_handle && i < p->hash; i++) {
- for (f = p->h[i]; !t->tcm_handle && f;
- f = f->next) {
+ fp = &p->h[i];
+ for (f = rtnl_dereference(*fp);
+ !t->tcm_handle && f;
+ fp = &f->next, f = rtnl_dereference(*fp)) {
if (&f->result == r)
t->tcm_handle = f->key;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 70c0be8d0121..5ed5ac4361b1 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -36,6 +36,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
+#include <linux/percpu.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/bitmap.h>
@@ -44,40 +45,46 @@
#include <net/pkt_cls.h>
struct tc_u_knode {
- struct tc_u_knode *next;
+ struct tc_u_knode __rcu *next;
u32 handle;
- struct tc_u_hnode *ht_up;
+ struct tc_u_hnode __rcu *ht_up;
struct tcf_exts exts;
#ifdef CONFIG_NET_CLS_IND
int ifindex;
#endif
u8 fshift;
struct tcf_result res;
- struct tc_u_hnode *ht_down;
+ struct tc_u_hnode __rcu *ht_down;
#ifdef CONFIG_CLS_U32_PERF
- struct tc_u32_pcnt *pf;
+ struct tc_u32_pcnt __percpu *pf;
#endif
#ifdef CONFIG_CLS_U32_MARK
- struct tc_u32_mark mark;
+ u32 val;
+ u32 mask;
+ u32 __percpu *pcpu_success;
#endif
+ struct tcf_proto *tp;
struct tc_u32_sel sel;
+ struct rcu_head rcu;
};
struct tc_u_hnode {
- struct tc_u_hnode *next;
+ struct tc_u_hnode __rcu *next;
u32 handle;
u32 prio;
struct tc_u_common *tp_c;
int refcnt;
unsigned int divisor;
- struct tc_u_knode *ht[1];
+ struct tc_u_knode __rcu *ht[1];
+ struct rcu_head rcu;
};
struct tc_u_common {
- struct tc_u_hnode *hlist;
+ struct tc_u_hnode __rcu *hlist;
struct Qdisc *q;
int refcnt;
u32 hgenerator;
+ struct rcu_head rcu;
};
static inline unsigned int u32_hash_fold(__be32 key,
@@ -96,7 +103,7 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct
unsigned int off;
} stack[TC_U32_MAXDEPTH];
- struct tc_u_hnode *ht = tp->root;
+ struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
unsigned int off = skb_network_offset(skb);
struct tc_u_knode *n;
int sdepth = 0;
@@ -108,23 +115,23 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct
int i, r;
next_ht:
- n = ht->ht[sel];
+ n = rcu_dereference_bh(ht->ht[sel]);
next_knode:
if (n) {
struct tc_u32_key *key = n->sel.keys;
#ifdef CONFIG_CLS_U32_PERF
- n->pf->rcnt += 1;
+ __this_cpu_inc(n->pf->rcnt);
j = 0;
#endif
#ifdef CONFIG_CLS_U32_MARK
- if ((skb->mark & n->mark.mask) != n->mark.val) {
- n = n->next;
+ if ((skb->mark & n->mask) != n->val) {
+ n = rcu_dereference_bh(n->next);
goto next_knode;
} else {
- n->mark.success++;
+ __this_cpu_inc(*n->pcpu_success);
}
#endif
@@ -139,37 +146,39 @@ next_knode:
if (!data)
goto out;
if ((*data ^ key->val) & key->mask) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
#ifdef CONFIG_CLS_U32_PERF
- n->pf->kcnts[j] += 1;
+ __this_cpu_inc(n->pf->kcnts[j]);
j++;
#endif
}
- if (n->ht_down == NULL) {
+
+ ht = rcu_dereference_bh(n->ht_down);
+ if (!ht) {
check_terminal:
if (n->sel.flags & TC_U32_TERMINAL) {
*res = n->res;
#ifdef CONFIG_NET_CLS_IND
if (!tcf_match_indev(skb, n->ifindex)) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
#endif
#ifdef CONFIG_CLS_U32_PERF
- n->pf->rhit += 1;
+ __this_cpu_inc(n->pf->rhit);
#endif
r = tcf_exts_exec(skb, &n->exts, res);
if (r < 0) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
return r;
}
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
@@ -180,7 +189,7 @@ check_terminal:
stack[sdepth].off = off;
sdepth++;
- ht = n->ht_down;
+ ht = rcu_dereference_bh(n->ht_down);
sel = 0;
if (ht->divisor) {
__be32 *data, hdata;
@@ -222,7 +231,7 @@ check_terminal:
/* POP */
if (sdepth--) {
n = stack[sdepth].knode;
- ht = n->ht_up;
+ ht = rcu_dereference_bh(n->ht_up);
off = stack[sdepth].off;
goto check_terminal;
}
@@ -239,7 +248,9 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
{
struct tc_u_hnode *ht;
- for (ht = tp_c->hlist; ht; ht = ht->next)
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next))
if (ht->handle == handle)
break;
@@ -256,7 +267,9 @@ u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
if (sel > ht->divisor)
goto out;
- for (n = ht->ht[sel]; n; n = n->next)
+ for (n = rtnl_dereference(ht->ht[sel]);
+ n;
+ n = rtnl_dereference(n->next))
if (n->handle == handle)
break;
out:
@@ -270,7 +283,7 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
struct tc_u_common *tp_c = tp->data;
if (TC_U32_HTID(handle) == TC_U32_ROOT)
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
else
ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
@@ -291,6 +304,9 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
{
int i = 0x800;
+ /* hgenerator only used inside rtnl lock it is safe to increment
+ * without read _copy_ update semantics
+ */
do {
if (++tp_c->hgenerator == 0x7FF)
tp_c->hgenerator = 1;
@@ -326,11 +342,11 @@ static int u32_init(struct tcf_proto *tp)
}
tp_c->refcnt++;
- root_ht->next = tp_c->hlist;
- tp_c->hlist = root_ht;
+ RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
+ rcu_assign_pointer(tp_c->hlist, root_ht);
root_ht->tp_c = tp_c;
- tp->root = root_ht;
+ rcu_assign_pointer(tp->root, root_ht);
tp->data = tp_c;
return 0;
}
@@ -342,25 +358,33 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
if (n->ht_down)
n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
- kfree(n->pf);
+ free_percpu(n->pf);
#endif
kfree(n);
return 0;
}
+static void u32_delete_key_rcu(struct rcu_head *rcu)
+{
+ struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
+
+ u32_destroy_key(key->tp, key);
+}
+
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
{
- struct tc_u_knode **kp;
+ struct tc_u_knode __rcu **kp;
+ struct tc_u_knode *pkp;
struct tc_u_hnode *ht = key->ht_up;
if (ht) {
- for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) {
- if (*kp == key) {
- tcf_tree_lock(tp);
- *kp = key->next;
- tcf_tree_unlock(tp);
+ kp = &ht->ht[TC_U32_HASH(key->handle)];
+ for (pkp = rtnl_dereference(*kp); pkp;
+ kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
+ if (pkp == key) {
+ RCU_INIT_POINTER(*kp, key->next);
- u32_destroy_key(tp, key);
+ call_rcu(&key->rcu, u32_delete_key_rcu);
return 0;
}
}
@@ -369,16 +393,16 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
return 0;
}
-static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+static void u32_clear_hnode(struct tc_u_hnode *ht)
{
struct tc_u_knode *n;
unsigned int h;
for (h = 0; h <= ht->divisor; h++) {
- while ((n = ht->ht[h]) != NULL) {
- ht->ht[h] = n->next;
-
- u32_destroy_key(tp, n);
+ while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
+ RCU_INIT_POINTER(ht->ht[h],
+ rtnl_dereference(n->next));
+ call_rcu(&n->rcu, u32_delete_key_rcu);
}
}
}
@@ -386,28 +410,31 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
struct tc_u_common *tp_c = tp->data;
- struct tc_u_hnode **hn;
+ struct tc_u_hnode __rcu **hn;
+ struct tc_u_hnode *phn;
WARN_ON(ht->refcnt);
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(ht);
- for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) {
- if (*hn == ht) {
- *hn = ht->next;
- kfree(ht);
+ hn = &tp_c->hlist;
+ for (phn = rtnl_dereference(*hn);
+ phn;
+ hn = &phn->next, phn = rtnl_dereference(*hn)) {
+ if (phn == ht) {
+ RCU_INIT_POINTER(*hn, ht->next);
+ kfree_rcu(ht, rcu);
return 0;
}
}
- WARN_ON(1);
return -ENOENT;
}
static void u32_destroy(struct tcf_proto *tp)
{
struct tc_u_common *tp_c = tp->data;
- struct tc_u_hnode *root_ht = tp->root;
+ struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
WARN_ON(root_ht == NULL);
@@ -419,17 +446,16 @@ static void u32_destroy(struct tcf_proto *tp)
tp->q->u32_node = NULL;
- for (ht = tp_c->hlist; ht; ht = ht->next) {
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next)) {
ht->refcnt--;
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(ht);
}
- while ((ht = tp_c->hlist) != NULL) {
- tp_c->hlist = ht->next;
-
- WARN_ON(ht->refcnt != 0);
-
- kfree(ht);
+ while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
+ RCU_INIT_POINTER(tp_c->hlist, ht->next);
+ kfree_rcu(ht, rcu);
}
kfree(tp_c);
@@ -441,6 +467,7 @@ static void u32_destroy(struct tcf_proto *tp)
static int u32_delete(struct tcf_proto *tp, unsigned long arg)
{
struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
+ struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
if (ht == NULL)
return 0;
@@ -448,7 +475,7 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
if (TC_U32_KEY(ht->handle))
return u32_delete_key(tp, (struct tc_u_knode *)ht);
- if (tp->root == ht)
+ if (root_ht == ht)
return -EINVAL;
if (ht->refcnt == 1) {
@@ -471,7 +498,9 @@ static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
if (!bitmap)
return handle | 0xFFF;
- for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+ for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
+ n;
+ n = rtnl_dereference(n->next))
set_bit(TC_U32_NODE(n->handle), bitmap);
i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
@@ -521,10 +550,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
ht_down->refcnt++;
}
- tcf_tree_lock(tp);
- ht_old = n->ht_down;
- n->ht_down = ht_down;
- tcf_tree_unlock(tp);
+ ht_old = rtnl_dereference(n->ht_down);
+ rcu_assign_pointer(n->ht_down, ht_down);
if (ht_old)
ht_old->refcnt--;
@@ -564,6 +591,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
struct nlattr *tb[TCA_U32_MAX + 1];
u32 htid;
int err;
+#ifdef CONFIG_CLS_U32_PERF
+ size_t size;
+#endif
if (opt == NULL)
return handle ? -EINVAL : 0;
@@ -601,8 +631,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht->divisor = divisor;
ht->handle = handle;
ht->prio = tp->prio;
- ht->next = tp_c->hlist;
- tp_c->hlist = ht;
+ RCU_INIT_POINTER(ht->next, tp_c->hlist);
+ rcu_assign_pointer(tp_c->hlist, ht);
*arg = (unsigned long)ht;
return 0;
}
@@ -610,7 +640,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (tb[TCA_U32_HASH]) {
htid = nla_get_u32(tb[TCA_U32_HASH]);
if (TC_U32_HTID(htid) == TC_U32_ROOT) {
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
htid = ht->handle;
} else {
ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
@@ -618,7 +648,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
} else {
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
htid = ht->handle;
}
@@ -642,8 +672,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
#ifdef CONFIG_CLS_U32_PERF
- n->pf = kzalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
- if (n->pf == NULL) {
+ size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
+ n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
+ if (!n->pf) {
kfree(n);
return -ENOBUFS;
}
@@ -654,34 +685,39 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+ n->tp = tp;
#ifdef CONFIG_CLS_U32_MARK
+ n->pcpu_success = alloc_percpu(u32);
+
if (tb[TCA_U32_MARK]) {
struct tc_u32_mark *mark;
mark = nla_data(tb[TCA_U32_MARK]);
- memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
- n->mark.success = 0;
+ n->val = mark->val;
+ n->mask = mark->mask;
}
#endif
err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
if (err == 0) {
- struct tc_u_knode **ins;
- for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
- if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
+ struct tc_u_knode __rcu **ins;
+ struct tc_u_knode *pins;
+
+ ins = &ht->ht[TC_U32_HASH(handle)];
+ for (pins = rtnl_dereference(*ins); pins;
+ ins = &pins->next, pins = rtnl_dereference(*ins))
+ if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
break;
- n->next = *ins;
- tcf_tree_lock(tp);
- *ins = n;
- tcf_tree_unlock(tp);
+ RCU_INIT_POINTER(n->next, pins);
+ rcu_assign_pointer(*ins, n);
*arg = (unsigned long)n;
return 0;
}
#ifdef CONFIG_CLS_U32_PERF
- kfree(n->pf);
+ free_percpu(n->pf);
#endif
kfree(n);
return err;
@@ -697,7 +733,9 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
if (arg->stop)
return;
- for (ht = tp_c->hlist; ht; ht = ht->next) {
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next)) {
if (ht->prio != tp->prio)
continue;
if (arg->count >= arg->skip) {
@@ -708,7 +746,9 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
arg->count++;
for (h = 0; h <= ht->divisor; h++) {
- for (n = ht->ht[h]; n; n = n->next) {
+ for (n = rtnl_dereference(ht->ht[h]);
+ n;
+ n = rtnl_dereference(n->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
@@ -727,6 +767,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct tc_u_knode *n = (struct tc_u_knode *)fh;
+ struct tc_u_hnode *ht_up, *ht_down;
struct nlattr *nest;
if (n == NULL)
@@ -745,11 +786,18 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
goto nla_put_failure;
} else {
+#ifdef CONFIG_CLS_U32_PERF
+ struct tc_u32_pcnt *gpf;
+#endif
+ int cpu;
+
if (nla_put(skb, TCA_U32_SEL,
sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
&n->sel))
goto nla_put_failure;
- if (n->ht_up) {
+
+ ht_up = rtnl_dereference(n->ht_up);
+ if (ht_up) {
u32 htid = n->handle & 0xFFFFF000;
if (nla_put_u32(skb, TCA_U32_HASH, htid))
goto nla_put_failure;
@@ -757,14 +805,27 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (n->res.classid &&
nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
goto nla_put_failure;
- if (n->ht_down &&
- nla_put_u32(skb, TCA_U32_LINK, n->ht_down->handle))
+
+ ht_down = rtnl_dereference(n->ht_down);
+ if (ht_down &&
+ nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
goto nla_put_failure;
#ifdef CONFIG_CLS_U32_MARK
- if ((n->mark.val || n->mark.mask) &&
- nla_put(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark))
- goto nla_put_failure;
+ if ((n->val || n->mask)) {
+ struct tc_u32_mark mark = {.val = n->val,
+ .mask = n->mask,
+ .success = 0};
+
+ for_each_possible_cpu(cpu) {
+ __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpu);
+
+ mark.success += cnt;
+ }
+
+ if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
+ goto nla_put_failure;
+ }
#endif
if (tcf_exts_dump(skb, &n->exts) < 0)
@@ -779,10 +840,29 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
}
#endif
#ifdef CONFIG_CLS_U32_PERF
+ gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
+ n->sel.nkeys * sizeof(u64),
+ GFP_KERNEL);
+ if (!gpf)
+ goto nla_put_failure;
+
+ for_each_possible_cpu(cpu) {
+ int i;
+ struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);
+
+ gpf->rcnt += pf->rcnt;
+ gpf->rhit += pf->rhit;
+ for (i = 0; i < n->sel.nkeys; i++)
+ gpf->kcnts[i] += pf->kcnts[i];
+ }
+
if (nla_put(skb, TCA_U32_PCNT,
sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
- n->pf))
+ gpf)) {
+ kfree(gpf);
goto nla_put_failure;
+ }
+ kfree(gpf);
#endif
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 58bed7599db7..ca6248345937 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1781,7 +1781,7 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
__be16 protocol = skb->protocol;
int err;
- for (; tp; tp = tp->next) {
+ for (; tp; tp = rcu_dereference_bh(tp->next)) {
if (tp->protocol != protocol &&
tp->protocol != htons(ETH_P_ALL))
continue;
@@ -1833,15 +1833,15 @@ void tcf_destroy(struct tcf_proto *tp)
{
tp->ops->destroy(tp);
module_put(tp->ops->owner);
- kfree(tp);
+ kfree_rcu(tp, rcu);
}
-void tcf_destroy_chain(struct tcf_proto **fl)
+void tcf_destroy_chain(struct tcf_proto __rcu **fl)
{
struct tcf_proto *tp;
- while ((tp = *fl) != NULL) {
- *fl = tp->next;
+ while ((tp = rtnl_dereference(*fl)) != NULL) {
+ RCU_INIT_POINTER(*fl, tp->next);
tcf_destroy(tp);
}
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 8449b337f9e3..c398f9c3dbdd 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -41,7 +41,7 @@
struct atm_flow_data {
struct Qdisc *q; /* FIFO, TBF, etc. */
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */
void (*old_pop)(struct atm_vcc *vcc,
struct sk_buff *skb); /* chaining */
@@ -273,7 +273,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
error = -ENOBUFS;
goto err_out;
}
- flow->filter_list = NULL;
+ RCU_INIT_POINTER(flow->filter_list, NULL);
flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
if (!flow->q)
flow->q = &noop_qdisc;
@@ -311,7 +311,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
if (list_empty(&flow->list))
return -EINVAL;
- if (flow->filter_list || flow == &p->link)
+ if (rcu_access_pointer(flow->filter_list) || flow == &p->link)
return -EBUSY;
/*
* Reference count must be 2: one for "keepalive" (set at class
@@ -345,7 +345,8 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)cl;
@@ -369,11 +370,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
flow = NULL;
if (TC_H_MAJ(skb->priority) != sch->handle ||
!(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
+ struct tcf_proto *fl;
+
list_for_each_entry(flow, &p->flows, list) {
- if (flow->filter_list) {
- result = tc_classify_compat(skb,
- flow->filter_list,
- &res);
+ fl = rcu_dereference_bh(flow->filter_list);
+ if (fl) {
+ result = tc_classify_compat(skb, fl, &res);
if (result < 0)
continue;
flow = (struct atm_flow_data *)res.class;
@@ -544,7 +546,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
if (!p->link.q)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- p->link.filter_list = NULL;
+ RCU_INIT_POINTER(p->link.filter_list, NULL);
p->link.vcc = NULL;
p->link.sock = NULL;
p->link.classid = sch->handle;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 762a04bb8f6d..a3244a800501 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -133,7 +133,7 @@ struct cbq_class {
struct gnet_stats_rate_est64 rate_est;
struct tc_cbq_xstats xstats;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
int refcnt;
int filters;
@@ -221,6 +221,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
struct cbq_class **defmap;
struct cbq_class *cl = NULL;
u32 prio = skb->priority;
+ struct tcf_proto *fl;
struct tcf_result res;
/*
@@ -235,11 +236,12 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
int result = 0;
defmap = head->defaults;
+ fl = rcu_dereference_bh(head->filter_list);
/*
* Step 2+n. Apply classifier.
*/
- if (!head->filter_list ||
- (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
+ result = tc_classify_compat(skb, fl, &res);
+ if (!fl || result < 0)
goto fallback;
cl = (void *)res.class;
@@ -1954,7 +1956,8 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
return 0;
}
-static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
+static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
+ unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ed30e436128b..74813e6b6ff6 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -57,7 +57,7 @@ struct choke_sched_data {
/* Variables */
struct red_vars vars;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct {
u32 prob_drop; /* Early probability drops */
u32 prob_mark; /* Early probability marks */
@@ -193,9 +193,11 @@ static bool choke_classify(struct sk_buff *skb,
{
struct choke_sched_data *q = qdisc_priv(sch);
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
- result = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -249,7 +251,7 @@ static bool choke_match_random(const struct choke_sched_data *q,
return false;
oskb = choke_peek_random(q, pidx);
- if (q->filter_list)
+ if (rcu_access_pointer(q->filter_list))
return choke_get_classid(nskb) == choke_get_classid(oskb);
return choke_match_flow(oskb, nskb);
@@ -257,11 +259,11 @@ static bool choke_match_random(const struct choke_sched_data *q,
static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
+ int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
struct choke_sched_data *q = qdisc_priv(sch);
const struct red_parms *p = &q->parms;
- int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- if (q->filter_list) {
+ if (rcu_access_pointer(q->filter_list)) {
/* If using external classifiers, get result and record it. */
if (!choke_classify(skb, sch, &ret))
goto other_drop; /* Packet was eaten by filter */
@@ -554,7 +556,8 @@ static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
return 0;
}
-static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **choke_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct choke_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 7bbbfe112192..d8b5ccfd248a 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -35,7 +35,7 @@ struct drr_class {
struct drr_sched {
struct list_head active;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct Qdisc_class_hash clhash;
};
@@ -184,7 +184,8 @@ static void drr_put_class(struct Qdisc *sch, unsigned long arg)
drr_destroy_class(sch, cl);
}
-static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **drr_tcf_chain(struct Qdisc *sch,
+ unsigned long cl)
{
struct drr_sched *q = qdisc_priv(sch);
@@ -319,6 +320,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl;
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
@@ -328,7 +330,8 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 49d6ef338b55..485e456c8139 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -37,7 +37,7 @@
struct dsmark_qdisc_data {
struct Qdisc *q;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
u8 *mask; /* "owns" the array */
u8 *value;
u16 indices;
@@ -186,8 +186,8 @@ ignore:
}
}
-static inline struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
return &p->filter_list;
@@ -229,7 +229,8 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skb->tc_index = TC_H_MIN(skb->priority);
else {
struct tcf_result res;
- int result = tc_classify(skb, p->filter_list, &res);
+ struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
+ int result = tc_classify(skb, fl, &res);
pr_debug("result %d class 0x%04x\n", result, res.classid);
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index cc56c8bb9bed..105cf5557630 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -52,7 +52,7 @@ struct fq_codel_flow {
}; /* please try to keep this structure <= 64 bytes */
struct fq_codel_sched_data {
- struct tcf_proto *filter_list; /* optional external classifier */
+ struct tcf_proto __rcu *filter_list; /* optional external classifier */
struct fq_codel_flow *flows; /* Flows table [flows_cnt] */
u32 *backlogs; /* backlog table [flows_cnt] */
u32 flows_cnt; /* number of flows */
@@ -85,6 +85,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
+ struct tcf_proto *filter;
struct tcf_result res;
int result;
@@ -93,11 +94,12 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
TC_H_MIN(skb->priority) <= q->flows_cnt)
return TC_H_MIN(skb->priority);
- if (!q->filter_list)
+ filter = rcu_dereference(q->filter_list);
+ if (!filter)
return fq_codel_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ result = tc_classify(skb, filter, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -496,7 +498,8 @@ static void fq_codel_put(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_proto **fq_codel_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **fq_codel_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 19696ebe9ebc..346ef85617d3 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -783,7 +783,7 @@ static void dev_deactivate_queue(struct net_device *dev,
struct Qdisc *qdisc_default = _qdisc_default;
struct Qdisc *qdisc;
- qdisc = dev_queue->qdisc;
+ qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
spin_lock_bh(qdisc_lock(qdisc));
@@ -876,7 +876,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
{
struct Qdisc *qdisc = _qdisc;
- dev_queue->qdisc = qdisc;
+ rcu_assign_pointer(dev_queue->qdisc, qdisc);
dev_queue->qdisc_sleeping = qdisc;
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index ec8aeaac1dd7..04b0de4c68b5 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -116,7 +116,7 @@ struct hfsc_class {
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est64 rate_est;
unsigned int level; /* class level in hierarchy */
- struct tcf_proto *filter_list; /* filter list */
+ struct tcf_proto __rcu *filter_list; /* filter list */
unsigned int filter_cnt; /* filter count */
struct hfsc_sched *sched; /* scheduler data */
@@ -1161,7 +1161,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
head = &q->root;
- tcf = q->root.filter_list;
+ tcf = rcu_dereference_bh(q->root.filter_list);
while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -1185,7 +1185,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
return cl; /* hit leaf class */
/* apply inner filter chain */
- tcf = cl->filter_list;
+ tcf = rcu_dereference_bh(cl->filter_list);
head = cl;
}
@@ -1285,7 +1285,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
cl->filter_cnt--;
}
-static struct tcf_proto **
+static struct tcf_proto __rcu **
hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
{
struct hfsc_sched *q = qdisc_priv(sch);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index aea942ce6008..6d16b9b81c28 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -103,7 +103,7 @@ struct htb_class {
u32 prio; /* these two are used only by leaves... */
int quantum; /* but stored for parent-to-leaf return */
- struct tcf_proto *filter_list; /* class attached filters */
+ struct tcf_proto __rcu *filter_list; /* class attached filters */
int filter_cnt;
int refcnt; /* usage count of this class */
@@ -153,7 +153,7 @@ struct htb_sched {
int rate2quantum; /* quant = rate / rate2quantum */
/* filters for qdisc itself */
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
#define HTB_WARN_TOOMANYEVENTS 0x1
unsigned int warned; /* only one warning */
@@ -223,9 +223,9 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
if (cl->level == 0)
return cl;
/* Start with inner filter chain if a non-leaf class is selected */
- tcf = cl->filter_list;
+ tcf = rcu_dereference_bh(cl->filter_list);
} else {
- tcf = q->filter_list;
+ tcf = rcu_dereference_bh(q->filter_list);
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -251,7 +251,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
return cl; /* we hit leaf; return it */
/* we have got inner class; apply inner filter chain */
- tcf = cl->filter_list;
+ tcf = rcu_dereference_bh(cl->filter_list);
}
/* classification failed; try to use default class */
cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
@@ -1519,11 +1519,12 @@ failure:
return err;
}
-static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
+static struct tcf_proto __rcu **htb_find_tcf(struct Qdisc *sch,
+ unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
- struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
+ struct tcf_proto __rcu **fl = cl ? &cl->filter_list : &q->filter_list;
return fl;
}
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 62871c14e1f9..b351125f3849 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -17,7 +17,7 @@
struct ingress_qdisc_data {
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
};
/* ------------------------- Class/flow operations ------------------------- */
@@ -46,7 +46,8 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
}
-static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct ingress_qdisc_data *p = qdisc_priv(sch);
@@ -59,9 +60,10 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct ingress_qdisc_data *p = qdisc_priv(sch);
struct tcf_result res;
+ struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
int result;
- result = tc_classify(skb, p->filter_list, &res);
+ result = tc_classify(skb, fl, &res);
qdisc_bstats_update(sch, skb);
switch (result) {
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 6749e2f540d0..37e7d25d21f1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -231,7 +231,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
memset(&sch->qstats, 0, sizeof(sch->qstats));
for (i = 0; i < dev->num_tx_queues; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
spin_lock_bh(qdisc_lock(qdisc));
sch->q.qlen += qdisc->q.qlen;
sch->bstats.bytes += qdisc->bstats.bytes;
@@ -340,7 +340,9 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
spin_unlock_bh(d->lock);
for (i = tc.offset; i < tc.offset + tc.count; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+ struct netdev_queue *q = netdev_get_tx_queue(dev, i);
+
+ qdisc = rtnl_dereference(q->qdisc);
spin_lock_bh(qdisc_lock(qdisc));
bstats.bytes += qdisc->bstats.bytes;
bstats.packets += qdisc->bstats.packets;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index afb050a735fa..c0466c1840f3 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -31,7 +31,7 @@ struct multiq_sched_data {
u16 bands;
u16 max_bands;
u16 curband;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct Qdisc **queues;
};
@@ -42,10 +42,11 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
struct multiq_sched_data *q = qdisc_priv(sch);
u32 band;
struct tcf_result res;
+ struct tcf_proto *fl = rcu_dereference_bh(q->filter_list);
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- err = tc_classify(skb, q->filter_list, &res);
+ err = tc_classify(skb, fl, &res);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
@@ -388,7 +389,8 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **multiq_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct multiq_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 79359b69ad8d..03ef99e52a5c 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -24,7 +24,7 @@
struct prio_sched_data {
int bands;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
u8 prio2band[TC_PRIO_MAX+1];
struct Qdisc *queues[TCQ_PRIO_BANDS];
};
@@ -36,11 +36,13 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
struct prio_sched_data *q = qdisc_priv(sch);
u32 band = skb->priority;
struct tcf_result res;
+ struct tcf_proto *fl;
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
- err = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ err = tc_classify(skb, fl, &res);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
@@ -50,7 +52,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
return NULL;
}
#endif
- if (!q->filter_list || err < 0) {
+ if (!fl || err < 0) {
if (TC_H_MAJ(band))
band = 0;
return q->queues[q->prio2band[band & TC_PRIO_MAX]];
@@ -351,7 +353,8 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **prio_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct prio_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 8056fb4e618a..602ea01a4ddd 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -181,7 +181,7 @@ struct qfq_group {
};
struct qfq_sched {
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct Qdisc_class_hash clhash;
u64 oldV, V; /* Precise virtual times. */
@@ -576,7 +576,8 @@ static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
qfq_destroy_class(sch, cl);
}
-static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch,
+ unsigned long cl)
{
struct qfq_sched *q = qdisc_priv(sch);
@@ -704,6 +705,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl;
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
@@ -714,7 +716,8 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 9b0f7093d970..1562fb2b3f46 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -55,7 +55,7 @@ struct sfb_bins {
struct sfb_sched_data {
struct Qdisc *qdisc;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
unsigned long rehash_interval;
unsigned long warmup_time; /* double buffering warmup time in jiffies */
u32 max;
@@ -253,13 +253,13 @@ static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q)
return false;
}
-static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q,
+static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
int *qerr, u32 *salt)
{
struct tcf_result res;
int result;
- result = tc_classify(skb, q->filter_list, &res);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -281,6 +281,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct sfb_sched_data *q = qdisc_priv(sch);
struct Qdisc *child = q->qdisc;
+ struct tcf_proto *fl;
int i;
u32 p_min = ~0;
u32 minqlen = ~0;
@@ -306,9 +307,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}
- if (q->filter_list) {
+ fl = rcu_dereference_bh(q->filter_list);
+ if (fl) {
/* If using external classifiers, get result and record it. */
- if (!sfb_classify(skb, q, &ret, &salt))
+ if (!sfb_classify(skb, fl, &ret, &salt))
goto other_drop;
keys.src = salt;
keys.dst = 0;
@@ -660,7 +662,8 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **sfb_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct sfb_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 211db9017c35..80c36bd54abc 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -125,7 +125,7 @@ struct sfq_sched_data {
u8 cur_depth; /* depth of longest slot */
u8 flags;
unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
sfq_index *ht; /* Hash table ('divisor' slots) */
struct sfq_slot *slots; /* Flows table ('maxflows' entries) */
@@ -187,6 +187,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
{
struct sfq_sched_data *q = qdisc_priv(sch);
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
if (TC_H_MAJ(skb->priority) == sch->handle &&
@@ -194,13 +195,14 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
TC_H_MIN(skb->priority) <= q->divisor)
return TC_H_MIN(skb->priority);
- if (!q->filter_list) {
+ fl = rcu_dereference_bh(q->filter_list);
+ if (!fl) {
skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
return sfq_hash(q, skb) + 1;
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -836,7 +838,8 @@ static void sfq_put(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct sfq_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index aaa8d03ed054..5cd291bd00e4 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -96,11 +96,14 @@ teql_dequeue(struct Qdisc *sch)
struct teql_sched_data *dat = qdisc_priv(sch);
struct netdev_queue *dat_queue;
struct sk_buff *skb;
+ struct Qdisc *q;
skb = __skb_dequeue(&dat->q);
dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
+ q = rcu_dereference_bh(dat_queue->qdisc);
+
if (skb == NULL) {
- struct net_device *m = qdisc_dev(dat_queue->qdisc);
+ struct net_device *m = qdisc_dev(q);
if (m) {
dat->m->slaves = sch;
netif_wake_queue(m);
@@ -108,7 +111,7 @@ teql_dequeue(struct Qdisc *sch)
} else {
qdisc_bstats_update(sch, skb);
}
- sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
+ sch->q.qlen = dat->q.qlen + q->q.qlen;
return skb;
}
@@ -157,9 +160,9 @@ teql_destroy(struct Qdisc *sch)
txq = netdev_get_tx_queue(master->dev, 0);
master->slaves = NULL;
- root_lock = qdisc_root_sleeping_lock(txq->qdisc);
+ root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
spin_lock_bh(root_lock);
- qdisc_reset(txq->qdisc);
+ qdisc_reset(rtnl_dereference(txq->qdisc));
spin_unlock_bh(root_lock);
}
}
@@ -266,7 +269,7 @@ static inline int teql_resolve(struct sk_buff *skb,
struct dst_entry *dst = skb_dst(skb);
int res;
- if (txq->qdisc == &noop_qdisc)
+ if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
return -ENODEV;
if (!dev->header_ops || !dst)