diff options
Diffstat (limited to 'net')
63 files changed, 905 insertions, 469 deletions
diff --git a/net/Kconfig b/net/Kconfig index b3250944cde9..e24fa0873f32 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -32,7 +32,7 @@ config WANT_COMPAT_NETLINK_MESSAGES config COMPAT_NETLINK_MESSAGES def_bool y depends on COMPAT - depends on WIRELESS_EXT || WANT_COMPAT_NETLINK_MESSAGES + depends on WEXT_CORE || WANT_COMPAT_NETLINK_MESSAGES help This option makes it possible to send different netlink messages to tasks depending on whether the task is a compat task or not. To diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 075c435ad22d..cf09fe591fc2 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -22,7 +22,7 @@ #include <asm/uaccess.h> #include "br_private.h" -/* net device transmit always called with no BH (preempt_disabled) */ +/* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -48,13 +48,16 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); + rcu_read_lock(); if (is_multicast_ether_addr(dest)) { if (unlikely(netpoll_tx_running(dev))) { br_flood_deliver(br, skb); goto out; } - if (br_multicast_rcv(br, NULL, skb)) + if (br_multicast_rcv(br, NULL, skb)) { + kfree_skb(skb); goto out; + } mdst = br_mdb_get(br, skb); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) @@ -67,6 +70,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_flood_deliver(br, skb); out: + rcu_read_unlock(); return NETDEV_TX_OK; } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a744296fc675..90512ccfd3e9 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -214,7 +214,7 @@ void br_fdb_delete_by_port(struct net_bridge *br, spin_unlock_bh(&br->hash_lock); } -/* No locking or refcounting, assumes caller has no preempt (rcu_read_lock) */ +/* No locking or refcounting, assumes caller has rcu_read_lock */ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, const unsigned char *addr) { diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 5fc1c5b1c360..826cd5221536 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -39,7 +39,7 @@ static int br_pass_frame_up(struct sk_buff *skb) netif_receive_skb); } -/* note: already called with rcu_read_lock (preempt_disabled) */ +/* note: already called with rcu_read_lock */ int br_handle_frame_finish(struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; @@ -110,7 +110,7 @@ drop: goto out; } -/* note: already called with rcu_read_lock (preempt_disabled) */ +/* note: already called with rcu_read_lock */ static int br_handle_local_finish(struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); @@ -133,8 +133,7 @@ static inline int is_link_local(const unsigned char *dest) /* * Return NULL if skb is handled - * note: already called with rcu_read_lock (preempt_disabled) from - * netif_receive_skb + * note: already called with rcu_read_lock */ struct sk_buff *br_handle_frame(struct sk_buff *skb) { diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 85afcdab4921..eb5b256ffc88 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1728,13 +1728,9 @@ unlock: int br_multicast_toggle(struct net_bridge *br, unsigned long val) { struct net_bridge_port *port; - int err = -ENOENT; + int err = 0; spin_lock(&br->multicast_lock); - if (!netif_running(br->dev)) - goto unlock; - - err = 0; if (br->multicast_disabled == !val) goto unlock; @@ -1742,6 +1738,9 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) if (br->multicast_disabled) goto unlock; + if (!netif_running(br->dev)) + goto unlock; + if (br->mdb) { if (br->mdb->old) { err = -EEXIST; diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 70aecb48fb69..35cf27087b56 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -131,7 +131,7 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) /* * Called from llc. * - * NO locks, but rcu_read_lock (preempt_disabled) + * NO locks, but rcu_read_lock */ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, struct net_device *dev) diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index 4b04d25b6a3f..eb1602022ac0 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -193,7 +193,7 @@ out: static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt) { - caif_assert(!cfpkt_getlen(pkt) < rfml->fragment_size); + caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size); /* Add info for MUX-layer to route the packet out. */ cfpkt_info(pkt)->channel_id = rfml->serv.layer.id; diff --git a/net/can/raw.c b/net/can/raw.c index ccfe633eec8e..a10e3338f084 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -650,6 +650,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = sock_tx_timestamp(msg, sk, skb_tx(skb)); if (err < 0) goto free_skb; + + /* to be able to check the received tx sock reference in raw_rcv() */ + skb_tx(skb)->prevent_sk_orphan = 1; + skb->dev = dev; skb->sk = sk; diff --git a/net/core/dev.c b/net/core/dev.c index 6e1b4370781c..e1c1cdcc2bb0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -101,8 +101,6 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stat.h> -#include <linux/if_bridge.h> -#include <linux/if_macvlan.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> @@ -1484,6 +1482,7 @@ static inline void net_timestamp_check(struct sk_buff *skb) int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { skb_orphan(skb); + nf_reset(skb); if (!(dev->flags & IFF_UP) || (skb->len > (dev->mtu + dev->hard_header_len))) { @@ -1592,9 +1591,7 @@ EXPORT_SYMBOL(__netif_schedule); void dev_kfree_skb_irq(struct sk_buff *skb) { - if (!skb->destructor) - dev_kfree_skb(skb); - else if (atomic_dec_and_test(&skb->users)) { + if (atomic_dec_and_test(&skb->users)) { struct softnet_data *sd; unsigned long flags; @@ -2645,10 +2642,10 @@ static int ing_filter(struct sk_buff *skb) int result = TC_ACT_OK; struct Qdisc *q; - if (MAX_RED_LOOP < ttl++) { - printk(KERN_WARNING - "Redir loop detected Dropping packet (%d->%d)\n", - skb->skb_iif, dev->ifindex); + if (unlikely(MAX_RED_LOOP < ttl++)) { + if (net_ratelimit()) + pr_warning( "Redir loop detected Dropping packet (%d->%d)\n", + skb->skb_iif, dev->ifindex); return TC_ACT_SHOT; } diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 646ef3bc7200..36e603c78ce9 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -347,9 +347,9 @@ static struct notifier_block dropmon_net_notifier = { static int __init init_net_drop_monitor(void) { - int cpu; - int rc, i, ret; struct per_cpu_dm_data *data; + int cpu, rc; + printk(KERN_INFO "Initalizing network drop monitor service\n"); if (sizeof(void *) > 8) { @@ -357,21 +357,12 @@ static int __init init_net_drop_monitor(void) return -ENOSPC; } - if (genl_register_family(&net_drop_monitor_family) < 0) { + rc = genl_register_family_with_ops(&net_drop_monitor_family, + dropmon_ops, + ARRAY_SIZE(dropmon_ops)); + if (rc) { printk(KERN_ERR "Could not create drop monitor netlink family\n"); - return -EFAULT; - } - - rc = -EFAULT; - - for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) { - ret = genl_register_ops(&net_drop_monitor_family, - &dropmon_ops[i]); - if (ret) { - printk(KERN_CRIT "Failed to register operation %d\n", - dropmon_ops[i].cmd); - goto out_unreg; - } + return rc; } rc = register_netdevice_notifier(&dropmon_net_notifier); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index d2b596537d41..af4dfbadf2a0 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -95,6 +95,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, } NETDEVICE_SHOW(dev_id, fmt_hex); +NETDEVICE_SHOW(addr_assign_type, fmt_dec); NETDEVICE_SHOW(addr_len, fmt_dec); NETDEVICE_SHOW(iflink, fmt_dec); NETDEVICE_SHOW(ifindex, fmt_dec); @@ -295,6 +296,7 @@ static ssize_t show_ifalias(struct device *dev, } static struct device_attribute net_class_attributes[] = { + __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c2b7a8bed8f6..537e01afd81b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -49,6 +49,7 @@ static atomic_t trapped; (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ sizeof(struct iphdr) + sizeof(struct ethhdr)) +static void zap_completion_queue(void); static void arp_reply(struct sk_buff *skb); static unsigned int carrier_timeout = 4; @@ -196,6 +197,7 @@ void netpoll_poll_dev(struct net_device *dev) service_arp_queue(dev->npinfo); + zap_completion_queue(); } EXPORT_SYMBOL(netpoll_poll_dev); @@ -221,11 +223,40 @@ static void refill_skbs(void) spin_unlock_irqrestore(&skb_pool.lock, flags); } +static void zap_completion_queue(void) +{ + unsigned long flags; + struct softnet_data *sd = &get_cpu_var(softnet_data); + + if (sd->completion_queue) { + struct sk_buff *clist; + + local_irq_save(flags); + clist = sd->completion_queue; + sd->completion_queue = NULL; + local_irq_restore(flags); + + while (clist != NULL) { + struct sk_buff *skb = clist; + clist = clist->next; + if (skb->destructor) { + atomic_inc(&skb->users); + dev_kfree_skb_any(skb); /* put this one back */ + } else { + __kfree_skb(skb); + } + } + } + + put_cpu_var(softnet_data); +} + static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) { int count = 0; struct sk_buff *skb; + zap_completion_queue(); refill_skbs(); repeat: diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 24a19debda1b..10a1ea72010d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1434,18 +1434,12 @@ static ssize_t pktgen_if_write(struct file *file, i += len; for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) { - if (*v >= '0' && *v <= '9') { - *m *= 16; - *m += *v - '0'; - } - if (*v >= 'A' && *v <= 'F') { - *m *= 16; - *m += *v - 'A' + 10; - } - if (*v >= 'a' && *v <= 'f') { - *m *= 16; - *m += *v - 'a' + 10; - } + int value; + + value = hex_to_bin(*v); + if (value >= 0) + *m = *m * 16 + value; + if (*v == ':') { m++; *m = 0; @@ -1476,18 +1470,12 @@ static ssize_t pktgen_if_write(struct file *file, i += len; for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) { - if (*v >= '0' && *v <= '9') { - *m *= 16; - *m += *v - '0'; - } - if (*v >= 'A' && *v <= 'F') { - *m *= 16; - *m += *v - 'A' + 10; - } - if (*v >= 'a' && *v <= 'f') { - *m *= 16; - *m += *v - 'a' + 10; - } + int value; + + value = hex_to_bin(*v); + if (value >= 0) + *m = *m * 16 + value; + if (*v == ':') { m++; *m = 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 76d33ca5f037..3a2513f0d0c3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -817,7 +817,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, memcpy(data + nhead, skb->head, skb->tail - skb->head); #endif memcpy(data + size, skb_end_pointer(skb), - sizeof(struct skb_shared_info)); + offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); @@ -843,7 +843,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->network_header += off; if (skb_mac_header_was_set(skb)) skb->mac_header += off; - skb->csum_start += nhead; + /* Only adjust this if it actually is csum_start rather than csum */ + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb->csum_start += nhead; skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; @@ -930,7 +932,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, copy_skb_header(n, skb); off = newheadroom - oldheadroom; - n->csum_start += off; + if (n->ip_summed == CHECKSUM_PARTIAL) + n->csum_start += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET n->transport_header += off; n->network_header += off; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6652bd9da676..04b69896df5f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -446,7 +446,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) int ptr; struct net_device *dev; struct sk_buff *skb2; - unsigned int mtu, hlen, left, len, ll_rs, pad; + unsigned int mtu, hlen, left, len, ll_rs; int offset; __be16 not_last_frag; struct rtable *rt = skb_rtable(skb); @@ -585,9 +585,7 @@ slow_path: /* for bridged IP traffic encapsulated inside f.e. a vlan header, * we need to make room for the encapsulating header */ - pad = nf_bridge_pad(skb); - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, pad); - mtu -= pad; + ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); /* * Fragment the datagram. diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 16c0ba0a2728..6bccba31d132 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -283,16 +283,13 @@ unsigned int arpt_do_table(struct sk_buff *skb, arp = arp_hdr(skb); do { const struct arpt_entry_target *t; - int hdr_len; if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { e = arpt_next_entry(e); continue; } - hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + - (2 * skb->dev->addr_len); - ADD_COUNTER(e->counters, hdr_len, 1); + ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); t = arpt_get_target_c(e); @@ -713,7 +710,7 @@ static void get_counters(const struct xt_table_info *t, struct arpt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = get_cpu(); /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -723,14 +720,16 @@ static void get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); - curcpu = smp_processor_id(); - i = 0; xt_entry_foreach(iter, t->entries[curcpu], t->size) { SET_COUNTER(counters[i], iter->counters.bcnt, iter->counters.pcnt); ++i; } + local_bh_enable(); + /* Processing counters from other cpus, we can let bottom half enabled, + * (preemption is disabled) + */ for_each_possible_cpu(cpu) { if (cpu == curcpu) @@ -744,7 +743,7 @@ static void get_counters(const struct xt_table_info *t, } xt_info_wrunlock(cpu); } - local_bh_enable(); + put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index b38c11810c65..c439721b165a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -364,7 +364,7 @@ ipt_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); + ADD_COUNTER(e->counters, skb->len, 1); t = ipt_get_target(e); IP_NF_ASSERT(t->u.kernel.target); @@ -884,7 +884,7 @@ get_counters(const struct xt_table_info *t, struct ipt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = get_cpu(); /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -894,14 +894,16 @@ get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); - curcpu = smp_processor_id(); - i = 0; xt_entry_foreach(iter, t->entries[curcpu], t->size) { SET_COUNTER(counters[i], iter->counters.bcnt, iter->counters.pcnt); ++i; } + local_bh_enable(); + /* Processing counters from other cpus, we can let bottom half enabled, + * (preemption is disabled) + */ for_each_possible_cpu(cpu) { if (cpu == curcpu) @@ -915,7 +917,7 @@ get_counters(const struct xt_table_info *t, } xt_info_wrunlock(cpu); } - local_bh_enable(); + put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 64d0875f5192..3a43cf36db87 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -469,7 +469,7 @@ struct arp_payload { __be32 src_ip; u_int8_t dst_hw[ETH_ALEN]; __be32 dst_ip; -} __attribute__ ((packed)); +} __packed; #ifdef DEBUG static void arp_print(struct arp_payload *payload) diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index bbbd2736c549..b254dafaf429 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -95,10 +95,11 @@ static void send_reset(struct sk_buff *oldskb, int hook) } tcph->rst = 1; - tcph->check = tcp_v4_check(sizeof(struct tcphdr), - niph->saddr, niph->daddr, - csum_partial(tcph, - sizeof(struct tcphdr), 0)); + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, + niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)tcph - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); addr_type = RTN_UNSPEC; if (hook != NF_INET_FORWARD @@ -115,7 +116,6 @@ static void send_reset(struct sk_buff *oldskb, int hook) goto free_nskb; niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); - nskb->ip_summed = CHECKSUM_NONE; /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index c7719b283ada..8c8632d9b93c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -261,14 +261,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, rcu_read_lock(); proto = __nf_nat_proto_find(orig_tuple->dst.protonum); - /* Change protocol info to have some randomization */ - if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { - proto->unique_tuple(tuple, range, maniptype, ct); - goto out; - } - /* Only bother mapping if it's not already in range and unique */ - if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || + if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) && + (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) && !nf_nat_used_tuple(tuple, ct)) goto out; @@ -440,7 +435,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) return 0; - inside = (void *)skb->data + ip_hdrlen(skb); + inside = (void *)skb->data + hdrlen; /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ @@ -470,12 +465,10 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, /* rcu_read_lock()ed by nf_hook_slow */ l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - if (!nf_ct_get_tuple(skb, - ip_hdrlen(skb) + sizeof(struct icmphdr), - (ip_hdrlen(skb) + + if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr), + (hdrlen + sizeof(struct icmphdr) + inside->ip.ihl * 4), - (u_int16_t)AF_INET, - inside->ip.protocol, + (u_int16_t)AF_INET, inside->ip.protocol, &inner, l3proto, l4proto)) return 0; @@ -484,15 +477,13 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, pass all hooks (locally-generated ICMP). Consider incoming packet: PREROUTING (DST manip), routing produces ICMP, goes through POSTROUTING (which must correct the DST manip). */ - if (!manip_pkt(inside->ip.protocol, skb, - ip_hdrlen(skb) + sizeof(inside->icmp), - &ct->tuplehash[!dir].tuple, - !manip)) + if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp), + &ct->tuplehash[!dir].tuple, !manip)) return 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)skb->data + ip_hdrlen(skb); + inside = (void *)skb->data + hdrlen; inside->icmp.checksum = 0; inside->icmp.checksum = csum_fold(skb_checksum(skb, hdrlen, diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 6c4f11f51446..3e61faf23a9a 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); -bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, +void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct, @@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { /* If it's dst rewrite, can't change port */ if (maniptype == IP_NAT_MANIP_DST) - return false; + return; if (ntohs(*portptr) < 1024) { /* Loose convention: >> 512 is credential passing */ @@ -81,15 +81,15 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, else off = *rover; - for (i = 0; i < range_size; i++, off++) { + for (i = 0; ; ++off) { *portptr = htons(min + off % range_size); - if (nf_nat_used_tuple(tuple, ct)) + if (++i != range_size && nf_nat_used_tuple(tuple, ct)) continue; if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) *rover = off; - return true; + return; } - return false; + return; } EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c index 22485ce306d4..570faf2667b2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c @@ -22,14 +22,14 @@ static u_int16_t dccp_port_rover; -static bool +static void dccp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &dccp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &dccp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index d7e89201351e..bc8d83a31c73 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); /* generate unique tuple ... */ -static bool +static void gre_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, @@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* If there is no master conntrack we are not PPTP, do not change tuples */ if (!ct->master) - return false; + return; if (maniptype == IP_NAT_MANIP_SRC) keyptr = &tuple->src.u.gre.key; @@ -68,14 +68,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, pr_debug("min = %u, range_size = %u\n", min, range_size); - for (i = 0; i < range_size; i++, key++) { + for (i = 0; ; ++key) { *keyptr = htons(min + key % range_size); - if (!nf_nat_used_tuple(tuple, ct)) - return true; + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; } pr_debug("%p: no NAT mapping\n", ct); - return false; + return; } /* manipulate a GRE packet according to maniptype */ diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 19a8b0b07d8e..5744c3ec847c 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); } -static bool +static void icmp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, @@ -42,13 +42,13 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) range_size = 0xFFFF; - for (i = 0; i < range_size; i++, id++) { + for (i = 0; ; ++id) { tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + (id % range_size)); - if (!nf_nat_used_tuple(tuple, ct)) - return true; + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; } - return false; + return; } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c index 3fc598eeeb1a..756331d42661 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c @@ -16,14 +16,14 @@ static u_int16_t nf_sctp_port_rover; -static bool +static void sctp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &nf_sctp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &nf_sctp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 399e2cfa263b..aa460a595d5d 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -20,14 +20,13 @@ static u_int16_t tcp_port_rover; -static bool +static void tcp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &tcp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 9e61c79492e4..dfe65c7e2925 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -19,14 +19,13 @@ static u_int16_t udp_port_rover; -static bool +static void udp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &udp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c index 440a229bbd87..3cc8c8af39ef 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c @@ -18,14 +18,14 @@ static u_int16_t udplite_port_rover; -static bool +static void udplite_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &udplite_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &udplite_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index 14381c62acea..a50f2bc1c732 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c @@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, return true; } -static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple, +static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { /* Sorry: we can't help you; if it's not unique, we can't frob anything. */ - return false; + return; } static bool diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 562ce92de2a6..3f56b6e6c6aa 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2878,6 +2878,9 @@ static int rt_fill_info(struct net *net, if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) goto nla_put_failure; + if (rt->fl.mark) + NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); + error = rt->dst.error; expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; if (rt->peer) { @@ -2933,6 +2936,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void __be32 src = 0; u32 iif; int err; + int mark; struct sk_buff *skb; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); @@ -2960,6 +2964,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; + mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; if (iif) { struct net_device *dev; @@ -2972,6 +2977,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb->protocol = htons(ETH_P_IP); skb->dev = dev; + skb->mark = mark; local_bh_disable(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); @@ -2989,6 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void }, }, .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + .mark = mark, }; err = ip_route_output_key(net, &rt, &fl); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 86b9f67abede..176e11aaea77 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2187,6 +2187,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, GFP_KERNEL); if (cvp == NULL) return -ENOMEM; + + kref_init(&cvp->kref); } lock_sock(sk); tp->rx_opt.cookie_in_always = @@ -2201,12 +2203,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, */ kref_put(&tp->cookie_values->kref, tcp_cookie_values_release); - kref_init(&cvp->kref); - tp->cookie_values = cvp; } else { cvp = tp->cookie_values; } } + if (cvp != NULL) { cvp->cookie_desired = ctd.tcpct_cookie_desired; @@ -2220,6 +2221,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, cvp->s_data_desired = ctd.tcpct_s_data_desired; cvp->s_data_constant = 0; /* false */ } + + tp->cookie_values = cvp; } release_sock(sk); return err; @@ -2601,6 +2604,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; } + case TCP_THIN_LINEAR_TIMEOUTS: + val = tp->thin_lto; + break; + case TCP_THIN_DUPACK: + val = tp->thin_dupack; + break; default: return -ENOPROTOOPT; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e81155d2f251..ab70a3fbcafa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1763,7 +1763,10 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) idev = ipv6_find_idev(dev); if (!idev) - return NULL; + return ERR_PTR(-ENOBUFS); + + if (idev->cnf.disable_ipv6) + return ERR_PTR(-EACCES); /* Add default multicast route */ addrconf_add_mroute(dev); @@ -2132,8 +2135,9 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, if (!dev) return -ENODEV; - if ((idev = addrconf_add_dev(dev)) == NULL) - return -ENOBUFS; + idev = addrconf_add_dev(dev); + if (IS_ERR(idev)) + return PTR_ERR(idev); scope = ipv6_addr_scope(pfx); @@ -2380,7 +2384,7 @@ static void addrconf_dev_config(struct net_device *dev) } idev = addrconf_add_dev(dev); - if (idev == NULL) + if (IS_ERR(idev)) return; memset(&addr, 0, sizeof(struct in6_addr)); @@ -2471,7 +2475,7 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) ASSERT_RTNL(); idev = addrconf_add_dev(dev); - if (!idev) { + if (IS_ERR(idev)) { printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); return; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index dc41d6d3c6c6..5359ef4daac5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -387,9 +387,7 @@ ip6t_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, - ntohs(ipv6_hdr(skb)->payload_len) + - sizeof(struct ipv6hdr), 1); + ADD_COUNTER(e->counters, skb->len, 1); t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); @@ -899,7 +897,7 @@ get_counters(const struct xt_table_info *t, struct ip6t_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = get_cpu(); /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -909,14 +907,16 @@ get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); - curcpu = smp_processor_id(); - i = 0; xt_entry_foreach(iter, t->entries[curcpu], t->size) { SET_COUNTER(counters[i], iter->counters.bcnt, iter->counters.pcnt); ++i; } + local_bh_enable(); + /* Processing counters from other cpus, we can let bottom half enabled, + * (preemption is disabled) + */ for_each_possible_cpu(cpu) { if (cpu == curcpu) @@ -930,7 +930,7 @@ get_counters(const struct xt_table_info *t, } xt_info_wrunlock(cpu); } - local_bh_enable(); + put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 9254008602d4..098a050a20b0 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -269,6 +269,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, * in the chain of fragments so far. We must know where to put * this fragment, right? */ + prev = fq->q.fragments_tail; + if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) { + next = NULL; + goto found; + } prev = NULL; for (next = fq->q.fragments; next != NULL; next = next->next) { if (NFCT_FRAG6_CB(next)->offset >= offset) @@ -276,6 +281,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, prev = next; } +found: /* We found where to put this one. Check for overlap with * preceding fragment, and, if needed, align things so that * any overlaps are eliminated. @@ -341,6 +347,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* Insert this fragment in the chain of fragments. */ skb->next = next; + if (!next) + fq->q.fragments_tail = skb; if (prev) prev->next = skb; else @@ -464,6 +472,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->csum); fq->q.fragments = NULL; + fq->q.fragments_tail = NULL; /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ fp = skb_shinfo(head)->frag_list; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index dab6b8efe5fa..29ac8e1a509e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -627,7 +627,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) skb->dev = sta->sdata->dev; skb->protocol = eth_type_trans(skb, sta->sdata->dev); memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); + netif_rx_ni(skb); } static void sta_apply_parameters(struct ieee80211_local *local, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 7cc4f913a431..798a91b100cc 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -685,10 +685,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return 0; +#ifdef CONFIG_INET fail_ifa: pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); rtnl_lock(); +#endif fail_pm_qos: ieee80211_led_exit(local); ieee80211_remove_interfaces(local); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 41f20fb7e670..872d7b6ef6b3 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -400,19 +400,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, else __set_bit(SCAN_SW_SCANNING, &local->scanning); - /* - * Kicking off the scan need not be protected, - * only the scan variable stuff, since now - * local->scan_req is assigned and other callers - * will abort their scan attempts. - * - * This avoids too many locking dependencies - * so that the scan completed calls have more - * locking freedom. - */ - ieee80211_recalc_idle(local); - mutex_unlock(&local->scan_mtx); if (local->ops->hw_scan) { WARN_ON(!ieee80211_prep_hw_scan(local)); @@ -420,8 +408,6 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, } else rc = ieee80211_start_sw_scan(local); - mutex_lock(&local->scan_mtx); - if (rc) { kfree(local->hw_scan_req); local->hw_scan_req = NULL; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index aa2f106347e4..43288259f4a1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -326,6 +326,22 @@ config NETFILTER_XT_CONNMARK comment "Xtables targets" +config NETFILTER_XT_TARGET_CHECKSUM + tristate "CHECKSUM target support" + depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on NETFILTER_ADVANCED + ---help--- + This option adds a `CHECKSUM' target, which can be used in the iptables mangle + table. + + You can use this target to compute and fill in the checksum in + a packet that lacks a checksum. This is particularly useful, + if you need to work around old applications such as dhcp clients, + that do not work well with checksum offloads, but don't want to disable + checksum offload in your device. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_CLASSIFY tristate '"CLASSIFY" target support' depends on NETFILTER_ADVANCED @@ -647,6 +663,15 @@ config NETFILTER_XT_MATCH_CONNTRACK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_CPU + tristate '"cpu" match support' + depends on NETFILTER_ADVANCED + help + CPU matching allows you to match packets based on the CPU + currently handling the packet. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_DCCP tristate '"dccp" protocol match support' depends on NETFILTER_ADVANCED @@ -726,6 +751,16 @@ config NETFILTER_XT_MATCH_IPRANGE If unsure, say M. +config NETFILTER_XT_MATCH_IPVS + tristate '"ipvs" match support' + depends on IP_VS + depends on NETFILTER_ADVANCED + depends on NF_CONNTRACK + help + This option allows you to match against IPVS properties of a packet. + + If unsure, say N. + config NETFILTER_XT_MATCH_LENGTH tristate '"length" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index e28420aac5ef..441050f31111 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o # targets +obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o @@ -69,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o @@ -76,6 +78,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o +obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 712ccad13344..46a77d5c3887 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -3,7 +3,7 @@ # menuconfig IP_VS tristate "IP virtual server support" - depends on NET && INET && NETFILTER + depends on NET && INET && NETFILTER && NF_CONNTRACK ---help--- IP Virtual Server support will let you build a high-performance virtual server based on cluster of two or more real servers. This @@ -26,7 +26,7 @@ if IP_VS config IP_VS_IPV6 bool "IPv6 support for IPVS" - depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) + depends on IPV6 = y || IP_VS = IPV6 ---help--- Add IPv6 support to IPVS. This is incomplete and might be dangerous. @@ -87,19 +87,16 @@ config IP_VS_PROTO_UDP protocol. Say Y if unsure. config IP_VS_PROTO_AH_ESP - bool - depends on UNDEFINED + def_bool IP_VS_PROTO_ESP || IP_VS_PROTO_AH config IP_VS_PROTO_ESP bool "ESP load balancing support" - select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing ESP (Encapsulation Security Payload) transport protocol. Say Y if unsure. config IP_VS_PROTO_AH bool "AH load balancing support" - select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing AH (Authentication Header) transport protocol. Say Y if unsure. @@ -238,7 +235,7 @@ comment 'IPVS application helper' config IP_VS_FTP tristate "FTP protocol helper" - depends on IP_VS_PROTO_TCP + depends on IP_VS_PROTO_TCP && NF_NAT ---help--- FTP is a protocol that transfers IP address and/or port number in the payload. In the virtual server via Network Address Translation, diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 1cb0e834f8ff..e76f87f4aca8 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -569,49 +569,6 @@ static const struct file_operations ip_vs_app_fops = { }; #endif - -/* - * Replace a segment of data with a new segment - */ -int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, - char *o_buf, int o_len, char *n_buf, int n_len) -{ - int diff; - int o_offset; - int o_left; - - EnterFunction(9); - - diff = n_len - o_len; - o_offset = o_buf - (char *)skb->data; - /* The length of left data after o_buf+o_len in the skb data */ - o_left = skb->len - (o_offset + o_len); - - if (diff <= 0) { - memmove(o_buf + n_len, o_buf + o_len, o_left); - memcpy(o_buf, n_buf, n_len); - skb_trim(skb, skb->len + diff); - } else if (diff <= skb_tailroom(skb)) { - skb_put(skb, diff); - memmove(o_buf + n_len, o_buf + o_len, o_left); - memcpy(o_buf, n_buf, n_len); - } else { - if (pskb_expand_head(skb, skb_headroom(skb), diff, pri)) - return -ENOMEM; - skb_put(skb, diff); - memmove(skb->data + o_offset + n_len, - skb->data + o_offset + o_len, o_left); - skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len); - } - - /* must update the iph total length here */ - ip_hdr(skb)->tot_len = htons(skb->len); - - LeaveFunction(9); - return 0; -} - - int __init ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 654544e72264..b71c69a2db13 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -271,6 +271,29 @@ struct ip_vs_conn *ip_vs_conn_in_get return cp; } +struct ip_vs_conn * +ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} +EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); + /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, @@ -356,6 +379,28 @@ struct ip_vs_conn *ip_vs_conn_out_get return ret; } +struct ip_vs_conn * +ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} +EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); /* * Put back the conn and restart its timer with its timeout diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 50907d8472a3..4f8ddba48011 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -54,7 +54,6 @@ EXPORT_SYMBOL(register_ip_vs_scheduler); EXPORT_SYMBOL(unregister_ip_vs_scheduler); -EXPORT_SYMBOL(ip_vs_skb_replace); EXPORT_SYMBOL(ip_vs_proto_name); EXPORT_SYMBOL(ip_vs_conn_new); EXPORT_SYMBOL(ip_vs_conn_in_get); @@ -536,26 +535,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, return NF_DROP; } - -/* - * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING - * chain, and is used for VS/NAT. - * It detects packets for VS/NAT connections and sends the packets - * immediately. This can avoid that iptable_nat mangles the packets - * for VS/NAT. - */ -static unsigned int ip_vs_post_routing(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - if (!skb->ipvs_property) - return NF_ACCEPT; - /* The packet was sent from IPVS, exit this chain */ - return NF_STOP; -} - __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); @@ -1499,14 +1478,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, - /* Before the netfilter connection tracking, exit from POST_ROUTING */ - { - .hook = ip_vs_post_routing, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SRC-1, - }, #ifdef CONFIG_IP_VS_IPV6 /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1535,14 +1506,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, - /* Before the netfilter connection tracking, exit from POST_ROUTING */ - { - .hook = ip_vs_post_routing, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP6_PRI_NAT_SRC-1, - }, #endif }; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 2ae747a376a5..f228a17ec649 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -20,6 +20,17 @@ * * Author: Wouter Gadeyne * + * + * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from + * http://www.ssi.bg/~ja/nfct/: + * + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS + * + * Portions Copyright (C) 2001-2002 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. + * + * Portions Copyright (C) 2003-2008 + * Julian Anastasov */ #define KMSG_COMPONENT "IPVS" @@ -32,6 +43,9 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_nat_helper.h> #include <linux/gfp.h> #include <net/protocol.h> #include <net/tcp.h> @@ -43,6 +57,16 @@ #define SERVER_STRING "227 Entering Passive Mode (" #define CLIENT_STRING "PORT " +#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" +#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ + &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ + (T)->dst.protonum + +#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" +#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ + &((C)->vaddr.ip), ntohs((C)->vport), \ + &((C)->daddr.ip), ntohs((C)->dport), \ + (C)->protocol, (C)->state /* * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper @@ -123,6 +147,119 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, return 1; } +/* + * Called from init_conntrack() as expectfn handler. + */ +static void +ip_vs_expect_callback(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_conntrack_tuple *orig, new_reply; + struct ip_vs_conn *cp; + + if (exp->tuple.src.l3num != PF_INET) + return; + + /* + * We assume that no NF locks are held before this callback. + * ip_vs_conn_out_get and ip_vs_conn_in_get should match their + * expectations even if they use wildcard values, now we provide the + * actual values from the newly created original conntrack direction. + * The conntrack is confirmed when packet reaches IPVS hooks. + */ + + /* RS->CLIENT */ + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply CLIENT->RS to CLIENT->VS */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found inout cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.dst.u3 = cp->vaddr; + new_reply.dst.u.tcp.port = cp->vport; + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE + ", inout cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + /* CLIENT->VS */ + cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply VS->CLIENT to RS->CLIENT */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found outin cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.src.u3 = cp->daddr; + new_reply.src.u.tcp.port = cp->dport; + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " + FMT_TUPLE ", outin cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE + " - unknown expect\n", + __func__, ct, ct->status, ARG_TUPLE(orig)); + return; + +alter: + /* Never alter conntrack for non-NAT conns */ + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) + nf_conntrack_alter_reply(ct, &new_reply); + ip_vs_conn_put(cp); + return; +} + +/* + * Create NF conntrack expectation with wildcard (optional) source port. + * Then the default callback function will alter the reply and will confirm + * the conntrack entry when the first packet comes. + */ +static void +ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct, + struct ip_vs_conn *cp, u_int8_t proto, + const __be16 *port, int from_rs) +{ + struct nf_conntrack_expect *exp; + + BUG_ON(!ct || ct == &nf_conntrack_untracked); + + exp = nf_ct_expect_alloc(ct); + if (!exp) + return; + + if (from_rs) + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, + nf_ct_l3num(ct), &cp->daddr, &cp->caddr, + proto, port, &cp->cport); + else + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, + nf_ct_l3num(ct), &cp->caddr, &cp->vaddr, + proto, port, &cp->vport); + + exp->expectfn = ip_vs_expect_callback; + + IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n", + __func__, ct, ARG_TUPLE(&exp->tuple)); + nf_ct_expect_related(exp); + nf_ct_expect_put(exp); +} /* * Look at outgoing ftp packets to catch the response to a PASV command @@ -149,7 +286,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned buf_len; - int ret; + int ret = 0; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -219,19 +358,26 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); + ct = nf_ct_get(skb, &ctinfo); + if (ct && !nf_ct_is_untracked(ct)) { + /* If mangling fails this function will return 0 + * which will cause the packet to be dropped. + * Mangling can only fail under memory pressure, + * hopefully it will succeed on the retransmitted + * packet. + */ + ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + start-data, end-start, + buf, buf_len); + if (ret) + ip_vs_expect_related(skb, ct, n_cp, + IPPROTO_TCP, NULL, 0); + } + /* - * Calculate required delta-offset to keep TCP happy + * Not setting 'diff' is intentional, otherwise the sequence + * would be adjusted twice. */ - *diff = buf_len - (end-start); - - if (*diff == 0) { - /* simply replace it with new passive address */ - memcpy(start, buf, buf_len); - ret = 1; - } else { - ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, - end-start, buf, buf_len); - } cp->app_data = NULL; ip_vs_tcp_conn_listen(n_cp); @@ -263,6 +409,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; + struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -349,6 +496,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_control_add(n_cp, cp); } + ct = (struct nf_conn *)skb->nfct; + if (ct && ct != &nf_conntrack_untracked) + ip_vs_expect_related(skb, ct, n_cp, + IPPROTO_TCP, &n_cp->dport, 1); + /* * Move tunnel to listen state */ diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 2d3d5e4b35f8..027f654799fe 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -98,6 +98,7 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) return NULL; } +EXPORT_SYMBOL(ip_vs_proto_get); /* diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index c9a3f7a21d53..4c0855cb006e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -8,55 +8,6 @@ #include <net/sctp/checksum.h> #include <net/ip_vs.h> - -static struct ip_vs_conn * -sctp_conn_in_get(int af, - const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) - return ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); -} - -static struct ip_vs_conn * -sctp_conn_out_get(int af, - const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) - return ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); -} - static int sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -173,7 +124,7 @@ sctp_dnat_handler(struct sk_buff *skb, return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + if (!ip_vs_app_pkt_in(cp, skb)) return 0; } @@ -1169,8 +1120,8 @@ struct ip_vs_protocol ip_vs_protocol_sctp = { .register_app = sctp_register_app, .unregister_app = sctp_unregister_app, .conn_schedule = sctp_conn_schedule, - .conn_in_get = sctp_conn_in_get, - .conn_out_get = sctp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = sctp_snat_handler, .dnat_handler = sctp_dnat_handler, .csum_check = sctp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 91d28e073742..282d24de8592 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -27,52 +27,6 @@ #include <net/ip_vs.h> - -static struct ip_vs_conn * -tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - return ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - return ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } -} - -static struct ip_vs_conn * -tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - return ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - return ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } -} - - static int tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -721,8 +675,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .register_app = tcp_register_app, .unregister_app = tcp_unregister_app, .conn_schedule = tcp_conn_schedule, - .conn_in_get = tcp_conn_in_get, - .conn_out_get = tcp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = tcp_snat_handler, .dnat_handler = tcp_dnat_handler, .csum_check = tcp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index e7a6885e0167..8553231b5d41 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -27,58 +27,6 @@ #include <net/ip.h> #include <net/ip6_checksum.h> -static struct ip_vs_conn * -udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - cp = ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } - - return cp; -} - - -static struct ip_vs_conn * -udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - cp = ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } - - return cp; -} - - static int udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -520,8 +468,8 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .init = udp_init, .exit = udp_exit, .conn_schedule = udp_conn_schedule, - .conn_in_get = udp_conn_in_get, - .conn_out_get = udp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = udp_snat_handler, .dnat_handler = udp_dnat_handler, .csum_check = udp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 02b078e11cf3..21e1a5e9b9d3 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -28,6 +28,7 @@ #include <net/ip6_route.h> #include <linux/icmpv6.h> #include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack.h> #include <linux/netfilter_ipv4.h> #include <net/ip_vs.h> @@ -348,6 +349,30 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } #endif +static void +ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +{ + struct nf_conn *ct = (struct nf_conn *)skb->nfct; + struct nf_conntrack_tuple new_tuple; + + if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct)) + return; + + /* + * The connection is not yet in the hashtable, so we update it. + * CIP->VIP will remain the same, so leave the tuple in + * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the + * real-server we will see RIP->DIP. + */ + new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + new_tuple.src.u3 = cp->daddr; + /* + * This will also take care of UDP and other protocols. + */ + new_tuple.src.u.tcp.port = cp->dport; + nf_conntrack_alter_reply(ct, &new_tuple); +} + /* * NAT transmitter (only for outside-to-inside nat forwarding) * Not used for related ICMP @@ -403,6 +428,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + ip_vs_update_conntrack(skb, cp); + /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ @@ -479,6 +506,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + ip_vs_update_conntrack(skb, cp); + /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 16b41b4e2a3c..df3eedb142ff 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -966,8 +966,7 @@ acct: if (acct) { spin_lock_bh(&ct->lock); acct[CTINFO2DIR(ctinfo)].packets++; - acct[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); + acct[CTINFO2DIR(ctinfo)].bytes += skb->len; spin_unlock_bh(&ct->lock); } } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index fdc8fb4ae10f..7dcf7a404190 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -23,9 +23,10 @@ void __nf_ct_ext_destroy(struct nf_conn *ct) { unsigned int i; struct nf_ct_ext_type *t; + struct nf_ct_ext *ext = ct->ext; for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!nf_ct_ext_exist(ct, i)) + if (!__nf_ct_ext_exist(ext, i)) continue; rcu_read_lock(); @@ -73,44 +74,45 @@ static void __nf_ct_ext_free_rcu(struct rcu_head *head) void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) { - struct nf_ct_ext *new; + struct nf_ct_ext *old, *new; int i, newlen, newoff; struct nf_ct_ext_type *t; /* Conntrack must not be confirmed to avoid races on reallocation. */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); - if (!ct->ext) + old = ct->ext; + if (!old) return nf_ct_ext_create(&ct->ext, id, gfp); - if (nf_ct_ext_exist(ct, id)) + if (__nf_ct_ext_exist(old, id)) return NULL; rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); BUG_ON(t == NULL); - newoff = ALIGN(ct->ext->len, t->align); + newoff = ALIGN(old->len, t->align); newlen = newoff + t->len; rcu_read_unlock(); - new = __krealloc(ct->ext, newlen, gfp); + new = __krealloc(old, newlen, gfp); if (!new) return NULL; - if (new != ct->ext) { + if (new != old) { for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!nf_ct_ext_exist(ct, i)) + if (!__nf_ct_ext_exist(old, i)) continue; rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[i]); if (t && t->move) t->move((void *)new + new->offset[i], - (void *)ct->ext + ct->ext->offset[i]); + (void *)old + old->offset[i]); rcu_read_unlock(); } - call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu); + call_rcu(&old->rcu, __nf_ct_ext_free_rcu); ct->ext = new; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 802dbffae8b4..c4c885dca3bd 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -585,8 +585,16 @@ static bool tcp_in_window(const struct nf_conn *ct, * Let's try to use the data from the packet. */ sender->td_end = end; + win <<= sender->td_scale; sender->td_maxwin = (win == 0 ? 1 : win); sender->td_maxend = end + sender->td_maxwin; + /* + * We haven't seen traffic in the other direction yet + * but we have to tweak window tracking to pass III + * and IV until that happens. + */ + if (receiver->td_maxwin == 0) + receiver->td_end = receiver->td_maxend = sack; } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) @@ -680,7 +688,7 @@ static bool tcp_in_window(const struct nf_conn *ct, /* * Update receiver data. */ - if (after(end, sender->td_maxend)) + if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c new file mode 100644 index 000000000000..0f642ef8cd26 --- /dev/null +++ b/net/netfilter/xt_CHECKSUM.c @@ -0,0 +1,70 @@ +/* iptables module for the packet checksum mangling + * + * (C) 2002 by Harald Welte <laforge@netfilter.org> + * (C) 2010 Red Hat, Inc. + * + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_CHECKSUM.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>"); +MODULE_DESCRIPTION("Xtables: checksum modification"); +MODULE_ALIAS("ipt_CHECKSUM"); +MODULE_ALIAS("ip6t_CHECKSUM"); + +static unsigned int +checksum_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb_checksum_help(skb); + + return XT_CONTINUE; +} + +static int checksum_tg_check(const struct xt_tgchk_param *par) +{ + const struct xt_CHECKSUM_info *einfo = par->targinfo; + + if (einfo->operation & ~XT_CHECKSUM_OP_FILL) { + pr_info("unsupported CHECKSUM operation %x\n", einfo->operation); + return -EINVAL; + } + if (!einfo->operation) { + pr_info("no CHECKSUM operation enabled\n"); + return -EINVAL; + } + return 0; +} + +static struct xt_target checksum_tg_reg __read_mostly = { + .name = "CHECKSUM", + .family = NFPROTO_UNSPEC, + .target = checksum_tg, + .targetsize = sizeof(struct xt_CHECKSUM_info), + .table = "mangle", + .checkentry = checksum_tg_check, + .me = THIS_MODULE, +}; + +static int __init checksum_tg_init(void) +{ + return xt_register_target(&checksum_tg_reg); +} + +static void __exit checksum_tg_exit(void) +{ + xt_unregister_target(&checksum_tg_reg); +} + +module_init(checksum_tg_init); +module_exit(checksum_tg_exit); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index e1a0dedac258..c61294d85fda 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -37,8 +37,10 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par) return NF_DROP; sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, - iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, - hp->source, tgi->lport ? tgi->lport : hp->dest, + iph->saddr, + tgi->laddr ? tgi->laddr : iph->daddr, + hp->source, + tgi->lport ? tgi->lport : hp->dest, par->in, true); /* NOTE: assign_sock consumes our sk reference */ diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c new file mode 100644 index 000000000000..b39db8a5cbae --- /dev/null +++ b/net/netfilter/xt_cpu.c @@ -0,0 +1,63 @@ +/* Kernel module to match running CPU */ + +/* + * Might be used to distribute connections on several daemons, if + * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable, + * each RX queue IRQ affined to one CPU (1:1 mapping) + * + */ + +/* (C) 2010 Eric Dumazet + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter/xt_cpu.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); +MODULE_DESCRIPTION("Xtables: CPU match"); + +static int cpu_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_cpu_info *info = par->matchinfo; + + if (info->invert & ~1) + return -EINVAL; + return 0; +} + +static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cpu_info *info = par->matchinfo; + + return (info->cpu == smp_processor_id()) ^ info->invert; +} + +static struct xt_match cpu_mt_reg __read_mostly = { + .name = "cpu", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cpu_mt_check, + .match = cpu_mt, + .matchsize = sizeof(struct xt_cpu_info), + .me = THIS_MODULE, +}; + +static int __init cpu_mt_init(void) +{ + return xt_register_match(&cpu_mt_reg); +} + +static void __exit cpu_mt_exit(void) +{ + xt_unregister_match(&cpu_mt_reg); +} + +module_init(cpu_mt_init); +module_exit(cpu_mt_exit); diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c new file mode 100644 index 000000000000..7a4d66db95ae --- /dev/null +++ b/net/netfilter/xt_ipvs.c @@ -0,0 +1,189 @@ +/* + * xt_ipvs - kernel module to match IPVS connection properties + * + * Author: Hannes Eder <heder@google.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#ifdef CONFIG_IP_VS_IPV6 +#include <net/ipv6.h> +#endif +#include <linux/ip_vs.h> +#include <linux/types.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_ipvs.h> +#include <net/netfilter/nf_conntrack.h> + +#include <net/ip_vs.h> + +MODULE_AUTHOR("Hannes Eder <heder@google.com>"); +MODULE_DESCRIPTION("Xtables: match IPVS connection properties"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_ipvs"); +MODULE_ALIAS("ip6t_ipvs"); + +/* borrowed from xt_conntrack */ +static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr, + const union nf_inet_addr *uaddr, + const union nf_inet_addr *umask, + unsigned int l3proto) +{ + if (l3proto == NFPROTO_IPV4) + return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0; +#ifdef CONFIG_IP_VS_IPV6 + else if (l3proto == NFPROTO_IPV6) + return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6, + &uaddr->in6) == 0; +#endif + else + return false; +} + +static bool +ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_ipvs_mtinfo *data = par->matchinfo; + /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ + const u_int8_t family = par->family; + struct ip_vs_iphdr iph; + struct ip_vs_protocol *pp; + struct ip_vs_conn *cp; + bool match = true; + + if (data->bitmask == XT_IPVS_IPVS_PROPERTY) { + match = skb->ipvs_property ^ + !!(data->invert & XT_IPVS_IPVS_PROPERTY); + goto out; + } + + /* other flags than XT_IPVS_IPVS_PROPERTY are set */ + if (!skb->ipvs_property) { + match = false; + goto out; + } + + ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); + + if (data->bitmask & XT_IPVS_PROTO) + if ((iph.protocol == data->l4proto) ^ + !(data->invert & XT_IPVS_PROTO)) { + match = false; + goto out; + } + + pp = ip_vs_proto_get(iph.protocol); + if (unlikely(!pp)) { + match = false; + goto out; + } + + /* + * Check if the packet belongs to an existing entry + */ + cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); + if (unlikely(cp == NULL)) { + match = false; + goto out; + } + + /* + * We found a connection, i.e. ct != 0, make sure to call + * __ip_vs_conn_put before returning. In our case jump to out_put_con. + */ + + if (data->bitmask & XT_IPVS_VPORT) + if ((cp->vport == data->vport) ^ + !(data->invert & XT_IPVS_VPORT)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_VPORTCTL) + if ((cp->control != NULL && + cp->control->vport == data->vportctl) ^ + !(data->invert & XT_IPVS_VPORTCTL)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_DIR) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + + if (ct == NULL || nf_ct_is_untracked(ct)) { + match = false; + goto out_put_cp; + } + + if ((ctinfo >= IP_CT_IS_REPLY) ^ + !!(data->invert & XT_IPVS_DIR)) { + match = false; + goto out_put_cp; + } + } + + if (data->bitmask & XT_IPVS_METHOD) + if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^ + !(data->invert & XT_IPVS_METHOD)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_VADDR) { + if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr, + &data->vmask, family) ^ + !(data->invert & XT_IPVS_VADDR)) { + match = false; + goto out_put_cp; + } + } + +out_put_cp: + __ip_vs_conn_put(cp); +out: + pr_debug("match=%d\n", match); + return match; +} + +static int ipvs_mt_check(const struct xt_mtchk_param *par) +{ + if (par->family != NFPROTO_IPV4 +#ifdef CONFIG_IP_VS_IPV6 + && par->family != NFPROTO_IPV6 +#endif + ) { + pr_info("protocol family %u not supported\n", par->family); + return -EINVAL; + } + + return 0; +} + +static struct xt_match xt_ipvs_mt_reg __read_mostly = { + .name = "ipvs", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = ipvs_mt, + .checkentry = ipvs_mt_check, + .matchsize = XT_ALIGN(sizeof(struct xt_ipvs_mtinfo)), + .me = THIS_MODULE, +}; + +static int __init ipvs_mt_init(void) +{ + return xt_register_match(&xt_ipvs_mt_reg); +} + +static void __exit ipvs_mt_exit(void) +{ + xt_unregister_match(&xt_ipvs_mt_reg); +} + +module_init(ipvs_mt_init); +module_exit(ipvs_mt_exit); diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index b4f7dfea5980..70eb2b4984dd 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -11,7 +11,8 @@ #include <linux/netfilter/xt_quota.h> struct xt_quota_priv { - uint64_t quota; + spinlock_t lock; + uint64_t quota; }; MODULE_LICENSE("GPL"); @@ -20,8 +21,6 @@ MODULE_DESCRIPTION("Xtables: countdown quota match"); MODULE_ALIAS("ipt_quota"); MODULE_ALIAS("ip6t_quota"); -static DEFINE_SPINLOCK(quota_lock); - static bool quota_mt(const struct sk_buff *skb, struct xt_action_param *par) { @@ -29,7 +28,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par) struct xt_quota_priv *priv = q->master; bool ret = q->flags & XT_QUOTA_INVERT; - spin_lock_bh("a_lock); + spin_lock_bh(&priv->lock); if (priv->quota >= skb->len) { priv->quota -= skb->len; ret = !ret; @@ -37,9 +36,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par) /* we do not allow even small packets from now on */ priv->quota = 0; } - /* Copy quota back to matchinfo so that iptables can display it */ - q->quota = priv->quota; - spin_unlock_bh("a_lock); + spin_unlock_bh(&priv->lock); return ret; } @@ -55,6 +52,7 @@ static int quota_mt_check(const struct xt_mtchk_param *par) if (q->master == NULL) return -ENOMEM; + spin_lock_init(&q->master->lock); q->master->quota = q->quota; return 0; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8648a9922aab..2cbf380377d5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1406,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags&MSG_DONTWAIT; size_t copied; - struct sk_buff *skb, *frag __maybe_unused = NULL; + struct sk_buff *skb; int err; if (flags&MSG_OOB) @@ -1441,7 +1441,21 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, kfree_skb(skb); skb = compskb; } else { - frag = skb_shinfo(skb)->frag_list; + /* + * Before setting frag_list to NULL, we must get a + * private copy of skb if shared (because of MSG_PEEK) + */ + if (skb_shared(skb)) { + struct sk_buff *nskb; + + nskb = pskb_copy(skb, GFP_KERNEL); + kfree_skb(skb); + skb = nskb; + err = -ENOMEM; + if (!skb) + goto out; + } + kfree_skb(skb_shinfo(skb)->frag_list); skb_shinfo(skb)->frag_list = NULL; } } @@ -1478,10 +1492,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (flags & MSG_TRUNC) copied = skb->len; -#ifdef CONFIG_COMPAT_NETLINK_MESSAGES - skb_shinfo(skb)->frag_list = frag; -#endif - skb_free_datagram(sk, skb); if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index aa4308afcc7f..26ed3e8587c2 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -303,6 +303,7 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops) errout: return err; } +EXPORT_SYMBOL(genl_register_ops); /** * genl_unregister_ops - unregister generic netlink operations @@ -337,6 +338,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) return -ENOENT; } +EXPORT_SYMBOL(genl_unregister_ops); /** * genl_register_family - register a generic netlink family @@ -405,6 +407,7 @@ errout_locked: errout: return err; } +EXPORT_SYMBOL(genl_register_family); /** * genl_register_family_with_ops - register a generic netlink family @@ -485,6 +488,7 @@ int genl_unregister_family(struct genl_family *family) return -ENOENT; } +EXPORT_SYMBOL(genl_unregister_family); static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { @@ -873,11 +877,7 @@ static int __init genl_init(void) for (i = 0; i < GENL_FAM_TAB_SIZE; i++) INIT_LIST_HEAD(&family_ht[i]); - err = genl_register_family(&genl_ctrl); - if (err < 0) - goto problem; - - err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops); + err = genl_register_family_with_ops(&genl_ctrl, &genl_ctrl_ops, 1); if (err < 0) goto problem; @@ -899,11 +899,6 @@ problem: subsys_initcall(genl_init); -EXPORT_SYMBOL(genl_register_ops); -EXPORT_SYMBOL(genl_unregister_ops); -EXPORT_SYMBOL(genl_register_family); -EXPORT_SYMBOL(genl_unregister_family); - static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, gfp_t flags) { diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index cbc244a128bd..b4fdaac233f7 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -109,7 +109,9 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, init_timer(&rose_neigh->t0timer); if (rose_route->ndigis != 0) { - if ((rose_neigh->digipeat = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) { + rose_neigh->digipeat = + kmalloc(sizeof(ax25_digi), GFP_ATOMIC); + if (rose_neigh->digipeat == NULL) { kfree(rose_neigh); res = -ENOMEM; goto out; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index a16b0175f890..11f195af2da0 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -33,6 +33,7 @@ static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; static u32 mirred_idx_gen; static DEFINE_RWLOCK(mirred_lock); +static LIST_HEAD(mirred_list); static struct tcf_hashinfo mirred_hash_info = { .htab = tcf_mirred_ht, @@ -47,7 +48,9 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) m->tcf_bindcnt--; m->tcf_refcnt--; if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { - dev_put(m->tcfm_dev); + list_del(&m->tcfm_list); + if (m->tcfm_dev) + dev_put(m->tcfm_dev); tcf_hash_destroy(&m->common, &mirred_hash_info); return 1; } @@ -134,8 +137,10 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, m->tcfm_ok_push = ok_push; } spin_unlock_bh(&m->tcf_lock); - if (ret == ACT_P_CREATED) + if (ret == ACT_P_CREATED) { + list_add(&m->tcfm_list, &mirred_list); tcf_hash_insert(pc, &mirred_hash_info); + } return ret; } @@ -164,9 +169,14 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, m->tcf_bstats.packets++; dev = m->tcfm_dev; + if (!dev) { + printk_once(KERN_NOTICE "tc mirred: target device is gone\n"); + goto out; + } + if (!(dev->flags & IFF_UP)) { if (net_ratelimit()) - pr_notice("tc mirred to Houston: device %s is gone!\n", + pr_notice("tc mirred to Houston: device %s is down\n", dev->name); goto out; } @@ -230,6 +240,28 @@ nla_put_failure: return -1; } +static int mirred_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct tcf_mirred *m; + + if (event == NETDEV_UNREGISTER) + list_for_each_entry(m, &mirred_list, tcfm_list) { + if (m->tcfm_dev == dev) { + dev_put(dev); + m->tcfm_dev = NULL; + } + } + + return NOTIFY_DONE; +} + +static struct notifier_block mirred_device_notifier = { + .notifier_call = mirred_device_event, +}; + + static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .hinfo = &mirred_hash_info, @@ -250,12 +282,17 @@ MODULE_LICENSE("GPL"); static int __init mirred_init_module(void) { + int err = register_netdevice_notifier(&mirred_device_notifier); + if (err) + return err; + pr_info("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } static void __exit mirred_cleanup_module(void) { + unregister_netdevice_notifier(&mirred_device_notifier); tcf_unregister_action(&act_mirred_ops); } diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 24e614c495f2..d0386a413e8d 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -218,6 +218,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) goto drop; + icmph = (void *)(skb_network_header(skb) + ihl); iph = (void *)(icmph + 1); if (egress) addr = iph->daddr; @@ -246,7 +247,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, iph->saddr = new_addr; inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, - 1); + 0); break; } default: diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4f522143811e..7416a5c73b2a 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -134,10 +134,12 @@ next_knode: #endif for (i = n->sel.nkeys; i>0; i--, key++) { - unsigned int toff; + int toff = off + key->off + (off2 & key->offmask); __be32 *data, _data; - toff = off + key->off + (off2 & key->offmask); + if (skb_headroom(skb) + toff < 0) + goto out; + data = skb_header_pointer(skb, toff, 4, &_data); if (!data) goto out; |