diff options
Diffstat (limited to 'net')
326 files changed, 16110 insertions, 10498 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 6e64f7c6a2e9..7850412f52b7 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -327,7 +327,7 @@ static void vlan_sync_address(struct net_device *dev, static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { - unsigned long old_features = vlandev->features; + u32 old_features = vlandev->features; vlandev->features &= ~dev->vlan_features; vlandev->features |= dev->features & dev->vlan_features; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index be737539f34d..ae610f046de5 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -625,6 +625,19 @@ static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); return rc; } + +static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, + struct scatterlist *sgl, unsigned int sgc) +{ + struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; + int rc = 0; + + if (ops->ndo_fcoe_ddp_target) + rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc); + + return rc; +} #endif static void vlan_dev_change_rx_flags(struct net_device *dev, int change) @@ -858,6 +871,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_fcoe_enable = vlan_dev_fcoe_enable, .ndo_fcoe_disable = vlan_dev_fcoe_disable, .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, + .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, #endif }; diff --git a/net/Kconfig b/net/Kconfig index 72840626284b..79cabf1ee68b 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -221,6 +221,12 @@ config RPS depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y +config RFS_ACCEL + boolean + depends on RPS && GENERIC_HARDIRQS + select CPU_RMAP + default y + config XPS boolean depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS diff --git a/net/Makefile b/net/Makefile index a3330ebe2c53..a51d9465e628 100644 --- a/net/Makefile +++ b/net/Makefile @@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ -ifneq ($(CONFIG_IPV6),) -obj-y += ipv6/ -endif +obj-$(CONFIG_NET) += ipv6/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ diff --git a/net/atm/clip.c b/net/atm/clip.c index d257da50fcfb..1d4be60e1390 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -502,8 +502,6 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) struct atmarp_entry *entry; int error; struct clip_vcc *clip_vcc; - struct flowi fl = { .fl4_dst = ip, - .fl4_tos = 1 }; struct rtable *rt; if (vcc->push != clip_push) { @@ -520,9 +518,9 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) unlink_clip_vcc(clip_vcc); return 0; } - error = ip_route_output_key(&init_net, &rt, &fl); - if (error) - return error; + rt = ip_route_output(&init_net, ip, 0, 1, 0); + if (IS_ERR(rt)) + return PTR_ERR(rt); neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1); ip_rt_put(rt); if (!neigh) diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index d936aeccd194..2de93d00631b 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@ # -# Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c index 3850a3ecf947..af45d6b2031f 100644 --- a/net/batman-adv/aggregation.c +++ b/net/batman-adv/aggregation.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -35,7 +35,7 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet, int packet_len, unsigned long send_time, bool directlink, - struct batman_if *if_incoming, + struct hard_iface *if_incoming, struct forw_packet *forw_packet) { struct batman_packet *batman_packet = @@ -99,7 +99,7 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet, /* create a new aggregated packet and add this packet to it */ static void new_aggregated_packet(unsigned char *packet_buff, int packet_len, unsigned long send_time, bool direct_link, - struct batman_if *if_incoming, + struct hard_iface *if_incoming, int own_packet) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); @@ -188,7 +188,7 @@ static void aggregate(struct forw_packet *forw_packet_aggr, void add_bat_packet_to_list(struct bat_priv *bat_priv, unsigned char *packet_buff, int packet_len, - struct batman_if *if_incoming, char own_packet, + struct hard_iface *if_incoming, char own_packet, unsigned long send_time) { /** @@ -247,7 +247,7 @@ void add_bat_packet_to_list(struct bat_priv *bat_priv, /* unpack the aggregated packets and process them one by one */ void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, - int packet_len, struct batman_if *if_incoming) + int packet_len, struct hard_iface *if_incoming) { struct batman_packet *batman_packet; int buff_pos = 0; diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h index 71a91b3da913..062204289d1f 100644 --- a/net/batman-adv/aggregation.h +++ b/net/batman-adv/aggregation.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -35,9 +35,9 @@ static inline int aggregated_packet(int buff_pos, int packet_len, int num_hna) void add_bat_packet_to_list(struct bat_priv *bat_priv, unsigned char *packet_buff, int packet_len, - struct batman_if *if_incoming, char own_packet, + struct hard_iface *if_incoming, char own_packet, unsigned long send_time); void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, - int packet_len, struct batman_if *if_incoming); + int packet_len, struct hard_iface *if_incoming); #endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */ diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c index 0ae81d07f102..0e9d43509935 100644 --- a/net/batman-adv/bat_debugfs.c +++ b/net/batman-adv/bat_debugfs.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -52,7 +52,6 @@ static void emit_log_char(struct debug_log *debug_log, char c) static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) { - int printed_len; va_list args; static char debug_log_buf[256]; char *p; @@ -62,8 +61,7 @@ static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) spin_lock_bh(&debug_log->lock); va_start(args, fmt); - printed_len = vscnprintf(debug_log_buf, sizeof(debug_log_buf), - fmt, args); + vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args); va_end(args); for (p = debug_log_buf; *p != 0; p++) diff --git a/net/batman-adv/bat_debugfs.h b/net/batman-adv/bat_debugfs.h index 72df532b7d5f..bc9cda3f01e1 100644 --- a/net/batman-adv/bat_debugfs.h +++ b/net/batman-adv/bat_debugfs.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c index cd7bb51825f1..e449bf6353e0 100644 --- a/net/batman-adv/bat_sysfs.c +++ b/net/batman-adv/bat_sysfs.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -441,16 +441,16 @@ static ssize_t show_mesh_iface(struct kobject *kobj, struct attribute *attr, char *buff) { struct net_device *net_dev = kobj_to_netdev(kobj); - struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev); ssize_t length; - if (!batman_if) + if (!hard_iface) return 0; - length = sprintf(buff, "%s\n", batman_if->if_status == IF_NOT_IN_USE ? - "none" : batman_if->soft_iface->name); + length = sprintf(buff, "%s\n", hard_iface->if_status == IF_NOT_IN_USE ? + "none" : hard_iface->soft_iface->name); - kref_put(&batman_if->refcount, hardif_free_ref); + hardif_free_ref(hard_iface); return length; } @@ -459,11 +459,11 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { struct net_device *net_dev = kobj_to_netdev(kobj); - struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev); int status_tmp = -1; - int ret; + int ret = count; - if (!batman_if) + if (!hard_iface) return count; if (buff[count - 1] == '\n') @@ -472,7 +472,7 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr, if (strlen(buff) >= IFNAMSIZ) { pr_err("Invalid parameter for 'mesh_iface' setting received: " "interface name too long '%s'\n", buff); - kref_put(&batman_if->refcount, hardif_free_ref); + hardif_free_ref(hard_iface); return -EINVAL; } @@ -481,30 +481,31 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr, else status_tmp = IF_I_WANT_YOU; - if ((batman_if->if_status == status_tmp) || ((batman_if->soft_iface) && - (strncmp(batman_if->soft_iface->name, buff, IFNAMSIZ) == 0))) { - kref_put(&batman_if->refcount, hardif_free_ref); - return count; - } + if (hard_iface->if_status == status_tmp) + goto out; + + if ((hard_iface->soft_iface) && + (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0)) + goto out; if (status_tmp == IF_NOT_IN_USE) { rtnl_lock(); - hardif_disable_interface(batman_if); + hardif_disable_interface(hard_iface); rtnl_unlock(); - kref_put(&batman_if->refcount, hardif_free_ref); - return count; + goto out; } /* if the interface already is in use */ - if (batman_if->if_status != IF_NOT_IN_USE) { + if (hard_iface->if_status != IF_NOT_IN_USE) { rtnl_lock(); - hardif_disable_interface(batman_if); + hardif_disable_interface(hard_iface); rtnl_unlock(); } - ret = hardif_enable_interface(batman_if, buff); - kref_put(&batman_if->refcount, hardif_free_ref); + ret = hardif_enable_interface(hard_iface, buff); +out: + hardif_free_ref(hard_iface); return ret; } @@ -512,13 +513,13 @@ static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr, char *buff) { struct net_device *net_dev = kobj_to_netdev(kobj); - struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev); ssize_t length; - if (!batman_if) + if (!hard_iface) return 0; - switch (batman_if->if_status) { + switch (hard_iface->if_status) { case IF_TO_BE_REMOVED: length = sprintf(buff, "disabling\n"); break; @@ -537,7 +538,7 @@ static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr, break; } - kref_put(&batman_if->refcount, hardif_free_ref); + hardif_free_ref(hard_iface); return length; } diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h index 7f186c007b4f..02f1fa7aadfa 100644 --- a/net/batman-adv/bat_sysfs.h +++ b/net/batman-adv/bat_sysfs.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index bbcd8f744cdd..ad2ca925b3e0 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index ac54017601b1..769c246d1fc1 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 0065ffb8d96d..3cc43558cf9c 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -28,58 +28,75 @@ #include <linux/udp.h> #include <linux/if_vlan.h> -static void gw_node_free_ref(struct kref *refcount) +static void gw_node_free_rcu(struct rcu_head *rcu) { struct gw_node *gw_node; - gw_node = container_of(refcount, struct gw_node, refcount); + gw_node = container_of(rcu, struct gw_node, rcu); kfree(gw_node); } -static void gw_node_free_rcu(struct rcu_head *rcu) +static void gw_node_free_ref(struct gw_node *gw_node) { - struct gw_node *gw_node; - - gw_node = container_of(rcu, struct gw_node, rcu); - kref_put(&gw_node->refcount, gw_node_free_ref); + if (atomic_dec_and_test(&gw_node->refcount)) + call_rcu(&gw_node->rcu, gw_node_free_rcu); } void *gw_get_selected(struct bat_priv *bat_priv) { - struct gw_node *curr_gateway_tmp = bat_priv->curr_gw; + struct gw_node *curr_gateway_tmp; + struct orig_node *orig_node = NULL; + rcu_read_lock(); + curr_gateway_tmp = rcu_dereference(bat_priv->curr_gw); if (!curr_gateway_tmp) - return NULL; + goto out; + + orig_node = curr_gateway_tmp->orig_node; + if (!orig_node) + goto out; - return curr_gateway_tmp->orig_node; + if (!atomic_inc_not_zero(&orig_node->refcount)) + orig_node = NULL; + +out: + rcu_read_unlock(); + return orig_node; } void gw_deselect(struct bat_priv *bat_priv) { - struct gw_node *gw_node = bat_priv->curr_gw; + struct gw_node *gw_node; - bat_priv->curr_gw = NULL; + spin_lock_bh(&bat_priv->gw_list_lock); + gw_node = rcu_dereference(bat_priv->curr_gw); + rcu_assign_pointer(bat_priv->curr_gw, NULL); + spin_unlock_bh(&bat_priv->gw_list_lock); if (gw_node) - kref_put(&gw_node->refcount, gw_node_free_ref); + gw_node_free_ref(gw_node); } -static struct gw_node *gw_select(struct bat_priv *bat_priv, - struct gw_node *new_gw_node) +static void gw_select(struct bat_priv *bat_priv, struct gw_node *new_gw_node) { - struct gw_node *curr_gw_node = bat_priv->curr_gw; + struct gw_node *curr_gw_node; - if (new_gw_node) - kref_get(&new_gw_node->refcount); + if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount)) + new_gw_node = NULL; + + spin_lock_bh(&bat_priv->gw_list_lock); + curr_gw_node = rcu_dereference(bat_priv->curr_gw); + rcu_assign_pointer(bat_priv->curr_gw, new_gw_node); + spin_unlock_bh(&bat_priv->gw_list_lock); - bat_priv->curr_gw = new_gw_node; - return curr_gw_node; + if (curr_gw_node) + gw_node_free_ref(curr_gw_node); } void gw_election(struct bat_priv *bat_priv) { struct hlist_node *node; - struct gw_node *gw_node, *curr_gw_tmp = NULL, *old_gw_node = NULL; + struct gw_node *gw_node, *curr_gw, *curr_gw_tmp = NULL; uint8_t max_tq = 0; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; int down, up; @@ -93,19 +110,23 @@ void gw_election(struct bat_priv *bat_priv) if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT) return; - if (bat_priv->curr_gw) + rcu_read_lock(); + curr_gw = rcu_dereference(bat_priv->curr_gw); + if (curr_gw) { + rcu_read_unlock(); return; + } - rcu_read_lock(); if (hlist_empty(&bat_priv->gw_list)) { - rcu_read_unlock(); - if (bat_priv->curr_gw) { + if (curr_gw) { + rcu_read_unlock(); bat_dbg(DBG_BATMAN, bat_priv, "Removing selected gateway - " "no gateway in range\n"); gw_deselect(bat_priv); - } + } else + rcu_read_unlock(); return; } @@ -154,12 +175,12 @@ void gw_election(struct bat_priv *bat_priv) max_gw_factor = tmp_gw_factor; } - if (bat_priv->curr_gw != curr_gw_tmp) { - if ((bat_priv->curr_gw) && (!curr_gw_tmp)) + if (curr_gw != curr_gw_tmp) { + if ((curr_gw) && (!curr_gw_tmp)) bat_dbg(DBG_BATMAN, bat_priv, "Removing selected gateway - " "no gateway in range\n"); - else if ((!bat_priv->curr_gw) && (curr_gw_tmp)) + else if ((!curr_gw) && (curr_gw_tmp)) bat_dbg(DBG_BATMAN, bat_priv, "Adding route to gateway %pM " "(gw_flags: %i, tq: %i)\n", @@ -174,43 +195,43 @@ void gw_election(struct bat_priv *bat_priv) curr_gw_tmp->orig_node->gw_flags, curr_gw_tmp->orig_node->router->tq_avg); - old_gw_node = gw_select(bat_priv, curr_gw_tmp); + gw_select(bat_priv, curr_gw_tmp); } rcu_read_unlock(); - - /* the kfree() has to be outside of the rcu lock */ - if (old_gw_node) - kref_put(&old_gw_node->refcount, gw_node_free_ref); } void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node) { - struct gw_node *curr_gateway_tmp = bat_priv->curr_gw; + struct gw_node *curr_gateway_tmp; uint8_t gw_tq_avg, orig_tq_avg; + rcu_read_lock(); + curr_gateway_tmp = rcu_dereference(bat_priv->curr_gw); if (!curr_gateway_tmp) - return; + goto out_rcu; if (!curr_gateway_tmp->orig_node) - goto deselect; + goto deselect_rcu; if (!curr_gateway_tmp->orig_node->router) - goto deselect; + goto deselect_rcu; /* this node already is the gateway */ if (curr_gateway_tmp->orig_node == orig_node) - return; + goto out_rcu; if (!orig_node->router) - return; + goto out_rcu; gw_tq_avg = curr_gateway_tmp->orig_node->router->tq_avg; + rcu_read_unlock(); + orig_tq_avg = orig_node->router->tq_avg; /* the TQ value has to be better */ if (orig_tq_avg < gw_tq_avg) - return; + goto out; /** * if the routing class is greater than 3 the value tells us how much @@ -218,15 +239,23 @@ void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node) **/ if ((atomic_read(&bat_priv->gw_sel_class) > 3) && (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class))) - return; + goto out; bat_dbg(DBG_BATMAN, bat_priv, "Restarting gateway selection: better gateway found (tq curr: " "%i, tq new: %i)\n", gw_tq_avg, orig_tq_avg); + goto deselect; +out_rcu: + rcu_read_unlock(); + goto out; +deselect_rcu: + rcu_read_unlock(); deselect: gw_deselect(bat_priv); +out: + return; } static void gw_node_add(struct bat_priv *bat_priv, @@ -242,7 +271,7 @@ static void gw_node_add(struct bat_priv *bat_priv, memset(gw_node, 0, sizeof(struct gw_node)); INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node; - kref_init(&gw_node->refcount); + atomic_set(&gw_node->refcount, 1); spin_lock_bh(&bat_priv->gw_list_lock); hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list); @@ -283,7 +312,7 @@ void gw_node_update(struct bat_priv *bat_priv, "Gateway %pM removed from gateway list\n", orig_node->orig); - if (gw_node == bat_priv->curr_gw) { + if (gw_node == rcu_dereference(bat_priv->curr_gw)) { rcu_read_unlock(); gw_deselect(bat_priv); return; @@ -321,11 +350,11 @@ void gw_node_purge(struct bat_priv *bat_priv) atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) continue; - if (bat_priv->curr_gw == gw_node) + if (rcu_dereference(bat_priv->curr_gw) == gw_node) gw_deselect(bat_priv); hlist_del_rcu(&gw_node->list); - call_rcu(&gw_node->rcu, gw_node_free_rcu); + gw_node_free_ref(gw_node); } @@ -335,12 +364,16 @@ void gw_node_purge(struct bat_priv *bat_priv) static int _write_buffer_text(struct bat_priv *bat_priv, struct seq_file *seq, struct gw_node *gw_node) { - int down, up; + struct gw_node *curr_gw; + int down, up, ret; gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, &down, &up); - return seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n", - (bat_priv->curr_gw == gw_node ? "=>" : " "), + rcu_read_lock(); + curr_gw = rcu_dereference(bat_priv->curr_gw); + + ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n", + (curr_gw == gw_node ? "=>" : " "), gw_node->orig_node->orig, gw_node->orig_node->router->tq_avg, gw_node->orig_node->router->addr, @@ -350,6 +383,9 @@ static int _write_buffer_text(struct bat_priv *bat_priv, (down > 2048 ? "MBit" : "KBit"), (up > 2048 ? up / 1024 : up), (up > 2048 ? "MBit" : "KBit")); + + rcu_read_unlock(); + return ret; } int gw_client_seq_print_text(struct seq_file *seq, void *offset) @@ -470,8 +506,12 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) if (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER) return -1; - if (!bat_priv->curr_gw) + rcu_read_lock(); + if (!rcu_dereference(bat_priv->curr_gw)) { + rcu_read_unlock(); return 0; + } + rcu_read_unlock(); return 1; } diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 4585e6549844..2aa439124ee3 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index b962982f017e..50d3a59a3d73 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 5e728d0b7959..55e527a489fe 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 4f95777ce080..b3058e46ee6b 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -31,36 +31,40 @@ #include <linux/if_arp.h> -/* protect update critical side of if_list - but not the content */ -static DEFINE_SPINLOCK(if_list_lock); +/* protect update critical side of hardif_list - but not the content */ +static DEFINE_SPINLOCK(hardif_list_lock); -static void hardif_free_rcu(struct rcu_head *rcu) + +static int batman_skb_recv(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *ptype, + struct net_device *orig_dev); + +void hardif_free_rcu(struct rcu_head *rcu) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; - batman_if = container_of(rcu, struct batman_if, rcu); - dev_put(batman_if->net_dev); - kref_put(&batman_if->refcount, hardif_free_ref); + hard_iface = container_of(rcu, struct hard_iface, rcu); + dev_put(hard_iface->net_dev); + kfree(hard_iface); } -struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev) +struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { - if (batman_if->net_dev == net_dev) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->net_dev == net_dev && + atomic_inc_not_zero(&hard_iface->refcount)) goto out; } - batman_if = NULL; + hard_iface = NULL; out: - if (batman_if) - kref_get(&batman_if->refcount); - rcu_read_unlock(); - return batman_if; + return hard_iface; } static int is_valid_iface(struct net_device *net_dev) @@ -75,13 +79,8 @@ static int is_valid_iface(struct net_device *net_dev) return 0; /* no batman over batman */ -#ifdef HAVE_NET_DEVICE_OPS - if (net_dev->netdev_ops->ndo_start_xmit == interface_tx) - return 0; -#else - if (net_dev->hard_start_xmit == interface_tx) + if (softif_is_valid(net_dev)) return 0; -#endif /* Device is being bridged */ /* if (net_dev->priv_flags & IFF_BRIDGE_PORT) @@ -90,27 +89,25 @@ static int is_valid_iface(struct net_device *net_dev) return 1; } -static struct batman_if *get_active_batman_if(struct net_device *soft_iface) +static struct hard_iface *hardif_get_active(struct net_device *soft_iface) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { - if (batman_if->soft_iface != soft_iface) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->soft_iface != soft_iface) continue; - if (batman_if->if_status == IF_ACTIVE) + if (hard_iface->if_status == IF_ACTIVE && + atomic_inc_not_zero(&hard_iface->refcount)) goto out; } - batman_if = NULL; + hard_iface = NULL; out: - if (batman_if) - kref_get(&batman_if->refcount); - rcu_read_unlock(); - return batman_if; + return hard_iface; } static void update_primary_addr(struct bat_priv *bat_priv) @@ -126,24 +123,24 @@ static void update_primary_addr(struct bat_priv *bat_priv) } static void set_primary_if(struct bat_priv *bat_priv, - struct batman_if *batman_if) + struct hard_iface *hard_iface) { struct batman_packet *batman_packet; - struct batman_if *old_if; + struct hard_iface *old_if; - if (batman_if) - kref_get(&batman_if->refcount); + if (hard_iface && !atomic_inc_not_zero(&hard_iface->refcount)) + hard_iface = NULL; old_if = bat_priv->primary_if; - bat_priv->primary_if = batman_if; + bat_priv->primary_if = hard_iface; if (old_if) - kref_put(&old_if->refcount, hardif_free_ref); + hardif_free_ref(old_if); if (!bat_priv->primary_if) return; - batman_packet = (struct batman_packet *)(batman_if->packet_buff); + batman_packet = (struct batman_packet *)(hard_iface->packet_buff); batman_packet->flags = PRIMARIES_FIRST_HOP; batman_packet->ttl = TTL; @@ -156,42 +153,42 @@ static void set_primary_if(struct bat_priv *bat_priv, atomic_set(&bat_priv->hna_local_changed, 1); } -static bool hardif_is_iface_up(struct batman_if *batman_if) +static bool hardif_is_iface_up(struct hard_iface *hard_iface) { - if (batman_if->net_dev->flags & IFF_UP) + if (hard_iface->net_dev->flags & IFF_UP) return true; return false; } -static void update_mac_addresses(struct batman_if *batman_if) +static void update_mac_addresses(struct hard_iface *hard_iface) { - memcpy(((struct batman_packet *)(batman_if->packet_buff))->orig, - batman_if->net_dev->dev_addr, ETH_ALEN); - memcpy(((struct batman_packet *)(batman_if->packet_buff))->prev_sender, - batman_if->net_dev->dev_addr, ETH_ALEN); + memcpy(((struct batman_packet *)(hard_iface->packet_buff))->orig, + hard_iface->net_dev->dev_addr, ETH_ALEN); + memcpy(((struct batman_packet *)(hard_iface->packet_buff))->prev_sender, + hard_iface->net_dev->dev_addr, ETH_ALEN); } static void check_known_mac_addr(struct net_device *net_dev) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { - if ((batman_if->if_status != IF_ACTIVE) && - (batman_if->if_status != IF_TO_BE_ACTIVATED)) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if ((hard_iface->if_status != IF_ACTIVE) && + (hard_iface->if_status != IF_TO_BE_ACTIVATED)) continue; - if (batman_if->net_dev == net_dev) + if (hard_iface->net_dev == net_dev) continue; - if (!compare_orig(batman_if->net_dev->dev_addr, - net_dev->dev_addr)) + if (!compare_eth(hard_iface->net_dev->dev_addr, + net_dev->dev_addr)) continue; pr_warning("The newly added mac address (%pM) already exists " "on: %s\n", net_dev->dev_addr, - batman_if->net_dev->name); + hard_iface->net_dev->name); pr_warning("It is strongly recommended to keep mac addresses " "unique to avoid problems!\n"); } @@ -201,7 +198,7 @@ static void check_known_mac_addr(struct net_device *net_dev) int hardif_min_mtu(struct net_device *soft_iface) { struct bat_priv *bat_priv = netdev_priv(soft_iface); - struct batman_if *batman_if; + struct hard_iface *hard_iface; /* allow big frames if all devices are capable to do so * (have MTU > 1500 + BAT_HEADER_LEN) */ int min_mtu = ETH_DATA_LEN; @@ -210,15 +207,15 @@ int hardif_min_mtu(struct net_device *soft_iface) goto out; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { - if ((batman_if->if_status != IF_ACTIVE) && - (batman_if->if_status != IF_TO_BE_ACTIVATED)) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if ((hard_iface->if_status != IF_ACTIVE) && + (hard_iface->if_status != IF_TO_BE_ACTIVATED)) continue; - if (batman_if->soft_iface != soft_iface) + if (hard_iface->soft_iface != soft_iface) continue; - min_mtu = min_t(int, batman_if->net_dev->mtu - BAT_HEADER_LEN, + min_mtu = min_t(int, hard_iface->net_dev->mtu - BAT_HEADER_LEN, min_mtu); } rcu_read_unlock(); @@ -236,77 +233,95 @@ void update_min_mtu(struct net_device *soft_iface) soft_iface->mtu = min_mtu; } -static void hardif_activate_interface(struct batman_if *batman_if) +static void hardif_activate_interface(struct hard_iface *hard_iface) { struct bat_priv *bat_priv; - if (batman_if->if_status != IF_INACTIVE) + if (hard_iface->if_status != IF_INACTIVE) return; - bat_priv = netdev_priv(batman_if->soft_iface); + bat_priv = netdev_priv(hard_iface->soft_iface); - update_mac_addresses(batman_if); - batman_if->if_status = IF_TO_BE_ACTIVATED; + update_mac_addresses(hard_iface); + hard_iface->if_status = IF_TO_BE_ACTIVATED; /** * the first active interface becomes our primary interface or * the next active interface after the old primay interface was removed */ if (!bat_priv->primary_if) - set_primary_if(bat_priv, batman_if); + set_primary_if(bat_priv, hard_iface); - bat_info(batman_if->soft_iface, "Interface activated: %s\n", - batman_if->net_dev->name); + bat_info(hard_iface->soft_iface, "Interface activated: %s\n", + hard_iface->net_dev->name); - update_min_mtu(batman_if->soft_iface); + update_min_mtu(hard_iface->soft_iface); return; } -static void hardif_deactivate_interface(struct batman_if *batman_if) +static void hardif_deactivate_interface(struct hard_iface *hard_iface) { - if ((batman_if->if_status != IF_ACTIVE) && - (batman_if->if_status != IF_TO_BE_ACTIVATED)) + if ((hard_iface->if_status != IF_ACTIVE) && + (hard_iface->if_status != IF_TO_BE_ACTIVATED)) return; - batman_if->if_status = IF_INACTIVE; + hard_iface->if_status = IF_INACTIVE; - bat_info(batman_if->soft_iface, "Interface deactivated: %s\n", - batman_if->net_dev->name); + bat_info(hard_iface->soft_iface, "Interface deactivated: %s\n", + hard_iface->net_dev->name); - update_min_mtu(batman_if->soft_iface); + update_min_mtu(hard_iface->soft_iface); } -int hardif_enable_interface(struct batman_if *batman_if, char *iface_name) +int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name) { struct bat_priv *bat_priv; struct batman_packet *batman_packet; + struct net_device *soft_iface; + int ret; - if (batman_if->if_status != IF_NOT_IN_USE) + if (hard_iface->if_status != IF_NOT_IN_USE) goto out; - batman_if->soft_iface = dev_get_by_name(&init_net, iface_name); + if (!atomic_inc_not_zero(&hard_iface->refcount)) + goto out; - if (!batman_if->soft_iface) { - batman_if->soft_iface = softif_create(iface_name); + soft_iface = dev_get_by_name(&init_net, iface_name); - if (!batman_if->soft_iface) + if (!soft_iface) { + soft_iface = softif_create(iface_name); + + if (!soft_iface) { + ret = -ENOMEM; goto err; + } /* dev_get_by_name() increases the reference counter for us */ - dev_hold(batman_if->soft_iface); + dev_hold(soft_iface); + } + + if (!softif_is_valid(soft_iface)) { + pr_err("Can't create batman mesh interface %s: " + "already exists as regular interface\n", + soft_iface->name); + dev_put(soft_iface); + ret = -EINVAL; + goto err; } - bat_priv = netdev_priv(batman_if->soft_iface); - batman_if->packet_len = BAT_PACKET_LEN; - batman_if->packet_buff = kmalloc(batman_if->packet_len, GFP_ATOMIC); + hard_iface->soft_iface = soft_iface; + bat_priv = netdev_priv(hard_iface->soft_iface); + hard_iface->packet_len = BAT_PACKET_LEN; + hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC); - if (!batman_if->packet_buff) { - bat_err(batman_if->soft_iface, "Can't add interface packet " - "(%s): out of memory\n", batman_if->net_dev->name); + if (!hard_iface->packet_buff) { + bat_err(hard_iface->soft_iface, "Can't add interface packet " + "(%s): out of memory\n", hard_iface->net_dev->name); + ret = -ENOMEM; goto err; } - batman_packet = (struct batman_packet *)(batman_if->packet_buff); + batman_packet = (struct batman_packet *)(hard_iface->packet_buff); batman_packet->packet_type = BAT_PACKET; batman_packet->version = COMPAT_VERSION; batman_packet->flags = 0; @@ -314,107 +329,107 @@ int hardif_enable_interface(struct batman_if *batman_if, char *iface_name) batman_packet->tq = TQ_MAX_VALUE; batman_packet->num_hna = 0; - batman_if->if_num = bat_priv->num_ifaces; + hard_iface->if_num = bat_priv->num_ifaces; bat_priv->num_ifaces++; - batman_if->if_status = IF_INACTIVE; - orig_hash_add_if(batman_if, bat_priv->num_ifaces); + hard_iface->if_status = IF_INACTIVE; + orig_hash_add_if(hard_iface, bat_priv->num_ifaces); - batman_if->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN); - batman_if->batman_adv_ptype.func = batman_skb_recv; - batman_if->batman_adv_ptype.dev = batman_if->net_dev; - kref_get(&batman_if->refcount); - dev_add_pack(&batman_if->batman_adv_ptype); + hard_iface->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN); + hard_iface->batman_adv_ptype.func = batman_skb_recv; + hard_iface->batman_adv_ptype.dev = hard_iface->net_dev; + dev_add_pack(&hard_iface->batman_adv_ptype); - atomic_set(&batman_if->seqno, 1); - atomic_set(&batman_if->frag_seqno, 1); - bat_info(batman_if->soft_iface, "Adding interface: %s\n", - batman_if->net_dev->name); + atomic_set(&hard_iface->seqno, 1); + atomic_set(&hard_iface->frag_seqno, 1); + bat_info(hard_iface->soft_iface, "Adding interface: %s\n", + hard_iface->net_dev->name); - if (atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu < + if (atomic_read(&bat_priv->fragmentation) && hard_iface->net_dev->mtu < ETH_DATA_LEN + BAT_HEADER_LEN) - bat_info(batman_if->soft_iface, + bat_info(hard_iface->soft_iface, "The MTU of interface %s is too small (%i) to handle " "the transport of batman-adv packets. Packets going " "over this interface will be fragmented on layer2 " "which could impact the performance. Setting the MTU " "to %zi would solve the problem.\n", - batman_if->net_dev->name, batman_if->net_dev->mtu, + hard_iface->net_dev->name, hard_iface->net_dev->mtu, ETH_DATA_LEN + BAT_HEADER_LEN); - if (!atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu < + if (!atomic_read(&bat_priv->fragmentation) && hard_iface->net_dev->mtu < ETH_DATA_LEN + BAT_HEADER_LEN) - bat_info(batman_if->soft_iface, + bat_info(hard_iface->soft_iface, "The MTU of interface %s is too small (%i) to handle " "the transport of batman-adv packets. If you experience" " problems getting traffic through try increasing the " "MTU to %zi.\n", - batman_if->net_dev->name, batman_if->net_dev->mtu, + hard_iface->net_dev->name, hard_iface->net_dev->mtu, ETH_DATA_LEN + BAT_HEADER_LEN); - if (hardif_is_iface_up(batman_if)) - hardif_activate_interface(batman_if); + if (hardif_is_iface_up(hard_iface)) + hardif_activate_interface(hard_iface); else - bat_err(batman_if->soft_iface, "Not using interface %s " + bat_err(hard_iface->soft_iface, "Not using interface %s " "(retrying later): interface not active\n", - batman_if->net_dev->name); + hard_iface->net_dev->name); /* begin scheduling originator messages on that interface */ - schedule_own_packet(batman_if); + schedule_own_packet(hard_iface); out: return 0; err: - return -ENOMEM; + hardif_free_ref(hard_iface); + return ret; } -void hardif_disable_interface(struct batman_if *batman_if) +void hardif_disable_interface(struct hard_iface *hard_iface) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - if (batman_if->if_status == IF_ACTIVE) - hardif_deactivate_interface(batman_if); + if (hard_iface->if_status == IF_ACTIVE) + hardif_deactivate_interface(hard_iface); - if (batman_if->if_status != IF_INACTIVE) + if (hard_iface->if_status != IF_INACTIVE) return; - bat_info(batman_if->soft_iface, "Removing interface: %s\n", - batman_if->net_dev->name); - dev_remove_pack(&batman_if->batman_adv_ptype); - kref_put(&batman_if->refcount, hardif_free_ref); + bat_info(hard_iface->soft_iface, "Removing interface: %s\n", + hard_iface->net_dev->name); + dev_remove_pack(&hard_iface->batman_adv_ptype); bat_priv->num_ifaces--; - orig_hash_del_if(batman_if, bat_priv->num_ifaces); + orig_hash_del_if(hard_iface, bat_priv->num_ifaces); - if (batman_if == bat_priv->primary_if) { - struct batman_if *new_if; + if (hard_iface == bat_priv->primary_if) { + struct hard_iface *new_if; - new_if = get_active_batman_if(batman_if->soft_iface); + new_if = hardif_get_active(hard_iface->soft_iface); set_primary_if(bat_priv, new_if); if (new_if) - kref_put(&new_if->refcount, hardif_free_ref); + hardif_free_ref(new_if); } - kfree(batman_if->packet_buff); - batman_if->packet_buff = NULL; - batman_if->if_status = IF_NOT_IN_USE; + kfree(hard_iface->packet_buff); + hard_iface->packet_buff = NULL; + hard_iface->if_status = IF_NOT_IN_USE; - /* delete all references to this batman_if */ + /* delete all references to this hard_iface */ purge_orig_ref(bat_priv); - purge_outstanding_packets(bat_priv, batman_if); - dev_put(batman_if->soft_iface); + purge_outstanding_packets(bat_priv, hard_iface); + dev_put(hard_iface->soft_iface); /* nobody uses this interface anymore */ if (!bat_priv->num_ifaces) - softif_destroy(batman_if->soft_iface); + softif_destroy(hard_iface->soft_iface); - batman_if->soft_iface = NULL; + hard_iface->soft_iface = NULL; + hardif_free_ref(hard_iface); } -static struct batman_if *hardif_add_interface(struct net_device *net_dev) +static struct hard_iface *hardif_add_interface(struct net_device *net_dev) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; int ret; ret = is_valid_iface(net_dev); @@ -423,73 +438,73 @@ static struct batman_if *hardif_add_interface(struct net_device *net_dev) dev_hold(net_dev); - batman_if = kmalloc(sizeof(struct batman_if), GFP_ATOMIC); - if (!batman_if) { + hard_iface = kmalloc(sizeof(struct hard_iface), GFP_ATOMIC); + if (!hard_iface) { pr_err("Can't add interface (%s): out of memory\n", net_dev->name); goto release_dev; } - ret = sysfs_add_hardif(&batman_if->hardif_obj, net_dev); + ret = sysfs_add_hardif(&hard_iface->hardif_obj, net_dev); if (ret) goto free_if; - batman_if->if_num = -1; - batman_if->net_dev = net_dev; - batman_if->soft_iface = NULL; - batman_if->if_status = IF_NOT_IN_USE; - INIT_LIST_HEAD(&batman_if->list); - kref_init(&batman_if->refcount); + hard_iface->if_num = -1; + hard_iface->net_dev = net_dev; + hard_iface->soft_iface = NULL; + hard_iface->if_status = IF_NOT_IN_USE; + INIT_LIST_HEAD(&hard_iface->list); + /* extra reference for return */ + atomic_set(&hard_iface->refcount, 2); - check_known_mac_addr(batman_if->net_dev); + check_known_mac_addr(hard_iface->net_dev); - spin_lock(&if_list_lock); - list_add_tail_rcu(&batman_if->list, &if_list); - spin_unlock(&if_list_lock); + spin_lock(&hardif_list_lock); + list_add_tail_rcu(&hard_iface->list, &hardif_list); + spin_unlock(&hardif_list_lock); - /* extra reference for return */ - kref_get(&batman_if->refcount); - return batman_if; + return hard_iface; free_if: - kfree(batman_if); + kfree(hard_iface); release_dev: dev_put(net_dev); out: return NULL; } -static void hardif_remove_interface(struct batman_if *batman_if) +static void hardif_remove_interface(struct hard_iface *hard_iface) { /* first deactivate interface */ - if (batman_if->if_status != IF_NOT_IN_USE) - hardif_disable_interface(batman_if); + if (hard_iface->if_status != IF_NOT_IN_USE) + hardif_disable_interface(hard_iface); - if (batman_if->if_status != IF_NOT_IN_USE) + if (hard_iface->if_status != IF_NOT_IN_USE) return; - batman_if->if_status = IF_TO_BE_REMOVED; - sysfs_del_hardif(&batman_if->hardif_obj); - call_rcu(&batman_if->rcu, hardif_free_rcu); + hard_iface->if_status = IF_TO_BE_REMOVED; + sysfs_del_hardif(&hard_iface->hardif_obj); + hardif_free_ref(hard_iface); } void hardif_remove_interfaces(void) { - struct batman_if *batman_if, *batman_if_tmp; + struct hard_iface *hard_iface, *hard_iface_tmp; struct list_head if_queue; INIT_LIST_HEAD(&if_queue); - spin_lock(&if_list_lock); - list_for_each_entry_safe(batman_if, batman_if_tmp, &if_list, list) { - list_del_rcu(&batman_if->list); - list_add_tail(&batman_if->list, &if_queue); + spin_lock(&hardif_list_lock); + list_for_each_entry_safe(hard_iface, hard_iface_tmp, + &hardif_list, list) { + list_del_rcu(&hard_iface->list); + list_add_tail(&hard_iface->list, &if_queue); } - spin_unlock(&if_list_lock); + spin_unlock(&hardif_list_lock); rtnl_lock(); - list_for_each_entry_safe(batman_if, batman_if_tmp, &if_queue, list) { - hardif_remove_interface(batman_if); + list_for_each_entry_safe(hard_iface, hard_iface_tmp, &if_queue, list) { + hardif_remove_interface(hard_iface); } rtnl_unlock(); } @@ -498,43 +513,43 @@ static int hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *net_dev = (struct net_device *)ptr; - struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev); struct bat_priv *bat_priv; - if (!batman_if && event == NETDEV_REGISTER) - batman_if = hardif_add_interface(net_dev); + if (!hard_iface && event == NETDEV_REGISTER) + hard_iface = hardif_add_interface(net_dev); - if (!batman_if) + if (!hard_iface) goto out; switch (event) { case NETDEV_UP: - hardif_activate_interface(batman_if); + hardif_activate_interface(hard_iface); break; case NETDEV_GOING_DOWN: case NETDEV_DOWN: - hardif_deactivate_interface(batman_if); + hardif_deactivate_interface(hard_iface); break; case NETDEV_UNREGISTER: - spin_lock(&if_list_lock); - list_del_rcu(&batman_if->list); - spin_unlock(&if_list_lock); + spin_lock(&hardif_list_lock); + list_del_rcu(&hard_iface->list); + spin_unlock(&hardif_list_lock); - hardif_remove_interface(batman_if); + hardif_remove_interface(hard_iface); break; case NETDEV_CHANGEMTU: - if (batman_if->soft_iface) - update_min_mtu(batman_if->soft_iface); + if (hard_iface->soft_iface) + update_min_mtu(hard_iface->soft_iface); break; case NETDEV_CHANGEADDR: - if (batman_if->if_status == IF_NOT_IN_USE) + if (hard_iface->if_status == IF_NOT_IN_USE) goto hardif_put; - check_known_mac_addr(batman_if->net_dev); - update_mac_addresses(batman_if); + check_known_mac_addr(hard_iface->net_dev); + update_mac_addresses(hard_iface); - bat_priv = netdev_priv(batman_if->soft_iface); - if (batman_if == bat_priv->primary_if) + bat_priv = netdev_priv(hard_iface->soft_iface); + if (hard_iface == bat_priv->primary_if) update_primary_addr(bat_priv); break; default: @@ -542,22 +557,23 @@ static int hard_if_event(struct notifier_block *this, }; hardif_put: - kref_put(&batman_if->refcount, hardif_free_ref); + hardif_free_ref(hard_iface); out: return NOTIFY_DONE; } /* receive a packet with the batman ethertype coming on a hard * interface */ -int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype, struct net_device *orig_dev) +static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *ptype, + struct net_device *orig_dev) { struct bat_priv *bat_priv; struct batman_packet *batman_packet; - struct batman_if *batman_if; + struct hard_iface *hard_iface; int ret; - batman_if = container_of(ptype, struct batman_if, batman_adv_ptype); + hard_iface = container_of(ptype, struct hard_iface, batman_adv_ptype); skb = skb_share_check(skb, GFP_ATOMIC); /* skb was released by skb_share_check() */ @@ -573,16 +589,16 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, || !skb_mac_header(skb))) goto err_free; - if (!batman_if->soft_iface) + if (!hard_iface->soft_iface) goto err_free; - bat_priv = netdev_priv(batman_if->soft_iface); + bat_priv = netdev_priv(hard_iface->soft_iface); if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) goto err_free; /* discard frames on not active interfaces */ - if (batman_if->if_status != IF_ACTIVE) + if (hard_iface->if_status != IF_ACTIVE) goto err_free; batman_packet = (struct batman_packet *)skb->data; @@ -600,32 +616,32 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, switch (batman_packet->packet_type) { /* batman originator packet */ case BAT_PACKET: - ret = recv_bat_packet(skb, batman_if); + ret = recv_bat_packet(skb, hard_iface); break; /* batman icmp packet */ case BAT_ICMP: - ret = recv_icmp_packet(skb, batman_if); + ret = recv_icmp_packet(skb, hard_iface); break; /* unicast packet */ case BAT_UNICAST: - ret = recv_unicast_packet(skb, batman_if); + ret = recv_unicast_packet(skb, hard_iface); break; /* fragmented unicast packet */ case BAT_UNICAST_FRAG: - ret = recv_ucast_frag_packet(skb, batman_if); + ret = recv_ucast_frag_packet(skb, hard_iface); break; /* broadcast packet */ case BAT_BCAST: - ret = recv_bcast_packet(skb, batman_if); + ret = recv_bcast_packet(skb, hard_iface); break; /* vis packet */ case BAT_VIS: - ret = recv_vis_packet(skb, batman_if); + ret = recv_vis_packet(skb, hard_iface); break; default: ret = NET_RX_DROP; diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 30ec3b8db459..a9ddf36e51c8 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -31,23 +31,18 @@ extern struct notifier_block hard_if_notifier; -struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev); -int hardif_enable_interface(struct batman_if *batman_if, char *iface_name); -void hardif_disable_interface(struct batman_if *batman_if); +struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev); +int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name); +void hardif_disable_interface(struct hard_iface *hard_iface); void hardif_remove_interfaces(void); -int batman_skb_recv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *ptype, - struct net_device *orig_dev); int hardif_min_mtu(struct net_device *soft_iface); void update_min_mtu(struct net_device *soft_iface); +void hardif_free_rcu(struct rcu_head *rcu); -static inline void hardif_free_ref(struct kref *refcount) +static inline void hardif_free_ref(struct hard_iface *hard_iface) { - struct batman_if *batman_if; - - batman_if = container_of(refcount, struct batman_if, refcount); - kfree(batman_if); + if (atomic_dec_and_test(&hard_iface->refcount)) + call_rcu(&hard_iface->rcu, hardif_free_rcu); } #endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */ diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 26e623eb9def..c5213d8f2cca 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -27,13 +27,16 @@ static void hash_init(struct hashtable_t *hash) { int i; - for (i = 0 ; i < hash->size; i++) + for (i = 0 ; i < hash->size; i++) { INIT_HLIST_HEAD(&hash->table[i]); + spin_lock_init(&hash->list_locks[i]); + } } /* free only the hashtable and the hash itself. */ void hash_destroy(struct hashtable_t *hash) { + kfree(hash->list_locks); kfree(hash->table); kfree(hash); } @@ -43,20 +46,25 @@ struct hashtable_t *hash_new(int size) { struct hashtable_t *hash; - hash = kmalloc(sizeof(struct hashtable_t) , GFP_ATOMIC); - + hash = kmalloc(sizeof(struct hashtable_t), GFP_ATOMIC); if (!hash) return NULL; - hash->size = size; hash->table = kmalloc(sizeof(struct element_t *) * size, GFP_ATOMIC); + if (!hash->table) + goto free_hash; - if (!hash->table) { - kfree(hash); - return NULL; - } + hash->list_locks = kmalloc(sizeof(spinlock_t) * size, GFP_ATOMIC); + if (!hash->list_locks) + goto free_table; + hash->size = size; hash_init(hash); - return hash; + +free_table: + kfree(hash->table); +free_hash: + kfree(hash); + return NULL; } diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 09216ade16f1..434822b27473 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -28,32 +28,23 @@ * compare 2 element datas for their keys, * return 0 if same and not 0 if not * same */ -typedef int (*hashdata_compare_cb)(void *, void *); +typedef int (*hashdata_compare_cb)(struct hlist_node *, void *); /* the hashfunction, should return an index * based on the key in the data of the first * argument and the size the second */ typedef int (*hashdata_choose_cb)(void *, int); -typedef void (*hashdata_free_cb)(void *, void *); - -struct element_t { - void *data; /* pointer to the data */ - struct hlist_node hlist; /* bucket list pointer */ -}; +typedef void (*hashdata_free_cb)(struct hlist_node *, void *); struct hashtable_t { - struct hlist_head *table; /* the hashtable itself, with the buckets */ + struct hlist_head *table; /* the hashtable itself with the buckets */ + spinlock_t *list_locks; /* spinlock for each hash list entry */ int size; /* size of hashtable */ }; /* allocates and clears the hash */ struct hashtable_t *hash_new(int size); -/* remove element if you already found the element you want to delete and don't - * need the overhead to find it again with hash_remove(). But usually, you - * don't want to use this function, as it fiddles with hash-internals. */ -void *hash_remove_element(struct hashtable_t *hash, struct element_t *elem); - /* free only the hashtable and the hash itself. */ void hash_destroy(struct hashtable_t *hash); @@ -64,21 +55,22 @@ static inline void hash_delete(struct hashtable_t *hash, hashdata_free_cb free_cb, void *arg) { struct hlist_head *head; - struct hlist_node *walk, *safe; - struct element_t *bucket; + struct hlist_node *node, *node_tmp; + spinlock_t *list_lock; /* spinlock to protect write access */ int i; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; + list_lock = &hash->list_locks[i]; - hlist_for_each_safe(walk, safe, head) { - bucket = hlist_entry(walk, struct element_t, hlist); - if (free_cb) - free_cb(bucket->data, arg); + spin_lock_bh(list_lock); + hlist_for_each_safe(node, node_tmp, head) { + hlist_del_rcu(node); - hlist_del(walk); - kfree(bucket); + if (free_cb) + free_cb(node, arg); } + spin_unlock_bh(list_lock); } hash_destroy(hash); @@ -87,35 +79,41 @@ static inline void hash_delete(struct hashtable_t *hash, /* adds data to the hashtable. returns 0 on success, -1 on error */ static inline int hash_add(struct hashtable_t *hash, hashdata_compare_cb compare, - hashdata_choose_cb choose, void *data) + hashdata_choose_cb choose, + void *data, struct hlist_node *data_node) { int index; struct hlist_head *head; - struct hlist_node *walk, *safe; - struct element_t *bucket; + struct hlist_node *node; + spinlock_t *list_lock; /* spinlock to protect write access */ if (!hash) - return -1; + goto err; index = choose(data, hash->size); head = &hash->table[index]; + list_lock = &hash->list_locks[index]; + + rcu_read_lock(); + __hlist_for_each_rcu(node, head) { + if (!compare(node, data)) + continue; - hlist_for_each_safe(walk, safe, head) { - bucket = hlist_entry(walk, struct element_t, hlist); - if (compare(bucket->data, data)) - return -1; + goto err_unlock; } + rcu_read_unlock(); /* no duplicate found in list, add new element */ - bucket = kmalloc(sizeof(struct element_t), GFP_ATOMIC); - - if (!bucket) - return -1; - - bucket->data = data; - hlist_add_head(&bucket->hlist, head); + spin_lock_bh(list_lock); + hlist_add_head_rcu(data_node, head); + spin_unlock_bh(list_lock); return 0; + +err_unlock: + rcu_read_unlock(); +err: + return -1; } /* removes data from hash, if found. returns pointer do data on success, so you @@ -127,50 +125,25 @@ static inline void *hash_remove(struct hashtable_t *hash, hashdata_choose_cb choose, void *data) { size_t index; - struct hlist_node *walk; - struct element_t *bucket; + struct hlist_node *node; struct hlist_head *head; - void *data_save; + void *data_save = NULL; index = choose(data, hash->size); head = &hash->table[index]; - hlist_for_each_entry(bucket, walk, head, hlist) { - if (compare(bucket->data, data)) { - data_save = bucket->data; - hlist_del(walk); - kfree(bucket); - return data_save; - } - } - - return NULL; -} - -/* finds data, based on the key in keydata. returns the found data on success, - * or NULL on error */ -static inline void *hash_find(struct hashtable_t *hash, - hashdata_compare_cb compare, - hashdata_choose_cb choose, void *keydata) -{ - int index; - struct hlist_head *head; - struct hlist_node *walk; - struct element_t *bucket; - - if (!hash) - return NULL; - - index = choose(keydata , hash->size); - head = &hash->table[index]; + spin_lock_bh(&hash->list_locks[index]); + hlist_for_each(node, head) { + if (!compare(node, data)) + continue; - hlist_for_each(walk, head) { - bucket = hlist_entry(walk, struct element_t, hlist); - if (compare(bucket->data, keydata)) - return bucket->data; + data_save = node; + hlist_del_rcu(node); + break; } + spin_unlock_bh(&hash->list_locks[index]); - return NULL; + return data_save; } #endif /* _NET_BATMAN_ADV_HASH_H_ */ diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index ecf6d7ffab2e..34ce56c358e5 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -24,7 +24,6 @@ #include <linux/slab.h> #include "icmp_socket.h" #include "send.h" -#include "types.h" #include "hash.h" #include "originator.h" #include "hard-interface.h" @@ -157,10 +156,9 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, struct sk_buff *skb; struct icmp_packet_rr *icmp_packet; - struct orig_node *orig_node; - struct batman_if *batman_if; + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; size_t packet_len = sizeof(struct icmp_packet); - uint8_t dstaddr[ETH_ALEN]; if (len < sizeof(struct icmp_packet)) { bat_dbg(DBG_BATMAN, bat_priv, @@ -220,47 +218,52 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) goto dst_unreach; - spin_lock_bh(&bat_priv->orig_hash_lock); - orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, - compare_orig, choose_orig, - icmp_packet->dst)); + rcu_read_lock(); + orig_node = orig_hash_find(bat_priv, icmp_packet->dst); if (!orig_node) goto unlock; - if (!orig_node->router) + neigh_node = orig_node->router; + + if (!neigh_node) goto unlock; - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } - spin_unlock_bh(&bat_priv->orig_hash_lock); + rcu_read_unlock(); - if (!batman_if) + if (!neigh_node->if_incoming) goto dst_unreach; - if (batman_if->if_status != IF_ACTIVE) + if (neigh_node->if_incoming->if_status != IF_ACTIVE) goto dst_unreach; memcpy(icmp_packet->orig, bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); if (packet_len == sizeof(struct icmp_packet_rr)) - memcpy(icmp_packet->rr, batman_if->net_dev->dev_addr, ETH_ALEN); - - - send_skb_packet(skb, batman_if, dstaddr); + memcpy(icmp_packet->rr, + neigh_node->if_incoming->net_dev->dev_addr, ETH_ALEN); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); goto out; unlock: - spin_unlock_bh(&bat_priv->orig_hash_lock); + rcu_read_unlock(); dst_unreach: icmp_packet->msg_type = DESTINATION_UNREACHABLE; bat_socket_add_packet(socket_client, icmp_packet, packet_len); free_skb: kfree_skb(skb); out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + orig_node_free_ref(orig_node); return len; } diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index bf9b348cde27..462b190fa101 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -22,8 +22,6 @@ #ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_ #define _NET_BATMAN_ADV_ICMP_SOCKET_H_ -#include "types.h" - #define ICMP_SOCKET "socket" void bat_socket_init(void); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index b827f6a158cb..709b33bbdf43 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -30,11 +30,10 @@ #include "translation-table.h" #include "hard-interface.h" #include "gateway_client.h" -#include "types.h" #include "vis.h" #include "hash.h" -struct list_head if_list; +struct list_head hardif_list; unsigned char broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -42,7 +41,7 @@ struct workqueue_struct *bat_event_workqueue; static int __init batman_init(void) { - INIT_LIST_HEAD(&if_list); + INIT_LIST_HEAD(&hardif_list); /* the name should not be longer than 10 chars - see * http://lwn.net/Articles/23634/ */ @@ -80,7 +79,6 @@ int mesh_init(struct net_device *soft_iface) { struct bat_priv *bat_priv = netdev_priv(soft_iface); - spin_lock_init(&bat_priv->orig_hash_lock); spin_lock_init(&bat_priv->forw_bat_list_lock); spin_lock_init(&bat_priv->forw_bcast_list_lock); spin_lock_init(&bat_priv->hna_lhash_lock); @@ -155,14 +153,14 @@ void dec_module_count(void) int is_my_mac(uint8_t *addr) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { - if (batman_if->if_status != IF_ACTIVE) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->if_status != IF_ACTIVE) continue; - if (compare_orig(batman_if->net_dev->dev_addr, addr)) { + if (compare_eth(hard_iface->net_dev->dev_addr, addr)) { rcu_read_unlock(); return 1; } diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 65106fb61b8f..dc248697de71 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -22,9 +22,6 @@ #ifndef _NET_BATMAN_ADV_MAIN_H_ #define _NET_BATMAN_ADV_MAIN_H_ -/* Kernel Programming */ -#define LINUX - #define DRIVER_AUTHOR "Marek Lindner <lindner_marek@yahoo.de>, " \ "Simon Wunderlich <siwu@hrz.tu-chemnitz.de>" #define DRIVER_DESC "B.A.T.M.A.N. advanced" @@ -54,7 +51,6 @@ #define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) -#define PACKBUFF_SIZE 2000 #define LOG_BUF_LEN 8192 /* has to be a power of 2 */ #define VIS_INTERVAL 5000 /* 5 seconds */ @@ -96,15 +92,11 @@ #define DBG_ROUTES 2 /* route or hna added / changed / deleted */ #define DBG_ALL 3 -#define LOG_BUF_LEN 8192 /* has to be a power of 2 */ - /* * Vis */ -/* #define VIS_SUBCLUSTERS_DISABLED */ - /* * Kernel headers */ @@ -130,7 +122,7 @@ #define REVISION_VERSION_STR " "REVISION_VERSION #endif -extern struct list_head if_list; +extern struct list_head hardif_list; extern unsigned char broadcast_addr[]; extern struct workqueue_struct *bat_event_workqueue; @@ -158,13 +150,6 @@ static inline void bat_dbg(char type __always_unused, } #endif -#define bat_warning(net_dev, fmt, arg...) \ - do { \ - struct net_device *_netdev = (net_dev); \ - struct bat_priv *_batpriv = netdev_priv(_netdev); \ - bat_dbg(DBG_ALL, _batpriv, fmt, ## arg); \ - pr_warning("%s: " fmt, _netdev->name, ## arg); \ - } while (0) #define bat_info(net_dev, fmt, arg...) \ do { \ struct net_device *_netdev = (net_dev); \ @@ -180,4 +165,14 @@ static inline void bat_dbg(char type __always_unused, pr_err("%s: " fmt, _netdev->name, ## arg); \ } while (0) +/** + * returns 1 if they are the same ethernet addr + * + * note: can't use compare_ether_addr() as it requires aligned memory + */ +static inline int compare_eth(void *data1, void *data2) +{ + return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); +} + #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 6b7fb6b7e6f9..0b9133022d2d 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -44,24 +44,36 @@ int originator_init(struct bat_priv *bat_priv) if (bat_priv->orig_hash) return 1; - spin_lock_bh(&bat_priv->orig_hash_lock); bat_priv->orig_hash = hash_new(1024); if (!bat_priv->orig_hash) goto err; - spin_unlock_bh(&bat_priv->orig_hash_lock); start_purge_timer(bat_priv); return 1; err: - spin_unlock_bh(&bat_priv->orig_hash_lock); return 0; } -struct neigh_node * -create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, - uint8_t *neigh, struct batman_if *if_incoming) +static void neigh_node_free_rcu(struct rcu_head *rcu) +{ + struct neigh_node *neigh_node; + + neigh_node = container_of(rcu, struct neigh_node, rcu); + kfree(neigh_node); +} + +void neigh_node_free_ref(struct neigh_node *neigh_node) +{ + if (atomic_dec_and_test(&neigh_node->refcount)) + call_rcu(&neigh_node->rcu, neigh_node_free_rcu); +} + +struct neigh_node *create_neighbor(struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + uint8_t *neigh, + struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct neigh_node *neigh_node; @@ -73,50 +85,94 @@ create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, if (!neigh_node) return NULL; - INIT_LIST_HEAD(&neigh_node->list); + INIT_HLIST_NODE(&neigh_node->list); + INIT_LIST_HEAD(&neigh_node->bonding_list); memcpy(neigh_node->addr, neigh, ETH_ALEN); neigh_node->orig_node = orig_neigh_node; neigh_node->if_incoming = if_incoming; - list_add_tail(&neigh_node->list, &orig_node->neigh_list); + /* extra reference for return */ + atomic_set(&neigh_node->refcount, 2); + + spin_lock_bh(&orig_node->neigh_list_lock); + hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); + spin_unlock_bh(&orig_node->neigh_list_lock); return neigh_node; } -static void free_orig_node(void *data, void *arg) +static void orig_node_free_rcu(struct rcu_head *rcu) { - struct list_head *list_pos, *list_pos_tmp; - struct neigh_node *neigh_node; - struct orig_node *orig_node = (struct orig_node *)data; - struct bat_priv *bat_priv = (struct bat_priv *)arg; + struct hlist_node *node, *node_tmp; + struct neigh_node *neigh_node, *tmp_neigh_node; + struct orig_node *orig_node; - /* for all neighbors towards this originator ... */ - list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) { - neigh_node = list_entry(list_pos, struct neigh_node, list); + orig_node = container_of(rcu, struct orig_node, rcu); + + spin_lock_bh(&orig_node->neigh_list_lock); + + /* for all bonding members ... */ + list_for_each_entry_safe(neigh_node, tmp_neigh_node, + &orig_node->bond_list, bonding_list) { + list_del_rcu(&neigh_node->bonding_list); + neigh_node_free_ref(neigh_node); + } - list_del(list_pos); - kfree(neigh_node); + /* for all neighbors towards this originator ... */ + hlist_for_each_entry_safe(neigh_node, node, node_tmp, + &orig_node->neigh_list, list) { + hlist_del_rcu(&neigh_node->list); + neigh_node_free_ref(neigh_node); } + spin_unlock_bh(&orig_node->neigh_list_lock); + frag_list_free(&orig_node->frag_list); - hna_global_del_orig(bat_priv, orig_node, "originator timed out"); + hna_global_del_orig(orig_node->bat_priv, orig_node, + "originator timed out"); kfree(orig_node->bcast_own); kfree(orig_node->bcast_own_sum); kfree(orig_node); } +void orig_node_free_ref(struct orig_node *orig_node) +{ + if (atomic_dec_and_test(&orig_node->refcount)) + call_rcu(&orig_node->rcu, orig_node_free_rcu); +} + void originator_free(struct bat_priv *bat_priv) { - if (!bat_priv->orig_hash) + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_node *node, *node_tmp; + struct hlist_head *head; + spinlock_t *list_lock; /* spinlock to protect write access */ + struct orig_node *orig_node; + int i; + + if (!hash) return; cancel_delayed_work_sync(&bat_priv->orig_work); - spin_lock_bh(&bat_priv->orig_hash_lock); - hash_delete(bat_priv->orig_hash, free_orig_node, bat_priv); bat_priv->orig_hash = NULL; - spin_unlock_bh(&bat_priv->orig_hash_lock); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(orig_node, node, node_tmp, + head, hash_entry) { + + hlist_del_rcu(node); + orig_node_free_ref(orig_node); + } + spin_unlock_bh(list_lock); + } + + hash_destroy(hash); } /* this function finds or creates an originator entry for the given @@ -127,10 +183,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) int size; int hash_added; - orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, - compare_orig, choose_orig, - addr)); - + orig_node = orig_hash_find(bat_priv, addr); if (orig_node) return orig_node; @@ -141,8 +194,16 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) if (!orig_node) return NULL; - INIT_LIST_HEAD(&orig_node->neigh_list); + INIT_HLIST_HEAD(&orig_node->neigh_list); + INIT_LIST_HEAD(&orig_node->bond_list); + spin_lock_init(&orig_node->ogm_cnt_lock); + spin_lock_init(&orig_node->bcast_seqno_lock); + spin_lock_init(&orig_node->neigh_list_lock); + + /* extra reference for return */ + atomic_set(&orig_node->refcount, 2); + orig_node->bat_priv = bat_priv; memcpy(orig_node->orig, addr, ETH_ALEN); orig_node->router = NULL; orig_node->hna_buff = NULL; @@ -151,6 +212,8 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) orig_node->batman_seqno_reset = jiffies - 1 - msecs_to_jiffies(RESET_PROTECTION_MS); + atomic_set(&orig_node->bond_candidates, 0); + size = bat_priv->num_ifaces * sizeof(unsigned long) * NUM_WORDS; orig_node->bcast_own = kzalloc(size, GFP_ATOMIC); @@ -166,8 +229,8 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) if (!orig_node->bcast_own_sum) goto free_bcast_own; - hash_added = hash_add(bat_priv->orig_hash, compare_orig, choose_orig, - orig_node); + hash_added = hash_add(bat_priv->orig_hash, compare_orig, + choose_orig, orig_node, &orig_node->hash_entry); if (hash_added < 0) goto free_bcast_own_sum; @@ -185,23 +248,30 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv, struct orig_node *orig_node, struct neigh_node **best_neigh_node) { - struct list_head *list_pos, *list_pos_tmp; + struct hlist_node *node, *node_tmp; struct neigh_node *neigh_node; bool neigh_purged = false; *best_neigh_node = NULL; + spin_lock_bh(&orig_node->neigh_list_lock); + /* for all neighbors towards this originator ... */ - list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) { - neigh_node = list_entry(list_pos, struct neigh_node, list); + hlist_for_each_entry_safe(neigh_node, node, node_tmp, + &orig_node->neigh_list, list) { if ((time_after(jiffies, neigh_node->last_valid + PURGE_TIMEOUT * HZ)) || (neigh_node->if_incoming->if_status == IF_INACTIVE) || + (neigh_node->if_incoming->if_status == IF_NOT_IN_USE) || (neigh_node->if_incoming->if_status == IF_TO_BE_REMOVED)) { - if (neigh_node->if_incoming->if_status == - IF_TO_BE_REMOVED) + if ((neigh_node->if_incoming->if_status == + IF_INACTIVE) || + (neigh_node->if_incoming->if_status == + IF_NOT_IN_USE) || + (neigh_node->if_incoming->if_status == + IF_TO_BE_REMOVED)) bat_dbg(DBG_BATMAN, bat_priv, "neighbor purge: originator %pM, " "neighbor: %pM, iface: %s\n", @@ -215,14 +285,18 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv, (neigh_node->last_valid / HZ)); neigh_purged = true; - list_del(list_pos); - kfree(neigh_node); + + hlist_del_rcu(&neigh_node->list); + bonding_candidate_del(orig_node, neigh_node); + neigh_node_free_ref(neigh_node); } else { if ((!*best_neigh_node) || (neigh_node->tq_avg > (*best_neigh_node)->tq_avg)) *best_neigh_node = neigh_node; } } + + spin_unlock_bh(&orig_node->neigh_list_lock); return neigh_purged; } @@ -245,9 +319,6 @@ static bool purge_orig_node(struct bat_priv *bat_priv, best_neigh_node, orig_node->hna_buff, orig_node->hna_buff_len); - /* update bonding candidates, we could have lost - * some candidates. */ - update_bonding_candidates(bat_priv, orig_node); } } @@ -257,40 +328,38 @@ static bool purge_orig_node(struct bat_priv *bat_priv, static void _purge_orig(struct bat_priv *bat_priv) { struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk, *safe; + struct hlist_node *node, *node_tmp; struct hlist_head *head; - struct element_t *bucket; + spinlock_t *list_lock; /* spinlock to protect write access */ struct orig_node *orig_node; int i; if (!hash) return; - spin_lock_bh(&bat_priv->orig_hash_lock); - /* for all origins... */ for (i = 0; i < hash->size; i++) { head = &hash->table[i]; + list_lock = &hash->list_locks[i]; - hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { - orig_node = bucket->data; - + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(orig_node, node, node_tmp, + head, hash_entry) { if (purge_orig_node(bat_priv, orig_node)) { if (orig_node->gw_flags) gw_node_delete(bat_priv, orig_node); - hlist_del(walk); - kfree(bucket); - free_orig_node(orig_node, bat_priv); + hlist_del_rcu(node); + orig_node_free_ref(orig_node); + continue; } if (time_after(jiffies, orig_node->last_frag_packet + msecs_to_jiffies(FRAG_TIMEOUT))) frag_list_free(&orig_node->frag_list); } + spin_unlock_bh(list_lock); } - spin_unlock_bh(&bat_priv->orig_hash_lock); - gw_node_purge(bat_priv); gw_election(bat_priv); @@ -318,9 +387,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct bat_priv *bat_priv = netdev_priv(net_dev); struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node, *node_tmp; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; struct neigh_node *neigh_node; int batman_count = 0; @@ -348,14 +416,11 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) "Originator", "last-seen", "#", TQ_MAX_VALUE, "Nexthop", "outgoingIF", "Potential nexthops"); - spin_lock_bh(&bat_priv->orig_hash_lock); - for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - orig_node = bucket->data; - + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { if (!orig_node->router) continue; @@ -374,8 +439,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) neigh_node->addr, neigh_node->if_incoming->net_dev->name); - list_for_each_entry(neigh_node, &orig_node->neigh_list, - list) { + hlist_for_each_entry_rcu(neigh_node, node_tmp, + &orig_node->neigh_list, list) { seq_printf(seq, " %pM (%3i)", neigh_node->addr, neigh_node->tq_avg); } @@ -383,10 +448,9 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "\n"); batman_count++; } + rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->orig_hash_lock); - if ((batman_count == 0)) seq_printf(seq, "No batman nodes in range ...\n"); @@ -423,36 +487,36 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num) return 0; } -int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) +int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; - int i; + int i, ret; /* resize all orig nodes because orig_node->bcast_own(_sum) depend on * if_num */ - spin_lock_bh(&bat_priv->orig_hash_lock); - for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - orig_node = bucket->data; + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + spin_lock_bh(&orig_node->ogm_cnt_lock); + ret = orig_node_add_if(orig_node, max_if_num); + spin_unlock_bh(&orig_node->ogm_cnt_lock); - if (orig_node_add_if(orig_node, max_if_num) == -1) + if (ret == -1) goto err; } + rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->orig_hash_lock); return 0; err: - spin_unlock_bh(&bat_priv->orig_hash_lock); + rcu_read_unlock(); return -ENOMEM; } @@ -508,57 +572,55 @@ free_own_sum: return 0; } -int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) +int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; - struct batman_if *batman_if_tmp; + struct hard_iface *hard_iface_tmp; struct orig_node *orig_node; int i, ret; /* resize all orig nodes because orig_node->bcast_own(_sum) depend on * if_num */ - spin_lock_bh(&bat_priv->orig_hash_lock); - for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - orig_node = bucket->data; - + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + spin_lock_bh(&orig_node->ogm_cnt_lock); ret = orig_node_del_if(orig_node, max_if_num, - batman_if->if_num); + hard_iface->if_num); + spin_unlock_bh(&orig_node->ogm_cnt_lock); if (ret == -1) goto err; } + rcu_read_unlock(); } /* renumber remaining batman interfaces _inside_ of orig_hash_lock */ rcu_read_lock(); - list_for_each_entry_rcu(batman_if_tmp, &if_list, list) { - if (batman_if_tmp->if_status == IF_NOT_IN_USE) + list_for_each_entry_rcu(hard_iface_tmp, &hardif_list, list) { + if (hard_iface_tmp->if_status == IF_NOT_IN_USE) continue; - if (batman_if == batman_if_tmp) + if (hard_iface == hard_iface_tmp) continue; - if (batman_if->soft_iface != batman_if_tmp->soft_iface) + if (hard_iface->soft_iface != hard_iface_tmp->soft_iface) continue; - if (batman_if_tmp->if_num > batman_if->if_num) - batman_if_tmp->if_num--; + if (hard_iface_tmp->if_num > hard_iface->if_num) + hard_iface_tmp->if_num--; } rcu_read_unlock(); - batman_if->if_num = -1; - spin_unlock_bh(&bat_priv->orig_hash_lock); + hard_iface->if_num = -1; return 0; err: - spin_unlock_bh(&bat_priv->orig_hash_lock); + rcu_read_unlock(); return -ENOMEM; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index d474ceb2a4eb..5cc011057da1 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -22,21 +22,28 @@ #ifndef _NET_BATMAN_ADV_ORIGINATOR_H_ #define _NET_BATMAN_ADV_ORIGINATOR_H_ +#include "hash.h" + int originator_init(struct bat_priv *bat_priv); void originator_free(struct bat_priv *bat_priv); void purge_orig_ref(struct bat_priv *bat_priv); +void orig_node_free_ref(struct orig_node *orig_node); struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); -struct neigh_node * -create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, - uint8_t *neigh, struct batman_if *if_incoming); +struct neigh_node *create_neighbor(struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + uint8_t *neigh, + struct hard_iface *if_incoming); +void neigh_node_free_ref(struct neigh_node *neigh_node); int orig_seq_print_text(struct seq_file *seq, void *offset); -int orig_hash_add_if(struct batman_if *batman_if, int max_if_num); -int orig_hash_del_if(struct batman_if *batman_if, int max_if_num); +int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num); +int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num); /* returns 1 if they are the same originator */ -static inline int compare_orig(void *data1, void *data2) +static inline int compare_orig(struct hlist_node *node, void *data2) { + void *data1 = container_of(node, struct orig_node, hash_entry); + return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } @@ -61,4 +68,35 @@ static inline int choose_orig(void *data, int32_t size) return hash % size; } +static inline struct orig_node *orig_hash_find(struct bat_priv *bat_priv, + void *data) +{ + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_head *head; + struct hlist_node *node; + struct orig_node *orig_node, *orig_node_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = choose_orig(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + if (!compare_eth(orig_node, data)) + continue; + + if (!atomic_inc_not_zero(&orig_node->refcount)) + continue; + + orig_node_tmp = orig_node; + break; + } + rcu_read_unlock(); + + return orig_node_tmp; +} + #endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */ diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 2284e8129cb2..e7571879af3f 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -50,6 +50,7 @@ /* fragmentation defines */ #define UNI_FRAG_HEAD 0x01 +#define UNI_FRAG_LARGETAIL 0x02 struct batman_packet { uint8_t packet_type; diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c index defd37c9be1f..5bb6a619afee 100644 --- a/net/batman-adv/ring_buffer.c +++ b/net/batman-adv/ring_buffer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h index 6b0cb9aaeba5..0395b2741864 100644 --- a/net/batman-adv/ring_buffer.h +++ b/net/batman-adv/ring_buffer.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 8828eddd3f72..c172f5d0e05a 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -28,7 +28,6 @@ #include "icmp_socket.h" #include "translation-table.h" #include "originator.h" -#include "types.h" #include "ring_buffer.h" #include "vis.h" #include "aggregation.h" @@ -36,35 +35,33 @@ #include "gateway_client.h" #include "unicast.h" -void slide_own_bcast_window(struct batman_if *batman_if) +void slide_own_bcast_window(struct hard_iface *hard_iface) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; unsigned long *word; int i; size_t word_index; - spin_lock_bh(&bat_priv->orig_hash_lock); - for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - orig_node = bucket->data; - word_index = batman_if->if_num * NUM_WORDS; + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + spin_lock_bh(&orig_node->ogm_cnt_lock); + word_index = hard_iface->if_num * NUM_WORDS; word = &(orig_node->bcast_own[word_index]); bit_get_packet(bat_priv, word, 1, 0); - orig_node->bcast_own_sum[batman_if->if_num] = + orig_node->bcast_own_sum[hard_iface->if_num] = bit_packet_count(word); + spin_unlock_bh(&orig_node->ogm_cnt_lock); } + rcu_read_unlock(); } - - spin_unlock_bh(&bat_priv->orig_hash_lock); } static void update_HNA(struct bat_priv *bat_priv, struct orig_node *orig_node, @@ -90,6 +87,8 @@ static void update_route(struct bat_priv *bat_priv, struct neigh_node *neigh_node, unsigned char *hna_buff, int hna_buff_len) { + struct neigh_node *neigh_node_tmp; + /* route deleted */ if ((orig_node->router) && (!neigh_node)) { @@ -116,7 +115,12 @@ static void update_route(struct bat_priv *bat_priv, orig_node->router->addr); } + if (neigh_node && !atomic_inc_not_zero(&neigh_node->refcount)) + neigh_node = NULL; + neigh_node_tmp = orig_node->router; orig_node->router = neigh_node; + if (neigh_node_tmp) + neigh_node_free_ref(neigh_node_tmp); } @@ -139,73 +143,93 @@ void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, static int is_bidirectional_neigh(struct orig_node *orig_node, struct orig_node *orig_neigh_node, struct batman_packet *batman_packet, - struct batman_if *if_incoming) + struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; + struct neigh_node *neigh_node = NULL, *tmp_neigh_node; + struct hlist_node *node; unsigned char total_count; + uint8_t orig_eq_count, neigh_rq_count, tq_own; + int tq_asym_penalty, ret = 0; if (orig_node == orig_neigh_node) { - list_for_each_entry(tmp_neigh_node, - &orig_node->neigh_list, - list) { - - if (compare_orig(tmp_neigh_node->addr, - orig_neigh_node->orig) && - (tmp_neigh_node->if_incoming == if_incoming)) - neigh_node = tmp_neigh_node; + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { + + if (!compare_eth(tmp_neigh_node->addr, + orig_neigh_node->orig)) + continue; + + if (tmp_neigh_node->if_incoming != if_incoming) + continue; + + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; + + neigh_node = tmp_neigh_node; } + rcu_read_unlock(); if (!neigh_node) neigh_node = create_neighbor(orig_node, orig_neigh_node, orig_neigh_node->orig, if_incoming); - /* create_neighbor failed, return 0 */ if (!neigh_node) - return 0; + goto out; neigh_node->last_valid = jiffies; } else { /* find packet count of corresponding one hop neighbor */ - list_for_each_entry(tmp_neigh_node, - &orig_neigh_node->neigh_list, list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_neigh_node->neigh_list, list) { - if (compare_orig(tmp_neigh_node->addr, - orig_neigh_node->orig) && - (tmp_neigh_node->if_incoming == if_incoming)) - neigh_node = tmp_neigh_node; + if (!compare_eth(tmp_neigh_node->addr, + orig_neigh_node->orig)) + continue; + + if (tmp_neigh_node->if_incoming != if_incoming) + continue; + + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; + + neigh_node = tmp_neigh_node; } + rcu_read_unlock(); if (!neigh_node) neigh_node = create_neighbor(orig_neigh_node, orig_neigh_node, orig_neigh_node->orig, if_incoming); - /* create_neighbor failed, return 0 */ if (!neigh_node) - return 0; + goto out; } orig_node->last_valid = jiffies; + spin_lock_bh(&orig_node->ogm_cnt_lock); + orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num]; + neigh_rq_count = neigh_node->real_packet_count; + spin_unlock_bh(&orig_node->ogm_cnt_lock); + /* pay attention to not get a value bigger than 100 % */ - total_count = (orig_neigh_node->bcast_own_sum[if_incoming->if_num] > - neigh_node->real_packet_count ? - neigh_node->real_packet_count : - orig_neigh_node->bcast_own_sum[if_incoming->if_num]); + total_count = (orig_eq_count > neigh_rq_count ? + neigh_rq_count : orig_eq_count); /* if we have too few packets (too less data) we set tq_own to zero */ /* if we receive too few packets it is not considered bidirectional */ if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) || - (neigh_node->real_packet_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM)) - orig_neigh_node->tq_own = 0; + (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM)) + tq_own = 0; else /* neigh_node->real_packet_count is never zero as we * only purge old information when getting new * information */ - orig_neigh_node->tq_own = (TQ_MAX_VALUE * total_count) / - neigh_node->real_packet_count; + tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count; /* * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does @@ -213,20 +237,16 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, * punishes asymmetric links more. This will give a value * between 0 and TQ_MAX_VALUE */ - orig_neigh_node->tq_asym_penalty = - TQ_MAX_VALUE - - (TQ_MAX_VALUE * - (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) * - (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) * - (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count)) / - (TQ_LOCAL_WINDOW_SIZE * - TQ_LOCAL_WINDOW_SIZE * - TQ_LOCAL_WINDOW_SIZE); - - batman_packet->tq = ((batman_packet->tq * - orig_neigh_node->tq_own * - orig_neigh_node->tq_asym_penalty) / - (TQ_MAX_VALUE * TQ_MAX_VALUE)); + tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) / + (TQ_LOCAL_WINDOW_SIZE * + TQ_LOCAL_WINDOW_SIZE * + TQ_LOCAL_WINDOW_SIZE); + + batman_packet->tq = ((batman_packet->tq * tq_own * tq_asym_penalty) / + (TQ_MAX_VALUE * TQ_MAX_VALUE)); bat_dbg(DBG_BATMAN, bat_priv, "bidirectional: " @@ -234,34 +254,141 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, "real recv = %2i, local tq: %3i, asym_penalty: %3i, " "total tq: %3i\n", orig_node->orig, orig_neigh_node->orig, total_count, - neigh_node->real_packet_count, orig_neigh_node->tq_own, - orig_neigh_node->tq_asym_penalty, batman_packet->tq); + neigh_rq_count, tq_own, tq_asym_penalty, batman_packet->tq); /* if link has the minimum required transmission quality * consider it bidirectional */ if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT) - return 1; + ret = 1; - return 0; +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + return ret; +} + +/* caller must hold the neigh_list_lock */ +void bonding_candidate_del(struct orig_node *orig_node, + struct neigh_node *neigh_node) +{ + /* this neighbor is not part of our candidate list */ + if (list_empty(&neigh_node->bonding_list)) + goto out; + + list_del_rcu(&neigh_node->bonding_list); + INIT_LIST_HEAD(&neigh_node->bonding_list); + neigh_node_free_ref(neigh_node); + atomic_dec(&orig_node->bond_candidates); + +out: + return; +} + +static void bonding_candidate_add(struct orig_node *orig_node, + struct neigh_node *neigh_node) +{ + struct hlist_node *node; + struct neigh_node *tmp_neigh_node; + uint8_t best_tq, interference_candidate = 0; + + spin_lock_bh(&orig_node->neigh_list_lock); + + /* only consider if it has the same primary address ... */ + if (!compare_eth(orig_node->orig, + neigh_node->orig_node->primary_addr)) + goto candidate_del; + + if (!orig_node->router) + goto candidate_del; + + best_tq = orig_node->router->tq_avg; + + /* ... and is good enough to be considered */ + if (neigh_node->tq_avg < best_tq - BONDING_TQ_THRESHOLD) + goto candidate_del; + + /** + * check if we have another candidate with the same mac address or + * interface. If we do, we won't select this candidate because of + * possible interference. + */ + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { + + if (tmp_neigh_node == neigh_node) + continue; + + /* we only care if the other candidate is even + * considered as candidate. */ + if (list_empty(&tmp_neigh_node->bonding_list)) + continue; + + if ((neigh_node->if_incoming == tmp_neigh_node->if_incoming) || + (compare_eth(neigh_node->addr, tmp_neigh_node->addr))) { + interference_candidate = 1; + break; + } + } + + /* don't care further if it is an interference candidate */ + if (interference_candidate) + goto candidate_del; + + /* this neighbor already is part of our candidate list */ + if (!list_empty(&neigh_node->bonding_list)) + goto out; + + if (!atomic_inc_not_zero(&neigh_node->refcount)) + goto out; + + list_add_rcu(&neigh_node->bonding_list, &orig_node->bond_list); + atomic_inc(&orig_node->bond_candidates); + goto out; + +candidate_del: + bonding_candidate_del(orig_node, neigh_node); + +out: + spin_unlock_bh(&orig_node->neigh_list_lock); + return; +} + +/* copy primary address for bonding */ +static void bonding_save_primary(struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + struct batman_packet *batman_packet) +{ + if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) + return; + + memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN); } static void update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, struct ethhdr *ethhdr, struct batman_packet *batman_packet, - struct batman_if *if_incoming, + struct hard_iface *if_incoming, unsigned char *hna_buff, int hna_buff_len, char is_duplicate) { struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; + struct orig_node *orig_node_tmp; + struct hlist_node *node; int tmp_hna_buff_len; + uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " "Searching and updating originator entry of received packet\n"); - list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { - if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && - (tmp_neigh_node->if_incoming == if_incoming)) { + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { + if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && + (tmp_neigh_node->if_incoming == if_incoming) && + atomic_inc_not_zero(&tmp_neigh_node->refcount)) { + if (neigh_node) + neigh_node_free_ref(neigh_node); neigh_node = tmp_neigh_node; continue; } @@ -280,16 +407,20 @@ static void update_orig(struct bat_priv *bat_priv, orig_tmp = get_orig_node(bat_priv, ethhdr->h_source); if (!orig_tmp) - return; + goto unlock; neigh_node = create_neighbor(orig_node, orig_tmp, ethhdr->h_source, if_incoming); + + orig_node_free_ref(orig_tmp); if (!neigh_node) - return; + goto unlock; } else bat_dbg(DBG_BATMAN, bat_priv, "Updating existing last-hop neighbor of originator\n"); + rcu_read_unlock(); + orig_node->flags = batman_packet->flags; neigh_node->last_valid = jiffies; @@ -303,6 +434,8 @@ static void update_orig(struct bat_priv *bat_priv, neigh_node->last_ttl = batman_packet->ttl; } + bonding_candidate_add(orig_node, neigh_node); + tmp_hna_buff_len = (hna_buff_len > batman_packet->num_hna * ETH_ALEN ? batman_packet->num_hna * ETH_ALEN : hna_buff_len); @@ -319,10 +452,22 @@ static void update_orig(struct bat_priv *bat_priv, /* if the TQ is the same and the link not more symetric we * won't consider it either */ if ((orig_node->router) && - ((neigh_node->tq_avg == orig_node->router->tq_avg) && - (orig_node->router->orig_node->bcast_own_sum[if_incoming->if_num] - >= neigh_node->orig_node->bcast_own_sum[if_incoming->if_num]))) - goto update_hna; + (neigh_node->tq_avg == orig_node->router->tq_avg)) { + orig_node_tmp = orig_node->router->orig_node; + spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); + bcast_own_sum_orig = + orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); + + orig_node_tmp = neigh_node->orig_node; + spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); + bcast_own_sum_neigh = + orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); + + if (bcast_own_sum_orig >= bcast_own_sum_neigh) + goto update_hna; + } update_routes(bat_priv, orig_node, neigh_node, hna_buff, tmp_hna_buff_len); @@ -343,6 +488,14 @@ update_gw: (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) && (atomic_read(&bat_priv->gw_sel_class) > 2)) gw_check_election(bat_priv, orig_node); + + goto out; + +unlock: + rcu_read_unlock(); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); } /* checks whether the host restarted and is in the protection time. @@ -380,34 +533,38 @@ static int window_protected(struct bat_priv *bat_priv, */ static char count_real_packets(struct ethhdr *ethhdr, struct batman_packet *batman_packet, - struct batman_if *if_incoming) + struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct orig_node *orig_node; struct neigh_node *tmp_neigh_node; + struct hlist_node *node; char is_duplicate = 0; int32_t seq_diff; int need_update = 0; - int set_mark; + int set_mark, ret = -1; orig_node = get_orig_node(bat_priv, batman_packet->orig); if (!orig_node) return 0; + spin_lock_bh(&orig_node->ogm_cnt_lock); seq_diff = batman_packet->seqno - orig_node->last_real_seqno; /* signalize caller that the packet is to be dropped. */ if (window_protected(bat_priv, seq_diff, &orig_node->batman_seqno_reset)) - return -1; + goto out; - list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { is_duplicate |= get_bit_status(tmp_neigh_node->real_bits, orig_node->last_real_seqno, batman_packet->seqno); - if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && + if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && (tmp_neigh_node->if_incoming == if_incoming)) set_mark = 1; else @@ -421,6 +578,7 @@ static char count_real_packets(struct ethhdr *ethhdr, tmp_neigh_node->real_packet_count = bit_packet_count(tmp_neigh_node->real_bits); } + rcu_read_unlock(); if (need_update) { bat_dbg(DBG_BATMAN, bat_priv, @@ -429,123 +587,21 @@ static char count_real_packets(struct ethhdr *ethhdr, orig_node->last_real_seqno = batman_packet->seqno; } - return is_duplicate; -} - -/* copy primary address for bonding */ -static void mark_bonding_address(struct bat_priv *bat_priv, - struct orig_node *orig_node, - struct orig_node *orig_neigh_node, - struct batman_packet *batman_packet) + ret = is_duplicate; -{ - if (batman_packet->flags & PRIMARIES_FIRST_HOP) - memcpy(orig_neigh_node->primary_addr, - orig_node->orig, ETH_ALEN); - - return; -} - -/* mark possible bond.candidates in the neighbor list */ -void update_bonding_candidates(struct bat_priv *bat_priv, - struct orig_node *orig_node) -{ - int candidates; - int interference_candidate; - int best_tq; - struct neigh_node *tmp_neigh_node, *tmp_neigh_node2; - struct neigh_node *first_candidate, *last_candidate; - - /* update the candidates for this originator */ - if (!orig_node->router) { - orig_node->bond.candidates = 0; - return; - } - - best_tq = orig_node->router->tq_avg; - - /* update bond.candidates */ - - candidates = 0; - - /* mark other nodes which also received "PRIMARIES FIRST HOP" packets - * as "bonding partner" */ - - /* first, zero the list */ - list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { - tmp_neigh_node->next_bond_candidate = NULL; - } - - first_candidate = NULL; - last_candidate = NULL; - list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { - - /* only consider if it has the same primary address ... */ - if (memcmp(orig_node->orig, - tmp_neigh_node->orig_node->primary_addr, - ETH_ALEN) != 0) - continue; - - /* ... and is good enough to be considered */ - if (tmp_neigh_node->tq_avg < best_tq - BONDING_TQ_THRESHOLD) - continue; - - /* check if we have another candidate with the same - * mac address or interface. If we do, we won't - * select this candidate because of possible interference. */ - - interference_candidate = 0; - list_for_each_entry(tmp_neigh_node2, - &orig_node->neigh_list, list) { - - if (tmp_neigh_node2 == tmp_neigh_node) - continue; - - /* we only care if the other candidate is even - * considered as candidate. */ - if (!tmp_neigh_node2->next_bond_candidate) - continue; - - - if ((tmp_neigh_node->if_incoming == - tmp_neigh_node2->if_incoming) - || (memcmp(tmp_neigh_node->addr, - tmp_neigh_node2->addr, ETH_ALEN) == 0)) { - - interference_candidate = 1; - break; - } - } - /* don't care further if it is an interference candidate */ - if (interference_candidate) - continue; - - if (!first_candidate) { - first_candidate = tmp_neigh_node; - tmp_neigh_node->next_bond_candidate = first_candidate; - } else - tmp_neigh_node->next_bond_candidate = last_candidate; - - last_candidate = tmp_neigh_node; - - candidates++; - } - - if (candidates > 0) { - first_candidate->next_bond_candidate = last_candidate; - orig_node->bond.selected = first_candidate; - } - - orig_node->bond.candidates = candidates; +out: + spin_unlock_bh(&orig_node->ogm_cnt_lock); + orig_node_free_ref(orig_node); + return ret; } void receive_bat_packet(struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - unsigned char *hna_buff, int hna_buff_len, - struct batman_if *if_incoming) + struct batman_packet *batman_packet, + unsigned char *hna_buff, int hna_buff_len, + struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct batman_if *batman_if; + struct hard_iface *hard_iface; struct orig_node *orig_neigh_node, *orig_node; char has_directlink_flag; char is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; @@ -573,8 +629,8 @@ void receive_bat_packet(struct ethhdr *ethhdr, has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0); - is_single_hop_neigh = (compare_orig(ethhdr->h_source, - batman_packet->orig) ? 1 : 0); + is_single_hop_neigh = (compare_eth(ethhdr->h_source, + batman_packet->orig) ? 1 : 0); bat_dbg(DBG_BATMAN, bat_priv, "Received BATMAN packet via NB: %pM, IF: %s [%pM] " @@ -587,26 +643,26 @@ void receive_bat_packet(struct ethhdr *ethhdr, has_directlink_flag); rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { - if (batman_if->if_status != IF_ACTIVE) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->if_status != IF_ACTIVE) continue; - if (batman_if->soft_iface != if_incoming->soft_iface) + if (hard_iface->soft_iface != if_incoming->soft_iface) continue; - if (compare_orig(ethhdr->h_source, - batman_if->net_dev->dev_addr)) + if (compare_eth(ethhdr->h_source, + hard_iface->net_dev->dev_addr)) is_my_addr = 1; - if (compare_orig(batman_packet->orig, - batman_if->net_dev->dev_addr)) + if (compare_eth(batman_packet->orig, + hard_iface->net_dev->dev_addr)) is_my_orig = 1; - if (compare_orig(batman_packet->prev_sender, - batman_if->net_dev->dev_addr)) + if (compare_eth(batman_packet->prev_sender, + hard_iface->net_dev->dev_addr)) is_my_oldorig = 1; - if (compare_orig(ethhdr->h_source, broadcast_addr)) + if (compare_eth(ethhdr->h_source, broadcast_addr)) is_broadcast = 1; } rcu_read_unlock(); @@ -638,7 +694,6 @@ void receive_bat_packet(struct ethhdr *ethhdr, int offset; orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source); - if (!orig_neigh_node) return; @@ -647,18 +702,22 @@ void receive_bat_packet(struct ethhdr *ethhdr, /* if received seqno equals last send seqno save new * seqno for bidirectional check */ if (has_directlink_flag && - compare_orig(if_incoming->net_dev->dev_addr, - batman_packet->orig) && + compare_eth(if_incoming->net_dev->dev_addr, + batman_packet->orig) && (batman_packet->seqno - if_incoming_seqno + 2 == 0)) { offset = if_incoming->if_num * NUM_WORDS; + + spin_lock_bh(&orig_neigh_node->ogm_cnt_lock); word = &(orig_neigh_node->bcast_own[offset]); bit_mark(word, 0); orig_neigh_node->bcast_own_sum[if_incoming->if_num] = bit_packet_count(word); + spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock); } bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " "originator packet from myself (via neighbor)\n"); + orig_node_free_ref(orig_neigh_node); return; } @@ -679,27 +738,27 @@ void receive_bat_packet(struct ethhdr *ethhdr, bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: packet within seqno protection time " "(sender: %pM)\n", ethhdr->h_source); - return; + goto out; } if (batman_packet->tq == 0) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: originator packet with tq equal 0\n"); - return; + goto out; } /* avoid temporary routing loops */ if ((orig_node->router) && (orig_node->router->orig_node->router) && - (compare_orig(orig_node->router->addr, - batman_packet->prev_sender)) && - !(compare_orig(batman_packet->orig, batman_packet->prev_sender)) && - (compare_orig(orig_node->router->addr, - orig_node->router->orig_node->router->addr))) { + (compare_eth(orig_node->router->addr, + batman_packet->prev_sender)) && + !(compare_eth(batman_packet->orig, batman_packet->prev_sender)) && + (compare_eth(orig_node->router->addr, + orig_node->router->orig_node->router->addr))) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: ignoring all rebroadcast packets that " "may make me loop (sender: %pM)\n", ethhdr->h_source); - return; + goto out; } /* if sender is a direct neighbor the sender mac equals @@ -708,19 +767,21 @@ void receive_bat_packet(struct ethhdr *ethhdr, orig_node : get_orig_node(bat_priv, ethhdr->h_source)); if (!orig_neigh_node) - return; + goto out; /* drop packet if sender is not a direct neighbor and if we * don't route towards it */ if (!is_single_hop_neigh && (!orig_neigh_node->router)) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: OGM via unknown neighbor!\n"); - return; + goto out_neigh; } is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node, batman_packet, if_incoming); + bonding_save_primary(orig_node, orig_neigh_node, batman_packet); + /* update ranking if it is not a duplicate or has the same * seqno and similar ttl as the non-duplicate */ if (is_bidirectional && @@ -730,10 +791,6 @@ void receive_bat_packet(struct ethhdr *ethhdr, update_orig(bat_priv, orig_node, ethhdr, batman_packet, if_incoming, hna_buff, hna_buff_len, is_duplicate); - mark_bonding_address(bat_priv, orig_node, - orig_neigh_node, batman_packet); - update_bonding_candidates(bat_priv, orig_node); - /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { @@ -743,31 +800,36 @@ void receive_bat_packet(struct ethhdr *ethhdr, bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: " "rebroadcast neighbor packet with direct link flag\n"); - return; + goto out_neigh; } /* multihop originator */ if (!is_bidirectional) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: not received via bidirectional link\n"); - return; + goto out_neigh; } if (is_duplicate) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: duplicate packet received\n"); - return; + goto out_neigh; } bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: rebroadcast originator packet\n"); schedule_forward_packet(orig_node, ethhdr, batman_packet, 0, hna_buff_len, if_incoming); + +out_neigh: + if ((orig_neigh_node) && (!is_single_hop_neigh)) + orig_node_free_ref(orig_neigh_node); +out: + orig_node_free_ref(orig_node); } -int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) +int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); struct ethhdr *ethhdr; /* drop packet if it has not necessary minimum size */ @@ -794,12 +856,10 @@ int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) ethhdr = (struct ethhdr *)skb_mac_header(skb); - spin_lock_bh(&bat_priv->orig_hash_lock); receive_aggr_bat_packet(ethhdr, skb->data, skb_headlen(skb), - batman_if); - spin_unlock_bh(&bat_priv->orig_hash_lock); + hard_iface); kfree_skb(skb); return NET_RX_SUCCESS; @@ -808,135 +868,144 @@ int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) static int recv_my_icmp_packet(struct bat_priv *bat_priv, struct sk_buff *skb, size_t icmp_len) { - struct orig_node *orig_node; + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; struct icmp_packet_rr *icmp_packet; - struct ethhdr *ethhdr; - struct batman_if *batman_if; - int ret; - uint8_t dstaddr[ETH_ALEN]; + int ret = NET_RX_DROP; icmp_packet = (struct icmp_packet_rr *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); /* add data to device queue */ if (icmp_packet->msg_type != ECHO_REQUEST) { bat_socket_receive_packet(icmp_packet, icmp_len); - return NET_RX_DROP; + goto out; } if (!bat_priv->primary_if) - return NET_RX_DROP; + goto out; /* answer echo request (ping) */ /* get routing information */ - spin_lock_bh(&bat_priv->orig_hash_lock); - orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, - compare_orig, choose_orig, - icmp_packet->orig)); - ret = NET_RX_DROP; - - if ((orig_node) && (orig_node->router)) { - - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); - - /* create a copy of the skb, if needed, to modify it. */ - if (skb_cow(skb, sizeof(struct ethhdr)) < 0) - return NET_RX_DROP; + rcu_read_lock(); + orig_node = orig_hash_find(bat_priv, icmp_packet->orig); - icmp_packet = (struct icmp_packet_rr *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + if (!orig_node) + goto unlock; - memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); - memcpy(icmp_packet->orig, - bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); - icmp_packet->msg_type = ECHO_REPLY; - icmp_packet->ttl = TTL; + neigh_node = orig_node->router; - send_skb_packet(skb, batman_if, dstaddr); - ret = NET_RX_SUCCESS; + if (!neigh_node) + goto unlock; - } else - spin_unlock_bh(&bat_priv->orig_hash_lock); + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } + + rcu_read_unlock(); + + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, sizeof(struct ethhdr)) < 0) + goto out; + + icmp_packet = (struct icmp_packet_rr *)skb->data; + + memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); + memcpy(icmp_packet->orig, + bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); + icmp_packet->msg_type = ECHO_REPLY; + icmp_packet->ttl = TTL; + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = NET_RX_SUCCESS; + goto out; + +unlock: + rcu_read_unlock(); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + orig_node_free_ref(orig_node); return ret; } static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, - struct sk_buff *skb, size_t icmp_len) + struct sk_buff *skb) { - struct orig_node *orig_node; + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; struct icmp_packet *icmp_packet; - struct ethhdr *ethhdr; - struct batman_if *batman_if; - int ret; - uint8_t dstaddr[ETH_ALEN]; + int ret = NET_RX_DROP; icmp_packet = (struct icmp_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); /* send TTL exceeded if packet is an echo request (traceroute) */ if (icmp_packet->msg_type != ECHO_REQUEST) { pr_debug("Warning - can't forward icmp packet from %pM to " "%pM: ttl exceeded\n", icmp_packet->orig, icmp_packet->dst); - return NET_RX_DROP; + goto out; } if (!bat_priv->primary_if) - return NET_RX_DROP; + goto out; /* get routing information */ - spin_lock_bh(&bat_priv->orig_hash_lock); - orig_node = ((struct orig_node *) - hash_find(bat_priv->orig_hash, compare_orig, choose_orig, - icmp_packet->orig)); - ret = NET_RX_DROP; - - if ((orig_node) && (orig_node->router)) { - - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); - - /* create a copy of the skb, if needed, to modify it. */ - if (skb_cow(skb, sizeof(struct ethhdr)) < 0) - return NET_RX_DROP; + rcu_read_lock(); + orig_node = orig_hash_find(bat_priv, icmp_packet->orig); - icmp_packet = (struct icmp_packet *) skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + if (!orig_node) + goto unlock; - memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); - memcpy(icmp_packet->orig, - bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); - icmp_packet->msg_type = TTL_EXCEEDED; - icmp_packet->ttl = TTL; + neigh_node = orig_node->router; - send_skb_packet(skb, batman_if, dstaddr); - ret = NET_RX_SUCCESS; + if (!neigh_node) + goto unlock; - } else - spin_unlock_bh(&bat_priv->orig_hash_lock); + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } + rcu_read_unlock(); + + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, sizeof(struct ethhdr)) < 0) + goto out; + + icmp_packet = (struct icmp_packet *)skb->data; + + memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); + memcpy(icmp_packet->orig, + bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); + icmp_packet->msg_type = TTL_EXCEEDED; + icmp_packet->ttl = TTL; + + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = NET_RX_SUCCESS; + goto out; + +unlock: + rcu_read_unlock(); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + orig_node_free_ref(orig_node); return ret; } -int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) +int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct icmp_packet_rr *icmp_packet; struct ethhdr *ethhdr; - struct orig_node *orig_node; - struct batman_if *batman_if; + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; int hdr_size = sizeof(struct icmp_packet); - int ret; - uint8_t dstaddr[ETH_ALEN]; + int ret = NET_RX_DROP; /** * we truncate all incoming icmp packets if they don't match our size @@ -946,21 +1015,21 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - return NET_RX_DROP; + goto out; ethhdr = (struct ethhdr *)skb_mac_header(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) - return NET_RX_DROP; + goto out; /* packet with broadcast sender address */ if (is_broadcast_ether_addr(ethhdr->h_source)) - return NET_RX_DROP; + goto out; /* not for me */ if (!is_my_mac(ethhdr->h_dest)) - return NET_RX_DROP; + goto out; icmp_packet = (struct icmp_packet_rr *)skb->data; @@ -978,53 +1047,61 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) /* TTL exceeded */ if (icmp_packet->ttl < 2) - return recv_icmp_ttl_exceeded(bat_priv, skb, hdr_size); - - ret = NET_RX_DROP; + return recv_icmp_ttl_exceeded(bat_priv, skb); /* get routing information */ - spin_lock_bh(&bat_priv->orig_hash_lock); - orig_node = ((struct orig_node *) - hash_find(bat_priv->orig_hash, compare_orig, choose_orig, - icmp_packet->dst)); + rcu_read_lock(); + orig_node = orig_hash_find(bat_priv, icmp_packet->dst); - if ((orig_node) && (orig_node->router)) { + if (!orig_node) + goto unlock; - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); + neigh_node = orig_node->router; - /* create a copy of the skb, if needed, to modify it. */ - if (skb_cow(skb, sizeof(struct ethhdr)) < 0) - return NET_RX_DROP; + if (!neigh_node) + goto unlock; - icmp_packet = (struct icmp_packet_rr *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } - /* decrement ttl */ - icmp_packet->ttl--; + rcu_read_unlock(); - /* route it */ - send_skb_packet(skb, batman_if, dstaddr); - ret = NET_RX_SUCCESS; + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, sizeof(struct ethhdr)) < 0) + goto out; - } else - spin_unlock_bh(&bat_priv->orig_hash_lock); + icmp_packet = (struct icmp_packet_rr *)skb->data; + + /* decrement ttl */ + icmp_packet->ttl--; + /* route it */ + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = NET_RX_SUCCESS; + goto out; + +unlock: + rcu_read_unlock(); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + orig_node_free_ref(orig_node); return ret; } /* find a suitable router for this originator, and use - * bonding if possible. */ + * bonding if possible. increases the found neighbors + * refcount.*/ struct neigh_node *find_router(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct batman_if *recv_if) + struct hard_iface *recv_if) { struct orig_node *primary_orig_node; struct orig_node *router_orig; - struct neigh_node *router, *first_candidate, *best_router; + struct neigh_node *router, *first_candidate, *tmp_neigh_node; static uint8_t zero_mac[ETH_ALEN] = {0, 0, 0, 0, 0, 0}; int bonding_enabled; @@ -1036,78 +1113,128 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, /* without bonding, the first node should * always choose the default router. */ - bonding_enabled = atomic_read(&bat_priv->bonding); - if ((!recv_if) && (!bonding_enabled)) - return orig_node->router; - + rcu_read_lock(); + /* select default router to output */ + router = orig_node->router; router_orig = orig_node->router->orig_node; + if (!router_orig || !atomic_inc_not_zero(&router->refcount)) { + rcu_read_unlock(); + return NULL; + } + + if ((!recv_if) && (!bonding_enabled)) + goto return_router; /* if we have something in the primary_addr, we can search * for a potential bonding candidate. */ - if (memcmp(router_orig->primary_addr, zero_mac, ETH_ALEN) == 0) - return orig_node->router; + if (compare_eth(router_orig->primary_addr, zero_mac)) + goto return_router; /* find the orig_node which has the primary interface. might * even be the same as our router_orig in many cases */ - if (memcmp(router_orig->primary_addr, - router_orig->orig, ETH_ALEN) == 0) { + if (compare_eth(router_orig->primary_addr, router_orig->orig)) { primary_orig_node = router_orig; } else { - primary_orig_node = hash_find(bat_priv->orig_hash, compare_orig, - choose_orig, - router_orig->primary_addr); - + primary_orig_node = orig_hash_find(bat_priv, + router_orig->primary_addr); if (!primary_orig_node) - return orig_node->router; + goto return_router; + + orig_node_free_ref(primary_orig_node); } /* with less than 2 candidates, we can't do any * bonding and prefer the original router. */ - - if (primary_orig_node->bond.candidates < 2) - return orig_node->router; + if (atomic_read(&primary_orig_node->bond_candidates) < 2) + goto return_router; /* all nodes between should choose a candidate which * is is not on the interface where the packet came * in. */ - first_candidate = primary_orig_node->bond.selected; - router = first_candidate; + + neigh_node_free_ref(router); + first_candidate = NULL; + router = NULL; if (bonding_enabled) { /* in the bonding case, send the packets in a round * robin fashion over the remaining interfaces. */ - do { + + list_for_each_entry_rcu(tmp_neigh_node, + &primary_orig_node->bond_list, bonding_list) { + if (!first_candidate) + first_candidate = tmp_neigh_node; /* recv_if == NULL on the first node. */ - if (router->if_incoming != recv_if) + if (tmp_neigh_node->if_incoming != recv_if && + atomic_inc_not_zero(&tmp_neigh_node->refcount)) { + router = tmp_neigh_node; break; + } + } + + /* use the first candidate if nothing was found. */ + if (!router && first_candidate && + atomic_inc_not_zero(&first_candidate->refcount)) + router = first_candidate; - router = router->next_bond_candidate; - } while (router != first_candidate); + if (!router) { + rcu_read_unlock(); + return NULL; + } - primary_orig_node->bond.selected = router->next_bond_candidate; + /* selected should point to the next element + * after the current router */ + spin_lock_bh(&primary_orig_node->neigh_list_lock); + /* this is a list_move(), which unfortunately + * does not exist as rcu version */ + list_del_rcu(&primary_orig_node->bond_list); + list_add_rcu(&primary_orig_node->bond_list, + &router->bonding_list); + spin_unlock_bh(&primary_orig_node->neigh_list_lock); } else { /* if bonding is disabled, use the best of the * remaining candidates which are not using * this interface. */ - best_router = first_candidate; + list_for_each_entry_rcu(tmp_neigh_node, + &primary_orig_node->bond_list, bonding_list) { + if (!first_candidate) + first_candidate = tmp_neigh_node; - do { /* recv_if == NULL on the first node. */ - if ((router->if_incoming != recv_if) && - (router->tq_avg > best_router->tq_avg)) - best_router = router; + if (tmp_neigh_node->if_incoming == recv_if) + continue; - router = router->next_bond_candidate; - } while (router != first_candidate); + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; - router = best_router; - } + /* if we don't have a router yet + * or this one is better, choose it. */ + if ((!router) || + (tmp_neigh_node->tq_avg > router->tq_avg)) { + /* decrement refcount of + * previously selected router */ + if (router) + neigh_node_free_ref(router); + + router = tmp_neigh_node; + atomic_inc_not_zero(&router->refcount); + } + + neigh_node_free_ref(tmp_neigh_node); + } + /* use the first candidate if nothing was found. */ + if (!router && first_candidate && + atomic_inc_not_zero(&first_candidate->refcount)) + router = first_candidate; + } +return_router: + rcu_read_unlock(); return router; } @@ -1136,17 +1263,14 @@ static int check_unicast_packet(struct sk_buff *skb, int hdr_size) return 0; } -int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, - int hdr_size) +int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct orig_node *orig_node; - struct neigh_node *router; - struct batman_if *batman_if; - uint8_t dstaddr[ETH_ALEN]; + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; struct unicast_packet *unicast_packet; struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); - int ret; + int ret = NET_RX_DROP; struct sk_buff *new_skb; unicast_packet = (struct unicast_packet *)skb->data; @@ -1156,53 +1280,51 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, pr_debug("Warning - can't forward unicast packet from %pM to " "%pM: ttl exceeded\n", ethhdr->h_source, unicast_packet->dest); - return NET_RX_DROP; + goto out; } /* get routing information */ - spin_lock_bh(&bat_priv->orig_hash_lock); - orig_node = ((struct orig_node *) - hash_find(bat_priv->orig_hash, compare_orig, choose_orig, - unicast_packet->dest)); - - router = find_router(bat_priv, orig_node, recv_if); + rcu_read_lock(); + orig_node = orig_hash_find(bat_priv, unicast_packet->dest); - if (!router) { - spin_unlock_bh(&bat_priv->orig_hash_lock); - return NET_RX_DROP; - } + if (!orig_node) + goto unlock; - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ + rcu_read_unlock(); - batman_if = router->if_incoming; - memcpy(dstaddr, router->addr, ETH_ALEN); + /* find_router() increases neigh_nodes refcount if found. */ + neigh_node = find_router(bat_priv, orig_node, recv_if); - spin_unlock_bh(&bat_priv->orig_hash_lock); + if (!neigh_node) + goto out; /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, sizeof(struct ethhdr)) < 0) - return NET_RX_DROP; + goto out; unicast_packet = (struct unicast_packet *)skb->data; if (unicast_packet->packet_type == BAT_UNICAST && atomic_read(&bat_priv->fragmentation) && - skb->len > batman_if->net_dev->mtu) - return frag_send_skb(skb, bat_priv, batman_if, - dstaddr); + skb->len > neigh_node->if_incoming->net_dev->mtu) { + ret = frag_send_skb(skb, bat_priv, + neigh_node->if_incoming, neigh_node->addr); + goto out; + } if (unicast_packet->packet_type == BAT_UNICAST_FRAG && - 2 * skb->len - hdr_size <= batman_if->net_dev->mtu) { + frag_can_reassemble(skb, neigh_node->if_incoming->net_dev->mtu)) { ret = frag_reassemble_skb(skb, bat_priv, &new_skb); if (ret == NET_RX_DROP) - return NET_RX_DROP; + goto out; /* packet was buffered for late merge */ - if (!new_skb) - return NET_RX_SUCCESS; + if (!new_skb) { + ret = NET_RX_SUCCESS; + goto out; + } skb = new_skb; unicast_packet = (struct unicast_packet *)skb->data; @@ -1212,12 +1334,21 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, unicast_packet->ttl--; /* route it */ - send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = NET_RX_SUCCESS; + goto out; - return NET_RX_SUCCESS; +unlock: + rcu_read_unlock(); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + orig_node_free_ref(orig_node); + return ret; } -int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if) +int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct unicast_packet *unicast_packet; int hdr_size = sizeof(struct unicast_packet); @@ -1233,10 +1364,10 @@ int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if) return NET_RX_SUCCESS; } - return route_unicast_packet(skb, recv_if, hdr_size); + return route_unicast_packet(skb, recv_if); } -int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if) +int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct unicast_frag_packet *unicast_packet; @@ -1266,89 +1397,96 @@ int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if) return NET_RX_SUCCESS; } - return route_unicast_packet(skb, recv_if, hdr_size); + return route_unicast_packet(skb, recv_if); } -int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if) +int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct orig_node *orig_node; + struct orig_node *orig_node = NULL; struct bcast_packet *bcast_packet; struct ethhdr *ethhdr; int hdr_size = sizeof(struct bcast_packet); + int ret = NET_RX_DROP; int32_t seq_diff; /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - return NET_RX_DROP; + goto out; ethhdr = (struct ethhdr *)skb_mac_header(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) - return NET_RX_DROP; + goto out; /* packet with broadcast sender address */ if (is_broadcast_ether_addr(ethhdr->h_source)) - return NET_RX_DROP; + goto out; /* ignore broadcasts sent by myself */ if (is_my_mac(ethhdr->h_source)) - return NET_RX_DROP; + goto out; bcast_packet = (struct bcast_packet *)skb->data; /* ignore broadcasts originated by myself */ if (is_my_mac(bcast_packet->orig)) - return NET_RX_DROP; + goto out; if (bcast_packet->ttl < 2) - return NET_RX_DROP; + goto out; - spin_lock_bh(&bat_priv->orig_hash_lock); - orig_node = ((struct orig_node *) - hash_find(bat_priv->orig_hash, compare_orig, choose_orig, - bcast_packet->orig)); + rcu_read_lock(); + orig_node = orig_hash_find(bat_priv, bcast_packet->orig); - if (!orig_node) { - spin_unlock_bh(&bat_priv->orig_hash_lock); - return NET_RX_DROP; - } + if (!orig_node) + goto rcu_unlock; + + rcu_read_unlock(); + + spin_lock_bh(&orig_node->bcast_seqno_lock); /* check whether the packet is a duplicate */ - if (get_bit_status(orig_node->bcast_bits, - orig_node->last_bcast_seqno, - ntohl(bcast_packet->seqno))) { - spin_unlock_bh(&bat_priv->orig_hash_lock); - return NET_RX_DROP; - } + if (get_bit_status(orig_node->bcast_bits, orig_node->last_bcast_seqno, + ntohl(bcast_packet->seqno))) + goto spin_unlock; seq_diff = ntohl(bcast_packet->seqno) - orig_node->last_bcast_seqno; /* check whether the packet is old and the host just restarted. */ if (window_protected(bat_priv, seq_diff, - &orig_node->bcast_seqno_reset)) { - spin_unlock_bh(&bat_priv->orig_hash_lock); - return NET_RX_DROP; - } + &orig_node->bcast_seqno_reset)) + goto spin_unlock; /* mark broadcast in flood history, update window position * if required. */ if (bit_get_packet(bat_priv, orig_node->bcast_bits, seq_diff, 1)) orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno); - spin_unlock_bh(&bat_priv->orig_hash_lock); + spin_unlock_bh(&orig_node->bcast_seqno_lock); + /* rebroadcast packet */ add_bcast_packet_to_list(bat_priv, skb); /* broadcast for me */ interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size); + ret = NET_RX_SUCCESS; + goto out; - return NET_RX_SUCCESS; +rcu_unlock: + rcu_read_unlock(); + goto out; +spin_unlock: + spin_unlock_bh(&orig_node->bcast_seqno_lock); +out: + if (orig_node) + orig_node_free_ref(orig_node); + return ret; } -int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if) +int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct vis_packet *vis_packet; struct ethhdr *ethhdr; diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index f108f230bfdb..b5a064c88a4f 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -22,27 +22,25 @@ #ifndef _NET_BATMAN_ADV_ROUTING_H_ #define _NET_BATMAN_ADV_ROUTING_H_ -#include "types.h" - -void slide_own_bcast_window(struct batman_if *batman_if); +void slide_own_bcast_window(struct hard_iface *hard_iface); void receive_bat_packet(struct ethhdr *ethhdr, struct batman_packet *batman_packet, unsigned char *hna_buff, int hna_buff_len, - struct batman_if *if_incoming); + struct hard_iface *if_incoming); void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, struct neigh_node *neigh_node, unsigned char *hna_buff, int hna_buff_len); -int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, - int hdr_size); -int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if); -int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if); -int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if); -int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if); -int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if); -int recv_bat_packet(struct sk_buff *skb, struct batman_if *recv_if); +int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_bat_packet(struct sk_buff *skb, struct hard_iface *recv_if); struct neigh_node *find_router(struct bat_priv *bat_priv, - struct orig_node *orig_node, struct batman_if *recv_if); -void update_bonding_candidates(struct bat_priv *bat_priv, - struct orig_node *orig_node); + struct orig_node *orig_node, + struct hard_iface *recv_if); +void bonding_candidate_del(struct orig_node *orig_node, + struct neigh_node *neigh_node); #endif /* _NET_BATMAN_ADV_ROUTING_H_ */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index b89b9f7709ae..d49e54d932af 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -25,7 +25,6 @@ #include "translation-table.h" #include "soft-interface.h" #include "hard-interface.h" -#include "types.h" #include "vis.h" #include "aggregation.h" #include "gateway_common.h" @@ -49,7 +48,7 @@ static unsigned long own_send_time(struct bat_priv *bat_priv) } /* when do we schedule a forwarded packet to be sent */ -static unsigned long forward_send_time(struct bat_priv *bat_priv) +static unsigned long forward_send_time(void) { return jiffies + msecs_to_jiffies(random32() % (JITTER/2)); } @@ -57,20 +56,20 @@ static unsigned long forward_send_time(struct bat_priv *bat_priv) /* send out an already prepared packet to the given address via the * specified batman interface */ int send_skb_packet(struct sk_buff *skb, - struct batman_if *batman_if, + struct hard_iface *hard_iface, uint8_t *dst_addr) { struct ethhdr *ethhdr; - if (batman_if->if_status != IF_ACTIVE) + if (hard_iface->if_status != IF_ACTIVE) goto send_skb_err; - if (unlikely(!batman_if->net_dev)) + if (unlikely(!hard_iface->net_dev)) goto send_skb_err; - if (!(batman_if->net_dev->flags & IFF_UP)) { + if (!(hard_iface->net_dev->flags & IFF_UP)) { pr_warning("Interface %s is not up - can't send packet via " - "that interface!\n", batman_if->net_dev->name); + "that interface!\n", hard_iface->net_dev->name); goto send_skb_err; } @@ -81,7 +80,7 @@ int send_skb_packet(struct sk_buff *skb, skb_reset_mac_header(skb); ethhdr = (struct ethhdr *) skb_mac_header(skb); - memcpy(ethhdr->h_source, batman_if->net_dev->dev_addr, ETH_ALEN); + memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN); memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); @@ -89,7 +88,7 @@ int send_skb_packet(struct sk_buff *skb, skb->priority = TC_PRIO_CONTROL; skb->protocol = __constant_htons(ETH_P_BATMAN); - skb->dev = batman_if->net_dev; + skb->dev = hard_iface->net_dev; /* dev_queue_xmit() returns a negative result on error. However on * congestion and traffic shaping, it drops and returns NET_XMIT_DROP @@ -103,16 +102,16 @@ send_skb_err: /* Send a packet to a given interface */ static void send_packet_to_if(struct forw_packet *forw_packet, - struct batman_if *batman_if) + struct hard_iface *hard_iface) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); char *fwd_str; uint8_t packet_num; int16_t buff_pos; struct batman_packet *batman_packet; struct sk_buff *skb; - if (batman_if->if_status != IF_ACTIVE) + if (hard_iface->if_status != IF_ACTIVE) return; packet_num = 0; @@ -127,7 +126,7 @@ static void send_packet_to_if(struct forw_packet *forw_packet, /* we might have aggregated direct link packets with an * ordinary base packet */ if ((forw_packet->direct_link_flags & (1 << packet_num)) && - (forw_packet->if_incoming == batman_if)) + (forw_packet->if_incoming == hard_iface)) batman_packet->flags |= DIRECTLINK; else batman_packet->flags &= ~DIRECTLINK; @@ -143,7 +142,8 @@ static void send_packet_to_if(struct forw_packet *forw_packet, batman_packet->tq, batman_packet->ttl, (batman_packet->flags & DIRECTLINK ? "on" : "off"), - batman_if->net_dev->name, batman_if->net_dev->dev_addr); + hard_iface->net_dev->name, + hard_iface->net_dev->dev_addr); buff_pos += sizeof(struct batman_packet) + (batman_packet->num_hna * ETH_ALEN); @@ -155,13 +155,13 @@ static void send_packet_to_if(struct forw_packet *forw_packet, /* create clone because function is called more than once */ skb = skb_clone(forw_packet->skb, GFP_ATOMIC); if (skb) - send_skb_packet(skb, batman_if, broadcast_addr); + send_skb_packet(skb, hard_iface, broadcast_addr); } /* send a batman packet */ static void send_packet(struct forw_packet *forw_packet) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; struct net_device *soft_iface; struct bat_priv *bat_priv; struct batman_packet *batman_packet = @@ -205,17 +205,17 @@ static void send_packet(struct forw_packet *forw_packet) /* broadcast on every interface */ rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { - if (batman_if->soft_iface != soft_iface) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->soft_iface != soft_iface) continue; - send_packet_to_if(forw_packet, batman_if); + send_packet_to_if(forw_packet, hard_iface); } rcu_read_unlock(); } static void rebuild_batman_packet(struct bat_priv *bat_priv, - struct batman_if *batman_if) + struct hard_iface *hard_iface) { int new_len; unsigned char *new_buff; @@ -227,7 +227,7 @@ static void rebuild_batman_packet(struct bat_priv *bat_priv, /* keep old buffer if kmalloc should fail */ if (new_buff) { - memcpy(new_buff, batman_if->packet_buff, + memcpy(new_buff, hard_iface->packet_buff, sizeof(struct batman_packet)); batman_packet = (struct batman_packet *)new_buff; @@ -235,21 +235,21 @@ static void rebuild_batman_packet(struct bat_priv *bat_priv, new_buff + sizeof(struct batman_packet), new_len - sizeof(struct batman_packet)); - kfree(batman_if->packet_buff); - batman_if->packet_buff = new_buff; - batman_if->packet_len = new_len; + kfree(hard_iface->packet_buff); + hard_iface->packet_buff = new_buff; + hard_iface->packet_len = new_len; } } -void schedule_own_packet(struct batman_if *batman_if) +void schedule_own_packet(struct hard_iface *hard_iface) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); unsigned long send_time; struct batman_packet *batman_packet; int vis_server; - if ((batman_if->if_status == IF_NOT_IN_USE) || - (batman_if->if_status == IF_TO_BE_REMOVED)) + if ((hard_iface->if_status == IF_NOT_IN_USE) || + (hard_iface->if_status == IF_TO_BE_REMOVED)) return; vis_server = atomic_read(&bat_priv->vis_mode); @@ -261,51 +261,51 @@ void schedule_own_packet(struct batman_if *batman_if) * outdated packets (especially uninitialized mac addresses) in the * packet queue */ - if (batman_if->if_status == IF_TO_BE_ACTIVATED) - batman_if->if_status = IF_ACTIVE; + if (hard_iface->if_status == IF_TO_BE_ACTIVATED) + hard_iface->if_status = IF_ACTIVE; /* if local hna has changed and interface is a primary interface */ if ((atomic_read(&bat_priv->hna_local_changed)) && - (batman_if == bat_priv->primary_if)) - rebuild_batman_packet(bat_priv, batman_if); + (hard_iface == bat_priv->primary_if)) + rebuild_batman_packet(bat_priv, hard_iface); /** * NOTE: packet_buff might just have been re-allocated in * rebuild_batman_packet() */ - batman_packet = (struct batman_packet *)batman_if->packet_buff; + batman_packet = (struct batman_packet *)hard_iface->packet_buff; /* change sequence number to network order */ batman_packet->seqno = - htonl((uint32_t)atomic_read(&batman_if->seqno)); + htonl((uint32_t)atomic_read(&hard_iface->seqno)); if (vis_server == VIS_TYPE_SERVER_SYNC) batman_packet->flags |= VIS_SERVER; else batman_packet->flags &= ~VIS_SERVER; - if ((batman_if == bat_priv->primary_if) && + if ((hard_iface == bat_priv->primary_if) && (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)) batman_packet->gw_flags = (uint8_t)atomic_read(&bat_priv->gw_bandwidth); else batman_packet->gw_flags = 0; - atomic_inc(&batman_if->seqno); + atomic_inc(&hard_iface->seqno); - slide_own_bcast_window(batman_if); + slide_own_bcast_window(hard_iface); send_time = own_send_time(bat_priv); add_bat_packet_to_list(bat_priv, - batman_if->packet_buff, - batman_if->packet_len, - batman_if, 1, send_time); + hard_iface->packet_buff, + hard_iface->packet_len, + hard_iface, 1, send_time); } void schedule_forward_packet(struct orig_node *orig_node, struct ethhdr *ethhdr, struct batman_packet *batman_packet, uint8_t directlink, int hna_buff_len, - struct batman_if *if_incoming) + struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); unsigned char in_tq, in_ttl, tq_avg = 0; @@ -327,7 +327,7 @@ void schedule_forward_packet(struct orig_node *orig_node, if ((orig_node->router) && (orig_node->router->tq_avg != 0)) { /* rebroadcast ogm of best ranking neighbor as is */ - if (!compare_orig(orig_node->router->addr, ethhdr->h_source)) { + if (!compare_eth(orig_node->router->addr, ethhdr->h_source)) { batman_packet->tq = orig_node->router->tq_avg; if (orig_node->router->last_ttl) @@ -356,7 +356,7 @@ void schedule_forward_packet(struct orig_node *orig_node, else batman_packet->flags &= ~DIRECTLINK; - send_time = forward_send_time(bat_priv); + send_time = forward_send_time(); add_bat_packet_to_list(bat_priv, (unsigned char *)batman_packet, sizeof(struct batman_packet) + hna_buff_len, @@ -444,7 +444,7 @@ out: static void send_outstanding_bcast_packet(struct work_struct *work) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; struct delayed_work *delayed_work = container_of(work, struct delayed_work, work); struct forw_packet *forw_packet = @@ -462,14 +462,14 @@ static void send_outstanding_bcast_packet(struct work_struct *work) /* rebroadcast packet */ rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { - if (batman_if->soft_iface != soft_iface) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->soft_iface != soft_iface) continue; /* send a copy of the saved skb */ skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); if (skb1) - send_skb_packet(skb1, batman_if, broadcast_addr); + send_skb_packet(skb1, hard_iface, broadcast_addr); } rcu_read_unlock(); @@ -522,15 +522,15 @@ out: } void purge_outstanding_packets(struct bat_priv *bat_priv, - struct batman_if *batman_if) + struct hard_iface *hard_iface) { struct forw_packet *forw_packet; struct hlist_node *tmp_node, *safe_tmp_node; - if (batman_if) + if (hard_iface) bat_dbg(DBG_BATMAN, bat_priv, "purge_outstanding_packets(): %s\n", - batman_if->net_dev->name); + hard_iface->net_dev->name); else bat_dbg(DBG_BATMAN, bat_priv, "purge_outstanding_packets()\n"); @@ -544,8 +544,8 @@ void purge_outstanding_packets(struct bat_priv *bat_priv, * if purge_outstanding_packets() was called with an argmument * we delete only packets belonging to the given interface */ - if ((batman_if) && - (forw_packet->if_incoming != batman_if)) + if ((hard_iface) && + (forw_packet->if_incoming != hard_iface)) continue; spin_unlock_bh(&bat_priv->forw_bcast_list_lock); @@ -568,8 +568,8 @@ void purge_outstanding_packets(struct bat_priv *bat_priv, * if purge_outstanding_packets() was called with an argmument * we delete only packets belonging to the given interface */ - if ((batman_if) && - (forw_packet->if_incoming != batman_if)) + if ((hard_iface) && + (forw_packet->if_incoming != hard_iface)) continue; spin_unlock_bh(&bat_priv->forw_bat_list_lock); diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index c4cefa8e4f85..7b2ff19c05e7 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -22,20 +22,18 @@ #ifndef _NET_BATMAN_ADV_SEND_H_ #define _NET_BATMAN_ADV_SEND_H_ -#include "types.h" - int send_skb_packet(struct sk_buff *skb, - struct batman_if *batman_if, + struct hard_iface *hard_iface, uint8_t *dst_addr); -void schedule_own_packet(struct batman_if *batman_if); +void schedule_own_packet(struct hard_iface *hard_iface); void schedule_forward_packet(struct orig_node *orig_node, struct ethhdr *ethhdr, struct batman_packet *batman_packet, uint8_t directlink, int hna_buff_len, - struct batman_if *if_outgoing); + struct hard_iface *if_outgoing); int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb); void send_outstanding_bat_packet(struct work_struct *work); void purge_outstanding_packets(struct bat_priv *bat_priv, - struct batman_if *batman_if); + struct hard_iface *hard_iface); #endif /* _NET_BATMAN_ADV_SEND_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index e89ede192ed0..9ed26140a269 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -26,18 +26,15 @@ #include "send.h" #include "bat_debugfs.h" #include "translation-table.h" -#include "types.h" #include "hash.h" #include "gateway_common.h" #include "gateway_client.h" -#include "send.h" #include "bat_sysfs.h" #include <linux/slab.h> #include <linux/ethtool.h> #include <linux/etherdevice.h> #include <linux/if_vlan.h> #include "unicast.h" -#include "routing.h" static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); @@ -79,20 +76,18 @@ int my_skb_head_push(struct sk_buff *skb, unsigned int len) return 0; } -static void softif_neigh_free_ref(struct kref *refcount) +static void softif_neigh_free_rcu(struct rcu_head *rcu) { struct softif_neigh *softif_neigh; - softif_neigh = container_of(refcount, struct softif_neigh, refcount); + softif_neigh = container_of(rcu, struct softif_neigh, rcu); kfree(softif_neigh); } -static void softif_neigh_free_rcu(struct rcu_head *rcu) +static void softif_neigh_free_ref(struct softif_neigh *softif_neigh) { - struct softif_neigh *softif_neigh; - - softif_neigh = container_of(rcu, struct softif_neigh, rcu); - kref_put(&softif_neigh->refcount, softif_neigh_free_ref); + if (atomic_dec_and_test(&softif_neigh->refcount)) + call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu); } void softif_neigh_purge(struct bat_priv *bat_priv) @@ -119,11 +114,10 @@ void softif_neigh_purge(struct bat_priv *bat_priv) softif_neigh->addr, softif_neigh->vid); softif_neigh_tmp = bat_priv->softif_neigh; bat_priv->softif_neigh = NULL; - kref_put(&softif_neigh_tmp->refcount, - softif_neigh_free_ref); + softif_neigh_free_ref(softif_neigh_tmp); } - call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu); + softif_neigh_free_ref(softif_neigh); } spin_unlock_bh(&bat_priv->softif_neigh_lock); @@ -138,14 +132,17 @@ static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(softif_neigh, node, &bat_priv->softif_neigh_list, list) { - if (memcmp(softif_neigh->addr, addr, ETH_ALEN) != 0) + if (!compare_eth(softif_neigh->addr, addr)) continue; if (softif_neigh->vid != vid) continue; + if (!atomic_inc_not_zero(&softif_neigh->refcount)) + continue; + softif_neigh->last_seen = jiffies; - goto found; + goto out; } softif_neigh = kzalloc(sizeof(struct softif_neigh), GFP_ATOMIC); @@ -155,15 +152,14 @@ static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv, memcpy(softif_neigh->addr, addr, ETH_ALEN); softif_neigh->vid = vid; softif_neigh->last_seen = jiffies; - kref_init(&softif_neigh->refcount); + /* initialize with 2 - caller decrements counter by one */ + atomic_set(&softif_neigh->refcount, 2); INIT_HLIST_NODE(&softif_neigh->list); spin_lock_bh(&bat_priv->softif_neigh_lock); hlist_add_head_rcu(&softif_neigh->list, &bat_priv->softif_neigh_list); spin_unlock_bh(&bat_priv->softif_neigh_lock); -found: - kref_get(&softif_neigh->refcount); out: rcu_read_unlock(); return softif_neigh; @@ -175,8 +171,6 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset) struct bat_priv *bat_priv = netdev_priv(net_dev); struct softif_neigh *softif_neigh; struct hlist_node *node; - size_t buf_size, pos; - char *buff; if (!bat_priv->primary_if) { return seq_printf(seq, "BATMAN mesh %s disabled - " @@ -186,33 +180,15 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Softif neighbor list (%s)\n", net_dev->name); - buf_size = 1; - /* Estimate length for: " xx:xx:xx:xx:xx:xx\n" */ rcu_read_lock(); hlist_for_each_entry_rcu(softif_neigh, node, &bat_priv->softif_neigh_list, list) - buf_size += 30; - rcu_read_unlock(); - - buff = kmalloc(buf_size, GFP_ATOMIC); - if (!buff) - return -ENOMEM; - - buff[0] = '\0'; - pos = 0; - - rcu_read_lock(); - hlist_for_each_entry_rcu(softif_neigh, node, - &bat_priv->softif_neigh_list, list) { - pos += snprintf(buff + pos, 31, "%s %pM (vid: %d)\n", + seq_printf(seq, "%s %pM (vid: %d)\n", bat_priv->softif_neigh == softif_neigh ? "=>" : " ", softif_neigh->addr, softif_neigh->vid); - } rcu_read_unlock(); - seq_printf(seq, "%s", buff); - kfree(buff); return 0; } @@ -267,7 +243,7 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev, softif_neigh->addr, softif_neigh->vid); softif_neigh_tmp = bat_priv->softif_neigh; bat_priv->softif_neigh = softif_neigh; - kref_put(&softif_neigh_tmp->refcount, softif_neigh_free_ref); + softif_neigh_free_ref(softif_neigh_tmp); /* we need to hold the additional reference */ goto err; } @@ -285,7 +261,7 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev, } out: - kref_put(&softif_neigh->refcount, softif_neigh_free_ref); + softif_neigh_free_ref(softif_neigh); err: kfree_skb(skb); return; @@ -438,7 +414,7 @@ end: } void interface_rx(struct net_device *soft_iface, - struct sk_buff *skb, struct batman_if *recv_if, + struct sk_buff *skb, struct hard_iface *recv_if, int hdr_size) { struct bat_priv *bat_priv = netdev_priv(soft_iface); @@ -486,7 +462,7 @@ void interface_rx(struct net_device *soft_iface, memcpy(unicast_packet->dest, bat_priv->softif_neigh->addr, ETH_ALEN); - ret = route_unicast_packet(skb, recv_if, hdr_size); + ret = route_unicast_packet(skb, recv_if); if (ret == NET_RX_DROP) goto dropped; @@ -646,6 +622,19 @@ void softif_destroy(struct net_device *soft_iface) unregister_netdevice(soft_iface); } +int softif_is_valid(struct net_device *net_dev) +{ +#ifdef HAVE_NET_DEVICE_OPS + if (net_dev->netdev_ops->ndo_start_xmit == interface_tx) + return 1; +#else + if (net_dev->hard_start_xmit == interface_tx) + return 1; +#endif + + return 0; +} + /* ethtool */ static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 02b77334d10d..4789b6f2a0b3 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -27,9 +27,10 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset); void softif_neigh_purge(struct bat_priv *bat_priv); int interface_tx(struct sk_buff *skb, struct net_device *soft_iface); void interface_rx(struct net_device *soft_iface, - struct sk_buff *skb, struct batman_if *recv_if, + struct sk_buff *skb, struct hard_iface *recv_if, int hdr_size); struct net_device *softif_create(char *name); void softif_destroy(struct net_device *soft_iface); +int softif_is_valid(struct net_device *net_dev); #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index a633b5a435e2..8d15b48d1692 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -22,7 +22,6 @@ #include "main.h" #include "translation-table.h" #include "soft-interface.h" -#include "types.h" #include "hash.h" #include "originator.h" @@ -31,12 +30,85 @@ static void _hna_global_del_orig(struct bat_priv *bat_priv, struct hna_global_entry *hna_global_entry, char *message); +/* returns 1 if they are the same mac addr */ +static int compare_lhna(struct hlist_node *node, void *data2) +{ + void *data1 = container_of(node, struct hna_local_entry, hash_entry); + + return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); +} + +/* returns 1 if they are the same mac addr */ +static int compare_ghna(struct hlist_node *node, void *data2) +{ + void *data1 = container_of(node, struct hna_global_entry, hash_entry); + + return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); +} + static void hna_local_start_timer(struct bat_priv *bat_priv) { INIT_DELAYED_WORK(&bat_priv->hna_work, hna_local_purge); queue_delayed_work(bat_event_workqueue, &bat_priv->hna_work, 10 * HZ); } +static struct hna_local_entry *hna_local_hash_find(struct bat_priv *bat_priv, + void *data) +{ + struct hashtable_t *hash = bat_priv->hna_local_hash; + struct hlist_head *head; + struct hlist_node *node; + struct hna_local_entry *hna_local_entry, *hna_local_entry_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = choose_orig(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(hna_local_entry, node, head, hash_entry) { + if (!compare_eth(hna_local_entry, data)) + continue; + + hna_local_entry_tmp = hna_local_entry; + break; + } + rcu_read_unlock(); + + return hna_local_entry_tmp; +} + +static struct hna_global_entry *hna_global_hash_find(struct bat_priv *bat_priv, + void *data) +{ + struct hashtable_t *hash = bat_priv->hna_global_hash; + struct hlist_head *head; + struct hlist_node *node; + struct hna_global_entry *hna_global_entry; + struct hna_global_entry *hna_global_entry_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = choose_orig(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(hna_global_entry, node, head, hash_entry) { + if (!compare_eth(hna_global_entry, data)) + continue; + + hna_global_entry_tmp = hna_global_entry; + break; + } + rcu_read_unlock(); + + return hna_global_entry_tmp; +} + int hna_local_init(struct bat_priv *bat_priv) { if (bat_priv->hna_local_hash) @@ -61,10 +133,7 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr) int required_bytes; spin_lock_bh(&bat_priv->hna_lhash_lock); - hna_local_entry = - ((struct hna_local_entry *)hash_find(bat_priv->hna_local_hash, - compare_orig, choose_orig, - addr)); + hna_local_entry = hna_local_hash_find(bat_priv, addr); spin_unlock_bh(&bat_priv->hna_lhash_lock); if (hna_local_entry) { @@ -100,15 +169,15 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr) hna_local_entry->last_seen = jiffies; /* the batman interface mac address should never be purged */ - if (compare_orig(addr, soft_iface->dev_addr)) + if (compare_eth(addr, soft_iface->dev_addr)) hna_local_entry->never_purge = 1; else hna_local_entry->never_purge = 0; spin_lock_bh(&bat_priv->hna_lhash_lock); - hash_add(bat_priv->hna_local_hash, compare_orig, choose_orig, - hna_local_entry); + hash_add(bat_priv->hna_local_hash, compare_lhna, choose_orig, + hna_local_entry, &hna_local_entry->hash_entry); bat_priv->num_local_hna++; atomic_set(&bat_priv->hna_local_changed, 1); @@ -117,9 +186,7 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr) /* remove address from global hash if present */ spin_lock_bh(&bat_priv->hna_ghash_lock); - hna_global_entry = ((struct hna_global_entry *) - hash_find(bat_priv->hna_global_hash, - compare_orig, choose_orig, addr)); + hna_global_entry = hna_global_hash_find(bat_priv, addr); if (hna_global_entry) _hna_global_del_orig(bat_priv, hna_global_entry, @@ -133,28 +200,27 @@ int hna_local_fill_buffer(struct bat_priv *bat_priv, { struct hashtable_t *hash = bat_priv->hna_local_hash; struct hna_local_entry *hna_local_entry; - struct element_t *bucket; - int i; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - int count = 0; + int i, count = 0; spin_lock_bh(&bat_priv->hna_lhash_lock); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - + rcu_read_lock(); + hlist_for_each_entry_rcu(hna_local_entry, node, + head, hash_entry) { if (buff_len < (count + 1) * ETH_ALEN) break; - hna_local_entry = bucket->data; memcpy(buff + (count * ETH_ALEN), hna_local_entry->addr, ETH_ALEN); count++; } + rcu_read_unlock(); } /* if we did not get all new local hnas see you next time ;-) */ @@ -171,12 +237,11 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset) struct bat_priv *bat_priv = netdev_priv(net_dev); struct hashtable_t *hash = bat_priv->hna_local_hash; struct hna_local_entry *hna_local_entry; - int i; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; size_t buf_size, pos; char *buff; + int i; if (!bat_priv->primary_if) { return seq_printf(seq, "BATMAN mesh %s disabled - " @@ -195,8 +260,10 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each(walk, head) + rcu_read_lock(); + __hlist_for_each_rcu(node, head) buf_size += 21; + rcu_read_unlock(); } buff = kmalloc(buf_size, GFP_ATOMIC); @@ -204,18 +271,20 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset) spin_unlock_bh(&bat_priv->hna_lhash_lock); return -ENOMEM; } + buff[0] = '\0'; pos = 0; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - hna_local_entry = bucket->data; - + rcu_read_lock(); + hlist_for_each_entry_rcu(hna_local_entry, node, + head, hash_entry) { pos += snprintf(buff + pos, 22, " * %pM\n", hna_local_entry->addr); } + rcu_read_unlock(); } spin_unlock_bh(&bat_priv->hna_lhash_lock); @@ -225,9 +294,10 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset) return 0; } -static void _hna_local_del(void *data, void *arg) +static void _hna_local_del(struct hlist_node *node, void *arg) { struct bat_priv *bat_priv = (struct bat_priv *)arg; + void *data = container_of(node, struct hna_local_entry, hash_entry); kfree(data); bat_priv->num_local_hna--; @@ -241,9 +311,9 @@ static void hna_local_del(struct bat_priv *bat_priv, bat_dbg(DBG_ROUTES, bat_priv, "Deleting local hna entry (%pM): %s\n", hna_local_entry->addr, message); - hash_remove(bat_priv->hna_local_hash, compare_orig, choose_orig, + hash_remove(bat_priv->hna_local_hash, compare_lhna, choose_orig, hna_local_entry->addr); - _hna_local_del(hna_local_entry, bat_priv); + _hna_local_del(&hna_local_entry->hash_entry, bat_priv); } void hna_local_remove(struct bat_priv *bat_priv, @@ -253,9 +323,7 @@ void hna_local_remove(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->hna_lhash_lock); - hna_local_entry = (struct hna_local_entry *) - hash_find(bat_priv->hna_local_hash, compare_orig, choose_orig, - addr); + hna_local_entry = hna_local_hash_find(bat_priv, addr); if (hna_local_entry) hna_local_del(bat_priv, hna_local_entry, message); @@ -271,27 +339,29 @@ static void hna_local_purge(struct work_struct *work) container_of(delayed_work, struct bat_priv, hna_work); struct hashtable_t *hash = bat_priv->hna_local_hash; struct hna_local_entry *hna_local_entry; - int i; - struct hlist_node *walk, *safe; + struct hlist_node *node, *node_tmp; struct hlist_head *head; - struct element_t *bucket; unsigned long timeout; + int i; spin_lock_bh(&bat_priv->hna_lhash_lock); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { - hna_local_entry = bucket->data; + hlist_for_each_entry_safe(hna_local_entry, node, node_tmp, + head, hash_entry) { + if (hna_local_entry->never_purge) + continue; timeout = hna_local_entry->last_seen; timeout += LOCAL_HNA_TIMEOUT * HZ; - if ((!hna_local_entry->never_purge) && - time_after(jiffies, timeout)) - hna_local_del(bat_priv, hna_local_entry, - "address timed out"); + if (time_before(jiffies, timeout)) + continue; + + hna_local_del(bat_priv, hna_local_entry, + "address timed out"); } } @@ -335,9 +405,7 @@ void hna_global_add_orig(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->hna_ghash_lock); hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); - hna_global_entry = (struct hna_global_entry *) - hash_find(bat_priv->hna_global_hash, compare_orig, - choose_orig, hna_ptr); + hna_global_entry = hna_global_hash_find(bat_priv, hna_ptr); if (!hna_global_entry) { spin_unlock_bh(&bat_priv->hna_ghash_lock); @@ -357,8 +425,9 @@ void hna_global_add_orig(struct bat_priv *bat_priv, hna_global_entry->addr, orig_node->orig); spin_lock_bh(&bat_priv->hna_ghash_lock); - hash_add(bat_priv->hna_global_hash, compare_orig, - choose_orig, hna_global_entry); + hash_add(bat_priv->hna_global_hash, compare_ghna, + choose_orig, hna_global_entry, + &hna_global_entry->hash_entry); } @@ -369,9 +438,7 @@ void hna_global_add_orig(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->hna_lhash_lock); hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); - hna_local_entry = (struct hna_local_entry *) - hash_find(bat_priv->hna_local_hash, compare_orig, - choose_orig, hna_ptr); + hna_local_entry = hna_local_hash_find(bat_priv, hna_ptr); if (hna_local_entry) hna_local_del(bat_priv, hna_local_entry, @@ -401,12 +468,11 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset) struct bat_priv *bat_priv = netdev_priv(net_dev); struct hashtable_t *hash = bat_priv->hna_global_hash; struct hna_global_entry *hna_global_entry; - int i; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; size_t buf_size, pos; char *buff; + int i; if (!bat_priv->primary_if) { return seq_printf(seq, "BATMAN mesh %s disabled - " @@ -424,8 +490,10 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each(walk, head) + rcu_read_lock(); + __hlist_for_each_rcu(node, head) buf_size += 43; + rcu_read_unlock(); } buff = kmalloc(buf_size, GFP_ATOMIC); @@ -439,14 +507,15 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - hna_global_entry = bucket->data; - + rcu_read_lock(); + hlist_for_each_entry_rcu(hna_global_entry, node, + head, hash_entry) { pos += snprintf(buff + pos, 44, " * %pM via %pM\n", hna_global_entry->addr, hna_global_entry->orig_node->orig); } + rcu_read_unlock(); } spin_unlock_bh(&bat_priv->hna_ghash_lock); @@ -465,7 +534,7 @@ static void _hna_global_del_orig(struct bat_priv *bat_priv, hna_global_entry->addr, hna_global_entry->orig_node->orig, message); - hash_remove(bat_priv->hna_global_hash, compare_orig, choose_orig, + hash_remove(bat_priv->hna_global_hash, compare_ghna, choose_orig, hna_global_entry->addr); kfree(hna_global_entry); } @@ -484,9 +553,7 @@ void hna_global_del_orig(struct bat_priv *bat_priv, while ((hna_buff_count + 1) * ETH_ALEN <= orig_node->hna_buff_len) { hna_ptr = orig_node->hna_buff + (hna_buff_count * ETH_ALEN); - hna_global_entry = (struct hna_global_entry *) - hash_find(bat_priv->hna_global_hash, compare_orig, - choose_orig, hna_ptr); + hna_global_entry = hna_global_hash_find(bat_priv, hna_ptr); if ((hna_global_entry) && (hna_global_entry->orig_node == orig_node)) @@ -503,8 +570,10 @@ void hna_global_del_orig(struct bat_priv *bat_priv, orig_node->hna_buff = NULL; } -static void hna_global_del(void *data, void *arg) +static void hna_global_del(struct hlist_node *node, void *arg) { + void *data = container_of(node, struct hna_global_entry, hash_entry); + kfree(data); } @@ -520,15 +589,20 @@ void hna_global_free(struct bat_priv *bat_priv) struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr) { struct hna_global_entry *hna_global_entry; + struct orig_node *orig_node = NULL; spin_lock_bh(&bat_priv->hna_ghash_lock); - hna_global_entry = (struct hna_global_entry *) - hash_find(bat_priv->hna_global_hash, - compare_orig, choose_orig, addr); - spin_unlock_bh(&bat_priv->hna_ghash_lock); + hna_global_entry = hna_global_hash_find(bat_priv, addr); if (!hna_global_entry) - return NULL; + goto out; - return hna_global_entry->orig_node; + if (!atomic_inc_not_zero(&hna_global_entry->orig_node->refcount)) + goto out; + + orig_node = hna_global_entry->orig_node; + +out: + spin_unlock_bh(&bat_priv->hna_ghash_lock); + return orig_node; } diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 10c4c5c319b6..f19931ca1457 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -22,8 +22,6 @@ #ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ #define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ -#include "types.h" - int hna_local_init(struct bat_priv *bat_priv); void hna_local_add(struct net_device *soft_iface, uint8_t *addr); void hna_local_remove(struct bat_priv *bat_priv, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index bf3f6f5a12c4..83445cf0cc9f 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -33,7 +33,7 @@ sizeof(struct bcast_packet)))) -struct batman_if { +struct hard_iface { struct list_head list; int16_t if_num; char if_status; @@ -43,7 +43,7 @@ struct batman_if { unsigned char *packet_buff; int packet_len; struct kobject *hardif_obj; - struct kref refcount; + atomic_t refcount; struct packet_type batman_adv_ptype; struct net_device *soft_iface; struct rcu_head rcu; @@ -70,8 +70,6 @@ struct orig_node { struct neigh_node *router; unsigned long *bcast_own; uint8_t *bcast_own_sum; - uint8_t tq_own; - int tq_asym_penalty; unsigned long last_valid; unsigned long bcast_seqno_reset; unsigned long batman_seqno_reset; @@ -83,20 +81,28 @@ struct orig_node { uint8_t last_ttl; unsigned long bcast_bits[NUM_WORDS]; uint32_t last_bcast_seqno; - struct list_head neigh_list; + struct hlist_head neigh_list; struct list_head frag_list; + spinlock_t neigh_list_lock; /* protects neighbor list */ + atomic_t refcount; + struct rcu_head rcu; + struct hlist_node hash_entry; + struct bat_priv *bat_priv; unsigned long last_frag_packet; - struct { - uint8_t candidates; - struct neigh_node *selected; - } bond; + spinlock_t ogm_cnt_lock; /* protects: bcast_own, bcast_own_sum, + * neigh_node->real_bits, + * neigh_node->real_packet_count */ + spinlock_t bcast_seqno_lock; /* protects bcast_bits, + * last_bcast_seqno */ + atomic_t bond_candidates; + struct list_head bond_list; }; struct gw_node { struct hlist_node list; struct orig_node *orig_node; unsigned long deleted; - struct kref refcount; + atomic_t refcount; struct rcu_head rcu; }; @@ -105,18 +111,20 @@ struct gw_node { * @last_valid: when last packet via this neighbor was received */ struct neigh_node { - struct list_head list; + struct hlist_node list; uint8_t addr[ETH_ALEN]; uint8_t real_packet_count; uint8_t tq_recv[TQ_GLOBAL_WINDOW_SIZE]; uint8_t tq_index; uint8_t tq_avg; uint8_t last_ttl; - struct neigh_node *next_bond_candidate; + struct list_head bonding_list; unsigned long last_valid; unsigned long real_bits[NUM_WORDS]; + atomic_t refcount; + struct rcu_head rcu; struct orig_node *orig_node; - struct batman_if *if_incoming; + struct hard_iface *if_incoming; }; @@ -140,7 +148,7 @@ struct bat_priv { struct hlist_head softif_neigh_list; struct softif_neigh *softif_neigh; struct debug_log *debug_log; - struct batman_if *primary_if; + struct hard_iface *primary_if; struct kobject *mesh_obj; struct dentry *debug_dir; struct hlist_head forw_bat_list; @@ -151,12 +159,11 @@ struct bat_priv { struct hashtable_t *hna_local_hash; struct hashtable_t *hna_global_hash; struct hashtable_t *vis_hash; - spinlock_t orig_hash_lock; /* protects orig_hash */ spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ spinlock_t forw_bcast_list_lock; /* protects */ spinlock_t hna_lhash_lock; /* protects hna_local_hash */ spinlock_t hna_ghash_lock; /* protects hna_global_hash */ - spinlock_t gw_list_lock; /* protects gw_list */ + spinlock_t gw_list_lock; /* protects gw_list and curr_gw */ spinlock_t vis_hash_lock; /* protects vis_hash */ spinlock_t vis_list_lock; /* protects vis_info::recv_list */ spinlock_t softif_neigh_lock; /* protects soft-interface neigh list */ @@ -165,7 +172,7 @@ struct bat_priv { struct delayed_work hna_work; struct delayed_work orig_work; struct delayed_work vis_work; - struct gw_node *curr_gw; + struct gw_node __rcu *curr_gw; /* rcu protected pointer */ struct vis_info *my_vis_info; }; @@ -188,11 +195,13 @@ struct hna_local_entry { uint8_t addr[ETH_ALEN]; unsigned long last_seen; char never_purge; + struct hlist_node hash_entry; }; struct hna_global_entry { uint8_t addr[ETH_ALEN]; struct orig_node *orig_node; + struct hlist_node hash_entry; }; /** @@ -208,7 +217,7 @@ struct forw_packet { uint32_t direct_link_flags; uint8_t num_packets; struct delayed_work delayed_work; - struct batman_if *if_incoming; + struct hard_iface *if_incoming; }; /* While scanning for vis-entries of a particular vis-originator @@ -242,6 +251,7 @@ struct vis_info { * from. we should not reply to them. */ struct list_head send_list; struct kref refcount; + struct hlist_node hash_entry; struct bat_priv *bat_priv; /* this packet might be part of the vis send queue. */ struct sk_buff *skb_packet; @@ -264,7 +274,7 @@ struct softif_neigh { uint8_t addr[ETH_ALEN]; unsigned long last_seen; short vid; - struct kref refcount; + atomic_t refcount; struct rcu_head rcu; }; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index ee41fef04b21..19f84bd443af 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Andreas Langer * @@ -39,8 +39,8 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, (struct unicast_frag_packet *)skb->data; struct sk_buff *tmp_skb; struct unicast_packet *unicast_packet; - int hdr_len = sizeof(struct unicast_packet), - uni_diff = sizeof(struct unicast_frag_packet) - hdr_len; + int hdr_len = sizeof(struct unicast_packet); + int uni_diff = sizeof(struct unicast_frag_packet) - hdr_len; /* set skb to the first part and tmp_skb to the second part */ if (up->flags & UNI_FRAG_HEAD) { @@ -50,12 +50,12 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, skb = tfp->skb; } + if (skb_linearize(skb) < 0 || skb_linearize(tmp_skb) < 0) + goto err; + skb_pull(tmp_skb, sizeof(struct unicast_frag_packet)); - if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) { - /* free buffered skb, skb will be freed later */ - kfree_skb(tfp->skb); - return NULL; - } + if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) + goto err; /* move free entry to end */ tfp->skb = NULL; @@ -70,6 +70,11 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, unicast_packet->packet_type = BAT_UNICAST; return skb; + +err: + /* free buffered skb, skb will be freed later */ + kfree_skb(tfp->skb); + return NULL; } static void frag_create_entry(struct list_head *head, struct sk_buff *skb) @@ -178,15 +183,10 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, (struct unicast_frag_packet *)skb->data; *new_skb = NULL; - spin_lock_bh(&bat_priv->orig_hash_lock); - orig_node = ((struct orig_node *) - hash_find(bat_priv->orig_hash, compare_orig, choose_orig, - unicast_packet->orig)); - if (!orig_node) { - pr_debug("couldn't find originator in orig_hash\n"); + orig_node = orig_hash_find(bat_priv, unicast_packet->orig); + if (!orig_node) goto out; - } orig_node->last_frag_packet = jiffies; @@ -210,21 +210,24 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, /* if not, merge failed */ if (*new_skb) ret = NET_RX_SUCCESS; -out: - spin_unlock_bh(&bat_priv->orig_hash_lock); +out: + if (orig_node) + orig_node_free_ref(orig_node); return ret; } int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, - struct batman_if *batman_if, uint8_t dstaddr[]) + struct hard_iface *hard_iface, uint8_t dstaddr[]) { struct unicast_packet tmp_uc, *unicast_packet; struct sk_buff *frag_skb; struct unicast_frag_packet *frag1, *frag2; int uc_hdr_len = sizeof(struct unicast_packet); int ucf_hdr_len = sizeof(struct unicast_frag_packet); - int data_len = skb->len; + int data_len = skb->len - uc_hdr_len; + int large_tail = 0; + uint16_t seqno; if (!bat_priv->primary_if) goto dropped; @@ -232,10 +235,11 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); if (!frag_skb) goto dropped; + skb_reserve(frag_skb, ucf_hdr_len); unicast_packet = (struct unicast_packet *) skb->data; memcpy(&tmp_uc, unicast_packet, uc_hdr_len); - skb_split(skb, frag_skb, data_len / 2); + skb_split(skb, frag_skb, data_len / 2 + uc_hdr_len); if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 || my_skb_head_push(frag_skb, ucf_hdr_len) < 0) @@ -253,16 +257,18 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, memcpy(frag1->orig, bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); memcpy(frag2, frag1, sizeof(struct unicast_frag_packet)); - frag1->flags |= UNI_FRAG_HEAD; - frag2->flags &= ~UNI_FRAG_HEAD; + if (data_len & 1) + large_tail = UNI_FRAG_LARGETAIL; - frag1->seqno = htons((uint16_t)atomic_inc_return( - &batman_if->frag_seqno)); - frag2->seqno = htons((uint16_t)atomic_inc_return( - &batman_if->frag_seqno)); + frag1->flags = UNI_FRAG_HEAD | large_tail; + frag2->flags = large_tail; - send_skb_packet(skb, batman_if, dstaddr); - send_skb_packet(frag_skb, batman_if, dstaddr); + seqno = atomic_add_return(2, &hard_iface->frag_seqno); + frag1->seqno = htons(seqno - 1); + frag2->seqno = htons(seqno); + + send_skb_packet(skb, hard_iface, dstaddr); + send_skb_packet(frag_skb, hard_iface, dstaddr); return NET_RX_SUCCESS; drop_frag: @@ -277,44 +283,36 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct unicast_packet *unicast_packet; struct orig_node *orig_node; - struct batman_if *batman_if; - struct neigh_node *router; + struct neigh_node *neigh_node; int data_len = skb->len; - uint8_t dstaddr[6]; - - spin_lock_bh(&bat_priv->orig_hash_lock); + int ret = 1; /* get routing information */ - if (is_multicast_ether_addr(ethhdr->h_dest)) + if (is_multicast_ether_addr(ethhdr->h_dest)) { orig_node = (struct orig_node *)gw_get_selected(bat_priv); - else - orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, - compare_orig, - choose_orig, - ethhdr->h_dest)); - - /* check for hna host */ - if (!orig_node) - orig_node = transtable_search(bat_priv, ethhdr->h_dest); - - router = find_router(bat_priv, orig_node, NULL); - - if (!router) - goto unlock; + if (orig_node) + goto find_router; + } - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ + /* check for hna host - increases orig_node refcount */ + orig_node = transtable_search(bat_priv, ethhdr->h_dest); - batman_if = router->if_incoming; - memcpy(dstaddr, router->addr, ETH_ALEN); +find_router: + /** + * find_router(): + * - if orig_node is NULL it returns NULL + * - increases neigh_nodes refcount if found. + */ + neigh_node = find_router(bat_priv, orig_node, NULL); - spin_unlock_bh(&bat_priv->orig_hash_lock); + if (!neigh_node) + goto out; - if (batman_if->if_status != IF_ACTIVE) - goto dropped; + if (neigh_node->if_incoming->if_status != IF_ACTIVE) + goto out; if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0) - goto dropped; + goto out; unicast_packet = (struct unicast_packet *)skb->data; @@ -328,18 +326,24 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) if (atomic_read(&bat_priv->fragmentation) && data_len + sizeof(struct unicast_packet) > - batman_if->net_dev->mtu) { + neigh_node->if_incoming->net_dev->mtu) { /* send frag skb decreases ttl */ unicast_packet->ttl++; - return frag_send_skb(skb, bat_priv, batman_if, - dstaddr); + ret = frag_send_skb(skb, bat_priv, + neigh_node->if_incoming, neigh_node->addr); + goto out; } - send_skb_packet(skb, batman_if, dstaddr); - return 0; -unlock: - spin_unlock_bh(&bat_priv->orig_hash_lock); -dropped: - kfree_skb(skb); - return 1; + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = 0; + goto out; + +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + orig_node_free_ref(orig_node); + if (ret == 1) + kfree_skb(skb); + return ret; } diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index e32b7867a9a4..16ad7a9242b5 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Andreas Langer * @@ -22,6 +22,8 @@ #ifndef _NET_BATMAN_ADV_UNICAST_H_ #define _NET_BATMAN_ADV_UNICAST_H_ +#include "packet.h" + #define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */ #define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */ @@ -30,6 +32,27 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, void frag_list_free(struct list_head *head); int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv); int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, - struct batman_if *batman_if, uint8_t dstaddr[]); + struct hard_iface *hard_iface, uint8_t dstaddr[]); + +static inline int frag_can_reassemble(struct sk_buff *skb, int mtu) +{ + struct unicast_frag_packet *unicast_packet; + int uneven_correction = 0; + unsigned int merged_size; + + unicast_packet = (struct unicast_frag_packet *)skb->data; + + if (unicast_packet->flags & UNI_FRAG_LARGETAIL) { + if (unicast_packet->flags & UNI_FRAG_HEAD) + uneven_correction = 1; + else + uneven_correction = -1; + } + + merged_size = (skb->len - sizeof(struct unicast_frag_packet)) * 2; + merged_size += sizeof(struct unicast_packet) + uneven_correction; + + return merged_size <= mtu; +} #endif /* _NET_BATMAN_ADV_UNICAST_H_ */ diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index cd4c4231fa48..f90212f42082 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich * @@ -64,18 +64,20 @@ static void free_info(struct kref *ref) spin_unlock_bh(&bat_priv->vis_list_lock); kfree_skb(info->skb_packet); + kfree(info); } /* Compare two vis packets, used by the hashing algorithm */ -static int vis_info_cmp(void *data1, void *data2) +static int vis_info_cmp(struct hlist_node *node, void *data2) { struct vis_info *d1, *d2; struct vis_packet *p1, *p2; - d1 = data1; + + d1 = container_of(node, struct vis_info, hash_entry); d2 = data2; p1 = (struct vis_packet *)d1->skb_packet->data; p2 = (struct vis_packet *)d2->skb_packet->data; - return compare_orig(p1->vis_orig, p2->vis_orig); + return compare_eth(p1->vis_orig, p2->vis_orig); } /* hash function to choose an entry in a hash table of given size */ @@ -103,6 +105,34 @@ static int vis_info_choose(void *data, int size) return hash % size; } +static struct vis_info *vis_hash_find(struct bat_priv *bat_priv, + void *data) +{ + struct hashtable_t *hash = bat_priv->vis_hash; + struct hlist_head *head; + struct hlist_node *node; + struct vis_info *vis_info, *vis_info_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = vis_info_choose(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(vis_info, node, head, hash_entry) { + if (!vis_info_cmp(node, data)) + continue; + + vis_info_tmp = vis_info; + break; + } + rcu_read_unlock(); + + return vis_info_tmp; +} + /* insert interface to the list of interfaces of one originator, if it * does not already exist in the list */ static void vis_data_insert_interface(const uint8_t *interface, @@ -113,7 +143,7 @@ static void vis_data_insert_interface(const uint8_t *interface, struct hlist_node *pos; hlist_for_each_entry(entry, pos, if_list, list) { - if (compare_orig(entry->addr, (void *)interface)) + if (compare_eth(entry->addr, (void *)interface)) return; } @@ -165,7 +195,7 @@ static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry, /* maximal length: max(4+17+2, 3+17+1+3+2) == 26 */ if (primary && entry->quality == 0) return sprintf(buff, "HNA %pM, ", entry->dest); - else if (compare_orig(entry->src, src)) + else if (compare_eth(entry->src, src)) return sprintf(buff, "TQ %pM %d, ", entry->dest, entry->quality); @@ -174,9 +204,8 @@ static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry, int vis_seq_print_text(struct seq_file *seq, void *offset) { - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct vis_info *info; struct vis_packet *packet; struct vis_info_entry *entries; @@ -202,8 +231,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - info = bucket->data; + rcu_read_lock(); + hlist_for_each_entry_rcu(info, node, head, hash_entry) { packet = (struct vis_packet *)info->skb_packet->data; entries = (struct vis_info_entry *) ((char *)packet + sizeof(struct vis_packet)); @@ -212,7 +241,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) if (entries[j].quality == 0) continue; compare = - compare_orig(entries[j].src, packet->vis_orig); + compare_eth(entries[j].src, packet->vis_orig); vis_data_insert_interface(entries[j].src, &vis_if_list, compare); @@ -222,7 +251,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) buf_size += 18 + 26 * packet->entries; /* add primary/secondary records */ - if (compare_orig(entry->addr, packet->vis_orig)) + if (compare_eth(entry->addr, packet->vis_orig)) buf_size += vis_data_count_prim_sec(&vis_if_list); @@ -235,6 +264,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) kfree(entry); } } + rcu_read_unlock(); } buff = kmalloc(buf_size, GFP_ATOMIC); @@ -248,8 +278,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - info = bucket->data; + rcu_read_lock(); + hlist_for_each_entry_rcu(info, node, head, hash_entry) { packet = (struct vis_packet *)info->skb_packet->data; entries = (struct vis_info_entry *) ((char *)packet + sizeof(struct vis_packet)); @@ -258,7 +288,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) if (entries[j].quality == 0) continue; compare = - compare_orig(entries[j].src, packet->vis_orig); + compare_eth(entries[j].src, packet->vis_orig); vis_data_insert_interface(entries[j].src, &vis_if_list, compare); @@ -268,15 +298,15 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) buff_pos += sprintf(buff + buff_pos, "%pM,", entry->addr); - for (i = 0; i < packet->entries; i++) + for (j = 0; j < packet->entries; j++) buff_pos += vis_data_read_entry( buff + buff_pos, - &entries[i], + &entries[j], entry->addr, entry->primary); /* add primary/secondary records */ - if (compare_orig(entry->addr, packet->vis_orig)) + if (compare_eth(entry->addr, packet->vis_orig)) buff_pos += vis_data_read_prim_sec(buff + buff_pos, &vis_if_list); @@ -290,6 +320,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) kfree(entry); } } + rcu_read_unlock(); } spin_unlock_bh(&bat_priv->vis_hash_lock); @@ -344,7 +375,7 @@ static int recv_list_is_in(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->vis_list_lock); list_for_each_entry(entry, recv_list, list) { - if (memcmp(entry->mac, mac, ETH_ALEN) == 0) { + if (compare_eth(entry->mac, mac)) { spin_unlock_bh(&bat_priv->vis_list_lock); return 1; } @@ -380,8 +411,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, sizeof(struct vis_packet)); memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN); - old_info = hash_find(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, - &search_elem); + old_info = vis_hash_find(bat_priv, &search_elem); kfree_skb(search_elem.skb_packet); if (old_info) { @@ -441,10 +471,10 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, /* try to add it */ hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, - info); + info, &info->hash_entry); if (hash_added < 0) { /* did not work (for some reason) */ - kref_put(&old_info->refcount, free_info); + kref_put(&info->refcount, free_info); info = NULL; } @@ -528,9 +558,8 @@ static int find_best_vis_server(struct bat_priv *bat_priv, struct vis_info *info) { struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; struct vis_packet *packet; int best_tq = -1, i; @@ -540,16 +569,17 @@ static int find_best_vis_server(struct bat_priv *bat_priv, for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - orig_node = bucket->data; + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { if ((orig_node) && (orig_node->router) && - (orig_node->flags & VIS_SERVER) && - (orig_node->router->tq_avg > best_tq)) { + (orig_node->flags & VIS_SERVER) && + (orig_node->router->tq_avg > best_tq)) { best_tq = orig_node->router->tq_avg; memcpy(packet->target_orig, orig_node->orig, ETH_ALEN); } } + rcu_read_unlock(); } return best_tq; @@ -572,9 +602,8 @@ static bool vis_packet_full(struct vis_info *info) static int generate_vis_packet(struct bat_priv *bat_priv) { struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; struct neigh_node *neigh_node; struct vis_info *info = (struct vis_info *)bat_priv->my_vis_info; @@ -586,7 +615,6 @@ static int generate_vis_packet(struct bat_priv *bat_priv) info->first_seen = jiffies; packet->vis_type = atomic_read(&bat_priv->vis_mode); - spin_lock_bh(&bat_priv->orig_hash_lock); memcpy(packet->target_orig, broadcast_addr, ETH_ALEN); packet->ttl = TTL; packet->seqno = htonl(ntohl(packet->seqno) + 1); @@ -596,23 +624,21 @@ static int generate_vis_packet(struct bat_priv *bat_priv) if (packet->vis_type == VIS_TYPE_CLIENT_UPDATE) { best_tq = find_best_vis_server(bat_priv, info); - if (best_tq < 0) { - spin_unlock_bh(&bat_priv->orig_hash_lock); + if (best_tq < 0) return -1; - } } for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - orig_node = bucket->data; + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { neigh_node = orig_node->router; if (!neigh_node) continue; - if (!compare_orig(neigh_node->addr, orig_node->orig)) + if (!compare_eth(neigh_node->addr, orig_node->orig)) continue; if (neigh_node->if_incoming->if_status != IF_ACTIVE) @@ -631,23 +657,19 @@ static int generate_vis_packet(struct bat_priv *bat_priv) entry->quality = neigh_node->tq_avg; packet->entries++; - if (vis_packet_full(info)) { - spin_unlock_bh(&bat_priv->orig_hash_lock); - return 0; - } + if (vis_packet_full(info)) + goto unlock; } + rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->orig_hash_lock); - hash = bat_priv->hna_local_hash; spin_lock_bh(&bat_priv->hna_lhash_lock); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - hna_local_entry = bucket->data; + hlist_for_each_entry(hna_local_entry, node, head, hash_entry) { entry = (struct vis_info_entry *) skb_put(info->skb_packet, sizeof(*entry)); @@ -665,6 +687,10 @@ static int generate_vis_packet(struct bat_priv *bat_priv) spin_unlock_bh(&bat_priv->hna_lhash_lock); return 0; + +unlock: + rcu_read_unlock(); + return 0; } /* free old vis packets. Must be called with this vis_hash_lock @@ -673,25 +699,22 @@ static void purge_vis_packets(struct bat_priv *bat_priv) { int i; struct hashtable_t *hash = bat_priv->vis_hash; - struct hlist_node *walk, *safe; + struct hlist_node *node, *node_tmp; struct hlist_head *head; - struct element_t *bucket; struct vis_info *info; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { - info = bucket->data; - + hlist_for_each_entry_safe(info, node, node_tmp, + head, hash_entry) { /* never purge own data. */ if (info == bat_priv->my_vis_info) continue; if (time_after(jiffies, info->first_seen + VIS_TIMEOUT * HZ)) { - hlist_del(walk); - kfree(bucket); + hlist_del(node); send_list_del(info); kref_put(&info->refcount, free_info); } @@ -703,27 +726,24 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv, struct vis_info *info) { struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; struct vis_packet *packet; struct sk_buff *skb; - struct batman_if *batman_if; + struct hard_iface *hard_iface; uint8_t dstaddr[ETH_ALEN]; int i; - spin_lock_bh(&bat_priv->orig_hash_lock); packet = (struct vis_packet *)info->skb_packet->data; /* send to all routers in range. */ for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - orig_node = bucket->data; - + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { /* if it's a vis server and reachable, send it. */ if ((!orig_node) || (!orig_node->router)) continue; @@ -736,54 +756,61 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv, continue; memcpy(packet->target_orig, orig_node->orig, ETH_ALEN); - batman_if = orig_node->router->if_incoming; + hard_iface = orig_node->router->if_incoming; memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); skb = skb_clone(info->skb_packet, GFP_ATOMIC); if (skb) - send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(skb, hard_iface, dstaddr); - spin_lock_bh(&bat_priv->orig_hash_lock); } - + rcu_read_unlock(); } - - spin_unlock_bh(&bat_priv->orig_hash_lock); } static void unicast_vis_packet(struct bat_priv *bat_priv, struct vis_info *info) { struct orig_node *orig_node; + struct neigh_node *neigh_node = NULL; struct sk_buff *skb; struct vis_packet *packet; - struct batman_if *batman_if; - uint8_t dstaddr[ETH_ALEN]; - spin_lock_bh(&bat_priv->orig_hash_lock); packet = (struct vis_packet *)info->skb_packet->data; - orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, - compare_orig, choose_orig, - packet->target_orig)); - if ((!orig_node) || (!orig_node->router)) - goto out; + rcu_read_lock(); + orig_node = orig_hash_find(bat_priv, packet->target_orig); - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); + if (!orig_node) + goto unlock; + + neigh_node = orig_node->router; + + if (!neigh_node) + goto unlock; + + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } + + rcu_read_unlock(); skb = skb_clone(info->skb_packet, GFP_ATOMIC); if (skb) - send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(skb, neigh_node->if_incoming, + neigh_node->addr); - return; + goto out; +unlock: + rcu_read_unlock(); out: - spin_unlock_bh(&bat_priv->orig_hash_lock); + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + orig_node_free_ref(orig_node); + return; } /* only send one vis packet. called from send_vis_packets() */ @@ -815,7 +842,7 @@ static void send_vis_packets(struct work_struct *work) container_of(work, struct delayed_work, work); struct bat_priv *bat_priv = container_of(delayed_work, struct bat_priv, vis_work); - struct vis_info *info, *temp; + struct vis_info *info; spin_lock_bh(&bat_priv->vis_hash_lock); purge_vis_packets(bat_priv); @@ -825,8 +852,9 @@ static void send_vis_packets(struct work_struct *work) send_list_add(bat_priv, bat_priv->my_vis_info); } - list_for_each_entry_safe(info, temp, &bat_priv->vis_send_list, - send_list) { + while (!list_empty(&bat_priv->vis_send_list)) { + info = list_first_entry(&bat_priv->vis_send_list, + typeof(*info), send_list); kref_get(&info->refcount); spin_unlock_bh(&bat_priv->vis_hash_lock); @@ -894,7 +922,8 @@ int vis_init(struct bat_priv *bat_priv) INIT_LIST_HEAD(&bat_priv->vis_send_list); hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, - bat_priv->my_vis_info); + bat_priv->my_vis_info, + &bat_priv->my_vis_info->hash_entry); if (hash_added < 0) { pr_err("Can't add own vis packet into hash\n"); /* not in hash, need to remove it manually. */ @@ -916,10 +945,11 @@ err: } /* Decrease the reference count on a hash item info */ -static void free_info_ref(void *data, void *arg) +static void free_info_ref(struct hlist_node *node, void *arg) { - struct vis_info *info = data; + struct vis_info *info; + info = container_of(node, struct vis_info, hash_entry); send_list_del(info); kref_put(&info->refcount, free_info); } diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h index 2c3b33089a9b..31b820d07f23 100644 --- a/net/batman-adv/vis.h +++ b/net/batman-adv/vis.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index ed371684c133..6ae5ec508587 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -27,31 +27,27 @@ menuconfig BT compile it as module (bluetooth). To use Linux Bluetooth subsystem, you will need several user-space - utilities like hciconfig and hcid. These utilities and updates to - Bluetooth kernel modules are provided in the BlueZ packages. - For more information, see <http://www.bluez.org/>. + utilities like hciconfig and bluetoothd. These utilities and updates + to Bluetooth kernel modules are provided in the BlueZ packages. For + more information, see <http://www.bluez.org/>. + +if BT != n config BT_L2CAP - tristate "L2CAP protocol support" - depends on BT + bool "L2CAP protocol support" select CRC16 help L2CAP (Logical Link Control and Adaptation Protocol) provides connection oriented and connection-less data transport. L2CAP support is required for most Bluetooth applications. - Say Y here to compile L2CAP support into the kernel or say M to - compile it as module (l2cap). - config BT_SCO - tristate "SCO links support" - depends on BT + bool "SCO links support" help SCO link provides voice transport over Bluetooth. SCO support is required for voice applications like Headset and Audio. - Say Y here to compile SCO support into the kernel or say M to - compile it as module (sco). +endif source "net/bluetooth/rfcomm/Kconfig" diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 250f954f0213..f04fe9a9d634 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -3,11 +3,11 @@ # obj-$(CONFIG_BT) += bluetooth.o -obj-$(CONFIG_BT_L2CAP) += l2cap.o -obj-$(CONFIG_BT_SCO) += sco.o obj-$(CONFIG_BT_RFCOMM) += rfcomm/ obj-$(CONFIG_BT_BNEP) += bnep/ obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o +bluetooth-$(CONFIG_BT_L2CAP) += l2cap_core.o l2cap_sock.o +bluetooth-$(CONFIG_BT_SCO) += sco.o diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index c4cf3f595004..8add9b499912 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -40,7 +40,7 @@ #include <net/bluetooth/bluetooth.h> -#define VERSION "2.15" +#define VERSION "2.16" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 @@ -199,14 +199,15 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) BT_DBG("parent %p", parent); + local_bh_disable(); list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); - lock_sock(sk); + bh_lock_sock(sk); /* FIXME: Is this check still needed */ if (sk->sk_state == BT_CLOSED) { - release_sock(sk); + bh_unlock_sock(sk); bt_accept_unlink(sk); continue; } @@ -216,12 +217,16 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) bt_accept_unlink(sk); if (newsock) sock_graft(sk, newsock); - release_sock(sk); + + bh_unlock_sock(sk); + local_bh_enable(); return sk; } - release_sock(sk); + bh_unlock_sock(sk); } + local_bh_enable(); + return NULL; } EXPORT_SYMBOL(bt_accept_dequeue); @@ -240,7 +245,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & (MSG_OOB)) return -EOPNOTSUPP; - if (!(skb = skb_recv_datagram(sk, flags, noblock, &err))) { + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; return err; @@ -323,7 +329,8 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (copied >= target) break; - if ((err = sock_error(sk)) != 0) + err = sock_error(sk); + if (err) break; if (sk->sk_shutdown & RCV_SHUTDOWN) break; @@ -390,7 +397,7 @@ static inline unsigned int bt_accept_poll(struct sock *parent) return 0; } -unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait) +unsigned int bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; unsigned int mask = 0; @@ -538,13 +545,39 @@ static int __init bt_init(void) BT_INFO("HCI device and connection manager initialized"); - hci_sock_init(); + err = hci_sock_init(); + if (err < 0) + goto error; + + err = l2cap_init(); + if (err < 0) + goto sock_err; + + err = sco_init(); + if (err < 0) { + l2cap_exit(); + goto sock_err; + } return 0; + +sock_err: + hci_sock_cleanup(); + +error: + sock_unregister(PF_BLUETOOTH); + bt_sysfs_cleanup(); + + return err; } static void __exit bt_exit(void) { + + sco_exit(); + + l2cap_exit(); + hci_sock_cleanup(); sock_unregister(PF_BLUETOOTH); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 5868597534e5..03d4d1245d58 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -708,8 +708,6 @@ static int __init bnep_init(void) { char flt[50] = ""; - l2cap_load(); - #ifdef CONFIG_BT_BNEP_PROTO_FILTER strcat(flt, "protocol "); #endif diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 2862f53b66b1..d935da71ab3b 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -88,6 +88,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long sockfd_put(nsock); return -EBADFD; } + ca.device[sizeof(ca.device)-1] = 0; err = bnep_add_connection(&ca, nsock); if (!err) { diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 3487cfe74aec..67cff810c77d 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -155,7 +155,8 @@ static void cmtp_send_interopmsg(struct cmtp_session *session, BT_DBG("session %p subcmd 0x%02x appl %d msgnum %d", session, subcmd, appl, msgnum); - if (!(skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC))) { + skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC); + if (!skb) { BT_ERR("Can't allocate memory for interoperability packet"); return; } diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 8e5f292529ac..964ea9126f9f 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -115,7 +115,8 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const size = (skb) ? skb->len + count : count; - if (!(nskb = alloc_skb(size, GFP_ATOMIC))) { + nskb = alloc_skb(size, GFP_ATOMIC); + if (!nskb) { BT_ERR("Can't allocate memory for CAPI message"); return; } @@ -216,7 +217,8 @@ static void cmtp_process_transmit(struct cmtp_session *session) BT_DBG("session %p", session); - if (!(nskb = alloc_skb(session->mtu, GFP_ATOMIC))) { + nskb = alloc_skb(session->mtu, GFP_ATOMIC); + if (!nskb) { BT_ERR("Can't allocate memory for new frame"); return; } @@ -224,7 +226,8 @@ static void cmtp_process_transmit(struct cmtp_session *session) while ((skb = skb_dequeue(&session->transmit))) { struct cmtp_scb *scb = (void *) skb->cb; - if ((tail = (session->mtu - nskb->len)) < 5) { + tail = session->mtu - nskb->len; + if (tail < 5) { cmtp_send_frame(session, nskb->data, nskb->len); skb_trim(nskb, 0); tail = session->mtu; @@ -466,8 +469,6 @@ int cmtp_get_conninfo(struct cmtp_conninfo *ci) static int __init cmtp_init(void) { - l2cap_load(); - BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION); cmtp_init_sockets(); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6b90a4191734..7a6f56b2f49d 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -45,6 +45,33 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +static void hci_le_connect(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + struct hci_cp_le_create_conn cp; + + conn->state = BT_CONNECT; + conn->out = 1; + conn->link_mode |= HCI_LM_MASTER; + + memset(&cp, 0, sizeof(cp)); + cp.scan_interval = cpu_to_le16(0x0004); + cp.scan_window = cpu_to_le16(0x0004); + bacpy(&cp.peer_addr, &conn->dst); + cp.conn_interval_min = cpu_to_le16(0x0008); + cp.conn_interval_max = cpu_to_le16(0x0100); + cp.supervision_timeout = cpu_to_le16(0x0064); + cp.min_ce_len = cpu_to_le16(0x0001); + cp.max_ce_len = cpu_to_le16(0x0001); + + hci_send_cmd(hdev, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp); +} + +static void hci_le_connect_cancel(struct hci_conn *conn) +{ + hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); +} + void hci_acl_connect(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; @@ -156,6 +183,26 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); } +void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, + u16 latency, u16 to_multiplier) +{ + struct hci_cp_le_conn_update cp; + struct hci_dev *hdev = conn->hdev; + + memset(&cp, 0, sizeof(cp)); + + cp.handle = cpu_to_le16(conn->handle); + cp.conn_interval_min = cpu_to_le16(min); + cp.conn_interval_max = cpu_to_le16(max); + cp.conn_latency = cpu_to_le16(latency); + cp.supervision_timeout = cpu_to_le16(to_multiplier); + cp.min_ce_len = cpu_to_le16(0x0001); + cp.max_ce_len = cpu_to_le16(0x0001); + + hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp); +} +EXPORT_SYMBOL(hci_le_conn_update); + /* Device _must_ be locked */ void hci_sco_setup(struct hci_conn *conn, __u8 status) { @@ -193,8 +240,12 @@ static void hci_conn_timeout(unsigned long arg) switch (conn->state) { case BT_CONNECT: case BT_CONNECT2: - if (conn->type == ACL_LINK && conn->out) - hci_acl_connect_cancel(conn); + if (conn->out) { + if (conn->type == ACL_LINK) + hci_acl_connect_cancel(conn); + else if (conn->type == LE_LINK) + hci_le_connect_cancel(conn); + } break; case BT_CONFIG: case BT_CONNECTED: @@ -234,6 +285,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->mode = HCI_CM_ACTIVE; conn->state = BT_OPEN; conn->auth_type = HCI_AT_GENERAL_BONDING; + conn->io_capability = hdev->io_capability; + conn->remote_auth = 0xff; conn->power_save = 1; conn->disc_timeout = HCI_DISCONN_TIMEOUT; @@ -295,6 +348,11 @@ int hci_conn_del(struct hci_conn *conn) /* Unacked frames */ hdev->acl_cnt += conn->sent; + } else if (conn->type == LE_LINK) { + if (hdev->le_pkts) + hdev->le_cnt += conn->sent; + else + hdev->acl_cnt += conn->sent; } else { struct hci_conn *acl = conn->link; if (acl) { @@ -360,15 +418,31 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) } EXPORT_SYMBOL(hci_get_route); -/* Create SCO or ACL connection. +/* Create SCO, ACL or LE connection. * Device _must_ be locked */ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type) { struct hci_conn *acl; struct hci_conn *sco; + struct hci_conn *le; BT_DBG("%s dst %s", hdev->name, batostr(dst)); + if (type == LE_LINK) { + le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); + if (le) + return ERR_PTR(-EBUSY); + le = hci_conn_add(hdev, LE_LINK, dst); + if (!le) + return ERR_PTR(-ENOMEM); + if (le->state == BT_OPEN) + hci_le_connect(le); + + hci_conn_hold(le); + + return le; + } + acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { acl = hci_conn_add(hdev, ACL_LINK, dst); @@ -379,14 +453,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 hci_conn_hold(acl); if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { - acl->sec_level = sec_level; + acl->sec_level = BT_SECURITY_LOW; + acl->pending_sec_level = sec_level; acl->auth_type = auth_type; hci_acl_connect(acl); - } else { - if (acl->sec_level < sec_level) - acl->sec_level = sec_level; - if (acl->auth_type < auth_type) - acl->auth_type = auth_type; } if (type == ACL_LINK) @@ -442,11 +512,17 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("conn %p", conn); + if (conn->pending_sec_level > sec_level) + sec_level = conn->pending_sec_level; + if (sec_level > conn->sec_level) - conn->sec_level = sec_level; + conn->pending_sec_level = sec_level; else if (conn->link_mode & HCI_LM_AUTH) return 1; + /* Make sure we preserve an existing MITM requirement*/ + auth_type |= (conn->auth_type & 0x01); + conn->auth_type = auth_type; if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8b602d881fd7..b372fb8bcdcf 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -41,6 +41,7 @@ #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/rfkill.h> +#include <linux/timer.h> #include <net/sock.h> #include <asm/system.h> @@ -50,6 +51,8 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +#define AUTO_OFF_TIMEOUT 2000 + static void hci_cmd_task(unsigned long arg); static void hci_rx_task(unsigned long arg); static void hci_tx_task(unsigned long arg); @@ -95,11 +98,10 @@ void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result) { BT_DBG("%s command 0x%04x result 0x%2.2x", hdev->name, cmd, result); - /* If the request has set req_last_cmd (typical for multi-HCI - * command requests) check if the completed command matches - * this, and if not just return. Single HCI command requests - * typically leave req_last_cmd as 0 */ - if (hdev->req_last_cmd && cmd != hdev->req_last_cmd) + /* If this is the init phase check if the completed command matches + * the last init command, and if not just return. + */ + if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd) return; if (hdev->req_status == HCI_REQ_PEND) { @@ -122,7 +124,7 @@ static void hci_req_cancel(struct hci_dev *hdev, int err) /* Execute request and wait for completion. */ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt), - unsigned long opt, __u32 timeout) + unsigned long opt, __u32 timeout) { DECLARE_WAITQUEUE(wait, current); int err = 0; @@ -156,7 +158,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, break; } - hdev->req_last_cmd = hdev->req_status = hdev->req_result = 0; + hdev->req_status = hdev->req_result = 0; BT_DBG("%s end: err %d", hdev->name, err); @@ -164,7 +166,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, } static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt), - unsigned long opt, __u32 timeout) + unsigned long opt, __u32 timeout) { int ret; @@ -189,6 +191,7 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) static void hci_init_req(struct hci_dev *hdev, unsigned long opt) { + struct hci_cp_delete_stored_link_key cp; struct sk_buff *skb; __le16 param; __u8 flt_type; @@ -252,15 +255,21 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) flt_type = HCI_FLT_CLEAR_ALL; hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type); - /* Page timeout ~20 secs */ - param = cpu_to_le16(0x8000); - hci_send_cmd(hdev, HCI_OP_WRITE_PG_TIMEOUT, 2, ¶m); - /* Connection accept timeout ~20 secs */ param = cpu_to_le16(0x7d00); hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); - hdev->req_last_cmd = HCI_OP_WRITE_CA_TIMEOUT; + bacpy(&cp.bdaddr, BDADDR_ANY); + cp.delete_all = 1; + hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); +} + +static void hci_le_init_req(struct hci_dev *hdev, unsigned long opt) +{ + BT_DBG("%s", hdev->name); + + /* Read LE buffer size */ + hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); } static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) @@ -429,7 +438,8 @@ int hci_inquiry(void __user *arg) if (copy_from_user(&ir, ptr, sizeof(ir))) return -EFAULT; - if (!(hdev = hci_dev_get(ir.dev_id))) + hdev = hci_dev_get(ir.dev_id); + if (!hdev) return -ENODEV; hci_dev_lock_bh(hdev); @@ -455,7 +465,7 @@ int hci_inquiry(void __user *arg) /* cache_dump can't sleep. Therefore we allocate temp buffer and then * copy it to the user space. */ - buf = kmalloc(sizeof(struct inquiry_info) *max_rsp, GFP_KERNEL); + buf = kmalloc(sizeof(struct inquiry_info) * max_rsp, GFP_KERNEL); if (!buf) { err = -ENOMEM; goto done; @@ -489,7 +499,8 @@ int hci_dev_open(__u16 dev) struct hci_dev *hdev; int ret = 0; - if (!(hdev = hci_dev_get(dev))) + hdev = hci_dev_get(dev); + if (!hdev) return -ENODEV; BT_DBG("%s %p", hdev->name, hdev); @@ -521,11 +532,15 @@ int hci_dev_open(__u16 dev) if (!test_bit(HCI_RAW, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); set_bit(HCI_INIT, &hdev->flags); + hdev->init_last_cmd = 0; - //__hci_request(hdev, hci_reset_req, 0, HZ); ret = __hci_request(hdev, hci_init_req, 0, msecs_to_jiffies(HCI_INIT_TIMEOUT)); + if (lmp_le_capable(hdev)) + ret = __hci_request(hdev, hci_le_init_req, 0, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); + clear_bit(HCI_INIT, &hdev->flags); } @@ -533,6 +548,8 @@ int hci_dev_open(__u16 dev) hci_dev_hold(hdev); set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); + if (!test_bit(HCI_SETUP, &hdev->flags)) + mgmt_powered(hdev->id, 1); } else { /* Init failed, cleanup */ tasklet_kill(&hdev->rx_task); @@ -606,6 +623,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) /* Drop last sent command */ if (hdev->sent_cmd) { + del_timer_sync(&hdev->cmd_timer); kfree_skb(hdev->sent_cmd); hdev->sent_cmd = NULL; } @@ -614,6 +632,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) * and no tasks are scheduled. */ hdev->close(hdev); + mgmt_powered(hdev->id, 0); + /* Clear flags */ hdev->flags = 0; @@ -664,7 +684,7 @@ int hci_dev_reset(__u16 dev) hdev->flush(hdev); atomic_set(&hdev->cmd_cnt, 1); - hdev->acl_cnt = 0; hdev->sco_cnt = 0; + hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; if (!test_bit(HCI_RAW, &hdev->flags)) ret = __hci_request(hdev, hci_reset_req, 0, @@ -793,9 +813,17 @@ int hci_get_dev_list(void __user *arg) read_lock_bh(&hci_dev_list_lock); list_for_each(p, &hci_dev_list) { struct hci_dev *hdev; + hdev = list_entry(p, struct hci_dev, list); + + hci_del_off_timer(hdev); + + if (!test_bit(HCI_MGMT, &hdev->flags)) + set_bit(HCI_PAIRABLE, &hdev->flags); + (dr + n)->dev_id = hdev->id; (dr + n)->dev_opt = hdev->flags; + if (++n >= dev_num) break; } @@ -823,6 +851,11 @@ int hci_get_dev_info(void __user *arg) if (!hdev) return -ENODEV; + hci_del_off_timer(hdev); + + if (!test_bit(HCI_MGMT, &hdev->flags)) + set_bit(HCI_PAIRABLE, &hdev->flags); + strcpy(di.name, hdev->name); di.bdaddr = hdev->bdaddr; di.type = (hdev->bus & 0x0f) | (hdev->dev_type << 4); @@ -891,6 +924,159 @@ void hci_free_dev(struct hci_dev *hdev) } EXPORT_SYMBOL(hci_free_dev); +static void hci_power_on(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, power_on); + + BT_DBG("%s", hdev->name); + + if (hci_dev_open(hdev->id) < 0) + return; + + if (test_bit(HCI_AUTO_OFF, &hdev->flags)) + mod_timer(&hdev->off_timer, + jiffies + msecs_to_jiffies(AUTO_OFF_TIMEOUT)); + + if (test_and_clear_bit(HCI_SETUP, &hdev->flags)) + mgmt_index_added(hdev->id); +} + +static void hci_power_off(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, power_off); + + BT_DBG("%s", hdev->name); + + hci_dev_close(hdev->id); +} + +static void hci_auto_off(unsigned long data) +{ + struct hci_dev *hdev = (struct hci_dev *) data; + + BT_DBG("%s", hdev->name); + + clear_bit(HCI_AUTO_OFF, &hdev->flags); + + queue_work(hdev->workqueue, &hdev->power_off); +} + +void hci_del_off_timer(struct hci_dev *hdev) +{ + BT_DBG("%s", hdev->name); + + clear_bit(HCI_AUTO_OFF, &hdev->flags); + del_timer(&hdev->off_timer); +} + +int hci_uuids_clear(struct hci_dev *hdev) +{ + struct list_head *p, *n; + + list_for_each_safe(p, n, &hdev->uuids) { + struct bt_uuid *uuid; + + uuid = list_entry(p, struct bt_uuid, list); + + list_del(p); + kfree(uuid); + } + + return 0; +} + +int hci_link_keys_clear(struct hci_dev *hdev) +{ + struct list_head *p, *n; + + list_for_each_safe(p, n, &hdev->link_keys) { + struct link_key *key; + + key = list_entry(p, struct link_key, list); + + list_del(p); + kfree(key); + } + + return 0; +} + +struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) +{ + struct list_head *p; + + list_for_each(p, &hdev->link_keys) { + struct link_key *k; + + k = list_entry(p, struct link_key, list); + + if (bacmp(bdaddr, &k->bdaddr) == 0) + return k; + } + + return NULL; +} + +int hci_add_link_key(struct hci_dev *hdev, int new_key, bdaddr_t *bdaddr, + u8 *val, u8 type, u8 pin_len) +{ + struct link_key *key, *old_key; + u8 old_key_type; + + old_key = hci_find_link_key(hdev, bdaddr); + if (old_key) { + old_key_type = old_key->type; + key = old_key; + } else { + old_key_type = 0xff; + key = kzalloc(sizeof(*key), GFP_ATOMIC); + if (!key) + return -ENOMEM; + list_add(&key->list, &hdev->link_keys); + } + + BT_DBG("%s key for %s type %u", hdev->name, batostr(bdaddr), type); + + bacpy(&key->bdaddr, bdaddr); + memcpy(key->val, val, 16); + key->type = type; + key->pin_len = pin_len; + + if (new_key) + mgmt_new_key(hdev->id, key, old_key_type); + + if (type == 0x06) + key->type = old_key_type; + + return 0; +} + +int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) +{ + struct link_key *key; + + key = hci_find_link_key(hdev, bdaddr); + if (!key) + return -ENOENT; + + BT_DBG("%s removing %s", hdev->name, batostr(bdaddr)); + + list_del(&key->list); + kfree(key); + + return 0; +} + +/* HCI command timer function */ +static void hci_cmd_timer(unsigned long arg) +{ + struct hci_dev *hdev = (void *) arg; + + BT_ERR("%s command tx timeout", hdev->name); + atomic_set(&hdev->cmd_cnt, 1); + tasklet_schedule(&hdev->cmd_task); +} + /* Register HCI device */ int hci_register_dev(struct hci_dev *hdev) { @@ -923,6 +1109,7 @@ int hci_register_dev(struct hci_dev *hdev) hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); hdev->esco_type = (ESCO_HV1); hdev->link_mode = (HCI_LM_ACCEPT); + hdev->io_capability = 0x03; /* No Input No Output */ hdev->idle_timeout = 0; hdev->sniff_max_interval = 800; @@ -936,6 +1123,8 @@ int hci_register_dev(struct hci_dev *hdev) skb_queue_head_init(&hdev->cmd_q); skb_queue_head_init(&hdev->raw_q); + setup_timer(&hdev->cmd_timer, hci_cmd_timer, (unsigned long) hdev); + for (i = 0; i < NUM_REASSEMBLY; i++) hdev->reassembly[i] = NULL; @@ -948,6 +1137,14 @@ int hci_register_dev(struct hci_dev *hdev) INIT_LIST_HEAD(&hdev->blacklist); + INIT_LIST_HEAD(&hdev->uuids); + + INIT_LIST_HEAD(&hdev->link_keys); + + INIT_WORK(&hdev->power_on, hci_power_on); + INIT_WORK(&hdev->power_off, hci_power_off); + setup_timer(&hdev->off_timer, hci_auto_off, (unsigned long) hdev); + memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); atomic_set(&hdev->promisc, 0); @@ -969,7 +1166,10 @@ int hci_register_dev(struct hci_dev *hdev) } } - mgmt_index_added(hdev->id); + set_bit(HCI_AUTO_OFF, &hdev->flags); + set_bit(HCI_SETUP, &hdev->flags); + queue_work(hdev->workqueue, &hdev->power_on); + hci_notify(hdev, HCI_DEV_REG); return id; @@ -999,7 +1199,10 @@ int hci_unregister_dev(struct hci_dev *hdev) for (i = 0; i < NUM_REASSEMBLY; i++) kfree_skb(hdev->reassembly[i]); - mgmt_index_removed(hdev->id); + if (!test_bit(HCI_INIT, &hdev->flags) && + !test_bit(HCI_SETUP, &hdev->flags)) + mgmt_index_removed(hdev->id); + hci_notify(hdev, HCI_DEV_UNREG); if (hdev->rfkill) { @@ -1009,8 +1212,16 @@ int hci_unregister_dev(struct hci_dev *hdev) hci_unregister_sysfs(hdev); + hci_del_off_timer(hdev); + destroy_workqueue(hdev->workqueue); + hci_dev_lock_bh(hdev); + hci_blacklist_clear(hdev); + hci_uuids_clear(hdev); + hci_link_keys_clear(hdev); + hci_dev_unlock_bh(hdev); + __hci_dev_put(hdev); return 0; @@ -1309,7 +1520,7 @@ static int hci_send_frame(struct sk_buff *skb) /* Time stamp */ __net_timestamp(skb); - hci_send_to_sock(hdev, skb); + hci_send_to_sock(hdev, skb, NULL); } /* Get rid of skb owner, prior to sending to the driver. */ @@ -1345,6 +1556,9 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; + if (test_bit(HCI_INIT, &hdev->flags)) + hdev->init_last_cmd = opcode; + skb_queue_tail(&hdev->cmd_q, skb); tasklet_schedule(&hdev->cmd_task); @@ -1391,7 +1605,7 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) skb->dev = (void *) hdev; bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; - hci_add_acl_hdr(skb, conn->handle, flags | ACL_START); + hci_add_acl_hdr(skb, conn->handle, flags); list = skb_shinfo(skb)->frag_list; if (!list) { @@ -1409,12 +1623,15 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) spin_lock_bh(&conn->data_q.lock); __skb_queue_tail(&conn->data_q, skb); + + flags &= ~ACL_START; + flags |= ACL_CONT; do { skb = list; list = list->next; skb->dev = (void *) hdev; bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; - hci_add_acl_hdr(skb, conn->handle, flags | ACL_CONT); + hci_add_acl_hdr(skb, conn->handle, flags); BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); @@ -1482,8 +1699,25 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int } if (conn) { - int cnt = (type == ACL_LINK ? hdev->acl_cnt : hdev->sco_cnt); - int q = cnt / num; + int cnt, q; + + switch (conn->type) { + case ACL_LINK: + cnt = hdev->acl_cnt; + break; + case SCO_LINK: + case ESCO_LINK: + cnt = hdev->sco_cnt; + break; + case LE_LINK: + cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; + break; + default: + cnt = 0; + BT_ERR("Unknown link type"); + } + + q = cnt / num; *quote = q ? q : 1; } else *quote = 0; @@ -1492,19 +1726,19 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int return conn; } -static inline void hci_acl_tx_to(struct hci_dev *hdev) +static inline void hci_link_tx_to(struct hci_dev *hdev, __u8 type) { struct hci_conn_hash *h = &hdev->conn_hash; struct list_head *p; struct hci_conn *c; - BT_ERR("%s ACL tx timeout", hdev->name); + BT_ERR("%s link tx timeout", hdev->name); /* Kill stalled connections */ list_for_each(p, &h->list) { c = list_entry(p, struct hci_conn, list); - if (c->type == ACL_LINK && c->sent) { - BT_ERR("%s killing stalled ACL connection %s", + if (c->type == type && c->sent) { + BT_ERR("%s killing stalled connection %s", hdev->name, batostr(&c->dst)); hci_acl_disconn(c, 0x13); } @@ -1523,7 +1757,7 @@ static inline void hci_sched_acl(struct hci_dev *hdev) /* ACL tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ if (!hdev->acl_cnt && time_after(jiffies, hdev->acl_last_tx + HZ * 45)) - hci_acl_tx_to(hdev); + hci_link_tx_to(hdev, ACL_LINK); } while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, "e))) { @@ -1582,6 +1816,40 @@ static inline void hci_sched_esco(struct hci_dev *hdev) } } +static inline void hci_sched_le(struct hci_dev *hdev) +{ + struct hci_conn *conn; + struct sk_buff *skb; + int quote, cnt; + + BT_DBG("%s", hdev->name); + + if (!test_bit(HCI_RAW, &hdev->flags)) { + /* LE tx timeout must be longer than maximum + * link supervision timeout (40.9 seconds) */ + if (!hdev->le_cnt && hdev->le_pkts && + time_after(jiffies, hdev->le_last_tx + HZ * 45)) + hci_link_tx_to(hdev, LE_LINK); + } + + cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; + while (cnt && (conn = hci_low_sent(hdev, LE_LINK, "e))) { + while (quote-- && (skb = skb_dequeue(&conn->data_q))) { + BT_DBG("skb %p len %d", skb, skb->len); + + hci_send_frame(skb); + hdev->le_last_tx = jiffies; + + cnt--; + conn->sent++; + } + } + if (hdev->le_pkts) + hdev->le_cnt = cnt; + else + hdev->acl_cnt = cnt; +} + static void hci_tx_task(unsigned long arg) { struct hci_dev *hdev = (struct hci_dev *) arg; @@ -1589,7 +1857,8 @@ static void hci_tx_task(unsigned long arg) read_lock(&hci_task_lock); - BT_DBG("%s acl %d sco %d", hdev->name, hdev->acl_cnt, hdev->sco_cnt); + BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt, + hdev->sco_cnt, hdev->le_cnt); /* Schedule queues and send stuff to HCI driver */ @@ -1599,6 +1868,8 @@ static void hci_tx_task(unsigned long arg) hci_sched_esco(hdev); + hci_sched_le(hdev); + /* Send next queued raw (unknown type) packet */ while ((skb = skb_dequeue(&hdev->raw_q))) hci_send_frame(skb); @@ -1696,7 +1967,7 @@ static void hci_rx_task(unsigned long arg) while ((skb = skb_dequeue(&hdev->rx_q))) { if (atomic_read(&hdev->promisc)) { /* Send copy to the sockets */ - hci_send_to_sock(hdev, skb); + hci_send_to_sock(hdev, skb, NULL); } if (test_bit(HCI_RAW, &hdev->flags)) { @@ -1746,20 +2017,20 @@ static void hci_cmd_task(unsigned long arg) BT_DBG("%s cmd %d", hdev->name, atomic_read(&hdev->cmd_cnt)); - if (!atomic_read(&hdev->cmd_cnt) && time_after(jiffies, hdev->cmd_last_tx + HZ)) { - BT_ERR("%s command tx timeout", hdev->name); - atomic_set(&hdev->cmd_cnt, 1); - } - /* Send queued commands */ - if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) { + if (atomic_read(&hdev->cmd_cnt)) { + skb = skb_dequeue(&hdev->cmd_q); + if (!skb) + return; + kfree_skb(hdev->sent_cmd); hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC); if (hdev->sent_cmd) { atomic_dec(&hdev->cmd_cnt); hci_send_frame(skb); - hdev->cmd_last_tx = jiffies; + mod_timer(&hdev->cmd_timer, + jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT)); } else { skb_queue_head(&hdev->cmd_q, skb); tasklet_schedule(&hdev->cmd_task); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 38100170d380..3fbfa50c2bff 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -274,15 +274,24 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) if (!status) { __u8 param = *((__u8 *) sent); + int old_pscan, old_iscan; - clear_bit(HCI_PSCAN, &hdev->flags); - clear_bit(HCI_ISCAN, &hdev->flags); + old_pscan = test_and_clear_bit(HCI_PSCAN, &hdev->flags); + old_iscan = test_and_clear_bit(HCI_ISCAN, &hdev->flags); - if (param & SCAN_INQUIRY) + if (param & SCAN_INQUIRY) { set_bit(HCI_ISCAN, &hdev->flags); + if (!old_iscan) + mgmt_discoverable(hdev->id, 1); + } else if (old_iscan) + mgmt_discoverable(hdev->id, 0); - if (param & SCAN_PAGE) + if (param & SCAN_PAGE) { set_bit(HCI_PSCAN, &hdev->flags); + if (!old_pscan) + mgmt_connectable(hdev->id, 1); + } else if (old_pscan) + mgmt_connectable(hdev->id, 0); } hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status); @@ -415,6 +424,115 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) hdev->ssp_mode = *((__u8 *) sent); } +static u8 hci_get_inquiry_mode(struct hci_dev *hdev) +{ + if (hdev->features[6] & LMP_EXT_INQ) + return 2; + + if (hdev->features[3] & LMP_RSSI_INQ) + return 1; + + if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 && + hdev->lmp_subver == 0x0757) + return 1; + + if (hdev->manufacturer == 15) { + if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963) + return 1; + if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963) + return 1; + if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965) + return 1; + } + + if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 && + hdev->lmp_subver == 0x1805) + return 1; + + return 0; +} + +static void hci_setup_inquiry_mode(struct hci_dev *hdev) +{ + u8 mode; + + mode = hci_get_inquiry_mode(hdev); + + hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); +} + +static void hci_setup_event_mask(struct hci_dev *hdev) +{ + /* The second byte is 0xff instead of 0x9f (two reserved bits + * disabled) since a Broadcom 1.2 dongle doesn't respond to the + * command otherwise */ + u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 }; + + /* Events for 1.2 and newer controllers */ + if (hdev->lmp_ver > 1) { + events[4] |= 0x01; /* Flow Specification Complete */ + events[4] |= 0x02; /* Inquiry Result with RSSI */ + events[4] |= 0x04; /* Read Remote Extended Features Complete */ + events[5] |= 0x08; /* Synchronous Connection Complete */ + events[5] |= 0x10; /* Synchronous Connection Changed */ + } + + if (hdev->features[3] & LMP_RSSI_INQ) + events[4] |= 0x04; /* Inquiry Result with RSSI */ + + if (hdev->features[5] & LMP_SNIFF_SUBR) + events[5] |= 0x20; /* Sniff Subrating */ + + if (hdev->features[5] & LMP_PAUSE_ENC) + events[5] |= 0x80; /* Encryption Key Refresh Complete */ + + if (hdev->features[6] & LMP_EXT_INQ) + events[5] |= 0x40; /* Extended Inquiry Result */ + + if (hdev->features[6] & LMP_NO_FLUSH) + events[7] |= 0x01; /* Enhanced Flush Complete */ + + if (hdev->features[7] & LMP_LSTO) + events[6] |= 0x80; /* Link Supervision Timeout Changed */ + + if (hdev->features[6] & LMP_SIMPLE_PAIR) { + events[6] |= 0x01; /* IO Capability Request */ + events[6] |= 0x02; /* IO Capability Response */ + events[6] |= 0x04; /* User Confirmation Request */ + events[6] |= 0x08; /* User Passkey Request */ + events[6] |= 0x10; /* Remote OOB Data Request */ + events[6] |= 0x20; /* Simple Pairing Complete */ + events[7] |= 0x04; /* User Passkey Notification */ + events[7] |= 0x08; /* Keypress Notification */ + events[7] |= 0x10; /* Remote Host Supported + * Features Notification */ + } + + if (hdev->features[4] & LMP_LE) + events[7] |= 0x20; /* LE Meta-Event */ + + hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events); +} + +static void hci_setup(struct hci_dev *hdev) +{ + hci_setup_event_mask(hdev); + + if (hdev->lmp_ver > 1) + hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); + + if (hdev->features[6] & LMP_SIMPLE_PAIR) { + u8 mode = 0x01; + hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); + } + + if (hdev->features[3] & LMP_RSSI_INQ) + hci_setup_inquiry_mode(hdev); + + if (hdev->features[7] & LMP_INQ_TX_PWR) + hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); +} + static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_read_local_version *rp = (void *) skb->data; @@ -426,11 +544,34 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) hdev->hci_ver = rp->hci_ver; hdev->hci_rev = __le16_to_cpu(rp->hci_rev); + hdev->lmp_ver = rp->lmp_ver; hdev->manufacturer = __le16_to_cpu(rp->manufacturer); + hdev->lmp_subver = __le16_to_cpu(rp->lmp_subver); BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name, hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); + + if (test_bit(HCI_INIT, &hdev->flags)) + hci_setup(hdev); +} + +static void hci_setup_link_policy(struct hci_dev *hdev) +{ + u16 link_policy = 0; + + if (hdev->features[0] & LMP_RSWITCH) + link_policy |= HCI_LP_RSWITCH; + if (hdev->features[0] & LMP_HOLD) + link_policy |= HCI_LP_HOLD; + if (hdev->features[0] & LMP_SNIFF) + link_policy |= HCI_LP_SNIFF; + if (hdev->features[1] & LMP_PARK) + link_policy |= HCI_LP_PARK; + + link_policy = cpu_to_le16(link_policy); + hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, + sizeof(link_policy), &link_policy); } static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb) @@ -440,9 +581,15 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb BT_DBG("%s status 0x%x", hdev->name, rp->status); if (rp->status) - return; + goto done; memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); + + if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10)) + hci_setup_link_policy(hdev); + +done: + hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status); } static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb) @@ -548,6 +695,130 @@ static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status); } +static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, + struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status); +} + +static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status); +} + +static void hci_cc_write_inquiry_mode(struct hci_dev *hdev, + struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status); +} + +static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, + struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, status); +} + +static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status); +} + +static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_pin_code_reply *rp = (void *) skb->data; + struct hci_cp_pin_code_reply *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_pin_code_reply_complete(hdev->id, &rp->bdaddr, rp->status); + + if (rp->status != 0) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_PIN_CODE_REPLY); + if (!cp) + return; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); + if (conn) + conn->pin_length = cp->pin_len; +} + +static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_pin_code_neg_reply *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_pin_code_neg_reply_complete(hdev->id, &rp->bdaddr, + rp->status); +} +static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_buffer_size *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->le_mtu = __le16_to_cpu(rp->le_mtu); + hdev->le_pkts = rp->le_max_pkt; + + hdev->le_cnt = hdev->le_pkts; + + BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts); + + hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status); +} + +static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_confirm_reply_complete(hdev->id, &rp->bdaddr, + rp->status); +} + +static void hci_cc_user_confirm_neg_reply(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_confirm_neg_reply_complete(hdev->id, &rp->bdaddr, + rp->status); +} + static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%x", hdev->name, status); @@ -622,11 +893,14 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status) hci_dev_lock(hdev); acl = hci_conn_hash_lookup_handle(hdev, handle); - if (acl && (sco = acl->link)) { - sco->state = BT_CLOSED; + if (acl) { + sco = acl->link; + if (sco) { + sco->state = BT_CLOSED; - hci_proto_connect_cfm(sco, status); - hci_conn_del(sco); + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); + } } hci_dev_unlock(hdev); @@ -687,18 +961,18 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status) } static int hci_outgoing_auth_needed(struct hci_dev *hdev, - struct hci_conn *conn) + struct hci_conn *conn) { if (conn->state != BT_CONFIG || !conn->out) return 0; - if (conn->sec_level == BT_SECURITY_SDP) + if (conn->pending_sec_level == BT_SECURITY_SDP) return 0; /* Only request authentication for SSP connections or non-SSP * devices with sec_level HIGH */ if (!(hdev->ssp_mode > 0 && conn->ssp_mode > 0) && - conn->sec_level != BT_SECURITY_HIGH) + conn->pending_sec_level != BT_SECURITY_HIGH) return 0; return 1; @@ -808,11 +1082,14 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status) hci_dev_lock(hdev); acl = hci_conn_hash_lookup_handle(hdev, handle); - if (acl && (sco = acl->link)) { - sco->state = BT_CLOSED; + if (acl) { + sco = acl->link; + if (sco) { + sco->state = BT_CLOSED; - hci_proto_connect_cfm(sco, status); - hci_conn_del(sco); + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); + } } hci_dev_unlock(hdev); @@ -872,6 +1149,43 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status) hci_dev_unlock(hdev); } +static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_le_create_conn *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, status); + + cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr); + + BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr), + conn); + + if (status) { + if (conn && conn->state == BT_CONNECT) { + conn->state = BT_CLOSED; + hci_proto_connect_cfm(conn, status); + hci_conn_del(conn); + } + } else { + if (!conn) { + conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr); + if (conn) + conn->out = 1; + else + BT_ERR("No memory for new connection"); + } + } + + hci_dev_unlock(hdev); +} + static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); @@ -942,6 +1256,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s conn->state = BT_CONFIG; hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; + mgmt_connected(hdev->id, &ev->bdaddr); } else conn->state = BT_CONNECTED; @@ -970,8 +1285,11 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp), &cp); } - } else + } else { conn->state = BT_CLOSED; + if (conn->type == ACL_LINK) + mgmt_connect_failed(hdev->id, &ev->bdaddr, ev->status); + } if (conn->type == ACL_LINK) hci_sco_setup(conn, ev->status); @@ -998,7 +1316,8 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); - if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) { + if ((mask & HCI_LM_ACCEPT) && + !hci_blacklist_lookup(hdev, &ev->bdaddr)) { /* Connection accepted */ struct inquiry_entry *ie; struct hci_conn *conn; @@ -1068,19 +1387,26 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff BT_DBG("%s status %d", hdev->name, ev->status); - if (ev->status) + if (ev->status) { + mgmt_disconnect_failed(hdev->id); return; + } hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn) { - conn->state = BT_CLOSED; + if (!conn) + goto unlock; - hci_proto_disconn_cfm(conn, ev->reason); - hci_conn_del(conn); - } + conn->state = BT_CLOSED; + if (conn->type == ACL_LINK) + mgmt_disconnected(hdev->id, &conn->dst); + + hci_proto_disconn_cfm(conn, ev->reason); + hci_conn_del(conn); + +unlock: hci_dev_unlock(hdev); } @@ -1095,10 +1421,13 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { - if (!ev->status) + if (!ev->status) { conn->link_mode |= HCI_LM_AUTH; - else + conn->sec_level = conn->pending_sec_level; + } else { + mgmt_auth_failed(hdev->id, &conn->dst, ev->status); conn->sec_level = BT_SECURITY_LOW; + } clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); @@ -1392,11 +1721,54 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_write_ca_timeout(hdev, skb); break; + case HCI_OP_DELETE_STORED_LINK_KEY: + hci_cc_delete_stored_link_key(hdev, skb); + break; + + case HCI_OP_SET_EVENT_MASK: + hci_cc_set_event_mask(hdev, skb); + break; + + case HCI_OP_WRITE_INQUIRY_MODE: + hci_cc_write_inquiry_mode(hdev, skb); + break; + + case HCI_OP_READ_INQ_RSP_TX_POWER: + hci_cc_read_inq_rsp_tx_power(hdev, skb); + break; + + case HCI_OP_SET_EVENT_FLT: + hci_cc_set_event_flt(hdev, skb); + break; + + case HCI_OP_PIN_CODE_REPLY: + hci_cc_pin_code_reply(hdev, skb); + break; + + case HCI_OP_PIN_CODE_NEG_REPLY: + hci_cc_pin_code_neg_reply(hdev, skb); + break; + + case HCI_OP_LE_READ_BUFFER_SIZE: + hci_cc_le_read_buffer_size(hdev, skb); + break; + + case HCI_OP_USER_CONFIRM_REPLY: + hci_cc_user_confirm_reply(hdev, skb); + break; + + case HCI_OP_USER_CONFIRM_NEG_REPLY: + hci_cc_user_confirm_neg_reply(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; } + if (ev->opcode != HCI_OP_NOP) + del_timer(&hdev->cmd_timer); + if (ev->ncmd) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) @@ -1458,11 +1830,23 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cs_exit_sniff_mode(hdev, ev->status); break; + case HCI_OP_DISCONNECT: + if (ev->status != 0) + mgmt_disconnect_failed(hdev->id); + break; + + case HCI_OP_LE_CREATE_CONN: + hci_cs_le_create_conn(hdev, ev->status); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; } + if (ev->opcode != HCI_OP_NOP) + del_timer(&hdev->cmd_timer); + if (ev->ncmd) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) @@ -1528,6 +1912,16 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s hdev->acl_cnt += count; if (hdev->acl_cnt > hdev->acl_pkts) hdev->acl_cnt = hdev->acl_pkts; + } else if (conn->type == LE_LINK) { + if (hdev->le_pkts) { + hdev->le_cnt += count; + if (hdev->le_cnt > hdev->le_pkts) + hdev->le_cnt = hdev->le_pkts; + } else { + hdev->acl_cnt += count; + if (hdev->acl_cnt > hdev->acl_pkts) + hdev->acl_cnt = hdev->acl_pkts; + } } else { hdev->sco_cnt += count; if (hdev->sco_cnt > hdev->sco_pkts) @@ -1585,18 +1979,72 @@ static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff hci_conn_put(conn); } + if (!test_bit(HCI_PAIRABLE, &hdev->flags)) + hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, + sizeof(ev->bdaddr), &ev->bdaddr); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_pin_code_request(hdev->id, &ev->bdaddr); + hci_dev_unlock(hdev); } static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_link_key_req *ev = (void *) skb->data; + struct hci_cp_link_key_reply cp; + struct hci_conn *conn; + struct link_key *key; + BT_DBG("%s", hdev->name); + + if (!test_bit(HCI_LINK_KEYS, &hdev->flags)) + return; + + hci_dev_lock(hdev); + + key = hci_find_link_key(hdev, &ev->bdaddr); + if (!key) { + BT_DBG("%s link key not found for %s", hdev->name, + batostr(&ev->bdaddr)); + goto not_found; + } + + BT_DBG("%s found key type %u for %s", hdev->name, key->type, + batostr(&ev->bdaddr)); + + if (!test_bit(HCI_DEBUG_KEYS, &hdev->flags) && key->type == 0x03) { + BT_DBG("%s ignoring debug key", hdev->name); + goto not_found; + } + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + + if (key->type == 0x04 && conn && conn->auth_type != 0xff && + (conn->auth_type & 0x01)) { + BT_DBG("%s ignoring unauthenticated key", hdev->name); + goto not_found; + } + + bacpy(&cp.bdaddr, &ev->bdaddr); + memcpy(cp.link_key, key->val, 16); + + hci_send_cmd(hdev, HCI_OP_LINK_KEY_REPLY, sizeof(cp), &cp); + + hci_dev_unlock(hdev); + + return; + +not_found: + hci_send_cmd(hdev, HCI_OP_LINK_KEY_NEG_REPLY, 6, &ev->bdaddr); + hci_dev_unlock(hdev); } static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_link_key_notify *ev = (void *) skb->data; struct hci_conn *conn; + u8 pin_len = 0; BT_DBG("%s", hdev->name); @@ -1606,9 +2054,14 @@ static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff if (conn) { hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; + pin_len = conn->pin_length; hci_conn_put(conn); } + if (test_bit(HCI_LINK_KEYS, &hdev->flags)) + hci_add_link_key(hdev, 1, &ev->bdaddr, ev->link_key, + ev->key_type, pin_len); + hci_dev_unlock(hdev); } @@ -1682,7 +2135,8 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct hci_dev_lock(hdev); if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { - struct inquiry_info_with_rssi_and_pscan_mode *info = (void *) (skb->data + 1); + struct inquiry_info_with_rssi_and_pscan_mode *info; + info = (void *) (skb->data + 1); for (; num_rsp; num_rsp--) { bacpy(&data.bdaddr, &info->bdaddr); @@ -1823,17 +2277,8 @@ static inline void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buf static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_sniff_subrate *ev = (void *) skb->data; - struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); - - hci_dev_lock(hdev); - - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn) { - } - - hci_dev_unlock(hdev); } static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -1851,12 +2296,12 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct for (; num_rsp; num_rsp--) { bacpy(&data.bdaddr, &info->bdaddr); - data.pscan_rep_mode = info->pscan_rep_mode; - data.pscan_period_mode = info->pscan_period_mode; - data.pscan_mode = 0x00; + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = 0x00; memcpy(data.dev_class, info->dev_class, 3); - data.clock_offset = info->clock_offset; - data.rssi = info->rssi; + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; data.ssp_mode = 0x01; info++; hci_inquiry_cache_update(hdev, &data); @@ -1865,6 +2310,25 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct hci_dev_unlock(hdev); } +static inline u8 hci_get_auth_req(struct hci_conn *conn) +{ + /* If remote requests dedicated bonding follow that lead */ + if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03) { + /* If both remote and local IO capabilities allow MITM + * protection then require it, otherwise don't */ + if (conn->remote_cap == 0x03 || conn->io_capability == 0x03) + return 0x02; + else + return 0x03; + } + + /* If remote requests no-bonding follow that lead */ + if (conn->remote_auth == 0x00 || conn->remote_auth == 0x01) + return 0x00; + + return conn->auth_type; +} + static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_io_capa_request *ev = (void *) skb->data; @@ -1875,8 +2339,73 @@ static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (conn) - hci_conn_hold(conn); + if (!conn) + goto unlock; + + hci_conn_hold(conn); + + if (!test_bit(HCI_MGMT, &hdev->flags)) + goto unlock; + + if (test_bit(HCI_PAIRABLE, &hdev->flags) || + (conn->remote_auth & ~0x01) == HCI_AT_NO_BONDING) { + struct hci_cp_io_capability_reply cp; + + bacpy(&cp.bdaddr, &ev->bdaddr); + cp.capability = conn->io_capability; + cp.oob_data = 0; + cp.authentication = hci_get_auth_req(conn); + + hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_REPLY, + sizeof(cp), &cp); + } else { + struct hci_cp_io_capability_neg_reply cp; + + bacpy(&cp.bdaddr, &ev->bdaddr); + cp.reason = 0x16; /* Pairing not allowed */ + + hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_NEG_REPLY, + sizeof(cp), &cp); + } + +unlock: + hci_dev_unlock(hdev); +} + +static inline void hci_io_capa_reply_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_io_capa_reply *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (!conn) + goto unlock; + + hci_conn_hold(conn); + + conn->remote_cap = ev->capability; + conn->remote_oob = ev->oob_data; + conn->remote_auth = ev->authentication; + +unlock: + hci_dev_unlock(hdev); +} + +static inline void hci_user_confirm_request_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_user_confirm_req *ev = (void *) skb->data; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_confirm_request(hdev->id, &ev->bdaddr, ev->passkey); hci_dev_unlock(hdev); } @@ -1891,9 +2420,20 @@ static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_ hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (conn) - hci_conn_put(conn); + if (!conn) + goto unlock; + + /* To avoid duplicate auth_failed events to user space we check + * the HCI_CONN_AUTH_PEND flag which will be set if we + * initiated the authentication. A traditional auth_complete + * event gets always produced as initiator and is also mapped to + * the mgmt_auth_failed event */ + if (!test_bit(HCI_CONN_AUTH_PEND, &conn->pend) && ev->status != 0) + mgmt_auth_failed(hdev->id, &conn->dst, ev->status); + + hci_conn_put(conn); +unlock: hci_dev_unlock(hdev); } @@ -1913,6 +2453,60 @@ static inline void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_ hci_dev_unlock(hdev); } +static inline void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_le_conn_complete *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr); + if (!conn) { + conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); + if (!conn) { + BT_ERR("No memory for new connection"); + hci_dev_unlock(hdev); + return; + } + } + + if (ev->status) { + hci_proto_connect_cfm(conn, ev->status); + conn->state = BT_CLOSED; + hci_conn_del(conn); + goto unlock; + } + + conn->handle = __le16_to_cpu(ev->handle); + conn->state = BT_CONNECTED; + + hci_conn_hold_device(conn); + hci_conn_add_sysfs(conn); + + hci_proto_connect_cfm(conn, ev->status); + +unlock: + hci_dev_unlock(hdev); +} + +static inline void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_le_meta *le_ev = (void *) skb->data; + + skb_pull(skb, sizeof(*le_ev)); + + switch (le_ev->subevent) { + case HCI_EV_LE_CONN_COMPLETE: + hci_le_conn_complete_evt(hdev, skb); + break; + + default: + break; + } +} + void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_event_hdr *hdr = (void *) skb->data; @@ -2041,6 +2635,14 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_io_capa_request_evt(hdev, skb); break; + case HCI_EV_IO_CAPA_REPLY: + hci_io_capa_reply_evt(hdev, skb); + break; + + case HCI_EV_USER_CONFIRM_REQUEST: + hci_user_confirm_request_evt(hdev, skb); + break; + case HCI_EV_SIMPLE_PAIR_COMPLETE: hci_simple_pair_complete_evt(hdev, skb); break; @@ -2049,6 +2651,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_remote_host_features_evt(hdev, skb); break; + case HCI_EV_LE_META: + hci_le_meta_evt(hdev, skb); + break; + default: BT_DBG("%s event 0x%x", hdev->name, event); break; @@ -2082,6 +2688,6 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) bt_cb(skb)->pkt_type = HCI_EVENT_PKT; skb->dev = (void *) hdev; - hci_send_to_sock(hdev, skb); + hci_send_to_sock(hdev, skb, NULL); kfree_skb(skb); } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 29827c77f6ce..295e4a88fff8 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -85,7 +85,8 @@ static struct bt_sock_list hci_sk_list = { }; /* Send frame to RAW socket */ -void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) +void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb, + struct sock *skip_sk) { struct sock *sk; struct hlist_node *node; @@ -97,6 +98,9 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) struct hci_filter *flt; struct sk_buff *nskb; + if (sk == skip_sk) + continue; + if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev) continue; @@ -857,7 +861,7 @@ error: return err; } -void __exit hci_sock_cleanup(void) +void hci_sock_cleanup(void) { if (bt_sock_unregister(BTPROTO_HCI) < 0) BT_ERR("HCI socket unregistration failed"); diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 5fce3d6d07b4..3c838a65a75a 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -11,7 +11,7 @@ static struct class *bt_class; -struct dentry *bt_debugfs = NULL; +struct dentry *bt_debugfs; EXPORT_SYMBOL_GPL(bt_debugfs); static inline char *link_typetostr(int type) @@ -51,8 +51,8 @@ static ssize_t show_link_features(struct device *dev, struct device_attribute *a conn->features[6], conn->features[7]); } -#define LINK_ATTR(_name,_mode,_show,_store) \ -struct device_attribute link_attr_##_name = __ATTR(_name,_mode,_show,_store) +#define LINK_ATTR(_name, _mode, _show, _store) \ +struct device_attribute link_attr_##_name = __ATTR(_name, _mode, _show, _store) static LINK_ATTR(type, S_IRUGO, show_link_type, NULL); static LINK_ATTR(address, S_IRUGO, show_link_address, NULL); @@ -461,6 +461,56 @@ static const struct file_operations blacklist_fops = { .llseek = seq_lseek, .release = single_release, }; + +static void print_bt_uuid(struct seq_file *f, u8 *uuid) +{ + u32 data0, data4; + u16 data1, data2, data3, data5; + + memcpy(&data0, &uuid[0], 4); + memcpy(&data1, &uuid[4], 2); + memcpy(&data2, &uuid[6], 2); + memcpy(&data3, &uuid[8], 2); + memcpy(&data4, &uuid[10], 4); + memcpy(&data5, &uuid[14], 2); + + seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.8x%.4x\n", + ntohl(data0), ntohs(data1), ntohs(data2), + ntohs(data3), ntohl(data4), ntohs(data5)); +} + +static int uuids_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct list_head *l; + + hci_dev_lock_bh(hdev); + + list_for_each(l, &hdev->uuids) { + struct bt_uuid *uuid; + + uuid = list_entry(l, struct bt_uuid, list); + + print_bt_uuid(f, uuid->uuid); + } + + hci_dev_unlock_bh(hdev); + + return 0; +} + +static int uuids_open(struct inode *inode, struct file *file) +{ + return single_open(file, uuids_show, inode->i_private); +} + +static const struct file_operations uuids_fops = { + .open = uuids_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + int hci_register_sysfs(struct hci_dev *hdev) { struct device *dev = &hdev->dev; @@ -493,6 +543,8 @@ int hci_register_sysfs(struct hci_dev *hdev) debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev, &blacklist_fops); + debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops); + return 0; } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 29544c21f4b5..2429ca2d7b06 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -157,7 +157,8 @@ static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev, session->leds = newleds; - if (!(skb = alloc_skb(3, GFP_ATOMIC))) { + skb = alloc_skb(3, GFP_ATOMIC); + if (!skb) { BT_ERR("Can't allocate memory for new frame"); return -ENOMEM; } @@ -250,7 +251,8 @@ static int __hidp_send_ctrl_message(struct hidp_session *session, BT_DBG("session %p data %p size %d", session, data, size); - if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { + skb = alloc_skb(size + 1, GFP_ATOMIC); + if (!skb) { BT_ERR("Can't allocate memory for new frame"); return -ENOMEM; } @@ -283,7 +285,8 @@ static int hidp_queue_report(struct hidp_session *session, BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size); - if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { + skb = alloc_skb(size + 1, GFP_ATOMIC); + if (!skb) { BT_ERR("Can't allocate memory for new frame"); return -ENOMEM; } @@ -1016,8 +1019,6 @@ static int __init hidp_init(void) { int ret; - l2cap_load(); - BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION); ret = hid_register_driver(&hidp_driver); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap_core.c index c791fcda7b2d..c9f9cecca527 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap_core.c @@ -24,7 +24,7 @@ SOFTWARE IS DISCLAIMED. */ -/* Bluetooth L2CAP core and sockets. */ +/* Bluetooth L2CAP core. */ #include <linux/module.h> @@ -55,79 +55,24 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> -#define VERSION "2.15" - -static int disable_ertm; +int disable_ertm; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; static u8 l2cap_fixed_chan[8] = { 0x02, }; -static const struct proto_ops l2cap_sock_ops; - static struct workqueue_struct *_busy_wq; -static struct bt_sock_list l2cap_sk_list = { +struct bt_sock_list l2cap_sk_list = { .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock) }; static void l2cap_busy_work(struct work_struct *work); -static void __l2cap_sock_close(struct sock *sk, int reason); -static void l2cap_sock_close(struct sock *sk); -static void l2cap_sock_kill(struct sock *sk); - -static int l2cap_build_conf_req(struct sock *sk, void *data); static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb); -/* ---- L2CAP timers ---- */ -static void l2cap_sock_set_timer(struct sock *sk, long timeout) -{ - BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout); - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); -} - -static void l2cap_sock_clear_timer(struct sock *sk) -{ - BT_DBG("sock %p state %d", sk, sk->sk_state); - sk_stop_timer(sk, &sk->sk_timer); -} - -static void l2cap_sock_timeout(unsigned long arg) -{ - struct sock *sk = (struct sock *) arg; - int reason; - - BT_DBG("sock %p state %d", sk, sk->sk_state); - - bh_lock_sock(sk); - - if (sock_owned_by_user(sk)) { - /* sk is owned by user. Try again later */ - l2cap_sock_set_timer(sk, HZ / 5); - bh_unlock_sock(sk); - sock_put(sk); - return; - } - - if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG) - reason = ECONNREFUSED; - else if (sk->sk_state == BT_CONNECT && - l2cap_pi(sk)->sec_level != BT_SECURITY_SDP) - reason = ECONNREFUSED; - else - reason = ETIMEDOUT; - - __l2cap_sock_close(sk, reason); - - bh_unlock_sock(sk); - - l2cap_sock_kill(sk); - sock_put(sk); -} - /* ---- L2CAP channels ---- */ static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) { @@ -236,8 +181,16 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so l2cap_pi(sk)->conn = conn; if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) { - /* Alloc CID for connection-oriented socket */ - l2cap_pi(sk)->scid = l2cap_alloc_cid(l); + if (conn->hcon->type == LE_LINK) { + /* LE connection */ + l2cap_pi(sk)->omtu = L2CAP_LE_DEFAULT_MTU; + l2cap_pi(sk)->scid = L2CAP_CID_LE_DATA; + l2cap_pi(sk)->dcid = L2CAP_CID_LE_DATA; + } else { + /* Alloc CID for connection-oriented socket */ + l2cap_pi(sk)->scid = l2cap_alloc_cid(l); + l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; + } } else if (sk->sk_type == SOCK_DGRAM) { /* Connectionless socket */ l2cap_pi(sk)->scid = L2CAP_CID_CONN_LESS; @@ -258,7 +211,7 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so /* Delete channel. * Must be called on the locked socket. */ -static void l2cap_chan_del(struct sock *sk, int err) +void l2cap_chan_del(struct sock *sk, int err) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; struct sock *parent = bt_sk(sk)->parent; @@ -305,39 +258,50 @@ static void l2cap_chan_del(struct sock *sk, int err) } } -/* Service level security */ -static inline int l2cap_check_security(struct sock *sk) +static inline u8 l2cap_get_auth_type(struct sock *sk) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - __u8 auth_type; + if (sk->sk_type == SOCK_RAW) { + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_HIGH: + return HCI_AT_DEDICATED_BONDING_MITM; + case BT_SECURITY_MEDIUM: + return HCI_AT_DEDICATED_BONDING; + default: + return HCI_AT_NO_BONDING; + } + } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { + if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) + l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; - if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) - auth_type = HCI_AT_NO_BONDING_MITM; + return HCI_AT_NO_BONDING_MITM; else - auth_type = HCI_AT_NO_BONDING; - - if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) - l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; + return HCI_AT_NO_BONDING; } else { switch (l2cap_pi(sk)->sec_level) { case BT_SECURITY_HIGH: - auth_type = HCI_AT_GENERAL_BONDING_MITM; - break; + return HCI_AT_GENERAL_BONDING_MITM; case BT_SECURITY_MEDIUM: - auth_type = HCI_AT_GENERAL_BONDING; - break; + return HCI_AT_GENERAL_BONDING; default: - auth_type = HCI_AT_NO_BONDING; - break; + return HCI_AT_NO_BONDING; } } +} + +/* Service level security */ +static inline int l2cap_check_security(struct sock *sk) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + __u8 auth_type; + + auth_type = l2cap_get_auth_type(sk); return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level, auth_type); } -static inline u8 l2cap_get_ident(struct l2cap_conn *conn) +u8 l2cap_get_ident(struct l2cap_conn *conn) { u8 id; @@ -359,16 +323,22 @@ static inline u8 l2cap_get_ident(struct l2cap_conn *conn) return id; } -static inline void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) +void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) { struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); + u8 flags; BT_DBG("code 0x%2.2x", code); if (!skb) return; - hci_send_acl(conn->hcon, skb, 0); + if (lmp_no_flush_capable(conn->hcon->hdev)) + flags = ACL_START_NO_FLUSH; + else + flags = ACL_START; + + hci_send_acl(conn->hcon, skb, flags); } static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) @@ -378,6 +348,7 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) struct l2cap_conn *conn = pi->conn; struct sock *sk = (struct sock *)pi; int count, hlen = L2CAP_HDR_SIZE + 2; + u8 flags; if (sk->sk_state != BT_CONNECTED) return; @@ -414,7 +385,12 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) put_unaligned_le16(fcs, skb_put(skb, 2)); } - hci_send_acl(pi->conn->hcon, skb, 0); + if (lmp_no_flush_capable(conn->hcon->hdev)) + flags = ACL_START_NO_FLUSH; + else + flags = ACL_START; + + hci_send_acl(pi->conn->hcon, skb, flags); } static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control) @@ -485,7 +461,7 @@ static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask) } } -static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err) +void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err) { struct l2cap_disconn_req req; @@ -613,6 +589,82 @@ static void l2cap_conn_start(struct l2cap_conn *conn) } } +/* Find socket with cid and source bdaddr. + * Returns closest match, locked. + */ +static struct sock *l2cap_get_sock_by_scid(int state, __le16 cid, bdaddr_t *src) +{ + struct sock *s, *sk = NULL, *sk1 = NULL; + struct hlist_node *node; + + read_lock(&l2cap_sk_list.lock); + + sk_for_each(sk, node, &l2cap_sk_list.head) { + if (state && sk->sk_state != state) + continue; + + if (l2cap_pi(sk)->scid == cid) { + /* Exact match. */ + if (!bacmp(&bt_sk(sk)->src, src)) + break; + + /* Closest match */ + if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) + sk1 = sk; + } + } + s = node ? sk : sk1; + if (s) + bh_lock_sock(s); + read_unlock(&l2cap_sk_list.lock); + + return s; +} + +static void l2cap_le_conn_ready(struct l2cap_conn *conn) +{ + struct l2cap_chan_list *list = &conn->chan_list; + struct sock *parent, *uninitialized_var(sk); + + BT_DBG(""); + + /* Check if we have socket listening on cid */ + parent = l2cap_get_sock_by_scid(BT_LISTEN, L2CAP_CID_LE_DATA, + conn->src); + if (!parent) + return; + + /* Check for backlog size */ + if (sk_acceptq_is_full(parent)) { + BT_DBG("backlog full %d", parent->sk_ack_backlog); + goto clean; + } + + sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC); + if (!sk) + goto clean; + + write_lock_bh(&list->lock); + + hci_conn_hold(conn->hcon); + + l2cap_sock_init(sk, parent); + bacpy(&bt_sk(sk)->src, conn->src); + bacpy(&bt_sk(sk)->dst, conn->dst); + + __l2cap_chan_add(conn, sk, parent); + + l2cap_sock_set_timer(sk, sk->sk_sndtimeo); + + sk->sk_state = BT_CONNECTED; + parent->sk_data_ready(parent, 0); + + write_unlock_bh(&list->lock); + +clean: + bh_unlock_sock(parent); +} + static void l2cap_conn_ready(struct l2cap_conn *conn) { struct l2cap_chan_list *l = &conn->chan_list; @@ -620,11 +672,20 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) BT_DBG("conn %p", conn); + if (!conn->hcon->out && conn->hcon->type == LE_LINK) + l2cap_le_conn_ready(conn); + read_lock(&l->lock); for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { bh_lock_sock(sk); + if (conn->hcon->type == LE_LINK) { + l2cap_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + } + if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM) { l2cap_sock_clear_timer(sk); @@ -683,7 +744,11 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) BT_DBG("hcon %p conn %p", hcon, conn); - conn->mtu = hcon->hdev->acl_mtu; + if (hcon->hdev->le_mtu && hcon->type == LE_LINK) + conn->mtu = hcon->hdev->le_mtu; + else + conn->mtu = hcon->hdev->acl_mtu; + conn->src = &hcon->hdev->bdaddr; conn->dst = &hcon->dst; @@ -692,7 +757,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) spin_lock_init(&conn->lock); rwlock_init(&conn->chan_list.lock); - setup_timer(&conn->info_timer, l2cap_info_timeout, + if (hcon->type != LE_LINK) + setup_timer(&conn->info_timer, l2cap_info_timeout, (unsigned long) conn); conn->disc_reason = 0x13; @@ -736,17 +802,6 @@ static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, stru } /* ---- Socket interface ---- */ -static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src) -{ - struct sock *sk; - struct hlist_node *node; - sk_for_each(sk, node, &l2cap_sk_list.head) - if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src)) - goto found; - sk = NULL; -found: - return sk; -} /* Find socket with psm and source bdaddr. * Returns closest match. @@ -778,276 +833,7 @@ static struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src) return node ? sk : sk1; } -static void l2cap_sock_destruct(struct sock *sk) -{ - BT_DBG("sk %p", sk); - - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); -} - -static void l2cap_sock_cleanup_listen(struct sock *parent) -{ - struct sock *sk; - - BT_DBG("parent %p", parent); - - /* Close not yet accepted channels */ - while ((sk = bt_accept_dequeue(parent, NULL))) - l2cap_sock_close(sk); - - parent->sk_state = BT_CLOSED; - sock_set_flag(parent, SOCK_ZAPPED); -} - -/* Kill socket (only if zapped and orphan) - * Must be called on unlocked socket. - */ -static void l2cap_sock_kill(struct sock *sk) -{ - if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) - return; - - BT_DBG("sk %p state %d", sk, sk->sk_state); - - /* Kill poor orphan */ - bt_sock_unlink(&l2cap_sk_list, sk); - sock_set_flag(sk, SOCK_DEAD); - sock_put(sk); -} - -static void __l2cap_sock_close(struct sock *sk, int reason) -{ - BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); - - switch (sk->sk_state) { - case BT_LISTEN: - l2cap_sock_cleanup_listen(sk); - break; - - case BT_CONNECTED: - case BT_CONFIG: - if (sk->sk_type == SOCK_SEQPACKET || - sk->sk_type == SOCK_STREAM) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - - l2cap_sock_set_timer(sk, sk->sk_sndtimeo); - l2cap_send_disconn_req(conn, sk, reason); - } else - l2cap_chan_del(sk, reason); - break; - - case BT_CONNECT2: - if (sk->sk_type == SOCK_SEQPACKET || - sk->sk_type == SOCK_STREAM) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - struct l2cap_conn_rsp rsp; - __u16 result; - - if (bt_sk(sk)->defer_setup) - result = L2CAP_CR_SEC_BLOCK; - else - result = L2CAP_CR_BAD_PSM; - - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); - } else - l2cap_chan_del(sk, reason); - break; - - case BT_CONNECT: - case BT_DISCONN: - l2cap_chan_del(sk, reason); - break; - - default: - sock_set_flag(sk, SOCK_ZAPPED); - break; - } -} - -/* Must be called on unlocked socket. */ -static void l2cap_sock_close(struct sock *sk) -{ - l2cap_sock_clear_timer(sk); - lock_sock(sk); - __l2cap_sock_close(sk, ECONNRESET); - release_sock(sk); - l2cap_sock_kill(sk); -} - -static void l2cap_sock_init(struct sock *sk, struct sock *parent) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - - BT_DBG("sk %p", sk); - - if (parent) { - sk->sk_type = parent->sk_type; - bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup; - - pi->imtu = l2cap_pi(parent)->imtu; - pi->omtu = l2cap_pi(parent)->omtu; - pi->conf_state = l2cap_pi(parent)->conf_state; - pi->mode = l2cap_pi(parent)->mode; - pi->fcs = l2cap_pi(parent)->fcs; - pi->max_tx = l2cap_pi(parent)->max_tx; - pi->tx_win = l2cap_pi(parent)->tx_win; - pi->sec_level = l2cap_pi(parent)->sec_level; - pi->role_switch = l2cap_pi(parent)->role_switch; - pi->force_reliable = l2cap_pi(parent)->force_reliable; - } else { - pi->imtu = L2CAP_DEFAULT_MTU; - pi->omtu = 0; - if (!disable_ertm && sk->sk_type == SOCK_STREAM) { - pi->mode = L2CAP_MODE_ERTM; - pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; - } else { - pi->mode = L2CAP_MODE_BASIC; - } - pi->max_tx = L2CAP_DEFAULT_MAX_TX; - pi->fcs = L2CAP_FCS_CRC16; - pi->tx_win = L2CAP_DEFAULT_TX_WINDOW; - pi->sec_level = BT_SECURITY_LOW; - pi->role_switch = 0; - pi->force_reliable = 0; - } - - /* Default config options */ - pi->conf_len = 0; - pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; - skb_queue_head_init(TX_QUEUE(sk)); - skb_queue_head_init(SREJ_QUEUE(sk)); - skb_queue_head_init(BUSY_QUEUE(sk)); - INIT_LIST_HEAD(SREJ_LIST(sk)); -} - -static struct proto l2cap_proto = { - .name = "L2CAP", - .owner = THIS_MODULE, - .obj_size = sizeof(struct l2cap_pinfo) -}; - -static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) -{ - struct sock *sk; - - sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); - if (!sk) - return NULL; - - sock_init_data(sock, sk); - INIT_LIST_HEAD(&bt_sk(sk)->accept_q); - - sk->sk_destruct = l2cap_sock_destruct; - sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); - - sock_reset_flag(sk, SOCK_ZAPPED); - - sk->sk_protocol = proto; - sk->sk_state = BT_OPEN; - - setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk); - - bt_sock_link(&l2cap_sk_list, sk); - return sk; -} - -static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - - BT_DBG("sock %p", sock); - - sock->state = SS_UNCONNECTED; - - if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM && - sock->type != SOCK_DGRAM && sock->type != SOCK_RAW) - return -ESOCKTNOSUPPORT; - - if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) - return -EPERM; - - sock->ops = &l2cap_sock_ops; - - sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); - if (!sk) - return -ENOMEM; - - l2cap_sock_init(sk, NULL); - return 0; -} - -static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) -{ - struct sock *sk = sock->sk; - struct sockaddr_l2 la; - int len, err = 0; - - BT_DBG("sk %p", sk); - - if (!addr || addr->sa_family != AF_BLUETOOTH) - return -EINVAL; - - memset(&la, 0, sizeof(la)); - len = min_t(unsigned int, sizeof(la), alen); - memcpy(&la, addr, len); - - if (la.l2_cid) - return -EINVAL; - - lock_sock(sk); - - if (sk->sk_state != BT_OPEN) { - err = -EBADFD; - goto done; - } - - if (la.l2_psm) { - __u16 psm = __le16_to_cpu(la.l2_psm); - - /* PSM must be odd and lsb of upper byte must be 0 */ - if ((psm & 0x0101) != 0x0001) { - err = -EINVAL; - goto done; - } - - /* Restrict usage of well-known PSMs */ - if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) { - err = -EACCES; - goto done; - } - } - - write_lock_bh(&l2cap_sk_list.lock); - - if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) { - err = -EADDRINUSE; - } else { - /* Save source address */ - bacpy(&bt_sk(sk)->src, &la.l2_bdaddr); - l2cap_pi(sk)->psm = la.l2_psm; - l2cap_pi(sk)->sport = la.l2_psm; - sk->sk_state = BT_BOUND; - - if (__le16_to_cpu(la.l2_psm) == 0x0001 || - __le16_to_cpu(la.l2_psm) == 0x0003) - l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; - } - - write_unlock_bh(&l2cap_sk_list.lock); - -done: - release_sock(sk); - return err; -} - -static int l2cap_do_connect(struct sock *sk) +int l2cap_do_connect(struct sock *sk) { bdaddr_t *src = &bt_sk(sk)->src; bdaddr_t *dst = &bt_sk(sk)->dst; @@ -1066,55 +852,27 @@ static int l2cap_do_connect(struct sock *sk) hci_dev_lock_bh(hdev); - err = -ENOMEM; - - if (sk->sk_type == SOCK_RAW) { - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_HIGH: - auth_type = HCI_AT_DEDICATED_BONDING_MITM; - break; - case BT_SECURITY_MEDIUM: - auth_type = HCI_AT_DEDICATED_BONDING; - break; - default: - auth_type = HCI_AT_NO_BONDING; - break; - } - } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { - if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) - auth_type = HCI_AT_NO_BONDING_MITM; - else - auth_type = HCI_AT_NO_BONDING; + auth_type = l2cap_get_auth_type(sk); - if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) - l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; - } else { - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_HIGH: - auth_type = HCI_AT_GENERAL_BONDING_MITM; - break; - case BT_SECURITY_MEDIUM: - auth_type = HCI_AT_GENERAL_BONDING; - break; - default: - auth_type = HCI_AT_NO_BONDING; - break; - } - } - - hcon = hci_connect(hdev, ACL_LINK, dst, + if (l2cap_pi(sk)->dcid == L2CAP_CID_LE_DATA) + hcon = hci_connect(hdev, LE_LINK, dst, l2cap_pi(sk)->sec_level, auth_type); - if (!hcon) + else + hcon = hci_connect(hdev, ACL_LINK, dst, + l2cap_pi(sk)->sec_level, auth_type); + + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); goto done; + } conn = l2cap_conn_add(hcon, 0); if (!conn) { hci_conn_put(hcon); + err = -ENOMEM; goto done; } - err = 0; - /* Update source addr of the socket */ bacpy(src, conn->src); @@ -1127,241 +885,21 @@ static int l2cap_do_connect(struct sock *sk) if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM) { l2cap_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; + if (l2cap_check_security(sk)) + sk->sk_state = BT_CONNECTED; } else l2cap_do_start(sk); } + err = 0; + done: hci_dev_unlock_bh(hdev); hci_dev_put(hdev); return err; } -static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) -{ - struct sock *sk = sock->sk; - struct sockaddr_l2 la; - int len, err = 0; - - BT_DBG("sk %p", sk); - - if (!addr || alen < sizeof(addr->sa_family) || - addr->sa_family != AF_BLUETOOTH) - return -EINVAL; - - memset(&la, 0, sizeof(la)); - len = min_t(unsigned int, sizeof(la), alen); - memcpy(&la, addr, len); - - if (la.l2_cid) - return -EINVAL; - - lock_sock(sk); - - if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) - && !la.l2_psm) { - err = -EINVAL; - goto done; - } - - switch (l2cap_pi(sk)->mode) { - case L2CAP_MODE_BASIC: - break; - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - if (!disable_ertm) - break; - /* fall through */ - default: - err = -ENOTSUPP; - goto done; - } - - switch (sk->sk_state) { - case BT_CONNECT: - case BT_CONNECT2: - case BT_CONFIG: - /* Already connecting */ - goto wait; - - case BT_CONNECTED: - /* Already connected */ - err = -EISCONN; - goto done; - - case BT_OPEN: - case BT_BOUND: - /* Can connect */ - break; - - default: - err = -EBADFD; - goto done; - } - - /* PSM must be odd and lsb of upper byte must be 0 */ - if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 && - sk->sk_type != SOCK_RAW) { - err = -EINVAL; - goto done; - } - - /* Set destination address and psm */ - bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); - l2cap_pi(sk)->psm = la.l2_psm; - - err = l2cap_do_connect(sk); - if (err) - goto done; - -wait: - err = bt_sock_wait_state(sk, BT_CONNECTED, - sock_sndtimeo(sk, flags & O_NONBLOCK)); -done: - release_sock(sk); - return err; -} - -static int l2cap_sock_listen(struct socket *sock, int backlog) -{ - struct sock *sk = sock->sk; - int err = 0; - - BT_DBG("sk %p backlog %d", sk, backlog); - - lock_sock(sk); - - if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM) - || sk->sk_state != BT_BOUND) { - err = -EBADFD; - goto done; - } - - switch (l2cap_pi(sk)->mode) { - case L2CAP_MODE_BASIC: - break; - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - if (!disable_ertm) - break; - /* fall through */ - default: - err = -ENOTSUPP; - goto done; - } - - if (!l2cap_pi(sk)->psm) { - bdaddr_t *src = &bt_sk(sk)->src; - u16 psm; - - err = -EINVAL; - - write_lock_bh(&l2cap_sk_list.lock); - - for (psm = 0x1001; psm < 0x1100; psm += 2) - if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) { - l2cap_pi(sk)->psm = cpu_to_le16(psm); - l2cap_pi(sk)->sport = cpu_to_le16(psm); - err = 0; - break; - } - - write_unlock_bh(&l2cap_sk_list.lock); - - if (err < 0) - goto done; - } - - sk->sk_max_ack_backlog = backlog; - sk->sk_ack_backlog = 0; - sk->sk_state = BT_LISTEN; - -done: - release_sock(sk); - return err; -} - -static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) -{ - DECLARE_WAITQUEUE(wait, current); - struct sock *sk = sock->sk, *nsk; - long timeo; - int err = 0; - - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); - - BT_DBG("sk %p timeo %ld", sk, timeo); - - /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(nsk = bt_accept_dequeue(sk, newsock))) { - set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; - break; - } - - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - break; - } - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - break; - } - } - set_current_state(TASK_RUNNING); - remove_wait_queue(sk_sleep(sk), &wait); - - if (err) - goto done; - - newsock->state = SS_CONNECTED; - - BT_DBG("new socket %p", nsk); - -done: - release_sock(sk); - return err; -} - -static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) -{ - struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; - struct sock *sk = sock->sk; - - BT_DBG("sock %p, sk %p", sock, sk); - - addr->sa_family = AF_BLUETOOTH; - *len = sizeof(struct sockaddr_l2); - - if (peer) { - la->l2_psm = l2cap_pi(sk)->psm; - bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); - la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid); - } else { - la->l2_psm = l2cap_pi(sk)->sport; - bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); - la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid); - } - - return 0; -} - -static int __l2cap_wait_ack(struct sock *sk) +int __l2cap_wait_ack(struct sock *sk) { DECLARE_WAITQUEUE(wait, current); int err = 0; @@ -1447,16 +985,23 @@ static void l2cap_drop_acked_frames(struct sock *sk) del_timer(&l2cap_pi(sk)->retrans_timer); } -static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb) +void l2cap_do_send(struct sock *sk, struct sk_buff *skb) { struct l2cap_pinfo *pi = l2cap_pi(sk); + struct hci_conn *hcon = pi->conn->hcon; + u16 flags; BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len); - hci_send_acl(pi->conn->hcon, skb, 0); + if (!pi->flushable && lmp_no_flush_capable(hcon->hdev)) + flags = ACL_START_NO_FLUSH; + else + flags = ACL_START; + + hci_send_acl(hcon, skb, flags); } -static void l2cap_streaming_send(struct sock *sk) +void l2cap_streaming_send(struct sock *sk) { struct sk_buff *skb; struct l2cap_pinfo *pi = l2cap_pi(sk); @@ -1525,7 +1070,7 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq) l2cap_do_send(sk, tx_skb); } -static int l2cap_ertm_send(struct sock *sk) +int l2cap_ertm_send(struct sock *sk) { struct sk_buff *skb, *tx_skb; struct l2cap_pinfo *pi = l2cap_pi(sk); @@ -1665,7 +1210,7 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in return sent; } -static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len) +struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; struct sk_buff *skb; @@ -1694,7 +1239,7 @@ static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr return skb; } -static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len) +struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; struct sk_buff *skb; @@ -1722,7 +1267,7 @@ static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *ms return skb; } -static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen) +struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; struct sk_buff *skb; @@ -1767,7 +1312,7 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m return skb; } -static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len) +int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len) { struct l2cap_pinfo *pi = l2cap_pi(sk); struct sk_buff *skb; @@ -1813,487 +1358,6 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz return size; } -static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) -{ - struct sock *sk = sock->sk; - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct sk_buff *skb; - u16 control; - int err; - - BT_DBG("sock %p, sk %p", sock, sk); - - err = sock_error(sk); - if (err) - return err; - - if (msg->msg_flags & MSG_OOB) - return -EOPNOTSUPP; - - lock_sock(sk); - - if (sk->sk_state != BT_CONNECTED) { - err = -ENOTCONN; - goto done; - } - - /* Connectionless channel */ - if (sk->sk_type == SOCK_DGRAM) { - skb = l2cap_create_connless_pdu(sk, msg, len); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - } else { - l2cap_do_send(sk, skb); - err = len; - } - goto done; - } - - switch (pi->mode) { - case L2CAP_MODE_BASIC: - /* Check outgoing MTU */ - if (len > pi->omtu) { - err = -EMSGSIZE; - goto done; - } - - /* Create a basic PDU */ - skb = l2cap_create_basic_pdu(sk, msg, len); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - goto done; - } - - l2cap_do_send(sk, skb); - err = len; - break; - - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - /* Entire SDU fits into one PDU */ - if (len <= pi->remote_mps) { - control = L2CAP_SDU_UNSEGMENTED; - skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - goto done; - } - __skb_queue_tail(TX_QUEUE(sk), skb); - - if (sk->sk_send_head == NULL) - sk->sk_send_head = skb; - - } else { - /* Segment SDU into multiples PDUs */ - err = l2cap_sar_segment_sdu(sk, msg, len); - if (err < 0) - goto done; - } - - if (pi->mode == L2CAP_MODE_STREAMING) { - l2cap_streaming_send(sk); - } else { - if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && - pi->conn_state && L2CAP_CONN_WAIT_F) { - err = len; - break; - } - err = l2cap_ertm_send(sk); - } - - if (err >= 0) - err = len; - break; - - default: - BT_DBG("bad state %1.1x", pi->mode); - err = -EBADFD; - } - -done: - release_sock(sk); - return err; -} - -static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) -{ - struct sock *sk = sock->sk; - - lock_sock(sk); - - if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { - struct l2cap_conn_rsp rsp; - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - u8 buf[128]; - - sk->sk_state = BT_CONFIG; - - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); - - if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) { - release_sock(sk); - return 0; - } - - l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; - l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(sk, buf), buf); - l2cap_pi(sk)->num_conf_req++; - - release_sock(sk); - return 0; - } - - release_sock(sk); - - if (sock->type == SOCK_STREAM) - return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags); - - return bt_sock_recvmsg(iocb, sock, msg, len, flags); -} - -static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - struct l2cap_options opts; - int len, err = 0; - u32 opt; - - BT_DBG("sk %p", sk); - - lock_sock(sk); - - switch (optname) { - case L2CAP_OPTIONS: - if (sk->sk_state == BT_CONNECTED) { - err = -EINVAL; - break; - } - - opts.imtu = l2cap_pi(sk)->imtu; - opts.omtu = l2cap_pi(sk)->omtu; - opts.flush_to = l2cap_pi(sk)->flush_to; - opts.mode = l2cap_pi(sk)->mode; - opts.fcs = l2cap_pi(sk)->fcs; - opts.max_tx = l2cap_pi(sk)->max_tx; - opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; - - len = min_t(unsigned int, sizeof(opts), optlen); - if (copy_from_user((char *) &opts, optval, len)) { - err = -EFAULT; - break; - } - - if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) { - err = -EINVAL; - break; - } - - l2cap_pi(sk)->mode = opts.mode; - switch (l2cap_pi(sk)->mode) { - case L2CAP_MODE_BASIC: - l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE; - break; - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - if (!disable_ertm) - break; - /* fall through */ - default: - err = -EINVAL; - break; - } - - l2cap_pi(sk)->imtu = opts.imtu; - l2cap_pi(sk)->omtu = opts.omtu; - l2cap_pi(sk)->fcs = opts.fcs; - l2cap_pi(sk)->max_tx = opts.max_tx; - l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size; - break; - - case L2CAP_LM: - if (get_user(opt, (u32 __user *) optval)) { - err = -EFAULT; - break; - } - - if (opt & L2CAP_LM_AUTH) - l2cap_pi(sk)->sec_level = BT_SECURITY_LOW; - if (opt & L2CAP_LM_ENCRYPT) - l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM; - if (opt & L2CAP_LM_SECURE) - l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH; - - l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER); - l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE); - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - struct bt_security sec; - int len, err = 0; - u32 opt; - - BT_DBG("sk %p", sk); - - if (level == SOL_L2CAP) - return l2cap_sock_setsockopt_old(sock, optname, optval, optlen); - - if (level != SOL_BLUETOOTH) - return -ENOPROTOOPT; - - lock_sock(sk); - - switch (optname) { - case BT_SECURITY: - if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM - && sk->sk_type != SOCK_RAW) { - err = -EINVAL; - break; - } - - sec.level = BT_SECURITY_LOW; - - len = min_t(unsigned int, sizeof(sec), optlen); - if (copy_from_user((char *) &sec, optval, len)) { - err = -EFAULT; - break; - } - - if (sec.level < BT_SECURITY_LOW || - sec.level > BT_SECURITY_HIGH) { - err = -EINVAL; - break; - } - - l2cap_pi(sk)->sec_level = sec.level; - break; - - case BT_DEFER_SETUP: - if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { - err = -EINVAL; - break; - } - - if (get_user(opt, (u32 __user *) optval)) { - err = -EFAULT; - break; - } - - bt_sk(sk)->defer_setup = opt; - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - struct l2cap_options opts; - struct l2cap_conninfo cinfo; - int len, err = 0; - u32 opt; - - BT_DBG("sk %p", sk); - - if (get_user(len, optlen)) - return -EFAULT; - - lock_sock(sk); - - switch (optname) { - case L2CAP_OPTIONS: - opts.imtu = l2cap_pi(sk)->imtu; - opts.omtu = l2cap_pi(sk)->omtu; - opts.flush_to = l2cap_pi(sk)->flush_to; - opts.mode = l2cap_pi(sk)->mode; - opts.fcs = l2cap_pi(sk)->fcs; - opts.max_tx = l2cap_pi(sk)->max_tx; - opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; - - len = min_t(unsigned int, len, sizeof(opts)); - if (copy_to_user(optval, (char *) &opts, len)) - err = -EFAULT; - - break; - - case L2CAP_LM: - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_LOW: - opt = L2CAP_LM_AUTH; - break; - case BT_SECURITY_MEDIUM: - opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT; - break; - case BT_SECURITY_HIGH: - opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT | - L2CAP_LM_SECURE; - break; - default: - opt = 0; - break; - } - - if (l2cap_pi(sk)->role_switch) - opt |= L2CAP_LM_MASTER; - - if (l2cap_pi(sk)->force_reliable) - opt |= L2CAP_LM_RELIABLE; - - if (put_user(opt, (u32 __user *) optval)) - err = -EFAULT; - break; - - case L2CAP_CONNINFO: - if (sk->sk_state != BT_CONNECTED && - !(sk->sk_state == BT_CONNECT2 && - bt_sk(sk)->defer_setup)) { - err = -ENOTCONN; - break; - } - - cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle; - memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3); - - len = min_t(unsigned int, len, sizeof(cinfo)); - if (copy_to_user(optval, (char *) &cinfo, len)) - err = -EFAULT; - - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - struct bt_security sec; - int len, err = 0; - - BT_DBG("sk %p", sk); - - if (level == SOL_L2CAP) - return l2cap_sock_getsockopt_old(sock, optname, optval, optlen); - - if (level != SOL_BLUETOOTH) - return -ENOPROTOOPT; - - if (get_user(len, optlen)) - return -EFAULT; - - lock_sock(sk); - - switch (optname) { - case BT_SECURITY: - if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM - && sk->sk_type != SOCK_RAW) { - err = -EINVAL; - break; - } - - sec.level = l2cap_pi(sk)->sec_level; - - len = min_t(unsigned int, len, sizeof(sec)); - if (copy_to_user(optval, (char *) &sec, len)) - err = -EFAULT; - - break; - - case BT_DEFER_SETUP: - if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { - err = -EINVAL; - break; - } - - if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval)) - err = -EFAULT; - - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -static int l2cap_sock_shutdown(struct socket *sock, int how) -{ - struct sock *sk = sock->sk; - int err = 0; - - BT_DBG("sock %p, sk %p", sock, sk); - - if (!sk) - return 0; - - lock_sock(sk); - if (!sk->sk_shutdown) { - if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) - err = __l2cap_wait_ack(sk); - - sk->sk_shutdown = SHUTDOWN_MASK; - l2cap_sock_clear_timer(sk); - __l2cap_sock_close(sk, 0); - - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) - err = bt_sock_wait_state(sk, BT_CLOSED, - sk->sk_lingertime); - } - - if (!err && sk->sk_err) - err = -sk->sk_err; - - release_sock(sk); - return err; -} - -static int l2cap_sock_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - int err; - - BT_DBG("sock %p, sk %p", sock, sk); - - if (!sk) - return 0; - - err = l2cap_sock_shutdown(sock, 2); - - sock_orphan(sk); - l2cap_sock_kill(sk); - return err; -} - static void l2cap_chan_ready(struct sock *sk) { struct sock *parent = bt_sk(sk)->parent; @@ -2365,7 +1429,11 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen); - lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING); + + if (conn->hcon->type == LE_LINK) + lh->cid = cpu_to_le16(L2CAP_CID_LE_SIGNALING); + else + lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING); cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE); cmd->code = code; @@ -2512,7 +1580,7 @@ static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) } } -static int l2cap_build_conf_req(struct sock *sk, void *data) +int l2cap_build_conf_req(struct sock *sk, void *data) { struct l2cap_pinfo *pi = l2cap_pi(sk); struct l2cap_conf_req *req = data; @@ -2537,11 +1605,11 @@ static int l2cap_build_conf_req(struct sock *sk, void *data) } done: + if (pi->imtu != L2CAP_DEFAULT_MTU) + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); + switch (pi->mode) { case L2CAP_MODE_BASIC: - if (pi->imtu != L2CAP_DEFAULT_MTU) - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); - if (!(pi->conn->feat_mask & L2CAP_FEAT_ERTM) && !(pi->conn->feat_mask & L2CAP_FEAT_STREAMING)) break; @@ -2604,10 +1672,6 @@ done: break; } - /* FIXME: Need actual value of the flush timeout */ - //if (flush_to != L2CAP_DEFAULT_FLUSH_TO) - // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to); - req->dcid = cpu_to_le16(pi->dcid); req->flags = cpu_to_le16(0); @@ -3434,12 +2498,153 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm return 0; } -static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) +static inline int l2cap_check_conn_param(u16 min, u16 max, u16 latency, + u16 to_multiplier) +{ + u16 max_latency; + + if (min > max || min < 6 || max > 3200) + return -EINVAL; + + if (to_multiplier < 10 || to_multiplier > 3200) + return -EINVAL; + + if (max >= to_multiplier * 8) + return -EINVAL; + + max_latency = (to_multiplier * 8 / max) - 1; + if (latency > 499 || latency > max_latency) + return -EINVAL; + + return 0; +} + +static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct hci_conn *hcon = conn->hcon; + struct l2cap_conn_param_update_req *req; + struct l2cap_conn_param_update_rsp rsp; + u16 min, max, latency, to_multiplier, cmd_len; + int err; + + if (!(hcon->link_mode & HCI_LM_MASTER)) + return -EINVAL; + + cmd_len = __le16_to_cpu(cmd->len); + if (cmd_len != sizeof(struct l2cap_conn_param_update_req)) + return -EPROTO; + + req = (struct l2cap_conn_param_update_req *) data; + min = __le16_to_cpu(req->min); + max = __le16_to_cpu(req->max); + latency = __le16_to_cpu(req->latency); + to_multiplier = __le16_to_cpu(req->to_multiplier); + + BT_DBG("min 0x%4.4x max 0x%4.4x latency: 0x%4.4x Timeout: 0x%4.4x", + min, max, latency, to_multiplier); + + memset(&rsp, 0, sizeof(rsp)); + + err = l2cap_check_conn_param(min, max, latency, to_multiplier); + if (err) + rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); + else + rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_ACCEPTED); + + l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP, + sizeof(rsp), &rsp); + + if (!err) + hci_le_conn_update(hcon, min, max, latency, to_multiplier); + + return 0; +} + +static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) +{ + int err = 0; + + switch (cmd->code) { + case L2CAP_COMMAND_REJ: + l2cap_command_rej(conn, cmd, data); + break; + + case L2CAP_CONN_REQ: + err = l2cap_connect_req(conn, cmd, data); + break; + + case L2CAP_CONN_RSP: + err = l2cap_connect_rsp(conn, cmd, data); + break; + + case L2CAP_CONF_REQ: + err = l2cap_config_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_CONF_RSP: + err = l2cap_config_rsp(conn, cmd, data); + break; + + case L2CAP_DISCONN_REQ: + err = l2cap_disconnect_req(conn, cmd, data); + break; + + case L2CAP_DISCONN_RSP: + err = l2cap_disconnect_rsp(conn, cmd, data); + break; + + case L2CAP_ECHO_REQ: + l2cap_send_cmd(conn, cmd->ident, L2CAP_ECHO_RSP, cmd_len, data); + break; + + case L2CAP_ECHO_RSP: + break; + + case L2CAP_INFO_REQ: + err = l2cap_information_req(conn, cmd, data); + break; + + case L2CAP_INFO_RSP: + err = l2cap_information_rsp(conn, cmd, data); + break; + + default: + BT_ERR("Unknown BR/EDR signaling command 0x%2.2x", cmd->code); + err = -EINVAL; + break; + } + + return err; +} + +static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u8 *data) +{ + switch (cmd->code) { + case L2CAP_COMMAND_REJ: + return 0; + + case L2CAP_CONN_PARAM_UPDATE_REQ: + return l2cap_conn_param_update_req(conn, cmd, data); + + case L2CAP_CONN_PARAM_UPDATE_RSP: + return 0; + + default: + BT_ERR("Unknown LE signaling command 0x%2.2x", cmd->code); + return -EINVAL; + } +} + +static inline void l2cap_sig_channel(struct l2cap_conn *conn, + struct sk_buff *skb) { u8 *data = skb->data; int len = skb->len; struct l2cap_cmd_hdr cmd; - int err = 0; + int err; l2cap_raw_recv(conn, skb); @@ -3458,55 +2663,10 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk break; } - switch (cmd.code) { - case L2CAP_COMMAND_REJ: - l2cap_command_rej(conn, &cmd, data); - break; - - case L2CAP_CONN_REQ: - err = l2cap_connect_req(conn, &cmd, data); - break; - - case L2CAP_CONN_RSP: - err = l2cap_connect_rsp(conn, &cmd, data); - break; - - case L2CAP_CONF_REQ: - err = l2cap_config_req(conn, &cmd, cmd_len, data); - break; - - case L2CAP_CONF_RSP: - err = l2cap_config_rsp(conn, &cmd, data); - break; - - case L2CAP_DISCONN_REQ: - err = l2cap_disconnect_req(conn, &cmd, data); - break; - - case L2CAP_DISCONN_RSP: - err = l2cap_disconnect_rsp(conn, &cmd, data); - break; - - case L2CAP_ECHO_REQ: - l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd_len, data); - break; - - case L2CAP_ECHO_RSP: - break; - - case L2CAP_INFO_REQ: - err = l2cap_information_req(conn, &cmd, data); - break; - - case L2CAP_INFO_RSP: - err = l2cap_information_rsp(conn, &cmd, data); - break; - - default: - BT_ERR("Unknown signaling command 0x%2.2x", cmd.code); - err = -EINVAL; - break; - } + if (conn->hcon->type == LE_LINK) + err = l2cap_le_sig_cmd(conn, &cmd, data); + else + err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data); if (err) { struct l2cap_cmd_rej rej; @@ -4503,6 +3663,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("len %d, cid 0x%4.4x", len, cid); switch (cid) { + case L2CAP_CID_LE_SIGNALING: case L2CAP_CID_SIGNALING: l2cap_sig_channel(conn, skb); break; @@ -4560,7 +3721,7 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); - if (hcon->type != ACL_LINK) + if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK)) return -EINVAL; if (!status) { @@ -4589,7 +3750,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); - if (hcon->type != ACL_LINK) + if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK)) return -EINVAL; l2cap_conn_del(hcon, bt_err(reason)); @@ -4692,12 +3853,15 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl { struct l2cap_conn *conn = hcon->l2cap_data; - if (!conn && !(conn = l2cap_conn_add(hcon, 0))) + if (!conn) + conn = l2cap_conn_add(hcon, 0); + + if (!conn) goto drop; BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags); - if (flags & ACL_START) { + if (!(flags & ACL_CONT)) { struct l2cap_hdr *hdr; struct sock *sk; u16 cid; @@ -4803,12 +3967,13 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p) sk_for_each(sk, node, &l2cap_sk_list.head) { struct l2cap_pinfo *pi = l2cap_pi(sk); - seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", + seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n", batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), sk->sk_state, __le16_to_cpu(pi->psm), pi->scid, pi->dcid, - pi->imtu, pi->omtu, pi->sec_level); + pi->imtu, pi->omtu, pi->sec_level, + pi->mode); } read_unlock_bh(&l2cap_sk_list.lock); @@ -4830,32 +3995,6 @@ static const struct file_operations l2cap_debugfs_fops = { static struct dentry *l2cap_debugfs; -static const struct proto_ops l2cap_sock_ops = { - .family = PF_BLUETOOTH, - .owner = THIS_MODULE, - .release = l2cap_sock_release, - .bind = l2cap_sock_bind, - .connect = l2cap_sock_connect, - .listen = l2cap_sock_listen, - .accept = l2cap_sock_accept, - .getname = l2cap_sock_getname, - .sendmsg = l2cap_sock_sendmsg, - .recvmsg = l2cap_sock_recvmsg, - .poll = bt_sock_poll, - .ioctl = bt_sock_ioctl, - .mmap = sock_no_mmap, - .socketpair = sock_no_socketpair, - .shutdown = l2cap_sock_shutdown, - .setsockopt = l2cap_sock_setsockopt, - .getsockopt = l2cap_sock_getsockopt -}; - -static const struct net_proto_family l2cap_sock_family_ops = { - .family = PF_BLUETOOTH, - .owner = THIS_MODULE, - .create = l2cap_sock_create, -}; - static struct hci_proto l2cap_hci_proto = { .name = "L2CAP", .id = HCI_PROTO_L2CAP, @@ -4867,23 +4006,17 @@ static struct hci_proto l2cap_hci_proto = { .recv_acldata = l2cap_recv_acldata }; -static int __init l2cap_init(void) +int __init l2cap_init(void) { int err; - err = proto_register(&l2cap_proto, 0); + err = l2cap_init_sockets(); if (err < 0) return err; _busy_wq = create_singlethread_workqueue("l2cap"); if (!_busy_wq) { - proto_unregister(&l2cap_proto); - return -ENOMEM; - } - - err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); - if (err < 0) { - BT_ERR("L2CAP socket registration failed"); + err = -ENOMEM; goto error; } @@ -4901,49 +4034,26 @@ static int __init l2cap_init(void) BT_ERR("Failed to create L2CAP debug file"); } - BT_INFO("L2CAP ver %s", VERSION); - BT_INFO("L2CAP socket layer initialized"); - return 0; error: destroy_workqueue(_busy_wq); - proto_unregister(&l2cap_proto); + l2cap_cleanup_sockets(); return err; } -static void __exit l2cap_exit(void) +void l2cap_exit(void) { debugfs_remove(l2cap_debugfs); flush_workqueue(_busy_wq); destroy_workqueue(_busy_wq); - if (bt_sock_unregister(BTPROTO_L2CAP) < 0) - BT_ERR("L2CAP socket unregistration failed"); - if (hci_unregister_proto(&l2cap_hci_proto) < 0) BT_ERR("L2CAP protocol unregistration failed"); - proto_unregister(&l2cap_proto); -} - -void l2cap_load(void) -{ - /* Dummy function to trigger automatic L2CAP module loading by - * other modules that use L2CAP sockets but don't use any other - * symbols from it. */ + l2cap_cleanup_sockets(); } -EXPORT_SYMBOL(l2cap_load); - -module_init(l2cap_init); -module_exit(l2cap_exit); module_param(disable_ertm, bool, 0644); MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); - -MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); -MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION); -MODULE_VERSION(VERSION); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("bt-proto-0"); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c new file mode 100644 index 000000000000..fc85e7ae33c7 --- /dev/null +++ b/net/bluetooth/l2cap_sock.c @@ -0,0 +1,1156 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org> + Copyright (C) 2010 Google Inc. + + Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +/* Bluetooth L2CAP sockets. */ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> +#include <net/bluetooth/l2cap.h> + +/* ---- L2CAP timers ---- */ +static void l2cap_sock_timeout(unsigned long arg) +{ + struct sock *sk = (struct sock *) arg; + int reason; + + BT_DBG("sock %p state %d", sk, sk->sk_state); + + bh_lock_sock(sk); + + if (sock_owned_by_user(sk)) { + /* sk is owned by user. Try again later */ + l2cap_sock_set_timer(sk, HZ / 5); + bh_unlock_sock(sk); + sock_put(sk); + return; + } + + if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG) + reason = ECONNREFUSED; + else if (sk->sk_state == BT_CONNECT && + l2cap_pi(sk)->sec_level != BT_SECURITY_SDP) + reason = ECONNREFUSED; + else + reason = ETIMEDOUT; + + __l2cap_sock_close(sk, reason); + + bh_unlock_sock(sk); + + l2cap_sock_kill(sk); + sock_put(sk); +} + +void l2cap_sock_set_timer(struct sock *sk, long timeout) +{ + BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout); + sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); +} + +void l2cap_sock_clear_timer(struct sock *sk) +{ + BT_DBG("sock %p state %d", sk, sk->sk_state); + sk_stop_timer(sk, &sk->sk_timer); +} + +static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src) +{ + struct sock *sk; + struct hlist_node *node; + sk_for_each(sk, node, &l2cap_sk_list.head) + if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src)) + goto found; + sk = NULL; +found: + return sk; +} + +static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) +{ + struct sock *sk = sock->sk; + struct sockaddr_l2 la; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (!addr || addr->sa_family != AF_BLUETOOTH) + return -EINVAL; + + memset(&la, 0, sizeof(la)); + len = min_t(unsigned int, sizeof(la), alen); + memcpy(&la, addr, len); + + if (la.l2_cid && la.l2_psm) + return -EINVAL; + + lock_sock(sk); + + if (sk->sk_state != BT_OPEN) { + err = -EBADFD; + goto done; + } + + if (la.l2_psm) { + __u16 psm = __le16_to_cpu(la.l2_psm); + + /* PSM must be odd and lsb of upper byte must be 0 */ + if ((psm & 0x0101) != 0x0001) { + err = -EINVAL; + goto done; + } + + /* Restrict usage of well-known PSMs */ + if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) { + err = -EACCES; + goto done; + } + } + + write_lock_bh(&l2cap_sk_list.lock); + + if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) { + err = -EADDRINUSE; + } else { + /* Save source address */ + bacpy(&bt_sk(sk)->src, &la.l2_bdaddr); + l2cap_pi(sk)->psm = la.l2_psm; + l2cap_pi(sk)->sport = la.l2_psm; + sk->sk_state = BT_BOUND; + + if (__le16_to_cpu(la.l2_psm) == 0x0001 || + __le16_to_cpu(la.l2_psm) == 0x0003) + l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; + } + + if (la.l2_cid) + l2cap_pi(sk)->scid = la.l2_cid; + + write_unlock_bh(&l2cap_sk_list.lock); + +done: + release_sock(sk); + return err; +} + +static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) +{ + struct sock *sk = sock->sk; + struct sockaddr_l2 la; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (!addr || alen < sizeof(addr->sa_family) || + addr->sa_family != AF_BLUETOOTH) + return -EINVAL; + + memset(&la, 0, sizeof(la)); + len = min_t(unsigned int, sizeof(la), alen); + memcpy(&la, addr, len); + + if (la.l2_cid && la.l2_psm) + return -EINVAL; + + lock_sock(sk); + + if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) + && !(la.l2_psm || la.l2_cid)) { + err = -EINVAL; + goto done; + } + + switch (l2cap_pi(sk)->mode) { + case L2CAP_MODE_BASIC: + break; + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + if (!disable_ertm) + break; + /* fall through */ + default: + err = -ENOTSUPP; + goto done; + } + + switch (sk->sk_state) { + case BT_CONNECT: + case BT_CONNECT2: + case BT_CONFIG: + /* Already connecting */ + goto wait; + + case BT_CONNECTED: + /* Already connected */ + err = -EISCONN; + goto done; + + case BT_OPEN: + case BT_BOUND: + /* Can connect */ + break; + + default: + err = -EBADFD; + goto done; + } + + /* PSM must be odd and lsb of upper byte must be 0 */ + if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 && + sk->sk_type != SOCK_RAW && !la.l2_cid) { + err = -EINVAL; + goto done; + } + + /* Set destination address and psm */ + bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); + l2cap_pi(sk)->psm = la.l2_psm; + l2cap_pi(sk)->dcid = la.l2_cid; + + err = l2cap_do_connect(sk); + if (err) + goto done; + +wait: + err = bt_sock_wait_state(sk, BT_CONNECTED, + sock_sndtimeo(sk, flags & O_NONBLOCK)); +done: + release_sock(sk); + return err; +} + +static int l2cap_sock_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sk %p backlog %d", sk, backlog); + + lock_sock(sk); + + if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM) + || sk->sk_state != BT_BOUND) { + err = -EBADFD; + goto done; + } + + switch (l2cap_pi(sk)->mode) { + case L2CAP_MODE_BASIC: + break; + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + if (!disable_ertm) + break; + /* fall through */ + default: + err = -ENOTSUPP; + goto done; + } + + if (!l2cap_pi(sk)->psm && !l2cap_pi(sk)->dcid) { + bdaddr_t *src = &bt_sk(sk)->src; + u16 psm; + + err = -EINVAL; + + write_lock_bh(&l2cap_sk_list.lock); + + for (psm = 0x1001; psm < 0x1100; psm += 2) + if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) { + l2cap_pi(sk)->psm = cpu_to_le16(psm); + l2cap_pi(sk)->sport = cpu_to_le16(psm); + err = 0; + break; + } + + write_unlock_bh(&l2cap_sk_list.lock); + + if (err < 0) + goto done; + } + + sk->sk_max_ack_backlog = backlog; + sk->sk_ack_backlog = 0; + sk->sk_state = BT_LISTEN; + +done: + release_sock(sk); + return err; +} + +static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) +{ + DECLARE_WAITQUEUE(wait, current); + struct sock *sk = sock->sk, *nsk; + long timeo; + int err = 0; + + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; + goto done; + } + + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + + BT_DBG("sk %p timeo %ld", sk, timeo); + + /* Wait for an incoming connection. (wake-one). */ + add_wait_queue_exclusive(sk_sleep(sk), &wait); + while (!(nsk = bt_accept_dequeue(sk, newsock))) { + set_current_state(TASK_INTERRUPTIBLE); + if (!timeo) { + err = -EAGAIN; + break; + } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; + break; + } + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + break; + } + } + set_current_state(TASK_RUNNING); + remove_wait_queue(sk_sleep(sk), &wait); + + if (err) + goto done; + + newsock->state = SS_CONNECTED; + + BT_DBG("new socket %p", nsk); + +done: + release_sock(sk); + return err; +} + +static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) +{ + struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; + struct sock *sk = sock->sk; + + BT_DBG("sock %p, sk %p", sock, sk); + + addr->sa_family = AF_BLUETOOTH; + *len = sizeof(struct sockaddr_l2); + + if (peer) { + la->l2_psm = l2cap_pi(sk)->psm; + bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); + la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid); + } else { + la->l2_psm = l2cap_pi(sk)->sport; + bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); + la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid); + } + + return 0; +} + +static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct l2cap_options opts; + struct l2cap_conninfo cinfo; + int len, err = 0; + u32 opt; + + BT_DBG("sk %p", sk); + + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + case L2CAP_OPTIONS: + memset(&opts, 0, sizeof(opts)); + opts.imtu = l2cap_pi(sk)->imtu; + opts.omtu = l2cap_pi(sk)->omtu; + opts.flush_to = l2cap_pi(sk)->flush_to; + opts.mode = l2cap_pi(sk)->mode; + opts.fcs = l2cap_pi(sk)->fcs; + opts.max_tx = l2cap_pi(sk)->max_tx; + opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; + + len = min_t(unsigned int, len, sizeof(opts)); + if (copy_to_user(optval, (char *) &opts, len)) + err = -EFAULT; + + break; + + case L2CAP_LM: + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_LOW: + opt = L2CAP_LM_AUTH; + break; + case BT_SECURITY_MEDIUM: + opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT; + break; + case BT_SECURITY_HIGH: + opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT | + L2CAP_LM_SECURE; + break; + default: + opt = 0; + break; + } + + if (l2cap_pi(sk)->role_switch) + opt |= L2CAP_LM_MASTER; + + if (l2cap_pi(sk)->force_reliable) + opt |= L2CAP_LM_RELIABLE; + + if (put_user(opt, (u32 __user *) optval)) + err = -EFAULT; + break; + + case L2CAP_CONNINFO: + if (sk->sk_state != BT_CONNECTED && + !(sk->sk_state == BT_CONNECT2 && + bt_sk(sk)->defer_setup)) { + err = -ENOTCONN; + break; + } + + cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle; + memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3); + + len = min_t(unsigned int, len, sizeof(cinfo)); + if (copy_to_user(optval, (char *) &cinfo, len)) + err = -EFAULT; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct bt_security sec; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (level == SOL_L2CAP) + return l2cap_sock_getsockopt_old(sock, optname, optval, optlen); + + if (level != SOL_BLUETOOTH) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + case BT_SECURITY: + if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM + && sk->sk_type != SOCK_RAW) { + err = -EINVAL; + break; + } + + sec.level = l2cap_pi(sk)->sec_level; + + len = min_t(unsigned int, len, sizeof(sec)); + if (copy_to_user(optval, (char *) &sec, len)) + err = -EFAULT; + + break; + + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval)) + err = -EFAULT; + + break; + + case BT_FLUSHABLE: + if (put_user(l2cap_pi(sk)->flushable, (u32 __user *) optval)) + err = -EFAULT; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct l2cap_options opts; + int len, err = 0; + u32 opt; + + BT_DBG("sk %p", sk); + + lock_sock(sk); + + switch (optname) { + case L2CAP_OPTIONS: + if (sk->sk_state == BT_CONNECTED) { + err = -EINVAL; + break; + } + + opts.imtu = l2cap_pi(sk)->imtu; + opts.omtu = l2cap_pi(sk)->omtu; + opts.flush_to = l2cap_pi(sk)->flush_to; + opts.mode = l2cap_pi(sk)->mode; + opts.fcs = l2cap_pi(sk)->fcs; + opts.max_tx = l2cap_pi(sk)->max_tx; + opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; + + len = min_t(unsigned int, sizeof(opts), optlen); + if (copy_from_user((char *) &opts, optval, len)) { + err = -EFAULT; + break; + } + + if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) { + err = -EINVAL; + break; + } + + l2cap_pi(sk)->mode = opts.mode; + switch (l2cap_pi(sk)->mode) { + case L2CAP_MODE_BASIC: + l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE; + break; + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + if (!disable_ertm) + break; + /* fall through */ + default: + err = -EINVAL; + break; + } + + l2cap_pi(sk)->imtu = opts.imtu; + l2cap_pi(sk)->omtu = opts.omtu; + l2cap_pi(sk)->fcs = opts.fcs; + l2cap_pi(sk)->max_tx = opts.max_tx; + l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size; + break; + + case L2CAP_LM: + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt & L2CAP_LM_AUTH) + l2cap_pi(sk)->sec_level = BT_SECURITY_LOW; + if (opt & L2CAP_LM_ENCRYPT) + l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM; + if (opt & L2CAP_LM_SECURE) + l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH; + + l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER); + l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE); + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct bt_security sec; + int len, err = 0; + u32 opt; + + BT_DBG("sk %p", sk); + + if (level == SOL_L2CAP) + return l2cap_sock_setsockopt_old(sock, optname, optval, optlen); + + if (level != SOL_BLUETOOTH) + return -ENOPROTOOPT; + + lock_sock(sk); + + switch (optname) { + case BT_SECURITY: + if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM + && sk->sk_type != SOCK_RAW) { + err = -EINVAL; + break; + } + + sec.level = BT_SECURITY_LOW; + + len = min_t(unsigned int, sizeof(sec), optlen); + if (copy_from_user((char *) &sec, optval, len)) { + err = -EFAULT; + break; + } + + if (sec.level < BT_SECURITY_LOW || + sec.level > BT_SECURITY_HIGH) { + err = -EINVAL; + break; + } + + l2cap_pi(sk)->sec_level = sec.level; + break; + + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + bt_sk(sk)->defer_setup = opt; + break; + + case BT_FLUSHABLE: + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt > BT_FLUSHABLE_ON) { + err = -EINVAL; + break; + } + + if (opt == BT_FLUSHABLE_OFF) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + /* proceed futher only when we have l2cap_conn and + No Flush support in the LM */ + if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) { + err = -EINVAL; + break; + } + } + + l2cap_pi(sk)->flushable = opt; + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) +{ + struct sock *sk = sock->sk; + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sk_buff *skb; + u16 control; + int err; + + BT_DBG("sock %p, sk %p", sock, sk); + + err = sock_error(sk); + if (err) + return err; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + if (sk->sk_state != BT_CONNECTED) { + err = -ENOTCONN; + goto done; + } + + /* Connectionless channel */ + if (sk->sk_type == SOCK_DGRAM) { + skb = l2cap_create_connless_pdu(sk, msg, len); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + } else { + l2cap_do_send(sk, skb); + err = len; + } + goto done; + } + + switch (pi->mode) { + case L2CAP_MODE_BASIC: + /* Check outgoing MTU */ + if (len > pi->omtu) { + err = -EMSGSIZE; + goto done; + } + + /* Create a basic PDU */ + skb = l2cap_create_basic_pdu(sk, msg, len); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto done; + } + + l2cap_do_send(sk, skb); + err = len; + break; + + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + /* Entire SDU fits into one PDU */ + if (len <= pi->remote_mps) { + control = L2CAP_SDU_UNSEGMENTED; + skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto done; + } + __skb_queue_tail(TX_QUEUE(sk), skb); + + if (sk->sk_send_head == NULL) + sk->sk_send_head = skb; + + } else { + /* Segment SDU into multiples PDUs */ + err = l2cap_sar_segment_sdu(sk, msg, len); + if (err < 0) + goto done; + } + + if (pi->mode == L2CAP_MODE_STREAMING) { + l2cap_streaming_send(sk); + } else { + if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && + (pi->conn_state & L2CAP_CONN_WAIT_F)) { + err = len; + break; + } + err = l2cap_ertm_send(sk); + } + + if (err >= 0) + err = len; + break; + + default: + BT_DBG("bad state %1.1x", pi->mode); + err = -EBADFD; + } + +done: + release_sock(sk); + return err; +} + +static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk = sock->sk; + + lock_sock(sk); + + if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { + struct l2cap_conn_rsp rsp; + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + u8 buf[128]; + + sk->sk_state = BT_CONFIG; + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) { + release_sock(sk); + return 0; + } + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; + + release_sock(sk); + return 0; + } + + release_sock(sk); + + if (sock->type == SOCK_STREAM) + return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags); + + return bt_sock_recvmsg(iocb, sock, msg, len, flags); +} + +/* Kill socket (only if zapped and orphan) + * Must be called on unlocked socket. + */ +void l2cap_sock_kill(struct sock *sk) +{ + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) + return; + + BT_DBG("sk %p state %d", sk, sk->sk_state); + + /* Kill poor orphan */ + bt_sock_unlink(&l2cap_sk_list, sk); + sock_set_flag(sk, SOCK_DEAD); + sock_put(sk); +} + +/* Must be called on unlocked socket. */ +static void l2cap_sock_close(struct sock *sk) +{ + l2cap_sock_clear_timer(sk); + lock_sock(sk); + __l2cap_sock_close(sk, ECONNRESET); + release_sock(sk); + l2cap_sock_kill(sk); +} + +static void l2cap_sock_cleanup_listen(struct sock *parent) +{ + struct sock *sk; + + BT_DBG("parent %p", parent); + + /* Close not yet accepted channels */ + while ((sk = bt_accept_dequeue(parent, NULL))) + l2cap_sock_close(sk); + + parent->sk_state = BT_CLOSED; + sock_set_flag(parent, SOCK_ZAPPED); +} + +void __l2cap_sock_close(struct sock *sk, int reason) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + + BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); + + switch (sk->sk_state) { + case BT_LISTEN: + l2cap_sock_cleanup_listen(sk); + break; + + case BT_CONNECTED: + case BT_CONFIG: + if ((sk->sk_type == SOCK_SEQPACKET || + sk->sk_type == SOCK_STREAM) && + conn->hcon->type == ACL_LINK) { + l2cap_sock_set_timer(sk, sk->sk_sndtimeo); + l2cap_send_disconn_req(conn, sk, reason); + } else + l2cap_chan_del(sk, reason); + break; + + case BT_CONNECT2: + if ((sk->sk_type == SOCK_SEQPACKET || + sk->sk_type == SOCK_STREAM) && + conn->hcon->type == ACL_LINK) { + struct l2cap_conn_rsp rsp; + __u16 result; + + if (bt_sk(sk)->defer_setup) + result = L2CAP_CR_SEC_BLOCK; + else + result = L2CAP_CR_BAD_PSM; + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(result); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + } else + l2cap_chan_del(sk, reason); + break; + + case BT_CONNECT: + case BT_DISCONN: + l2cap_chan_del(sk, reason); + break; + + default: + sock_set_flag(sk, SOCK_ZAPPED); + break; + } +} + +static int l2cap_sock_shutdown(struct socket *sock, int how) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sock %p, sk %p", sock, sk); + + if (!sk) + return 0; + + lock_sock(sk); + if (!sk->sk_shutdown) { + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) + err = __l2cap_wait_ack(sk); + + sk->sk_shutdown = SHUTDOWN_MASK; + l2cap_sock_clear_timer(sk); + __l2cap_sock_close(sk, 0); + + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + err = bt_sock_wait_state(sk, BT_CLOSED, + sk->sk_lingertime); + } + + if (!err && sk->sk_err) + err = -sk->sk_err; + + release_sock(sk); + return err; +} + +static int l2cap_sock_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + int err; + + BT_DBG("sock %p, sk %p", sock, sk); + + if (!sk) + return 0; + + err = l2cap_sock_shutdown(sock, 2); + + sock_orphan(sk); + l2cap_sock_kill(sk); + return err; +} + +static void l2cap_sock_destruct(struct sock *sk) +{ + BT_DBG("sk %p", sk); + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); +} + +void l2cap_sock_init(struct sock *sk, struct sock *parent) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + + BT_DBG("sk %p", sk); + + if (parent) { + sk->sk_type = parent->sk_type; + bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup; + + pi->imtu = l2cap_pi(parent)->imtu; + pi->omtu = l2cap_pi(parent)->omtu; + pi->conf_state = l2cap_pi(parent)->conf_state; + pi->mode = l2cap_pi(parent)->mode; + pi->fcs = l2cap_pi(parent)->fcs; + pi->max_tx = l2cap_pi(parent)->max_tx; + pi->tx_win = l2cap_pi(parent)->tx_win; + pi->sec_level = l2cap_pi(parent)->sec_level; + pi->role_switch = l2cap_pi(parent)->role_switch; + pi->force_reliable = l2cap_pi(parent)->force_reliable; + pi->flushable = l2cap_pi(parent)->flushable; + } else { + pi->imtu = L2CAP_DEFAULT_MTU; + pi->omtu = 0; + if (!disable_ertm && sk->sk_type == SOCK_STREAM) { + pi->mode = L2CAP_MODE_ERTM; + pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; + } else { + pi->mode = L2CAP_MODE_BASIC; + } + pi->max_tx = L2CAP_DEFAULT_MAX_TX; + pi->fcs = L2CAP_FCS_CRC16; + pi->tx_win = L2CAP_DEFAULT_TX_WINDOW; + pi->sec_level = BT_SECURITY_LOW; + pi->role_switch = 0; + pi->force_reliable = 0; + pi->flushable = BT_FLUSHABLE_OFF; + } + + /* Default config options */ + pi->conf_len = 0; + pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; + skb_queue_head_init(TX_QUEUE(sk)); + skb_queue_head_init(SREJ_QUEUE(sk)); + skb_queue_head_init(BUSY_QUEUE(sk)); + INIT_LIST_HEAD(SREJ_LIST(sk)); +} + +static struct proto l2cap_proto = { + .name = "L2CAP", + .owner = THIS_MODULE, + .obj_size = sizeof(struct l2cap_pinfo) +}; + +struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) +{ + struct sock *sk; + + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + INIT_LIST_HEAD(&bt_sk(sk)->accept_q); + + sk->sk_destruct = l2cap_sock_destruct; + sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); + + sock_reset_flag(sk, SOCK_ZAPPED); + + sk->sk_protocol = proto; + sk->sk_state = BT_OPEN; + + setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk); + + bt_sock_link(&l2cap_sk_list, sk); + return sk; +} + +static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + struct sock *sk; + + BT_DBG("sock %p", sock); + + sock->state = SS_UNCONNECTED; + + if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM && + sock->type != SOCK_DGRAM && sock->type != SOCK_RAW) + return -ESOCKTNOSUPPORT; + + if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) + return -EPERM; + + sock->ops = &l2cap_sock_ops; + + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); + if (!sk) + return -ENOMEM; + + l2cap_sock_init(sk, NULL); + return 0; +} + +const struct proto_ops l2cap_sock_ops = { + .family = PF_BLUETOOTH, + .owner = THIS_MODULE, + .release = l2cap_sock_release, + .bind = l2cap_sock_bind, + .connect = l2cap_sock_connect, + .listen = l2cap_sock_listen, + .accept = l2cap_sock_accept, + .getname = l2cap_sock_getname, + .sendmsg = l2cap_sock_sendmsg, + .recvmsg = l2cap_sock_recvmsg, + .poll = bt_sock_poll, + .ioctl = bt_sock_ioctl, + .mmap = sock_no_mmap, + .socketpair = sock_no_socketpair, + .shutdown = l2cap_sock_shutdown, + .setsockopt = l2cap_sock_setsockopt, + .getsockopt = l2cap_sock_getsockopt +}; + +static const struct net_proto_family l2cap_sock_family_ops = { + .family = PF_BLUETOOTH, + .owner = THIS_MODULE, + .create = l2cap_sock_create, +}; + +int __init l2cap_init_sockets(void) +{ + int err; + + err = proto_register(&l2cap_proto, 0); + if (err < 0) + return err; + + err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); + if (err < 0) + goto error; + + BT_INFO("L2CAP socket layer initialized"); + + return 0; + +error: + BT_ERR("L2CAP socket registration failed"); + proto_unregister(&l2cap_proto); + return err; +} + +void l2cap_cleanup_sockets(void) +{ + if (bt_sock_unregister(BTPROTO_L2CAP) < 0) + BT_ERR("L2CAP socket unregistration failed"); + + proto_unregister(&l2cap_proto); +} diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f827fd908380..0054c74e27b7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -22,7 +22,7 @@ /* Bluetooth HCI Management interface */ -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> @@ -32,13 +32,24 @@ #define MGMT_VERSION 0 #define MGMT_REVISION 1 -static int cmd_status(struct sock *sk, u16 cmd, u8 status) +struct pending_cmd { + struct list_head list; + __u16 opcode; + int index; + void *cmd; + struct sock *sk; + void *user_data; +}; + +LIST_HEAD(cmd_list); + +static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) { struct sk_buff *skb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_status *ev; - BT_DBG("sock %p", sk); + BT_DBG("sock %p, index %u, cmd %u, status %u", sk, index, cmd, status); skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC); if (!skb) @@ -47,6 +58,7 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status) hdr = (void *) skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); + hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(sizeof(*ev)); ev = (void *) skb_put(skb, sizeof(*ev)); @@ -59,29 +71,30 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status) return 0; } -static int read_version(struct sock *sk) +static int cmd_complete(struct sock *sk, u16 index, u16 cmd, void *rp, + size_t rp_len) { struct sk_buff *skb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_complete *ev; - struct mgmt_rp_read_version *rp; BT_DBG("sock %p", sk); - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + rp_len, GFP_ATOMIC); if (!skb) return -ENOMEM; hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); + hdr->index = cpu_to_le16(index); + hdr->len = cpu_to_le16(sizeof(*ev) + rp_len); - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(MGMT_OP_READ_VERSION, &ev->opcode); + ev = (void *) skb_put(skb, sizeof(*ev) + rp_len); + put_unaligned_le16(cmd, &ev->opcode); - rp = (void *) skb_put(skb, sizeof(*rp)); - rp->version = MGMT_VERSION; - put_unaligned_le16(MGMT_REVISION, &rp->revision); + if (rp) + memcpy(ev->data, rp, rp_len); if (sock_queue_rcv_skb(sk, skb) < 0) kfree_skb(skb); @@ -89,16 +102,26 @@ static int read_version(struct sock *sk) return 0; } +static int read_version(struct sock *sk) +{ + struct mgmt_rp_read_version rp; + + BT_DBG("sock %p", sk); + + rp.version = MGMT_VERSION; + put_unaligned_le16(MGMT_REVISION, &rp.revision); + + return cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, &rp, + sizeof(rp)); +} + static int read_index_list(struct sock *sk) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; struct mgmt_rp_read_index_list *rp; struct list_head *p; - size_t body_len; + size_t rp_len; u16 count; - int i; + int i, err; BT_DBG("sock %p", sk); @@ -109,112 +132,1131 @@ static int read_index_list(struct sock *sk) count++; } - body_len = sizeof(*ev) + sizeof(*rp) + (2 * count); - skb = alloc_skb(sizeof(*hdr) + body_len, GFP_ATOMIC); - if (!skb) + rp_len = sizeof(*rp) + (2 * count); + rp = kmalloc(rp_len, GFP_ATOMIC); + if (!rp) { + read_unlock(&hci_dev_list_lock); return -ENOMEM; + } - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(body_len); - - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(MGMT_OP_READ_INDEX_LIST, &ev->opcode); - - rp = (void *) skb_put(skb, sizeof(*rp) + (2 * count)); put_unaligned_le16(count, &rp->num_controllers); i = 0; list_for_each(p, &hci_dev_list) { struct hci_dev *d = list_entry(p, struct hci_dev, list); + + hci_del_off_timer(d); + + set_bit(HCI_MGMT, &d->flags); + + if (test_bit(HCI_SETUP, &d->flags)) + continue; + put_unaligned_le16(d->id, &rp->index[i++]); BT_DBG("Added hci%u", d->id); } read_unlock(&hci_dev_list_lock); - if (sock_queue_rcv_skb(sk, skb) < 0) - kfree_skb(skb); + err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_INDEX_LIST, rp, + rp_len); - return 0; + kfree(rp); + + return err; } -static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) +static int read_controller_info(struct sock *sk, u16 index) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; - struct mgmt_rp_read_info *rp; - struct mgmt_cp_read_info *cp; + struct mgmt_rp_read_info rp; struct hci_dev *hdev; - u16 dev_id; - BT_DBG("sock %p", sk); + BT_DBG("sock %p hci%u", sk, index); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_READ_INFO, ENODEV); + + hci_del_off_timer(hdev); + + hci_dev_lock_bh(hdev); + + set_bit(HCI_MGMT, &hdev->flags); + + rp.type = hdev->dev_type; + + rp.powered = test_bit(HCI_UP, &hdev->flags); + rp.connectable = test_bit(HCI_PSCAN, &hdev->flags); + rp.discoverable = test_bit(HCI_ISCAN, &hdev->flags); + rp.pairable = test_bit(HCI_PSCAN, &hdev->flags); + + if (test_bit(HCI_AUTH, &hdev->flags)) + rp.sec_mode = 3; + else if (hdev->ssp_mode > 0) + rp.sec_mode = 4; + else + rp.sec_mode = 2; + + bacpy(&rp.bdaddr, &hdev->bdaddr); + memcpy(rp.features, hdev->features, 8); + memcpy(rp.dev_class, hdev->dev_class, 3); + put_unaligned_le16(hdev->manufacturer, &rp.manufacturer); + rp.hci_ver = hdev->hci_ver; + put_unaligned_le16(hdev->hci_rev, &rp.hci_rev); + + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return cmd_complete(sk, index, MGMT_OP_READ_INFO, &rp, sizeof(rp)); +} + +static void mgmt_pending_free(struct pending_cmd *cmd) +{ + sock_put(cmd->sk); + kfree(cmd->cmd); + kfree(cmd); +} + +static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, + u16 index, void *data, u16 len) +{ + struct pending_cmd *cmd; + + cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC); + if (!cmd) + return NULL; + + cmd->opcode = opcode; + cmd->index = index; + + cmd->cmd = kmalloc(len, GFP_ATOMIC); + if (!cmd->cmd) { + kfree(cmd); + return NULL; + } + + memcpy(cmd->cmd, data, len); + + cmd->sk = sk; + sock_hold(sk); + + list_add(&cmd->list, &cmd_list); + + return cmd; +} + +static void mgmt_pending_foreach(u16 opcode, int index, + void (*cb)(struct pending_cmd *cmd, void *data), + void *data) +{ + struct list_head *p, *n; + + list_for_each_safe(p, n, &cmd_list) { + struct pending_cmd *cmd; + + cmd = list_entry(p, struct pending_cmd, list); + + if (cmd->opcode != opcode) + continue; + + if (index >= 0 && cmd->index != index) + continue; + + cb(cmd, data); + } +} + +static struct pending_cmd *mgmt_pending_find(u16 opcode, int index) +{ + struct list_head *p; + + list_for_each(p, &cmd_list) { + struct pending_cmd *cmd; + + cmd = list_entry(p, struct pending_cmd, list); + + if (cmd->opcode != opcode) + continue; + + if (index >= 0 && cmd->index != index) + continue; + + return cmd; + } + + return NULL; +} + +static void mgmt_pending_remove(struct pending_cmd *cmd) +{ + list_del(&cmd->list); + mgmt_pending_free(cmd); +} + +static int set_powered(struct sock *sk, u16 index, unsigned char *data, u16 len) +{ + struct mgmt_mode *cp; + struct hci_dev *hdev; + struct pending_cmd *cmd; + int err, up; + + cp = (void *) data; + + BT_DBG("request for hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_POWERED, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_SET_POWERED, ENODEV); + + hci_dev_lock_bh(hdev); + + up = test_bit(HCI_UP, &hdev->flags); + if ((cp->val && up) || (!cp->val && !up)) { + err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EALREADY); + goto failed; + } + + if (mgmt_pending_find(MGMT_OP_SET_POWERED, index)) { + err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EBUSY); + goto failed; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, index, data, len); + if (!cmd) { + err = -ENOMEM; + goto failed; + } + + if (cp->val) + queue_work(hdev->workqueue, &hdev->power_on); + else + queue_work(hdev->workqueue, &hdev->power_off); + + err = 0; + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + return err; +} + +static int set_discoverable(struct sock *sk, u16 index, unsigned char *data, + u16 len) +{ + struct mgmt_mode *cp; + struct hci_dev *hdev; + struct pending_cmd *cmd; + u8 scan; + int err; + + cp = (void *) data; + + BT_DBG("request for hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENODEV); + + hci_dev_lock_bh(hdev); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENETDOWN); + goto failed; + } + + if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) || + mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) { + err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EBUSY); + goto failed; + } + + if (cp->val == test_bit(HCI_ISCAN, &hdev->flags) && + test_bit(HCI_PSCAN, &hdev->flags)) { + err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EALREADY); + goto failed; + } - if (len != 2) - return cmd_status(sk, MGMT_OP_READ_INFO, EINVAL); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, index, data, len); + if (!cmd) { + err = -ENOMEM; + goto failed; + } + + scan = SCAN_PAGE; + + if (cp->val) + scan |= SCAN_INQUIRY; + + err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + if (err < 0) + mgmt_pending_remove(cmd); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int set_connectable(struct sock *sk, u16 index, unsigned char *data, + u16 len) +{ + struct mgmt_mode *cp; + struct hci_dev *hdev; + struct pending_cmd *cmd; + u8 scan; + int err; + + cp = (void *) data; + + BT_DBG("request for hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENODEV); + + hci_dev_lock_bh(hdev); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENETDOWN); + goto failed; + } + + if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) || + mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) { + err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EBUSY); + goto failed; + } + + if (cp->val == test_bit(HCI_PSCAN, &hdev->flags)) { + err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EALREADY); + goto failed; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, index, data, len); + if (!cmd) { + err = -ENOMEM; + goto failed; + } + + if (cp->val) + scan = SCAN_PAGE; + else + scan = 0; + + err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + if (err < 0) + mgmt_pending_remove(cmd); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int mgmt_event(u16 event, u16 index, void *data, u16 data_len, + struct sock *skip_sk) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); + skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC); if (!skb) return -ENOMEM; + bt_cb(skb)->channel = HCI_CHANNEL_CONTROL; + hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); + hdr->opcode = cpu_to_le16(event); + hdr->index = cpu_to_le16(index); + hdr->len = cpu_to_le16(data_len); - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(MGMT_OP_READ_INFO, &ev->opcode); + if (data) + memcpy(skb_put(skb, data_len), data, data_len); + + hci_send_to_sock(NULL, skb, skip_sk); + kfree_skb(skb); - rp = (void *) skb_put(skb, sizeof(*rp)); + return 0; +} + +static int send_mode_rsp(struct sock *sk, u16 opcode, u16 index, u8 val) +{ + struct mgmt_mode rp; + + rp.val = val; + + return cmd_complete(sk, index, opcode, &rp, sizeof(rp)); +} + +static int set_pairable(struct sock *sk, u16 index, unsigned char *data, + u16 len) +{ + struct mgmt_mode *cp, ev; + struct hci_dev *hdev; + int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); - hdev = hci_dev_get(dev_id); - if (!hdev) { - kfree_skb(skb); - return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, ENODEV); + + hci_dev_lock_bh(hdev); + + if (cp->val) + set_bit(HCI_PAIRABLE, &hdev->flags); + else + clear_bit(HCI_PAIRABLE, &hdev->flags); + + err = send_mode_rsp(sk, MGMT_OP_SET_PAIRABLE, index, cp->val); + if (err < 0) + goto failed; + + ev.val = cp->val; + + err = mgmt_event(MGMT_EV_PAIRABLE, index, &ev, sizeof(ev), sk); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static u8 get_service_classes(struct hci_dev *hdev) +{ + struct list_head *p; + u8 val = 0; + + list_for_each(p, &hdev->uuids) { + struct bt_uuid *uuid = list_entry(p, struct bt_uuid, list); + + val |= uuid->svc_hint; } + return val; +} + +static int update_class(struct hci_dev *hdev) +{ + u8 cod[3]; + + BT_DBG("%s", hdev->name); + + if (test_bit(HCI_SERVICE_CACHE, &hdev->flags)) + return 0; + + cod[0] = hdev->minor_class; + cod[1] = hdev->major_class; + cod[2] = get_service_classes(hdev); + + if (memcmp(cod, hdev->dev_class, 3) == 0) + return 0; + + return hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); +} + +static int add_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) +{ + struct mgmt_cp_add_uuid *cp; + struct hci_dev *hdev; + struct bt_uuid *uuid; + int err; + + cp = (void *) data; + + BT_DBG("request for hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_ADD_UUID, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_ADD_UUID, ENODEV); + hci_dev_lock_bh(hdev); - put_unaligned_le16(hdev->id, &rp->index); - rp->type = hdev->dev_type; + uuid = kmalloc(sizeof(*uuid), GFP_ATOMIC); + if (!uuid) { + err = -ENOMEM; + goto failed; + } - rp->powered = test_bit(HCI_UP, &hdev->flags); - rp->discoverable = test_bit(HCI_ISCAN, &hdev->flags); - rp->pairable = test_bit(HCI_PSCAN, &hdev->flags); + memcpy(uuid->uuid, cp->uuid, 16); + uuid->svc_hint = cp->svc_hint; - if (test_bit(HCI_AUTH, &hdev->flags)) - rp->sec_mode = 3; - else if (hdev->ssp_mode > 0) - rp->sec_mode = 4; - else - rp->sec_mode = 2; + list_add(&uuid->list, &hdev->uuids); - bacpy(&rp->bdaddr, &hdev->bdaddr); - memcpy(rp->features, hdev->features, 8); - memcpy(rp->dev_class, hdev->dev_class, 3); - put_unaligned_le16(hdev->manufacturer, &rp->manufacturer); - rp->hci_ver = hdev->hci_ver; - put_unaligned_le16(hdev->hci_rev, &rp->hci_rev); + err = update_class(hdev); + if (err < 0) + goto failed; + err = cmd_complete(sk, index, MGMT_OP_ADD_UUID, NULL, 0); + +failed: hci_dev_unlock_bh(hdev); hci_dev_put(hdev); - if (sock_queue_rcv_skb(sk, skb) < 0) - kfree_skb(skb); + return err; +} + +static int remove_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) +{ + struct list_head *p, *n; + struct mgmt_cp_remove_uuid *cp; + struct hci_dev *hdev; + u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int err, found; + + cp = (void *) data; + + BT_DBG("request for hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENODEV); + + hci_dev_lock_bh(hdev); + + if (memcmp(cp->uuid, bt_uuid_any, 16) == 0) { + err = hci_uuids_clear(hdev); + goto unlock; + } + + found = 0; + + list_for_each_safe(p, n, &hdev->uuids) { + struct bt_uuid *match = list_entry(p, struct bt_uuid, list); + + if (memcmp(match->uuid, cp->uuid, 16) != 0) + continue; + + list_del(&match->list); + found++; + } + + if (found == 0) { + err = cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENOENT); + goto unlock; + } + + err = update_class(hdev); + if (err < 0) + goto unlock; + + err = cmd_complete(sk, index, MGMT_OP_REMOVE_UUID, NULL, 0); + +unlock: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int set_dev_class(struct sock *sk, u16 index, unsigned char *data, + u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_set_dev_class *cp; + int err; + + cp = (void *) data; + + BT_DBG("request for hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, ENODEV); + + hci_dev_lock_bh(hdev); + + hdev->major_class = cp->major; + hdev->minor_class = cp->minor; + + err = update_class(hdev); + + if (err == 0) + err = cmd_complete(sk, index, MGMT_OP_SET_DEV_CLASS, NULL, 0); + + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int set_service_cache(struct sock *sk, u16 index, unsigned char *data, + u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_set_service_cache *cp; + int err; + + cp = (void *) data; + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, ENODEV); + + hci_dev_lock_bh(hdev); + + BT_DBG("hci%u enable %d", index, cp->enable); + + if (cp->enable) { + set_bit(HCI_SERVICE_CACHE, &hdev->flags); + err = 0; + } else { + clear_bit(HCI_SERVICE_CACHE, &hdev->flags); + err = update_class(hdev); + } + + if (err == 0) + err = cmd_complete(sk, index, MGMT_OP_SET_SERVICE_CACHE, NULL, + 0); + + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int load_keys(struct sock *sk, u16 index, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_load_keys *cp; + u16 key_count, expected_len; + int i; + + cp = (void *) data; + + if (len < sizeof(*cp)) + return -EINVAL; + + key_count = get_unaligned_le16(&cp->key_count); + + expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_key_info); + if (expected_len != len) { + BT_ERR("load_keys: expected %u bytes, got %u bytes", + len, expected_len); + return -EINVAL; + } + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_LOAD_KEYS, ENODEV); + + BT_DBG("hci%u debug_keys %u key_count %u", index, cp->debug_keys, + key_count); + + hci_dev_lock_bh(hdev); + + hci_link_keys_clear(hdev); + + set_bit(HCI_LINK_KEYS, &hdev->flags); + + if (cp->debug_keys) + set_bit(HCI_DEBUG_KEYS, &hdev->flags); + else + clear_bit(HCI_DEBUG_KEYS, &hdev->flags); + + for (i = 0; i < key_count; i++) { + struct mgmt_key_info *key = &cp->keys[i]; + + hci_add_link_key(hdev, 0, &key->bdaddr, key->val, key->type, + key->pin_len); + } + + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); return 0; } +static int remove_key(struct sock *sk, u16 index, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_remove_key *cp; + struct hci_conn *conn; + int err; + + cp = (void *) data; + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, ENODEV); + + hci_dev_lock_bh(hdev); + + err = hci_remove_link_key(hdev, &cp->bdaddr); + if (err < 0) { + err = cmd_status(sk, index, MGMT_OP_REMOVE_KEY, -err); + goto unlock; + } + + err = 0; + + if (!test_bit(HCI_UP, &hdev->flags) || !cp->disconnect) + goto unlock; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); + if (conn) { + struct hci_cp_disconnect dc; + + put_unaligned_le16(conn->handle, &dc.handle); + dc.reason = 0x13; /* Remote User Terminated Connection */ + err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, 0, NULL); + } + +unlock: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int disconnect(struct sock *sk, u16 index, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_disconnect *cp; + struct hci_cp_disconnect dc; + struct pending_cmd *cmd; + struct hci_conn *conn; + int err; + + BT_DBG(""); + + cp = (void *) data; + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_DISCONNECT, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_DISCONNECT, ENODEV); + + hci_dev_lock_bh(hdev); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENETDOWN); + goto failed; + } + + if (mgmt_pending_find(MGMT_OP_DISCONNECT, index)) { + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, EBUSY); + goto failed; + } + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); + if (!conn) { + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENOTCONN); + goto failed; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, index, data, len); + if (!cmd) { + err = -ENOMEM; + goto failed; + } + + put_unaligned_le16(conn->handle, &dc.handle); + dc.reason = 0x13; /* Remote User Terminated Connection */ + + err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc); + if (err < 0) + mgmt_pending_remove(cmd); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int get_connections(struct sock *sk, u16 index) +{ + struct mgmt_rp_get_connections *rp; + struct hci_dev *hdev; + struct list_head *p; + size_t rp_len; + u16 count; + int i, err; + + BT_DBG(""); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_GET_CONNECTIONS, ENODEV); + + hci_dev_lock_bh(hdev); + + count = 0; + list_for_each(p, &hdev->conn_hash.list) { + count++; + } + + rp_len = sizeof(*rp) + (count * sizeof(bdaddr_t)); + rp = kmalloc(rp_len, GFP_ATOMIC); + if (!rp) { + err = -ENOMEM; + goto unlock; + } + + put_unaligned_le16(count, &rp->conn_count); + + read_lock(&hci_dev_list_lock); + + i = 0; + list_for_each(p, &hdev->conn_hash.list) { + struct hci_conn *c = list_entry(p, struct hci_conn, list); + + bacpy(&rp->conn[i++], &c->dst); + } + + read_unlock(&hci_dev_list_lock); + + err = cmd_complete(sk, index, MGMT_OP_GET_CONNECTIONS, rp, rp_len); + +unlock: + kfree(rp); + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + return err; +} + +static int pin_code_reply(struct sock *sk, u16 index, unsigned char *data, + u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_pin_code_reply *cp; + struct hci_cp_pin_code_reply reply; + struct pending_cmd *cmd; + int err; + + BT_DBG(""); + + cp = (void *) data; + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENODEV); + + hci_dev_lock_bh(hdev); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENETDOWN); + goto failed; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, index, data, len); + if (!cmd) { + err = -ENOMEM; + goto failed; + } + + bacpy(&reply.bdaddr, &cp->bdaddr); + reply.pin_len = cp->pin_len; + memcpy(reply.pin_code, cp->pin_code, 16); + + err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_REPLY, sizeof(reply), &reply); + if (err < 0) + mgmt_pending_remove(cmd); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int pin_code_neg_reply(struct sock *sk, u16 index, unsigned char *data, + u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_pin_code_neg_reply *cp; + struct pending_cmd *cmd; + int err; + + BT_DBG(""); + + cp = (void *) data; + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, + EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, + ENODEV); + + hci_dev_lock_bh(hdev); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, + ENETDOWN); + goto failed; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, index, + data, len); + if (!cmd) { + err = -ENOMEM; + goto failed; + } + + err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(cp->bdaddr), + &cp->bdaddr); + if (err < 0) + mgmt_pending_remove(cmd); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int set_io_capability(struct sock *sk, u16 index, unsigned char *data, + u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_set_io_capability *cp; + + BT_DBG(""); + + cp = (void *) data; + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, ENODEV); + + hci_dev_lock_bh(hdev); + + hdev->io_capability = cp->io_capability; + + BT_DBG("%s IO capability set to 0x%02x", hdev->name, + hdev->io_capability); + + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return cmd_complete(sk, index, MGMT_OP_SET_IO_CAPABILITY, NULL, 0); +} + +static inline struct pending_cmd *find_pairing(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + struct list_head *p; + + list_for_each(p, &cmd_list) { + struct pending_cmd *cmd; + + cmd = list_entry(p, struct pending_cmd, list); + + if (cmd->opcode != MGMT_OP_PAIR_DEVICE) + continue; + + if (cmd->index != hdev->id) + continue; + + if (cmd->user_data != conn) + continue; + + return cmd; + } + + return NULL; +} + +static void pairing_complete(struct pending_cmd *cmd, u8 status) +{ + struct mgmt_rp_pair_device rp; + struct hci_conn *conn = cmd->user_data; + + bacpy(&rp.bdaddr, &conn->dst); + rp.status = status; + + cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, &rp, sizeof(rp)); + + /* So we don't get further callbacks for this connection */ + conn->connect_cfm_cb = NULL; + conn->security_cfm_cb = NULL; + conn->disconn_cfm_cb = NULL; + + hci_conn_put(conn); + + mgmt_pending_remove(cmd); +} + +static void pairing_complete_cb(struct hci_conn *conn, u8 status) +{ + struct pending_cmd *cmd; + + BT_DBG("status %u", status); + + cmd = find_pairing(conn); + if (!cmd) { + BT_DBG("Unable to find a pending command"); + return; + } + + pairing_complete(cmd, status); +} + +static int pair_device(struct sock *sk, u16 index, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_pair_device *cp; + struct pending_cmd *cmd; + u8 sec_level, auth_type; + struct hci_conn *conn; + int err; + + BT_DBG(""); + + cp = (void *) data; + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, ENODEV); + + hci_dev_lock_bh(hdev); + + if (cp->io_cap == 0x03) { + sec_level = BT_SECURITY_MEDIUM; + auth_type = HCI_AT_DEDICATED_BONDING; + } else { + sec_level = BT_SECURITY_HIGH; + auth_type = HCI_AT_DEDICATED_BONDING_MITM; + } + + conn = hci_connect(hdev, ACL_LINK, &cp->bdaddr, sec_level, auth_type); + if (IS_ERR(conn)) { + err = PTR_ERR(conn); + goto unlock; + } + + if (conn->connect_cfm_cb) { + hci_conn_put(conn); + err = cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EBUSY); + goto unlock; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, index, data, len); + if (!cmd) { + err = -ENOMEM; + hci_conn_put(conn); + goto unlock; + } + + conn->connect_cfm_cb = pairing_complete_cb; + conn->security_cfm_cb = pairing_complete_cb; + conn->disconn_cfm_cb = pairing_complete_cb; + conn->io_capability = cp->io_cap; + cmd->user_data = conn; + + if (conn->state == BT_CONNECTED && + hci_conn_security(conn, sec_level, auth_type)) + pairing_complete(cmd, 0); + + err = 0; + +unlock: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int user_confirm_reply(struct sock *sk, u16 index, unsigned char *data, + u16 len, int success) +{ + struct mgmt_cp_user_confirm_reply *cp = (void *) data; + u16 mgmt_op, hci_op; + struct pending_cmd *cmd; + struct hci_dev *hdev; + int err; + + BT_DBG(""); + + if (success) { + mgmt_op = MGMT_OP_USER_CONFIRM_REPLY; + hci_op = HCI_OP_USER_CONFIRM_REPLY; + } else { + mgmt_op = MGMT_OP_USER_CONFIRM_NEG_REPLY; + hci_op = HCI_OP_USER_CONFIRM_NEG_REPLY; + } + + if (len != sizeof(*cp)) + return cmd_status(sk, index, mgmt_op, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, mgmt_op, ENODEV); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, index, mgmt_op, ENETDOWN); + goto failed; + } + + cmd = mgmt_pending_add(sk, mgmt_op, index, data, len); + if (!cmd) { + err = -ENOMEM; + goto failed; + } + + err = hci_send_cmd(hdev, hci_op, sizeof(cp->bdaddr), &cp->bdaddr); + if (err < 0) + mgmt_pending_remove(cmd); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; struct mgmt_hdr *hdr; - u16 opcode, len; + u16 opcode, index, len; int err; BT_DBG("got %zu bytes", msglen); @@ -233,6 +1275,7 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) hdr = (struct mgmt_hdr *) buf; opcode = get_unaligned_le16(&hdr->opcode); + index = get_unaligned_le16(&hdr->index); len = get_unaligned_le16(&hdr->len); if (len != msglen - sizeof(*hdr)) { @@ -248,11 +1291,65 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) err = read_index_list(sk); break; case MGMT_OP_READ_INFO: - err = read_controller_info(sk, buf + sizeof(*hdr), len); + err = read_controller_info(sk, index); + break; + case MGMT_OP_SET_POWERED: + err = set_powered(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_SET_DISCOVERABLE: + err = set_discoverable(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_SET_CONNECTABLE: + err = set_connectable(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_SET_PAIRABLE: + err = set_pairable(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_ADD_UUID: + err = add_uuid(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_REMOVE_UUID: + err = remove_uuid(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_SET_DEV_CLASS: + err = set_dev_class(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_SET_SERVICE_CACHE: + err = set_service_cache(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_LOAD_KEYS: + err = load_keys(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_REMOVE_KEY: + err = remove_key(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_DISCONNECT: + err = disconnect(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_GET_CONNECTIONS: + err = get_connections(sk, index); + break; + case MGMT_OP_PIN_CODE_REPLY: + err = pin_code_reply(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_PIN_CODE_NEG_REPLY: + err = pin_code_neg_reply(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_SET_IO_CAPABILITY: + err = set_io_capability(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_PAIR_DEVICE: + err = pair_device(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_USER_CONFIRM_REPLY: + err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 1); + break; + case MGMT_OP_USER_CONFIRM_NEG_REPLY: + err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 0); break; default: BT_DBG("Unknown op %u", opcode); - err = cmd_status(sk, opcode, 0x01); + err = cmd_status(sk, index, opcode, 0x01); break; } @@ -266,43 +1363,283 @@ done: return err; } -static int mgmt_event(u16 event, void *data, u16 data_len) +int mgmt_index_added(u16 index) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; + return mgmt_event(MGMT_EV_INDEX_ADDED, index, NULL, 0, NULL); +} - skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC); - if (!skb) - return -ENOMEM; +int mgmt_index_removed(u16 index) +{ + return mgmt_event(MGMT_EV_INDEX_REMOVED, index, NULL, 0, NULL); +} - bt_cb(skb)->channel = HCI_CHANNEL_CONTROL; +struct cmd_lookup { + u8 val; + struct sock *sk; +}; - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(event); - hdr->len = cpu_to_le16(data_len); +static void mode_rsp(struct pending_cmd *cmd, void *data) +{ + struct mgmt_mode *cp = cmd->cmd; + struct cmd_lookup *match = data; - memcpy(skb_put(skb, data_len), data, data_len); + if (cp->val != match->val) + return; - hci_send_to_sock(NULL, skb); - kfree_skb(skb); + send_mode_rsp(cmd->sk, cmd->opcode, cmd->index, cp->val); - return 0; + list_del(&cmd->list); + + if (match->sk == NULL) { + match->sk = cmd->sk; + sock_hold(match->sk); + } + + mgmt_pending_free(cmd); } -int mgmt_index_added(u16 index) +int mgmt_powered(u16 index, u8 powered) { - struct mgmt_ev_index_added ev; + struct mgmt_mode ev; + struct cmd_lookup match = { powered, NULL }; + int ret; + + mgmt_pending_foreach(MGMT_OP_SET_POWERED, index, mode_rsp, &match); - put_unaligned_le16(index, &ev.index); + ev.val = powered; - return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev)); + ret = mgmt_event(MGMT_EV_POWERED, index, &ev, sizeof(ev), match.sk); + + if (match.sk) + sock_put(match.sk); + + return ret; } -int mgmt_index_removed(u16 index) +int mgmt_discoverable(u16 index, u8 discoverable) +{ + struct mgmt_mode ev; + struct cmd_lookup match = { discoverable, NULL }; + int ret; + + mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index, mode_rsp, &match); + + ev.val = discoverable; + + ret = mgmt_event(MGMT_EV_DISCOVERABLE, index, &ev, sizeof(ev), + match.sk); + + if (match.sk) + sock_put(match.sk); + + return ret; +} + +int mgmt_connectable(u16 index, u8 connectable) +{ + struct mgmt_mode ev; + struct cmd_lookup match = { connectable, NULL }; + int ret; + + mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, index, mode_rsp, &match); + + ev.val = connectable; + + ret = mgmt_event(MGMT_EV_CONNECTABLE, index, &ev, sizeof(ev), match.sk); + + if (match.sk) + sock_put(match.sk); + + return ret; +} + +int mgmt_new_key(u16 index, struct link_key *key, u8 old_key_type) +{ + struct mgmt_ev_new_key ev; + + memset(&ev, 0, sizeof(ev)); + + bacpy(&ev.key.bdaddr, &key->bdaddr); + ev.key.type = key->type; + memcpy(ev.key.val, key->val, 16); + ev.key.pin_len = key->pin_len; + ev.old_key_type = old_key_type; + + return mgmt_event(MGMT_EV_NEW_KEY, index, &ev, sizeof(ev), NULL); +} + +int mgmt_connected(u16 index, bdaddr_t *bdaddr) +{ + struct mgmt_ev_connected ev; + + bacpy(&ev.bdaddr, bdaddr); + + return mgmt_event(MGMT_EV_CONNECTED, index, &ev, sizeof(ev), NULL); +} + +static void disconnect_rsp(struct pending_cmd *cmd, void *data) +{ + struct mgmt_cp_disconnect *cp = cmd->cmd; + struct sock **sk = data; + struct mgmt_rp_disconnect rp; + + bacpy(&rp.bdaddr, &cp->bdaddr); + + cmd_complete(cmd->sk, cmd->index, MGMT_OP_DISCONNECT, &rp, sizeof(rp)); + + *sk = cmd->sk; + sock_hold(*sk); + + mgmt_pending_remove(cmd); +} + +int mgmt_disconnected(u16 index, bdaddr_t *bdaddr) +{ + struct mgmt_ev_disconnected ev; + struct sock *sk = NULL; + int err; + + mgmt_pending_foreach(MGMT_OP_DISCONNECT, index, disconnect_rsp, &sk); + + bacpy(&ev.bdaddr, bdaddr); + + err = mgmt_event(MGMT_EV_DISCONNECTED, index, &ev, sizeof(ev), sk); + + if (sk) + sock_put(sk); + + return err; +} + +int mgmt_disconnect_failed(u16 index) +{ + struct pending_cmd *cmd; + int err; + + cmd = mgmt_pending_find(MGMT_OP_DISCONNECT, index); + if (!cmd) + return -ENOENT; + + err = cmd_status(cmd->sk, index, MGMT_OP_DISCONNECT, EIO); + + mgmt_pending_remove(cmd); + + return err; +} + +int mgmt_connect_failed(u16 index, bdaddr_t *bdaddr, u8 status) +{ + struct mgmt_ev_connect_failed ev; + + bacpy(&ev.bdaddr, bdaddr); + ev.status = status; + + return mgmt_event(MGMT_EV_CONNECT_FAILED, index, &ev, sizeof(ev), NULL); +} + +int mgmt_pin_code_request(u16 index, bdaddr_t *bdaddr) +{ + struct mgmt_ev_pin_code_request ev; + + bacpy(&ev.bdaddr, bdaddr); + + return mgmt_event(MGMT_EV_PIN_CODE_REQUEST, index, &ev, sizeof(ev), + NULL); +} + +int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +{ + struct pending_cmd *cmd; + struct mgmt_rp_pin_code_reply rp; + int err; + + cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_REPLY, index); + if (!cmd) + return -ENOENT; + + bacpy(&rp.bdaddr, bdaddr); + rp.status = status; + + err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_REPLY, &rp, + sizeof(rp)); + + mgmt_pending_remove(cmd); + + return err; +} + +int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +{ + struct pending_cmd *cmd; + struct mgmt_rp_pin_code_reply rp; + int err; + + cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, index); + if (!cmd) + return -ENOENT; + + bacpy(&rp.bdaddr, bdaddr); + rp.status = status; + + err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, &rp, + sizeof(rp)); + + mgmt_pending_remove(cmd); + + return err; +} + +int mgmt_user_confirm_request(u16 index, bdaddr_t *bdaddr, __le32 value) +{ + struct mgmt_ev_user_confirm_request ev; + + BT_DBG("hci%u", index); + + bacpy(&ev.bdaddr, bdaddr); + put_unaligned_le32(value, &ev.value); + + return mgmt_event(MGMT_EV_USER_CONFIRM_REQUEST, index, &ev, sizeof(ev), + NULL); +} + +static int confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status, + u8 opcode) +{ + struct pending_cmd *cmd; + struct mgmt_rp_user_confirm_reply rp; + int err; + + cmd = mgmt_pending_find(opcode, index); + if (!cmd) + return -ENOENT; + + bacpy(&rp.bdaddr, bdaddr); + rp.status = status; + err = cmd_complete(cmd->sk, index, opcode, &rp, sizeof(rp)); + + mgmt_pending_remove(cmd); + + return err; +} + +int mgmt_user_confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +{ + return confirm_reply_complete(index, bdaddr, status, + MGMT_OP_USER_CONFIRM_REPLY); +} + +int mgmt_user_confirm_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +{ + return confirm_reply_complete(index, bdaddr, status, + MGMT_OP_USER_CONFIRM_NEG_REPLY); +} + +int mgmt_auth_failed(u16 index, bdaddr_t *bdaddr, u8 status) { - struct mgmt_ev_index_added ev; + struct mgmt_ev_auth_failed ev; - put_unaligned_le16(index, &ev.index); + bacpy(&ev.bdaddr, bdaddr); + ev.status = status; - return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev)); + return mgmt_event(MGMT_EV_AUTH_FAILED, index, &ev, sizeof(ev), NULL); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ff8aaa736650..c9973932456f 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1164,7 +1164,8 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) * initiator rfcomm_process_rx already calls * rfcomm_session_put() */ if (s->sock->sk->sk_state != BT_CLOSED) - rfcomm_session_put(s); + if (list_empty(&s->dlcs)) + rfcomm_session_put(s); break; } } @@ -2153,8 +2154,6 @@ static int __init rfcomm_init(void) { int err; - l2cap_load(); - hci_register_cb(&rfcomm_cb); rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd"); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 2575c2db6404..d7b9af4703d0 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -727,7 +727,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) break; } + tty_unlock(); schedule(); + tty_lock(); } set_current_state(TASK_RUNNING); remove_wait_queue(&dev->wait, &wait); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 960c6d1637da..42fdffd1d76c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -50,8 +50,6 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/sco.h> -#define VERSION "0.6" - static int disable_esco; static const struct proto_ops sco_sock_ops; @@ -192,20 +190,21 @@ static int sco_connect(struct sock *sk) hci_dev_lock_bh(hdev); - err = -ENOMEM; - if (lmp_esco_capable(hdev) && !disable_esco) type = ESCO_LINK; else type = SCO_LINK; hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING); - if (!hcon) + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); goto done; + } conn = sco_conn_add(hcon, 0); if (!conn) { hci_conn_put(hcon); + err = -ENOMEM; goto done; } @@ -703,6 +702,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user break; } + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle; memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3); @@ -1023,7 +1023,7 @@ static struct hci_proto sco_hci_proto = { .recv_scodata = sco_recv_scodata }; -static int __init sco_init(void) +int __init sco_init(void) { int err; @@ -1051,7 +1051,6 @@ static int __init sco_init(void) BT_ERR("Failed to create SCO debug file"); } - BT_INFO("SCO (Voice Link) ver %s", VERSION); BT_INFO("SCO socket layer initialized"); return 0; @@ -1061,7 +1060,7 @@ error: return err; } -static void __exit sco_exit(void) +void __exit sco_exit(void) { debugfs_remove(sco_debugfs); @@ -1074,14 +1073,5 @@ static void __exit sco_exit(void) proto_unregister(&sco_proto); } -module_init(sco_init); -module_exit(sco_exit); - module_param(disable_esco, bool, 0644); MODULE_PARM_DESC(disable_esco, "Disable eSCO connection creation"); - -MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); -MODULE_DESCRIPTION("Bluetooth SCO ver " VERSION); -MODULE_VERSION(VERSION); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("bt-proto-2"); diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 9190ae462cb4..6dee7bf648a9 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -6,6 +6,7 @@ config BRIDGE tristate "802.1d Ethernet Bridging" select LLC select STP + depends on IPV6 || IPV6=n ---help--- If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 556443566e9c..21e5901186ea 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -78,6 +78,8 @@ static int br_dev_open(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + netif_carrier_off(dev); + br_features_recompute(br); netif_start_queue(dev); br_stp_enable_bridge(br); @@ -94,6 +96,8 @@ static int br_dev_stop(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + netif_carrier_off(dev); + br_stp_disable_bridge(br); br_multicast_stop(br); @@ -297,6 +301,21 @@ void br_netpoll_disable(struct net_bridge_port *p) #endif +static int br_add_slave(struct net_device *dev, struct net_device *slave_dev) + +{ + struct net_bridge *br = netdev_priv(dev); + + return br_add_if(br, slave_dev); +} + +static int br_del_slave(struct net_device *dev, struct net_device *slave_dev) +{ + struct net_bridge *br = netdev_priv(dev); + + return br_del_if(br, slave_dev); +} + static const struct ethtool_ops br_ethtool_ops = { .get_drvinfo = br_getinfo, .get_link = ethtool_op_get_link, @@ -326,6 +345,8 @@ static const struct net_device_ops br_netdev_ops = { .ndo_netpoll_cleanup = br_netpoll_cleanup, .ndo_poll_controller = br_poll_controller, #endif + .ndo_add_slave = br_add_slave, + .ndo_del_slave = br_del_slave, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 2872393b2939..88485cc74dc3 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -328,12 +328,12 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); if (fdb) { memcpy(fdb->addr.addr, addr, ETH_ALEN); - hlist_add_head_rcu(&fdb->hlist, head); - fdb->dst = source; fdb->is_local = is_local; fdb->is_static = is_local; fdb->ageing_timer = jiffies; + + hlist_add_head_rcu(&fdb->hlist, head); } return fdb; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index d9d1e2bac1d6..dce8f0009a12 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -148,6 +148,8 @@ static void del_nbp(struct net_bridge_port *p) netdev_rx_handler_unregister(dev); + netdev_set_master(dev, NULL); + br_multicast_del_port(p); kobject_uevent(&p->kobj, KOBJ_REMOVE); @@ -365,7 +367,7 @@ int br_min_mtu(const struct net_bridge *br) void br_features_recompute(struct net_bridge *br) { struct net_bridge_port *p; - unsigned long features, mask; + u32 features, mask; features = mask = br->feature_mask; if (list_empty(&br->port_list)) @@ -379,7 +381,7 @@ void br_features_recompute(struct net_bridge *br) } done: - br->dev->features = netdev_fix_features(features, NULL); + br->dev->features = netdev_fix_features(br->dev, features); } /* called with RTNL */ @@ -429,10 +431,14 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) goto err3; - err = netdev_rx_handler_register(dev, br_handle_frame, p); + err = netdev_set_master(dev, br->dev); if (err) goto err3; + err = netdev_rx_handler_register(dev, br_handle_frame, p); + if (err) + goto err4; + dev->priv_flags |= IFF_BRIDGE_PORT; dev_disable_lro(dev); @@ -455,6 +461,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) kobject_uevent(&p->kobj, KOBJ_ADD); return 0; + +err4: + netdev_set_master(dev, NULL); err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 6f6d8e1b776f..88e4aa9cb1f9 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -80,7 +80,7 @@ int br_handle_frame_finish(struct sk_buff *skb) if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { - if ((mdst && !hlist_unhashed(&mdst->mglist)) || + if ((mdst && mdst->mglist) || br_multicast_is_router(br)) skb2 = skb; br_multicast_forward(mdst, skb, skb2); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f701a21acb34..030a002ff8ee 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -37,10 +37,9 @@ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static inline int ipv6_is_local_multicast(const struct in6_addr *addr) +static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) { - if (ipv6_addr_is_multicast(addr) && - IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL) + if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr)) return 1; return 0; } @@ -232,8 +231,7 @@ static void br_multicast_group_expired(unsigned long data) if (!netif_running(br->dev) || timer_pending(&mp->timer)) goto out; - if (!hlist_unhashed(&mp->mglist)) - hlist_del_init(&mp->mglist); + mp->mglist = false; if (mp->ports) goto out; @@ -276,7 +274,7 @@ static void br_multicast_del_pg(struct net_bridge *br, del_timer(&p->query_timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); - if (!mp->ports && hlist_unhashed(&mp->mglist) && + if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); @@ -436,7 +434,6 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, eth = eth_hdr(skb); memcpy(eth->h_source, br->dev->dev_addr, 6); - ipv6_eth_mc_map(group, eth->h_dest); eth->h_proto = htons(ETH_P_IPV6); skb_put(skb, sizeof(*eth)); @@ -448,8 +445,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h->payload_len = htons(8 + sizeof(*mldq)); ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; - ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); + ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, + &ip6h->saddr); ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); + ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); hopopt[0] = IPPROTO_ICMPV6; /* next hdr */ @@ -528,7 +527,7 @@ static void br_multicast_group_query_expired(unsigned long data) struct net_bridge *br = mp->br; spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || hlist_unhashed(&mp->mglist) || + if (!netif_running(br->dev) || !mp->mglist || mp->queries_sent >= br->multicast_last_member_count) goto out; @@ -719,7 +718,7 @@ static int br_multicast_add_group(struct net_bridge *br, goto err; if (!port) { - hlist_add_head(&mp->mglist, &br->mglist); + mp->mglist = true; mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } @@ -781,11 +780,11 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, { struct br_ip br_group; - if (ipv6_is_local_multicast(group)) + if (!ipv6_is_transient_multicast(group)) return 0; ipv6_addr_copy(&br_group.u.ip6, group); - br_group.proto = htons(ETH_P_IP); + br_group.proto = htons(ETH_P_IPV6); return br_multicast_add_group(br, port, &br_group); } @@ -1014,18 +1013,19 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, nsrcs = skb_header_pointer(skb, len + offsetof(struct mld2_grec, - grec_mca), + grec_nsrcs), sizeof(_nsrcs), &_nsrcs); if (!nsrcs) return -EINVAL; if (!pskb_may_pull(skb, len + sizeof(*grec) + - sizeof(struct in6_addr) * (*nsrcs))) + sizeof(struct in6_addr) * ntohs(*nsrcs))) return -EINVAL; grec = (struct mld2_grec *)(skb->data + len); - len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs); + len += sizeof(*grec) + + sizeof(struct in6_addr) * ntohs(*nsrcs); /* We treat these as MLDv1 reports for now. */ switch (grec->grec_type) { @@ -1165,7 +1165,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, max_delay *= br->multicast_last_member_count; - if (!hlist_unhashed(&mp->mglist) && + if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) @@ -1177,7 +1177,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0) - mod_timer(&mp->timer, now + max_delay); + mod_timer(&p->timer, now + max_delay); } out: @@ -1236,7 +1236,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, goto out; max_delay *= br->multicast_last_member_count; - if (!hlist_unhashed(&mp->mglist) && + if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) @@ -1248,7 +1248,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0) - mod_timer(&mp->timer, now + max_delay); + mod_timer(&p->timer, now + max_delay); } out: @@ -1283,7 +1283,7 @@ static void br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (!hlist_unhashed(&mp->mglist) && + if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { @@ -1341,7 +1341,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, { struct br_ip br_group; - if (ipv6_is_local_multicast(group)) + if (!ipv6_is_transient_multicast(group)) return; ipv6_addr_copy(&br_group.u.ip6, group); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 4b5b66d07bba..f97af5590ba1 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -412,10 +412,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; if (dnat_took_place(skb)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { - struct flowi fl = { - .fl4_dst = iph->daddr, - .fl4_tos = RT_TOS(iph->tos), - }; struct in_device *in_dev = __in_dev_get_rcu(dev); /* If err equals -EHOSTUNREACH the error is due to a @@ -428,14 +424,16 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) goto free_skb; - if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { + rt = ip_route_output(dev_net(dev), iph->daddr, 0, + RT_TOS(iph->tos), 0); + if (!IS_ERR(rt)) { /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ - if (((struct dst_entry *)rt)->dev == dev) { - skb_dst_set(skb, (struct dst_entry *)rt); + if (rt->dst.dev == dev) { + skb_dst_set(skb, &rt->dst); goto bridged_dnat; } - dst_release((struct dst_entry *)rt); + ip_rt_put(rt); } free_skb: kfree_skb(skb); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 84aac7734bfc..f7afc364d777 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -84,13 +84,13 @@ struct net_bridge_port_group { struct net_bridge_mdb_entry { struct hlist_node hlist[2]; - struct hlist_node mglist; struct net_bridge *br; struct net_bridge_port_group __rcu *ports; struct rcu_head rcu; struct timer_list timer; struct timer_list query_timer; struct br_ip addr; + bool mglist; u32 queries_sent; }; @@ -182,7 +182,7 @@ struct net_bridge struct br_cpu_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; - unsigned long feature_mask; + u32 feature_mask; #ifdef CONFIG_BRIDGE_NETFILTER struct rtable fake_rtable; bool nf_call_iptables; @@ -238,7 +238,6 @@ struct net_bridge spinlock_t multicast_lock; struct net_bridge_mdb_htable __rcu *mdb; struct hlist_head router_list; - struct hlist_head mglist; struct timer_list multicast_router_timer; struct timer_list multicast_querier_timer; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 57186d84d2bd..a5badd0f8226 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -397,28 +397,37 @@ static void br_make_forwarding(struct net_bridge_port *p) void br_port_state_selection(struct net_bridge *br) { struct net_bridge_port *p; + unsigned int liveports = 0; /* Don't change port states if userspace is handling STP */ if (br->stp_enabled == BR_USER_STP) return; list_for_each_entry(p, &br->port_list, list) { - if (p->state != BR_STATE_DISABLED) { - if (p->port_no == br->root_port) { - p->config_pending = 0; - p->topology_change_ack = 0; - br_make_forwarding(p); - } else if (br_is_designated_port(p)) { - del_timer(&p->message_age_timer); - br_make_forwarding(p); - } else { - p->config_pending = 0; - p->topology_change_ack = 0; - br_make_blocking(p); - } + if (p->state == BR_STATE_DISABLED) + continue; + + if (p->port_no == br->root_port) { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_forwarding(p); + } else if (br_is_designated_port(p)) { + del_timer(&p->message_age_timer); + br_make_forwarding(p); + } else { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_blocking(p); } + if (p->state == BR_STATE_FORWARDING) + ++liveports; } + + if (liveports == 0) + netif_carrier_off(br->dev); + else + netif_carrier_on(br->dev); } /* called under bridge lock */ diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 7b22456023c5..3e965140051e 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -94,6 +94,7 @@ static void br_forward_delay_timer_expired(unsigned long arg) p->state = BR_STATE_FORWARDING; if (br_is_designated_for_some_port(br)) br_topology_change_detection(br); + netif_carrier_on(br->dev); } br_log_state(p); spin_unlock(&br->lock); diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index fa9dab372b68..6008d6dc18a0 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -394,9 +394,7 @@ static void ipcaif_net_setup(struct net_device *dev) priv->conn_req.sockaddr.u.dgm.connection_id = -1; priv->flowenabled = false; - ASSERT_RTNL(); init_waitqueue_head(&priv->netmgmt_wq); - list_add(&priv->list_field, &chnl_net_list); } @@ -453,6 +451,8 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev, ret = register_netdevice(dev); if (ret) pr_warn("device rtml registration failed\n"); + else + list_add(&caifdev->list_field, &chnl_net_list); return ret; } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index dff633d62e5b..05f357828a2f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -252,8 +252,12 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) { struct kvec iov = {buf, len}; struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; + int r; - return kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); + r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); + if (r == -EAGAIN) + r = 0; + return r; } /* @@ -264,13 +268,17 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, size_t kvlen, size_t len, int more) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; + int r; if (more) msg.msg_flags |= MSG_MORE; else msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ - return kernel_sendmsg(sock, &msg, iov, kvlen, len); + r = kernel_sendmsg(sock, &msg, iov, kvlen, len); + if (r == -EAGAIN) + r = 0; + return r; } @@ -328,7 +336,6 @@ static void reset_connection(struct ceph_connection *con) ceph_msg_put(con->out_msg); con->out_msg = NULL; } - con->out_keepalive_pending = false; con->in_seq = 0; con->in_seq_acked = 0; } @@ -847,6 +854,8 @@ static int write_partial_msg_pages(struct ceph_connection *con) (msg->pages || msg->pagelist || msg->bio || in_trail)) kunmap(page); + if (ret == -EAGAIN) + ret = 0; if (ret <= 0) goto out; @@ -1238,8 +1247,6 @@ static int process_connect(struct ceph_connection *con) con->auth_retry); if (con->auth_retry == 2) { con->error_msg = "connect authorization failure"; - reset_connection(con); - set_bit(CLOSED, &con->state); return -1; } con->auth_retry = 1; @@ -1705,14 +1712,6 @@ more: /* open the socket first? */ if (con->sock == NULL) { - /* - * if we were STANDBY and are reconnecting _this_ - * connection, bump connect_seq now. Always bump - * global_seq. - */ - if (test_and_clear_bit(STANDBY, &con->state)) - con->connect_seq++; - prepare_write_banner(msgr, con); prepare_write_connect(msgr, con, 1); prepare_read_banner(con); @@ -1737,16 +1736,12 @@ more_kvec: if (con->out_skip) { ret = write_partial_skip(con); if (ret <= 0) - goto done; - if (ret < 0) { - dout("try_write write_partial_skip err %d\n", ret); - goto done; - } + goto out; } if (con->out_kvec_left) { ret = write_partial_kvec(con); if (ret <= 0) - goto done; + goto out; } /* msg pages? */ @@ -1761,11 +1756,11 @@ more_kvec: if (ret == 1) goto more_kvec; /* we need to send the footer, too! */ if (ret == 0) - goto done; + goto out; if (ret < 0) { dout("try_write write_partial_msg_pages err %d\n", ret); - goto done; + goto out; } } @@ -1789,10 +1784,9 @@ do_next: /* Nothing to do! */ clear_bit(WRITE_PENDING, &con->state); dout("try_write nothing else to write.\n"); -done: ret = 0; out: - dout("try_write done on %p\n", con); + dout("try_write done on %p ret %d\n", con, ret); return ret; } @@ -1821,19 +1815,17 @@ more: dout("try_read connecting\n"); ret = read_partial_banner(con); if (ret <= 0) - goto done; - if (process_banner(con) < 0) { - ret = -1; goto out; - } + ret = process_banner(con); + if (ret < 0) + goto out; } ret = read_partial_connect(con); if (ret <= 0) - goto done; - if (process_connect(con) < 0) { - ret = -1; goto out; - } + ret = process_connect(con); + if (ret < 0) + goto out; goto more; } @@ -1848,7 +1840,7 @@ more: dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); ret = ceph_tcp_recvmsg(con->sock, buf, skip); if (ret <= 0) - goto done; + goto out; con->in_base_pos += ret; if (con->in_base_pos) goto more; @@ -1859,7 +1851,7 @@ more: */ ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1); if (ret <= 0) - goto done; + goto out; dout("try_read got tag %d\n", (int)con->in_tag); switch (con->in_tag) { case CEPH_MSGR_TAG_MSG: @@ -1870,7 +1862,7 @@ more: break; case CEPH_MSGR_TAG_CLOSE: set_bit(CLOSED, &con->state); /* fixme */ - goto done; + goto out; default: goto bad_tag; } @@ -1882,13 +1874,12 @@ more: case -EBADMSG: con->error_msg = "bad crc"; ret = -EIO; - goto out; + break; case -EIO: con->error_msg = "io error"; - goto out; - default: - goto done; + break; } + goto out; } if (con->in_tag == CEPH_MSGR_TAG_READY) goto more; @@ -1898,15 +1889,13 @@ more: if (con->in_tag == CEPH_MSGR_TAG_ACK) { ret = read_partial_ack(con); if (ret <= 0) - goto done; + goto out; process_ack(con); goto more; } -done: - ret = 0; out: - dout("try_read done on %p\n", con); + dout("try_read done on %p ret %d\n", con, ret); return ret; bad_tag: @@ -1951,7 +1940,24 @@ static void con_work(struct work_struct *work) work.work); mutex_lock(&con->mutex); + if (test_and_clear_bit(BACKOFF, &con->state)) { + dout("con_work %p backing off\n", con); + if (queue_delayed_work(ceph_msgr_wq, &con->work, + round_jiffies_relative(con->delay))) { + dout("con_work %p backoff %lu\n", con, con->delay); + mutex_unlock(&con->mutex); + return; + } else { + con->ops->put(con); + dout("con_work %p FAILED to back off %lu\n", con, + con->delay); + } + } + if (test_bit(STANDBY, &con->state)) { + dout("con_work %p STANDBY\n", con); + goto done; + } if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ dout("con_work CLOSED\n"); con_close_socket(con); @@ -2008,10 +2014,12 @@ static void ceph_fault(struct ceph_connection *con) /* Requeue anything that hasn't been acked */ list_splice_init(&con->out_sent, &con->out_queue); - /* If there are no messages in the queue, place the connection - * in a STANDBY state (i.e., don't try to reconnect just yet). */ - if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { - dout("fault setting STANDBY\n"); + /* If there are no messages queued or keepalive pending, place + * the connection in a STANDBY state */ + if (list_empty(&con->out_queue) && + !test_bit(KEEPALIVE_PENDING, &con->state)) { + dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); + clear_bit(WRITE_PENDING, &con->state); set_bit(STANDBY, &con->state); } else { /* retry after a delay. */ @@ -2019,11 +2027,24 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - dout("fault queueing %p delay %lu\n", con, con->delay); con->ops->get(con); if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay)) == 0) + round_jiffies_relative(con->delay))) { + dout("fault queued %p delay %lu\n", con, con->delay); + } else { con->ops->put(con); + dout("fault failed to queue %p delay %lu, backoff\n", + con, con->delay); + /* + * In many cases we see a socket state change + * while con_work is running and end up + * queuing (non-delayed) work, such that we + * can't backoff with a delay. Set a flag so + * that when con_work restarts we schedule the + * delay then. + */ + set_bit(BACKOFF, &con->state); + } } out_unlock: @@ -2094,6 +2115,19 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr) } EXPORT_SYMBOL(ceph_messenger_destroy); +static void clear_standby(struct ceph_connection *con) +{ + /* come back from STANDBY? */ + if (test_and_clear_bit(STANDBY, &con->state)) { + mutex_lock(&con->mutex); + dout("clear_standby %p and ++connect_seq\n", con); + con->connect_seq++; + WARN_ON(test_bit(WRITE_PENDING, &con->state)); + WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); + mutex_unlock(&con->mutex); + } +} + /* * Queue up an outgoing message on the given connection. */ @@ -2126,6 +2160,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) /* if there wasn't anything waiting to send before, queue * new work */ + clear_standby(con); if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) queue_con(con); } @@ -2191,6 +2226,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) */ void ceph_con_keepalive(struct ceph_connection *con) { + dout("con_keepalive %p\n", con); + clear_standby(con); if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && test_and_set_bit(WRITE_PENDING, &con->state) == 0) queue_con(con); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 1a040e64c69f..cd9c21df87d1 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -16,22 +16,30 @@ struct page **ceph_get_direct_page_vector(const char __user *data, int num_pages, bool write_page) { struct page **pages; - int rc; + int got = 0; + int rc = 0; pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); if (!pages) return ERR_PTR(-ENOMEM); down_read(¤t->mm->mmap_sem); - rc = get_user_pages(current, current->mm, (unsigned long)data, - num_pages, write_page, 0, pages, NULL); + while (got < num_pages) { + rc = get_user_pages(current, current->mm, + (unsigned long)data + ((unsigned long)got * PAGE_SIZE), + num_pages - got, write_page, 0, pages + got, NULL); + if (rc < 0) + break; + BUG_ON(rc == 0); + got += rc; + } up_read(¤t->mm->mmap_sem); - if (rc < num_pages) + if (rc < 0) goto fail; return pages; fail: - ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); + ceph_put_page_vector(pages, got, false); return ERR_PTR(rc); } EXPORT_SYMBOL(ceph_get_direct_page_vector); diff --git a/net/core/dev.c b/net/core/dev.c index 8b1d886ed23b..0d39032e9621 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -132,6 +132,7 @@ #include <trace/events/skb.h> #include <linux/pci.h> #include <linux/inetdevice.h> +#include <linux/cpu_rmap.h> #include "net-sysfs.h" @@ -749,7 +750,8 @@ EXPORT_SYMBOL(dev_get_by_index); * @ha: hardware address * * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold RCU + * is not found or a pointer to the device. + * The caller must hold RCU or RTNL. * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * @@ -1113,13 +1115,21 @@ EXPORT_SYMBOL(netdev_bonding_change); void dev_load(struct net *net, const char *name) { struct net_device *dev; + int no_module; rcu_read_lock(); dev = dev_get_by_name_rcu(net, name); rcu_read_unlock(); - if (!dev && capable(CAP_NET_ADMIN)) - request_module("%s", name); + no_module = !dev; + if (no_module && capable(CAP_NET_ADMIN)) + no_module = request_module("netdev-%s", name); + if (no_module && capable(CAP_SYS_MODULE)) { + if (!request_module("%s", name)) + pr_err("Loading kernel module for a network device " +"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s " +"instead\n", name); + } } EXPORT_SYMBOL(dev_load); @@ -1279,13 +1289,16 @@ static int __dev_close_many(struct list_head *head) static int __dev_close(struct net_device *dev) { + int retval; LIST_HEAD(single); list_add(&dev->unreg_list, &single); - return __dev_close_many(&single); + retval = __dev_close_many(&single); + list_del(&single); + return retval; } -int dev_close_many(struct list_head *head) +static int dev_close_many(struct list_head *head) { struct net_device *dev, *tmp; LIST_HEAD(tmp_list); @@ -1324,7 +1337,7 @@ int dev_close(struct net_device *dev) list_add(&dev->unreg_list, &single); dev_close_many(&single); - + list_del(&single); return 0; } EXPORT_SYMBOL(dev_close); @@ -1605,7 +1618,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) * expected that drivers will fix this mapping if they can before * calling netif_set_real_num_tx_queues. */ -void netif_setup_tc(struct net_device *dev, unsigned int txq) +static void netif_setup_tc(struct net_device *dev, unsigned int txq) { int i; struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; @@ -1646,7 +1659,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (txq < 1 || txq > dev->num_tx_queues) return -EINVAL; - if (dev->reg_state == NETREG_REGISTERED) { + if (dev->reg_state == NETREG_REGISTERED || + dev->reg_state == NETREG_UNREGISTERING) { ASSERT_RTNL(); rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, @@ -1856,7 +1870,7 @@ EXPORT_SYMBOL(skb_checksum_help); * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) +struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; @@ -2044,9 +2058,9 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) protocol == htons(ETH_P_FCOE))); } -static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) +static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) { - if (!can_checksum_protocol(protocol, features)) { + if (!can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { @@ -2056,10 +2070,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features return features; } -int netif_skb_features(struct sk_buff *skb) +u32 netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - int features = skb->dev->features; + u32 features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; @@ -2104,7 +2118,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { - int features; + u32 features; /* * If device doesnt need skb->dst, release it right now while @@ -2325,15 +2339,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct netdev_queue *txq) { spinlock_t *root_lock = qdisc_lock(q); - bool contended = qdisc_is_running(q); + bool contended; int rc; + qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_calculate_pkt_len(skb, q); /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. * This permits __QDISC_STATE_RUNNING owner to get the lock more often * and dequeue packets faster. */ + contended = qdisc_is_running(q); if (unlikely(contended)) spin_lock(&q->busylock); @@ -2351,7 +2368,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); - qdisc_skb_cb(skb)->pkt_len = skb->len; qdisc_bstats_update(q, skb); if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { @@ -2366,7 +2382,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = NET_XMIT_SUCCESS; } else { skb_dst_force(skb); - rc = qdisc_enqueue_root(skb, q); + rc = q->enqueue(skb, q) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); @@ -2585,6 +2601,54 @@ EXPORT_SYMBOL(__skb_get_rxhash); struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; EXPORT_SYMBOL(rps_sock_flow_table); +static struct rps_dev_flow * +set_rps_cpu(struct net_device *dev, struct sk_buff *skb, + struct rps_dev_flow *rflow, u16 next_cpu) +{ + u16 tcpu; + + tcpu = rflow->cpu = next_cpu; + if (tcpu != RPS_NO_CPU) { +#ifdef CONFIG_RFS_ACCEL + struct netdev_rx_queue *rxqueue; + struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *old_rflow; + u32 flow_id; + u16 rxq_index; + int rc; + + /* Should we steer this flow to a different hardware queue? */ + if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || + !(dev->features & NETIF_F_NTUPLE)) + goto out; + rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); + if (rxq_index == skb_get_rx_queue(skb)) + goto out; + + rxqueue = dev->_rx + rxq_index; + flow_table = rcu_dereference(rxqueue->rps_flow_table); + if (!flow_table) + goto out; + flow_id = skb->rxhash & flow_table->mask; + rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, + rxq_index, flow_id); + if (rc < 0) + goto out; + old_rflow = rflow; + rflow = &flow_table->flows[flow_id]; + rflow->cpu = next_cpu; + rflow->filter = rc; + if (old_rflow->filter == rflow->filter) + old_rflow->filter = RPS_NO_FILTER; + out: +#endif + rflow->last_qtail = + per_cpu(softnet_data, tcpu).input_queue_head; + } + + return rflow; +} + /* * get_rps_cpu is called from netif_receive_skb and returns the target * CPU from the RPS map of the receiving queue for a given skb. @@ -2615,7 +2679,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, map = rcu_dereference(rxqueue->rps_map); if (map) { - if (map->len == 1) { + if (map->len == 1 && + !rcu_dereference_raw(rxqueue->rps_flow_table)) { tcpu = map->cpus[0]; if (cpu_online(tcpu)) cpu = tcpu; @@ -2655,12 +2720,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (unlikely(tcpu != next_cpu) && (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || ((int)(per_cpu(softnet_data, tcpu).input_queue_head - - rflow->last_qtail)) >= 0)) { - tcpu = rflow->cpu = next_cpu; - if (tcpu != RPS_NO_CPU) - rflow->last_qtail = per_cpu(softnet_data, - tcpu).input_queue_head; - } + rflow->last_qtail)) >= 0)) + rflow = set_rps_cpu(dev, skb, rflow, next_cpu); + if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { *rflowp = rflow; cpu = tcpu; @@ -2681,6 +2743,46 @@ done: return cpu; } +#ifdef CONFIG_RFS_ACCEL + +/** + * rps_may_expire_flow - check whether an RFS hardware filter may be removed + * @dev: Device on which the filter was set + * @rxq_index: RX queue index + * @flow_id: Flow ID passed to ndo_rx_flow_steer() + * @filter_id: Filter ID returned by ndo_rx_flow_steer() + * + * Drivers that implement ndo_rx_flow_steer() should periodically call + * this function for each installed filter and remove the filters for + * which it returns %true. + */ +bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, + u32 flow_id, u16 filter_id) +{ + struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; + struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *rflow; + bool expire = true; + int cpu; + + rcu_read_lock(); + flow_table = rcu_dereference(rxqueue->rps_flow_table); + if (flow_table && flow_id <= flow_table->mask) { + rflow = &flow_table->flows[flow_id]; + cpu = ACCESS_ONCE(rflow->cpu); + if (rflow->filter == filter_id && cpu != RPS_NO_CPU && + ((int)(per_cpu(softnet_data, cpu).input_queue_head - + rflow->last_qtail) < + (int)(10 * flow_table->mask))) + expire = false; + } + rcu_read_unlock(); + return expire; +} +EXPORT_SYMBOL(rps_may_expire_flow); + +#endif /* CONFIG_RFS_ACCEL */ + /* Called from hardirq (IPI) context */ static void rps_trigger_softirq(void *data) { @@ -3002,64 +3104,31 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); -static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, - struct net_device *master) -{ - if (skb->pkt_type == PACKET_HOST) { - u16 *dest = (u16 *) eth_hdr(skb)->h_dest; - - memcpy(dest, master->dev_addr, ETH_ALEN); - } -} - -/* On bonding slaves other than the currently active slave, suppress - * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and - * ARP on active-backup slaves with arp_validate enabled. - */ -int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) +static void vlan_on_bond_hook(struct sk_buff *skb) { - struct net_device *dev = skb->dev; - - if (master->priv_flags & IFF_MASTER_ARPMON) - dev->last_rx = jiffies; - - if ((master->priv_flags & IFF_MASTER_ALB) && - (master->priv_flags & IFF_BRIDGE_PORT)) { - /* Do address unmangle. The local destination address - * will be always the one master has. Provides the right - * functionality in a bridge. - */ - skb_bond_set_mac_by_master(skb, master); - } - - if (dev->priv_flags & IFF_SLAVE_INACTIVE) { - if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && - skb->protocol == __cpu_to_be16(ETH_P_ARP)) - return 0; - - if (master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) - return 0; - } - if (master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __cpu_to_be16(ETH_P_SLOW)) - return 0; + /* + * Make sure ARP frames received on VLAN interfaces stacked on + * bonding interfaces still make their way to any base bonding + * device that may have registered for a specific ptype. + */ + if (skb->dev->priv_flags & IFF_802_1Q_VLAN && + vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING && + skb->protocol == htons(ETH_P_ARP)) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); - return 1; + if (!skb2) + return; + skb2->dev = vlan_dev_real_dev(skb->dev); + netif_rx(skb2); } - return 0; } -EXPORT_SYMBOL(__skb_bond_should_drop); static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct net_device *orig_dev; - struct net_device *master; - struct net_device *null_or_orig; - struct net_device *orig_or_bond; + struct net_device *null_or_dev; int ret = NET_RX_DROP; __be16 type; @@ -3074,28 +3143,8 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!skb->skb_iif) skb->skb_iif = skb->dev->ifindex; - - /* - * bonding note: skbs received on inactive slaves should only - * be delivered to pkt handlers that are exact matches. Also - * the deliver_no_wcard flag will be set. If packet handlers - * are sensitive to duplicate packets these skbs will need to - * be dropped at the handler. - */ - null_or_orig = NULL; orig_dev = skb->dev; - master = ACCESS_ONCE(orig_dev->master); - if (skb->deliver_no_wcard) - null_or_orig = orig_dev; - else if (master) { - if (skb_bond_should_drop(skb, master)) { - skb->deliver_no_wcard = 1; - null_or_orig = orig_dev; /* deliver only exact match */ - } else - skb->dev = master; - } - __this_cpu_inc(softnet_data.processed); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->mac_len = skb->network_header - skb->mac_header; @@ -3104,6 +3153,10 @@ static int __netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); +another_round: + + __this_cpu_inc(softnet_data.processed); + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -3112,8 +3165,7 @@ static int __netif_receive_skb(struct sk_buff *skb) #endif list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (ptype->dev == null_or_orig || ptype->dev == skb->dev || - ptype->dev == orig_dev) { + if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -3127,16 +3179,20 @@ static int __netif_receive_skb(struct sk_buff *skb) ncls: #endif - /* Handle special case of bridge or macvlan */ rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { + struct net_device *prev_dev; + if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } + prev_dev = skb->dev; skb = rx_handler(skb); if (!skb) goto out; + if (skb->dev != prev_dev) + goto another_round; } if (vlan_tx_tag_present(skb)) { @@ -3151,24 +3207,17 @@ ncls: goto out; } - /* - * Make sure frames received on VLAN interfaces stacked on - * bonding interfaces still make their way to any base bonding - * device that may have registered for a specific ptype. The - * handler may have to adjust skb->dev and orig_dev. - */ - orig_or_bond = orig_dev; - if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && - (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { - orig_or_bond = vlan_dev_real_dev(skb->dev); - } + vlan_on_bond_hook(skb); + + /* deliver only exact match when indicated */ + null_or_dev = skb->deliver_no_wcard ? skb->dev : NULL; type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && (ptype->dev == null_or_orig || - ptype->dev == skb->dev || ptype->dev == orig_dev || - ptype->dev == orig_or_bond)) { + if (ptype->type == type && + (ptype->dev == null_or_dev || ptype->dev == skb->dev || + ptype->dev == orig_dev)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -3476,6 +3525,8 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) __skb_pull(skb, skb_headlen(skb)); skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); skb->vlan_tci = 0; + skb->dev = napi->dev; + skb->skb_iif = 0; napi->skb = skb; } @@ -3963,12 +4014,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev = (v == SEQ_START_TOKEN) ? - first_net_device(seq_file_net(seq)) : - next_net_device((struct net_device *)v); + struct net_device *dev = v; + + if (v == SEQ_START_TOKEN) + dev = first_net_device_rcu(seq_file_net(seq)); + else + dev = next_net_device_rcu(dev); ++*pos; - return rcu_dereference(dev); + return dev; } void dev_seq_stop(struct seq_file *seq, void *v) @@ -4252,15 +4306,14 @@ static int __init dev_proc_init(void) /** - * netdev_set_master - set up master/slave pair + * netdev_set_master - set up master pointer * @slave: slave device * @master: new master device * * Changes the master device of the slave. Pass %NULL to break the * bonding. The caller must hold the RTNL semaphore. On a failure * a negative errno code is returned. On success the reference counts - * are adjusted, %RTM_NEWLINK is sent to the routing socket and the - * function returns zero. + * are adjusted and the function returns zero. */ int netdev_set_master(struct net_device *slave, struct net_device *master) { @@ -4280,6 +4333,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) synchronize_net(); dev_put(old); } + return 0; +} +EXPORT_SYMBOL(netdev_set_master); + +/** + * netdev_set_bond_master - set up bonding master/slave pair + * @slave: slave device + * @master: new master device + * + * Changes the master device of the slave. Pass %NULL to break the + * bonding. The caller must hold the RTNL semaphore. On a failure + * a negative errno code is returned. On success %RTM_NEWLINK is sent + * to the routing socket and the function returns zero. + */ +int netdev_set_bond_master(struct net_device *slave, struct net_device *master) +{ + int err; + + ASSERT_RTNL(); + + err = netdev_set_master(slave, master); + if (err) + return err; if (master) slave->flags |= IFF_SLAVE; else @@ -4288,7 +4364,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); return 0; } -EXPORT_SYMBOL(netdev_set_master); +EXPORT_SYMBOL(netdev_set_bond_master); static void dev_change_rx_flags(struct net_device *dev, int flags) { @@ -5123,43 +5199,58 @@ static void rollback_registered(struct net_device *dev) list_add(&dev->unreg_list, &single); rollback_registered_many(&single); + list_del(&single); } -unsigned long netdev_fix_features(unsigned long features, const char *name) +u32 netdev_fix_features(struct net_device *dev, u32 features) { + /* Fix illegal checksum combinations */ + if ((features & NETIF_F_HW_CSUM) && + (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + netdev_info(dev, "mixed HW and IP checksum settings.\n"); + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((features & NETIF_F_NO_CSUM) && + (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + netdev_info(dev, "mixed no checksumming and other settings.\n"); + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + /* Fix illegal SG+CSUM combinations. */ if ((features & NETIF_F_SG) && !(features & NETIF_F_ALL_CSUM)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " - "checksum feature.\n", name); + netdev_info(dev, + "Dropping NETIF_F_SG since no checksum feature.\n"); features &= ~NETIF_F_SG; } /* TSO requires that SG is present as well. */ if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " - "SG feature.\n", name); + netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n"); features &= ~NETIF_F_TSO; } + /* Software GSO depends on SG. */ + if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { + netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); + features &= ~NETIF_F_GSO; + } + + /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ if (!((features & NETIF_F_GEN_CSUM) || (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no checksum offload features.\n", - name); + netdev_info(dev, + "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; } if (!(features & NETIF_F_SG)) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_SG feature.\n", name); + netdev_info(dev, + "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); features &= ~NETIF_F_UFO; } } @@ -5168,6 +5259,37 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } EXPORT_SYMBOL(netdev_fix_features); +void netdev_update_features(struct net_device *dev) +{ + u32 features; + int err = 0; + + features = netdev_get_wanted_features(dev); + + if (dev->netdev_ops->ndo_fix_features) + features = dev->netdev_ops->ndo_fix_features(dev, features); + + /* driver might be less strict about feature dependencies */ + features = netdev_fix_features(dev, features); + + if (dev->features == features) + return; + + netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", + dev->features, features); + + if (dev->netdev_ops->ndo_set_features) + err = dev->netdev_ops->ndo_set_features(dev, features); + + if (!err) + dev->features = features; + else if (err < 0) + netdev_err(dev, + "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", + err, features, dev->features); +} +EXPORT_SYMBOL(netdev_update_features); + /** * netif_stacked_transfer_operstate - transfer operstate * @rootdev: the root or lower level device to transfer state from @@ -5302,27 +5424,19 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - /* Fix illegal checksum combinations */ - if ((dev->features & NETIF_F_HW_CSUM) && - (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); - } + /* Transfer changeable features to wanted_features and enable + * software offloads (GSO and GRO). + */ + dev->hw_features |= NETIF_F_SOFT_FEATURES; + dev->features |= NETIF_F_SOFT_FEATURES; + dev->wanted_features = dev->features & dev->hw_features; - if ((dev->features & NETIF_F_NO_CSUM) && - (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + /* Avoid warning from netdev_fix_features() for GSO without SG */ + if (!(dev->wanted_features & NETIF_F_SG)) { + dev->wanted_features &= ~NETIF_F_GSO; + dev->features &= ~NETIF_F_GSO; } - dev->features = netdev_fix_features(dev->features, dev->name); - - /* Enable software GSO if SG is supported. */ - if (dev->features & NETIF_F_SG) - dev->features |= NETIF_F_GSO; - /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features * are enabled only if supported by underlying device. @@ -5339,6 +5453,8 @@ int register_netdevice(struct net_device *dev) goto err_uninit; dev->reg_state = NETREG_REGISTERED; + netdev_update_features(dev); + /* * Default initial state at registry is that the * device is present. @@ -5720,31 +5836,36 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); + dev->gso_max_size = GSO_MAX_SIZE; + + INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); + dev->ethtool_ntuple_list.count = 0; + INIT_LIST_HEAD(&dev->napi_list); + INIT_LIST_HEAD(&dev->unreg_list); + INIT_LIST_HEAD(&dev->link_watch_list); + dev->priv_flags = IFF_XMIT_DST_RELEASE; + setup(dev); + dev->num_tx_queues = txqs; dev->real_num_tx_queues = txqs; if (netif_alloc_netdev_queues(dev)) - goto free_pcpu; + goto free_all; #ifdef CONFIG_RPS dev->num_rx_queues = rxqs; dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) - goto free_pcpu; + goto free_all; #endif - dev->gso_max_size = GSO_MAX_SIZE; - - INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); - dev->ethtool_ntuple_list.count = 0; - INIT_LIST_HEAD(&dev->napi_list); - INIT_LIST_HEAD(&dev->unreg_list); - INIT_LIST_HEAD(&dev->link_watch_list); - dev->priv_flags = IFF_XMIT_DST_RELEASE; - setup(dev); strcpy(dev->name, name); dev->group = INIT_NETDEV_GROUP; return dev; +free_all: + free_netdev(dev); + return NULL; + free_pcpu: free_percpu(dev->pcpu_refcnt); kfree(dev->_tx); @@ -6053,8 +6174,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, * @one to the master device with current feature set @all. Will not * enable anything that is off in @mask. Returns the new feature set. */ -unsigned long netdev_increment_features(unsigned long all, unsigned long one, - unsigned long mask) +u32 netdev_increment_features(u32 all, u32 one, u32 mask) { /* If device needs checksumming, downgrade to it. */ if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) @@ -6272,6 +6392,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) } } unregister_netdevice_many(&dev_kill_list); + list_del(&dev_kill_list); rtnl_unlock(); } diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 508f9c18992f..133fd22ea287 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -144,7 +144,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, list_for_each_entry(ha, &from_list->list, list) { type = addr_type ? addr_type : ha->type; - __hw_addr_del(to_list, ha->addr, addr_len, addr_type); + __hw_addr_del(to_list, ha->addr, addr_len, type); } } EXPORT_SYMBOL(__hw_addr_del_multiple); diff --git a/net/core/dst.c b/net/core/dst.c index b99c7c7ffce2..91104d35de7d 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -164,7 +164,9 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); -void *dst_alloc(struct dst_ops *ops) +const u32 dst_default_metrics[RTAX_MAX]; + +void *dst_alloc(struct dst_ops *ops, int initial_ref) { struct dst_entry *dst; @@ -175,11 +177,12 @@ void *dst_alloc(struct dst_ops *ops) dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; - atomic_set(&dst->__refcnt, 0); + atomic_set(&dst->__refcnt, initial_ref); dst->ops = ops; dst->lastuse = jiffies; dst->path = dst; dst->input = dst->output = dst_discard; + dst_init_metrics(dst, dst_default_metrics, true); #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif @@ -282,6 +285,42 @@ void dst_release(struct dst_entry *dst) } EXPORT_SYMBOL(dst_release); +u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) +{ + u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + + if (p) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + kfree(p); + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } + } + return p; +} +EXPORT_SYMBOL(dst_cow_metrics_generic); + +/* Caller asserts that dst_metrics_read_only(dst) is false. */ +void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) +{ + unsigned long prev, new; + + new = (unsigned long) dst_default_metrics; + prev = cmpxchg(&dst->_metrics, old, new); + if (prev == old) + kfree(__DST_METRICS_PTR(old)); +} +EXPORT_SYMBOL(__dst_destroy_metrics_generic); + /** * skb_dst_set_noref - sets skb dst, without a reference * @skb: buffer diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 17741782a345..c1a71bb738da 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -34,12 +34,6 @@ u32 ethtool_op_get_link(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_link); -u32 ethtool_op_get_rx_csum(struct net_device *dev) -{ - return (dev->features & NETIF_F_ALL_CSUM) != 0; -} -EXPORT_SYMBOL(ethtool_op_get_rx_csum); - u32 ethtool_op_get_tx_csum(struct net_device *dev) { return (dev->features & NETIF_F_ALL_CSUM) != 0; @@ -55,6 +49,7 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) return 0; } +EXPORT_SYMBOL(ethtool_op_set_tx_csum); int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) { @@ -171,6 +166,381 @@ EXPORT_SYMBOL(ethtool_ntuple_flush); /* Handlers for each ethtool command */ +#define ETHTOOL_DEV_FEATURE_WORDS 1 + +static void ethtool_get_features_compat(struct net_device *dev, + struct ethtool_get_features_block *features) +{ + if (!dev->ethtool_ops) + return; + + /* getting RX checksum */ + if (dev->ethtool_ops->get_rx_csum) + if (dev->ethtool_ops->get_rx_csum(dev)) + features[0].active |= NETIF_F_RXCSUM; + + /* mark legacy-changeable features */ + if (dev->ethtool_ops->set_sg) + features[0].available |= NETIF_F_SG; + if (dev->ethtool_ops->set_tx_csum) + features[0].available |= NETIF_F_ALL_CSUM; + if (dev->ethtool_ops->set_tso) + features[0].available |= NETIF_F_ALL_TSO; + if (dev->ethtool_ops->set_rx_csum) + features[0].available |= NETIF_F_RXCSUM; + if (dev->ethtool_ops->set_flags) + features[0].available |= flags_dup_features; +} + +static int ethtool_set_feature_compat(struct net_device *dev, + int (*legacy_set)(struct net_device *, u32), + struct ethtool_set_features_block *features, u32 mask) +{ + u32 do_set; + + if (!legacy_set) + return 0; + + if (!(features[0].valid & mask)) + return 0; + + features[0].valid &= ~mask; + + do_set = !!(features[0].requested & mask); + + if (legacy_set(dev, do_set) < 0) + netdev_info(dev, + "Legacy feature change (%s) failed for 0x%08x\n", + do_set ? "set" : "clear", mask); + + return 1; +} + +static int ethtool_set_features_compat(struct net_device *dev, + struct ethtool_set_features_block *features) +{ + int compat; + + if (!dev->ethtool_ops) + return 0; + + compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg, + features, NETIF_F_SG); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum, + features, NETIF_F_ALL_CSUM); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso, + features, NETIF_F_ALL_TSO); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum, + features, NETIF_F_RXCSUM); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_flags, + features, flags_dup_features); + + return compat; +} + +static int ethtool_get_features(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_gfeatures cmd = { + .cmd = ETHTOOL_GFEATURES, + .size = ETHTOOL_DEV_FEATURE_WORDS, + }; + struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = { + { + .available = dev->hw_features, + .requested = dev->wanted_features, + .active = dev->features, + .never_changed = NETIF_F_NEVER_CHANGE, + }, + }; + u32 __user *sizeaddr; + u32 copy_size; + + ethtool_get_features_compat(dev, features); + + sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); + if (get_user(copy_size, sizeaddr)) + return -EFAULT; + + if (copy_size > ETHTOOL_DEV_FEATURE_WORDS) + copy_size = ETHTOOL_DEV_FEATURE_WORDS; + + if (copy_to_user(useraddr, &cmd, sizeof(cmd))) + return -EFAULT; + useraddr += sizeof(cmd); + if (copy_to_user(useraddr, features, copy_size * sizeof(*features))) + return -EFAULT; + + return 0; +} + +static int ethtool_set_features(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_sfeatures cmd; + struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; + int ret = 0; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + useraddr += sizeof(cmd); + + if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS) + return -EINVAL; + + if (copy_from_user(features, useraddr, sizeof(features))) + return -EFAULT; + + if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) + return -EINVAL; + + if (ethtool_set_features_compat(dev, features)) + ret |= ETHTOOL_F_COMPAT; + + if (features[0].valid & ~dev->hw_features) { + features[0].valid &= dev->hw_features; + ret |= ETHTOOL_F_UNSUPPORTED; + } + + dev->wanted_features &= ~features[0].valid; + dev->wanted_features |= features[0].valid & features[0].requested; + netdev_update_features(dev); + + if ((dev->wanted_features ^ dev->features) & features[0].valid) + ret |= ETHTOOL_F_WISH; + + return ret; +} + +static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = { + /* NETIF_F_SG */ "tx-scatter-gather", + /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4", + /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded", + /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic", + /* NETIF_F_IPV6_CSUM */ "tx_checksum-ipv6", + /* NETIF_F_HIGHDMA */ "highdma", + /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist", + /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert", + + /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse", + /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter", + /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged", + /* NETIF_F_GSO */ "tx-generic-segmentation", + /* NETIF_F_LLTX */ "tx-lockless", + /* NETIF_F_NETNS_LOCAL */ "netns-local", + /* NETIF_F_GRO */ "rx-gro", + /* NETIF_F_LRO */ "rx-lro", + + /* NETIF_F_TSO */ "tx-tcp-segmentation", + /* NETIF_F_UFO */ "tx-udp-fragmentation", + /* NETIF_F_GSO_ROBUST */ "tx-gso-robust", + /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation", + /* NETIF_F_TSO6 */ "tx-tcp6-segmentation", + /* NETIF_F_FSO */ "tx-fcoe-segmentation", + "", + "", + + /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc", + /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp", + /* NETIF_F_FCOE_MTU */ "fcoe-mtu", + /* NETIF_F_NTUPLE */ "rx-ntuple-filter", + /* NETIF_F_RXHASH */ "rx-hashing", + /* NETIF_F_RXCSUM */ "rx-checksum", + "", + "", +}; + +static int __ethtool_get_sset_count(struct net_device *dev, int sset) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (sset == ETH_SS_FEATURES) + return ARRAY_SIZE(netdev_features_strings); + + if (ops && ops->get_sset_count && ops->get_strings) + return ops->get_sset_count(dev, sset); + else + return -EOPNOTSUPP; +} + +static void __ethtool_get_strings(struct net_device *dev, + u32 stringset, u8 *data) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (stringset == ETH_SS_FEATURES) + memcpy(data, netdev_features_strings, + sizeof(netdev_features_strings)); + else + /* ops->get_strings is valid because checked earlier */ + ops->get_strings(dev, stringset, data); +} + +static u32 ethtool_get_feature_mask(u32 eth_cmd) +{ + /* feature masks of legacy discrete ethtool ops */ + + switch (eth_cmd) { + case ETHTOOL_GTXCSUM: + case ETHTOOL_STXCSUM: + return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; + case ETHTOOL_GRXCSUM: + case ETHTOOL_SRXCSUM: + return NETIF_F_RXCSUM; + case ETHTOOL_GSG: + case ETHTOOL_SSG: + return NETIF_F_SG; + case ETHTOOL_GTSO: + case ETHTOOL_STSO: + return NETIF_F_ALL_TSO; + case ETHTOOL_GUFO: + case ETHTOOL_SUFO: + return NETIF_F_UFO; + case ETHTOOL_GGSO: + case ETHTOOL_SGSO: + return NETIF_F_GSO; + case ETHTOOL_GGRO: + case ETHTOOL_SGRO: + return NETIF_F_GRO; + default: + BUG(); + } +} + +static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops) + return NULL; + + switch (ethcmd) { + case ETHTOOL_GTXCSUM: + return ops->get_tx_csum; + case ETHTOOL_GRXCSUM: + return ops->get_rx_csum; + case ETHTOOL_SSG: + return ops->get_sg; + case ETHTOOL_STSO: + return ops->get_tso; + case ETHTOOL_SUFO: + return ops->get_ufo; + default: + return NULL; + } +} + +static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev) +{ + return !!(dev->features & NETIF_F_ALL_CSUM); +} + +static int ethtool_get_one_feature(struct net_device *dev, + char __user *useraddr, u32 ethcmd) +{ + u32 mask = ethtool_get_feature_mask(ethcmd); + struct ethtool_value edata = { + .cmd = ethcmd, + .data = !!(dev->features & mask), + }; + + /* compatibility with discrete get_ ops */ + if (!(dev->hw_features & mask)) { + u32 (*actor)(struct net_device *); + + actor = __ethtool_get_one_feature_actor(dev, ethcmd); + + /* bug compatibility with old get_rx_csum */ + if (ethcmd == ETHTOOL_GRXCSUM && !actor) + actor = __ethtool_get_rx_csum_oldbug; + + if (actor) + edata.data = actor(dev); + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int __ethtool_set_tx_csum(struct net_device *dev, u32 data); +static int __ethtool_set_rx_csum(struct net_device *dev, u32 data); +static int __ethtool_set_sg(struct net_device *dev, u32 data); +static int __ethtool_set_tso(struct net_device *dev, u32 data); +static int __ethtool_set_ufo(struct net_device *dev, u32 data); + +static int ethtool_set_one_feature(struct net_device *dev, + void __user *useraddr, u32 ethcmd) +{ + struct ethtool_value edata; + u32 mask; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + mask = ethtool_get_feature_mask(ethcmd); + mask &= dev->hw_features; + if (mask) { + if (edata.data) + dev->wanted_features |= mask; + else + dev->wanted_features &= ~mask; + + netdev_update_features(dev); + return 0; + } + + /* Driver is not converted to ndo_fix_features or does not + * support changing this offload. In the latter case it won't + * have corresponding ethtool_ops field set. + * + * Following part is to be removed after all drivers advertise + * their changeable features in netdev->hw_features and stop + * using discrete offload setting ops. + */ + + switch (ethcmd) { + case ETHTOOL_STXCSUM: + return __ethtool_set_tx_csum(dev, edata.data); + case ETHTOOL_SRXCSUM: + return __ethtool_set_rx_csum(dev, edata.data); + case ETHTOOL_SSG: + return __ethtool_set_sg(dev, edata.data); + case ETHTOOL_STSO: + return __ethtool_set_tso(dev, edata.data); + case ETHTOOL_SUFO: + return __ethtool_set_ufo(dev, edata.data); + default: + return -EOPNOTSUPP; + } +} + +static int __ethtool_set_flags(struct net_device *dev, u32 data) +{ + u32 changed; + + if (data & ~flags_dup_features) + return -EINVAL; + + /* legacy set_flags() op */ + if (dev->ethtool_ops->set_flags) { + if (unlikely(dev->hw_features & flags_dup_features)) + netdev_warn(dev, + "driver BUG: mixed hw_features and set_flags()\n"); + return dev->ethtool_ops->set_flags(dev, data); + } + + /* allow changing only bits set in hw_features */ + changed = (data ^ dev->wanted_features) & flags_dup_features; + if (changed & ~dev->hw_features) + return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; + + dev->wanted_features = + (dev->wanted_features & ~changed) | data; + + netdev_update_features(dev); + + return 0; +} + static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) { struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; @@ -251,14 +621,10 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, void __user *useraddr) { struct ethtool_sset_info info; - const struct ethtool_ops *ops = dev->ethtool_ops; u64 sset_mask; int i, idx = 0, n_bits = 0, ret, rc; u32 *info_buf = NULL; - if (!ops->get_sset_count) - return -EOPNOTSUPP; - if (copy_from_user(&info, useraddr, sizeof(info))) return -EFAULT; @@ -285,7 +651,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, if (!(sset_mask & (1ULL << i))) continue; - rc = ops->get_sset_count(dev, i); + rc = __ethtool_get_sset_count(dev, i); if (rc >= 0) { info.sset_mask |= (1ULL << i); info_buf[idx++] = rc; @@ -817,7 +1183,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) if (regs.len > reglen) regs.len = reglen; - regbuf = vmalloc(reglen); + regbuf = vzalloc(reglen); if (!regbuf) return -ENOMEM; @@ -1091,6 +1457,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) { int err; + if (data && !(dev->features & NETIF_F_ALL_CSUM)) + return -EINVAL; + if (!data && dev->ethtool_ops->set_tso) { err = dev->ethtool_ops->set_tso(dev, 0); if (err) @@ -1105,145 +1474,55 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) return dev->ethtool_ops->set_sg(dev, data); } -static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_tx_csum(struct net_device *dev, u32 data) { - struct ethtool_value edata; int err; if (!dev->ethtool_ops->set_tx_csum) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (!edata.data && dev->ethtool_ops->set_sg) { + if (!data && dev->ethtool_ops->set_sg) { err = __ethtool_set_sg(dev, 0); if (err) return err; } - return dev->ethtool_ops->set_tx_csum(dev, edata.data); + return dev->ethtool_ops->set_tx_csum(dev, data); } -EXPORT_SYMBOL(ethtool_op_set_tx_csum); -static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_rx_csum(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_rx_csum) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (!edata.data && dev->ethtool_ops->set_sg) + if (!data) dev->features &= ~NETIF_F_GRO; - return dev->ethtool_ops->set_rx_csum(dev, edata.data); + return dev->ethtool_ops->set_rx_csum(dev, data); } -static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_tso(struct net_device *dev, u32 data) { - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_sg) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data && - !(dev->features & NETIF_F_ALL_CSUM)) - return -EINVAL; - - return __ethtool_set_sg(dev, edata.data); -} - -static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - if (!dev->ethtool_ops->set_tso) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data && !(dev->features & NETIF_F_SG)) + if (data && !(dev->features & NETIF_F_SG)) return -EINVAL; - return dev->ethtool_ops->set_tso(dev, edata.data); + return dev->ethtool_ops->set_tso(dev, data); } -static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_ufo(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_ufo) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if (edata.data && !(dev->features & NETIF_F_SG)) + if (data && !(dev->features & NETIF_F_SG)) return -EINVAL; - if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) || + if (data && !((dev->features & NETIF_F_GEN_CSUM) || (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) return -EINVAL; - return dev->ethtool_ops->set_ufo(dev, edata.data); -} - -static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GGSO }; - - edata.data = dev->features & NETIF_F_GSO; - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if (edata.data) - dev->features |= NETIF_F_GSO; - else - dev->features &= ~NETIF_F_GSO; - return 0; -} - -static int ethtool_get_gro(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GGRO }; - - edata.data = dev->features & NETIF_F_GRO; - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_gro(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data) { - u32 rxcsum = dev->ethtool_ops->get_rx_csum ? - dev->ethtool_ops->get_rx_csum(dev) : - ethtool_op_get_rx_csum(dev); - - if (!rxcsum) - return -EINVAL; - dev->features |= NETIF_F_GRO; - } else - dev->features &= ~NETIF_F_GRO; - - return 0; + return dev->ethtool_ops->set_ufo(dev, data); } static int ethtool_self_test(struct net_device *dev, char __user *useraddr) @@ -1287,17 +1566,13 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) { struct ethtool_gstrings gstrings; - const struct ethtool_ops *ops = dev->ethtool_ops; u8 *data; int ret; - if (!ops->get_strings || !ops->get_sset_count) - return -EOPNOTSUPP; - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - ret = ops->get_sset_count(dev, gstrings.string_set); + ret = __ethtool_get_sset_count(dev, gstrings.string_set); if (ret < 0) return ret; @@ -1307,7 +1582,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) if (!data) return -ENOMEM; - ops->get_strings(dev, gstrings.string_set, data); + __ethtool_get_strings(dev, gstrings.string_set, data); ret = -EFAULT; if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) @@ -1317,7 +1592,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) goto out; ret = 0; - out: +out: kfree(data); return ret; } @@ -1458,7 +1733,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; - unsigned long old_features; + u32 old_features; if (!dev || !netif_device_present(dev)) return -ENODEV; @@ -1500,6 +1775,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: + case ETHTOOL_GFEATURES: break; default: if (!capable(CAP_NET_ADMIN)) @@ -1570,42 +1846,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SPAUSEPARAM: rc = ethtool_set_pauseparam(dev, useraddr); break; - case ETHTOOL_GRXCSUM: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_rx_csum ? - dev->ethtool_ops->get_rx_csum : - ethtool_op_get_rx_csum)); - break; - case ETHTOOL_SRXCSUM: - rc = ethtool_set_rx_csum(dev, useraddr); - break; - case ETHTOOL_GTXCSUM: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_tx_csum ? - dev->ethtool_ops->get_tx_csum : - ethtool_op_get_tx_csum)); - break; - case ETHTOOL_STXCSUM: - rc = ethtool_set_tx_csum(dev, useraddr); - break; - case ETHTOOL_GSG: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_sg ? - dev->ethtool_ops->get_sg : - ethtool_op_get_sg)); - break; - case ETHTOOL_SSG: - rc = ethtool_set_sg(dev, useraddr); - break; - case ETHTOOL_GTSO: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_tso ? - dev->ethtool_ops->get_tso : - ethtool_op_get_tso)); - break; - case ETHTOOL_STSO: - rc = ethtool_set_tso(dev, useraddr); - break; case ETHTOOL_TEST: rc = ethtool_self_test(dev, useraddr); break; @@ -1621,21 +1861,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPERMADDR: rc = ethtool_get_perm_addr(dev, useraddr); break; - case ETHTOOL_GUFO: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_ufo ? - dev->ethtool_ops->get_ufo : - ethtool_op_get_ufo)); - break; - case ETHTOOL_SUFO: - rc = ethtool_set_ufo(dev, useraddr); - break; - case ETHTOOL_GGSO: - rc = ethtool_get_gso(dev, useraddr); - break; - case ETHTOOL_SGSO: - rc = ethtool_set_gso(dev, useraddr); - break; case ETHTOOL_GFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, (dev->ethtool_ops->get_flags ? @@ -1643,8 +1868,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) ethtool_op_get_flags)); break; case ETHTOOL_SFLAGS: - rc = ethtool_set_value(dev, useraddr, - dev->ethtool_ops->set_flags); + rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); break; case ETHTOOL_GPFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, @@ -1666,12 +1890,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXCLSRLINS: rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); break; - case ETHTOOL_GGRO: - rc = ethtool_get_gro(dev, useraddr); - break; - case ETHTOOL_SGRO: - rc = ethtool_set_gro(dev, useraddr); - break; case ETHTOOL_FLASHDEV: rc = ethtool_flash_device(dev, useraddr); break; @@ -1693,6 +1911,30 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GFEATURES: + rc = ethtool_get_features(dev, useraddr); + break; + case ETHTOOL_SFEATURES: + rc = ethtool_set_features(dev, useraddr); + break; + case ETHTOOL_GTXCSUM: + case ETHTOOL_GRXCSUM: + case ETHTOOL_GSG: + case ETHTOOL_GTSO: + case ETHTOOL_GUFO: + case ETHTOOL_GGSO: + case ETHTOOL_GGRO: + rc = ethtool_get_one_feature(dev, useraddr, ethcmd); + break; + case ETHTOOL_STXCSUM: + case ETHTOOL_SRXCSUM: + case ETHTOOL_SSG: + case ETHTOOL_STSO: + case ETHTOOL_SUFO: + case ETHTOOL_SGSO: + case ETHTOOL_SGRO: + rc = ethtool_set_one_feature(dev, useraddr, ethcmd); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index a20e5d3bbfa0..8248ebb5891d 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -181,13 +181,13 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->iifindex && (rule->iifindex != fl->iif)) + if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) goto out; - if (rule->oifindex && (rule->oifindex != fl->oif)) + if (rule->oifindex && (rule->oifindex != fl->flowi_oif)) goto out; - if ((rule->mark ^ fl->mark) & rule->mark_mask) + if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; ret = ops->match(rule, fl, flags); diff --git a/net/core/flow.c b/net/core/flow.c index 127c8a7ffd61..990703b8863b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -172,9 +172,9 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - struct flowi *key) + const struct flowi *key) { - u32 *k = (u32 *) key; + const u32 *k = (const u32 *) key; return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); @@ -186,17 +186,17 @@ typedef unsigned long flow_compare_t; * important assumptions that we can here, such as alignment and * constant size. */ -static int flow_key_compare(struct flowi *key1, struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) { - flow_compare_t *k1, *k1_lim, *k2; + const flow_compare_t *k1, *k1_lim, *k2; const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); - k1 = (flow_compare_t *) key1; + k1 = (const flow_compare_t *) key1; k1_lim = k1 + n_elem; - k2 = (flow_compare_t *) key2; + k2 = (const flow_compare_t *) key2; do { if (*k1++ != *k2++) @@ -207,7 +207,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) } struct flow_cache_object * -flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, +flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver, void *ctx) { struct flow_cache *fc = &flow_cache_global; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 60a902913429..799f06e03a22 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) { size_t size = entries * sizeof(struct neighbour *); struct neigh_hash_table *ret; - struct neighbour **buckets; + struct neighbour __rcu **buckets; ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) @@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) if (size <= PAGE_SIZE) buckets = kzalloc(size, GFP_ATOMIC); else - buckets = (struct neighbour **) + buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); if (!buckets) { kfree(ret); return NULL; } - rcu_assign_pointer(ret->hash_buckets, buckets); + ret->hash_buckets = buckets; ret->hash_mask = entries - 1; get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); return ret; @@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table, rcu); size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); - struct neighbour **buckets = nht->hash_buckets; + struct neighbour __rcu **buckets = nht->hash_buckets; if (size <= PAGE_SIZE) kfree(buckets); @@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour proc dir entry"); #endif - tbl->nht = neigh_hash_alloc(8); + RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8)); phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); @@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl) } write_unlock(&neigh_tbl_lock); - call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu); + call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, + neigh_hash_free_rcu); tbl->nht = NULL; kfree(tbl->phash_buckets); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e23c01be5a5b..5ceb257e860c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -99,7 +99,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec); NETDEVICE_SHOW(addr_len, fmt_dec); NETDEVICE_SHOW(iflink, fmt_dec); NETDEVICE_SHOW(ifindex, fmt_dec); -NETDEVICE_SHOW(features, fmt_long_hex); +NETDEVICE_SHOW(features, fmt_hex); NETDEVICE_SHOW(type, fmt_dec); NETDEVICE_SHOW(link_mode, fmt_dec); @@ -295,6 +295,20 @@ static ssize_t show_ifalias(struct device *dev, return ret; } +NETDEVICE_SHOW(group, fmt_dec); + +static int change_group(struct net_device *net, unsigned long new_group) +{ + dev_set_group(net, (int) new_group); + return 0; +} + +static ssize_t store_group(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, change_group); +} + static struct device_attribute net_class_attributes[] = { __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), @@ -316,6 +330,7 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), + __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group), {} }; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 02dc2cbcbe86..06be2431753e 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -193,6 +193,17 @@ void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); + if (dev->priv_flags & IFF_SLAVE) { + if (dev->npinfo) { + struct net_device *bond_dev = dev->master; + struct sk_buff *skb; + while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) { + skb->dev = bond_dev; + skb_queue_tail(&bond_dev->npinfo->arp_tx, skb); + } + } + } + service_arp_queue(dev->npinfo); zap_completion_queue(); @@ -313,9 +324,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_tx_queue_stopped(txq)) { - dev->priv_flags |= IFF_IN_NETPOLL; status = ops->ndo_start_xmit(skb, dev); - dev->priv_flags &= ~IFF_IN_NETPOLL; if (status == NETDEV_TX_OK) txq_trans_update(txq); } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a9e7fc4c461f..0c55eaa70e39 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -251,6 +251,7 @@ struct pktgen_dev { int max_pkt_size; /* = ETH_ZLEN; */ int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; + struct page *page; u64 delay; /* nano-seconds */ __u64 count; /* Default No packets to send */ @@ -1134,6 +1135,10 @@ static ssize_t pktgen_if_write(struct file *file, if (node_possible(value)) { pkt_dev->node = value; sprintf(pg_result, "OK: node=%d", pkt_dev->node); + if (pkt_dev->page) { + put_page(pkt_dev->page); + pkt_dev->page = NULL; + } } else sprintf(pg_result, "ERROR: node not possible"); @@ -2605,6 +2610,89 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi, return htons(id | (cfi << 12) | (prio << 13)); } +static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, + int datalen) +{ + struct timeval timestamp; + struct pktgen_hdr *pgh; + + pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh)); + datalen -= sizeof(*pgh); + + if (pkt_dev->nfrags <= 0) { + memset(skb_put(skb, datalen), 0, datalen); + } else { + int frags = pkt_dev->nfrags; + int i, len; + + + if (frags > MAX_SKB_FRAGS) + frags = MAX_SKB_FRAGS; + len = datalen - frags * PAGE_SIZE; + if (len > 0) { + memset(skb_put(skb, len), 0, len); + datalen = frags * PAGE_SIZE; + } + + i = 0; + while (datalen > 0) { + if (unlikely(!pkt_dev->page)) { + int node = numa_node_id(); + + if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE)) + node = pkt_dev->node; + pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + if (!pkt_dev->page) + break; + } + skb_shinfo(skb)->frags[i].page = pkt_dev->page; + get_page(pkt_dev->page); + skb_shinfo(skb)->frags[i].page_offset = 0; + skb_shinfo(skb)->frags[i].size = + (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); + datalen -= skb_shinfo(skb)->frags[i].size; + skb->len += skb_shinfo(skb)->frags[i].size; + skb->data_len += skb_shinfo(skb)->frags[i].size; + i++; + skb_shinfo(skb)->nr_frags = i; + } + + while (i < frags) { + int rem; + + if (i == 0) + break; + + rem = skb_shinfo(skb)->frags[i - 1].size / 2; + if (rem == 0) + break; + + skb_shinfo(skb)->frags[i - 1].size -= rem; + + skb_shinfo(skb)->frags[i] = + skb_shinfo(skb)->frags[i - 1]; + get_page(skb_shinfo(skb)->frags[i].page); + skb_shinfo(skb)->frags[i].page = + skb_shinfo(skb)->frags[i - 1].page; + skb_shinfo(skb)->frags[i].page_offset += + skb_shinfo(skb)->frags[i - 1].size; + skb_shinfo(skb)->frags[i].size = rem; + i++; + skb_shinfo(skb)->nr_frags = i; + } + } + + /* Stamp the time, and sequence number, + * convert them to network byte order + */ + pgh->pgh_magic = htonl(PKTGEN_MAGIC); + pgh->seq_num = htonl(pkt_dev->seq_num); + + do_gettimeofday(×tamp); + pgh->tv_sec = htonl(timestamp.tv_sec); + pgh->tv_usec = htonl(timestamp.tv_usec); +} + static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2613,7 +2701,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct udphdr *udph; int datalen, iplen; struct iphdr *iph; - struct pktgen_hdr *pgh = NULL; __be16 protocol = htons(ETH_P_IP); __be32 *mpls; __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ @@ -2729,76 +2816,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; - - if (pkt_dev->nfrags <= 0) { - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr)); - } else { - int frags = pkt_dev->nfrags; - int i, len; - - pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); - - if (frags > MAX_SKB_FRAGS) - frags = MAX_SKB_FRAGS; - if (datalen > frags * PAGE_SIZE) { - len = datalen - frags * PAGE_SIZE; - memset(skb_put(skb, len), 0, len); - datalen = frags * PAGE_SIZE; - } - - i = 0; - while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); - skb_shinfo(skb)->frags[i].page = page; - skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; - i++; - skb_shinfo(skb)->nr_frags = i; - } - - while (i < frags) { - int rem; - - if (i == 0) - break; - - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; - - skb_shinfo(skb)->frags[i - 1].size -= rem; - - skb_shinfo(skb)->frags[i] = - skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = - skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += - skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; - } - } - - /* Stamp the time, and sequence number, - * convert them to network byte order - */ - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } + pktgen_finalize_skb(pkt_dev, skb, datalen); #ifdef CONFIG_XFRM if (!process_ipsec(pkt_dev, skb, protocol)) @@ -2980,7 +2998,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, struct udphdr *udph; int datalen; struct ipv6hdr *iph; - struct pktgen_hdr *pgh = NULL; __be16 protocol = htons(ETH_P_IPV6); __be32 *mpls; __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ @@ -3083,75 +3100,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; - if (pkt_dev->nfrags <= 0) - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - else { - int frags = pkt_dev->nfrags; - int i; - - pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); - - if (frags > MAX_SKB_FRAGS) - frags = MAX_SKB_FRAGS; - if (datalen > frags * PAGE_SIZE) { - skb_put(skb, datalen - frags * PAGE_SIZE); - datalen = frags * PAGE_SIZE; - } - - i = 0; - while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL, 0); - skb_shinfo(skb)->frags[i].page = page; - skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; - i++; - skb_shinfo(skb)->nr_frags = i; - } - - while (i < frags) { - int rem; - - if (i == 0) - break; - - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; - - skb_shinfo(skb)->frags[i - 1].size -= rem; - - skb_shinfo(skb)->frags[i] = - skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = - skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += - skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; - } - } - - /* Stamp the time, and sequence number, - * convert them to network byte order - * should we update cloned packets too ? - */ - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } - /* pkt_dev->seq_num++; FF: you really mean this? */ + pktgen_finalize_skb(pkt_dev, skb, datalen); return skb; } @@ -3321,7 +3270,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) pkt_dev->started_at); ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); - p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n", + p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n", (unsigned long long)ktime_to_us(elapsed), (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), (unsigned long long)ktime_to_us(idle), @@ -3884,6 +3833,8 @@ static int pktgen_remove_device(struct pktgen_thread *t, free_SAs(pkt_dev); #endif vfree(pkt_dev->flows); + if (pkt_dev->page) + put_page(pkt_dev->page); kfree(pkt_dev); return 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a0b2eeb3b610..49f7ea5b4c75 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1036,6 +1036,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_LINK] = { .type = NLA_U32 }, + [IFLA_MASTER] = { .type = NLA_U32 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, @@ -1122,8 +1123,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EOPNOTSUPP; if (af_ops->validate_link_af) { - err = af_ops->validate_link_af(dev, - tb[IFLA_AF_SPEC]); + err = af_ops->validate_link_af(dev, af); if (err < 0) return err; } @@ -1179,6 +1179,41 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) return err; } +static int do_set_master(struct net_device *dev, int ifindex) +{ + struct net_device *master_dev; + const struct net_device_ops *ops; + int err; + + if (dev->master) { + if (dev->master->ifindex == ifindex) + return 0; + ops = dev->master->netdev_ops; + if (ops->ndo_del_slave) { + err = ops->ndo_del_slave(dev->master, dev); + if (err) + return err; + } else { + return -EOPNOTSUPP; + } + } + + if (ifindex) { + master_dev = __dev_get_by_index(dev_net(dev), ifindex); + if (!master_dev) + return -EINVAL; + ops = master_dev->netdev_ops; + if (ops->ndo_add_slave) { + err = ops->ndo_add_slave(master_dev, dev); + if (err) + return err; + } else { + return -EOPNOTSUPP; + } + } + return 0; +} + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { @@ -1302,6 +1337,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; } + if (tb[IFLA_MASTER]) { + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); @@ -1548,6 +1590,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + if (tb[IFLA_GROUP]) + dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); return dev; @@ -1606,10 +1650,6 @@ replay: else { if (ifname[0]) dev = __dev_get_by_name(net, ifname); - else if (tb[IFLA_GROUP]) - return rtnl_group_changelink(net, - nla_get_u32(tb[IFLA_GROUP]), - ifm, tb); else dev = NULL; } @@ -1676,8 +1716,13 @@ replay: return do_setlink(dev, ifm, tb, ifname, modified); } - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) + return rtnl_group_changelink(net, + nla_get_u32(tb[IFLA_GROUP]), + ifm, tb); return -ENODEV; + } if (ifm->ifi_index) return -EOPNOTSUPP; @@ -1702,6 +1747,9 @@ replay: snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); dest_net = rtnl_link_get_net(net, tb); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + dev = rtnl_create_link(net, dest_net, ifname, ops, tb); if (IS_ERR(dev)) @@ -1850,7 +1898,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) return -EPERM; - if (kind == 2 && (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d31bb36ae0dc..1eb526a848ff 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -210,6 +210,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); + kmemcheck_annotate_variable(shinfo->destructor_arg); if (fclone) { struct sk_buff *child = skb + 1; @@ -2433,8 +2434,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, return -ENOMEM; /* initialize the next frag */ - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; skb_fill_page_desc(skb, frg_cnt, page, 0, 0); skb->truesize += PAGE_SIZE; atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); @@ -2454,7 +2453,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, return -EFAULT; /* copy was successful so update the size parameters */ - sk->sk_sndmsg_off += copy; frag->size += copy; skb->len += copy; skb->data_len += copy; @@ -2497,7 +2495,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ -struct sk_buff *skb_segment(struct sk_buff *skb, int features) +struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; @@ -2507,7 +2505,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) unsigned int offset = doffset; unsigned int headroom; unsigned int len; - int sg = features & NETIF_F_SG; + int sg = !!(features & NETIF_F_SG); int nfrags = skb_shinfo(skb)->nr_frags; int err = -ENOMEM; int i = 0; @@ -2744,8 +2742,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) merge: if (offset > headlen) { - skbinfo->frags[0].page_offset += offset - headlen; - skbinfo->frags[0].size -= offset - headlen; + unsigned int eat = offset - headlen; + + skbinfo->frags[0].page_offset += eat; + skbinfo->frags[0].size -= eat; + skb->data_len -= eat; + skb->len -= eat; offset = headlen; } diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index d900ab99814a..3609eacaf4ce 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, Intel Corporation. + * Copyright (c) 2008-2011, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -583,7 +583,7 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb, u8 up, idtype; int ret = -EINVAL; - if (!tb[DCB_ATTR_APP] || !netdev->dcbnl_ops->getapp) + if (!tb[DCB_ATTR_APP]) goto out; ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], @@ -604,7 +604,16 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb, goto out; id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]); - up = netdev->dcbnl_ops->getapp(netdev, idtype, id); + + if (netdev->dcbnl_ops->getapp) { + up = netdev->dcbnl_ops->getapp(netdev, idtype, id); + } else { + struct dcb_app app = { + .selector = idtype, + .protocol = id, + }; + up = dcb_getapp(netdev, &app); + } /* send this back */ dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -617,6 +626,9 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb, dcb->cmd = DCB_CMD_GAPP; app_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_APP); + if (!app_nest) + goto out_cancel; + ret = nla_put_u8(dcbnl_skb, DCB_APP_ATTR_IDTYPE, idtype); if (ret) goto out_cancel; @@ -1181,7 +1193,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, goto err; } - if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) { + if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); err = ops->ieee_setpfc(netdev, pfc); if (err) @@ -1212,6 +1224,59 @@ err: return err; } +static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb, + int app_nested_type, int app_info_type, + int app_entry_type) +{ + struct dcb_peer_app_info info; + struct dcb_app *table = NULL; + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + u16 app_count; + int err; + + + /** + * retrieve the peer app configuration form the driver. If the driver + * handlers fail exit without doing anything + */ + err = ops->peer_getappinfo(netdev, &info, &app_count); + if (!err && app_count) { + table = kmalloc(sizeof(struct dcb_app) * app_count, GFP_KERNEL); + if (!table) + return -ENOMEM; + + err = ops->peer_getapptable(netdev, table); + } + + if (!err) { + u16 i; + struct nlattr *app; + + /** + * build the message, from here on the only possible failure + * is due to the skb size + */ + err = -EMSGSIZE; + + app = nla_nest_start(skb, app_nested_type); + if (!app) + goto nla_put_failure; + + if (app_info_type) + NLA_PUT(skb, app_info_type, sizeof(info), &info); + + for (i = 0; i < app_count; i++) + NLA_PUT(skb, app_entry_type, sizeof(struct dcb_app), + &table[i]); + + nla_nest_end(skb, app); + } + err = 0; + +nla_put_failure: + kfree(table); + return err; +} /* Handle IEEE 802.1Qaz GET commands. */ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, @@ -1276,6 +1341,30 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, spin_unlock(&dcb_lock); nla_nest_end(skb, app); + /* get peer info if available */ + if (ops->ieee_peer_getets) { + struct ieee_ets ets; + err = ops->ieee_peer_getets(netdev, &ets); + if (!err) + NLA_PUT(skb, DCB_ATTR_IEEE_PEER_ETS, sizeof(ets), &ets); + } + + if (ops->ieee_peer_getpfc) { + struct ieee_pfc pfc; + err = ops->ieee_peer_getpfc(netdev, &pfc); + if (!err) + NLA_PUT(skb, DCB_ATTR_IEEE_PEER_PFC, sizeof(pfc), &pfc); + } + + if (ops->peer_getappinfo && ops->peer_getapptable) { + err = dcbnl_build_peer_app(netdev, skb, + DCB_ATTR_IEEE_PEER_APP, + DCB_ATTR_IEEE_APP_UNSPEC, + DCB_ATTR_IEEE_APP); + if (err) + goto nla_put_failure; + } + nla_nest_end(skb, ieee); nlmsg_end(skb, nlh); @@ -1429,6 +1518,71 @@ err: return ret; } +/* Handle CEE DCBX GET commands. */ +static int dcbnl_cee_get(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *cee; + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + int err; + + if (!ops) + return -EOPNOTSUPP; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_CEE_GET; + + NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name); + + cee = nla_nest_start(skb, DCB_ATTR_CEE); + if (!cee) + goto nla_put_failure; + + /* get peer info if available */ + if (ops->cee_peer_getpg) { + struct cee_pg pg; + err = ops->cee_peer_getpg(netdev, &pg); + if (!err) + NLA_PUT(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg); + } + + if (ops->cee_peer_getpfc) { + struct cee_pfc pfc; + err = ops->cee_peer_getpfc(netdev, &pfc); + if (!err) + NLA_PUT(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc); + } + + if (ops->peer_getappinfo && ops->peer_getapptable) { + err = dcbnl_build_peer_app(netdev, skb, + DCB_ATTR_CEE_PEER_APP_TABLE, + DCB_ATTR_CEE_PEER_APP_INFO, + DCB_ATTR_CEE_PEER_APP); + if (err) + goto nla_put_failure; + } + + nla_nest_end(skb, cee); + nlmsg_end(skb, nlh); + + return rtnl_unicast(skb, &init_net, pid); +nla_put_failure: + nlmsg_cancel(skb, nlh); +nlmsg_failure: + kfree_skb(skb); + return -1; +} + static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -1558,6 +1712,10 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); goto out; + case DCB_CMD_CEE_GET: + ret = dcbnl_cee_get(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; default: goto errout; } @@ -1604,6 +1762,10 @@ EXPORT_SYMBOL(dcb_getapp); u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) { struct dcb_app_type *itr; + struct dcb_app_type event; + + memcpy(&event.name, dev->name, sizeof(event.name)); + memcpy(&event.app, new, sizeof(event.app)); spin_lock(&dcb_lock); /* Search for existing match and replace */ @@ -1635,7 +1797,7 @@ u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) } out: spin_unlock(&dcb_lock); - call_dcbevent_notifiers(DCB_APP_EVENT, new); + call_dcbevent_notifiers(DCB_APP_EVENT, &event); return 0; } EXPORT_SYMBOL(dcb_setapp); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index e96d5e810039..fadecd20d75b 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -583,6 +583,15 @@ done: dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * This is based on the numbers specified in RFC 5681, 3.1. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); +} + static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid2_hc_tx_sock *hc = ccid_priv(ccid); diff --git a/net/dccp/input.c b/net/dccp/input.c index 8cde009e8b85..4222e7a654b0 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -614,6 +614,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* Caller (dccp_v4_do_rcv) will send Reset */ dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; + } else if (sk->sk_state == DCCP_CLOSED) { + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + return 1; } if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { @@ -668,10 +671,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } switch (sk->sk_state) { - case DCCP_CLOSED: - dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - return 1; - case DCCP_REQUESTING: queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); if (queued >= 0) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 45a434f94169..ae451c6d83ba 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -43,9 +43,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + __be16 orig_sport, orig_dport; struct rtable *rt; __be32 daddr, nexthop; - int tmp; int err; dp->dccps_role = DCCP_ROLE_CLIENT; @@ -63,12 +63,14 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = inet->opt->faddr; } - tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, - IPPROTO_DCCP, - inet->inet_sport, usin->sin_port, sk, 1); - if (tmp < 0) - return tmp; + orig_sport = inet->inet_sport; + orig_dport = usin->sin_port; + rt = ip_route_connect(nexthop, inet->inet_saddr, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, + IPPROTO_DCCP, + orig_sport, orig_dport, sk, true); + if (IS_ERR(rt)) + return PTR_ERR(rt); if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); @@ -99,11 +101,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err != 0) goto failure; - err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, - inet->inet_dport, sk); - if (err != 0) + rt = ip_route_newports(rt, IPPROTO_DCCP, + orig_sport, orig_dport, + inet->inet_sport, inet->inet_dport, sk); + if (IS_ERR(rt)) { + rt = NULL; goto failure; - + } /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->dst); @@ -461,17 +465,19 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; - struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, - .fl4_dst = ip_hdr(skb)->saddr, - .fl4_src = ip_hdr(skb)->daddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .fl_ip_sport = dccp_hdr(skb)->dccph_dport, - .fl_ip_dport = dccp_hdr(skb)->dccph_sport - }; - - security_skb_classify_flow(skb, &fl); - if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { + struct flowi4 fl4 = { + .flowi4_oif = skb_rtable(skb)->rt_iif, + .daddr = ip_hdr(skb)->saddr, + .saddr = ip_hdr(skb)->daddr, + .flowi4_tos = RT_CONN_FLAGS(sk), + .flowi4_proto = sk->sk_protocol, + .fl4_sport = dccp_hdr(skb)->dccph_dport, + .fl4_dport = dccp_hdr(skb)->dccph_sport, + }; + + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); + if (IS_ERR(rt)) { IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index dca711df9b60..de1b7e37ad5b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -147,30 +147,24 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); - struct flowi fl; + struct flowi6 fl6; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; - security_sk_classify_flow(sk, &fl); - - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) { - sk->sk_err_soft = -err; - goto out; - } - - err = xfrm_lookup(net, &dst, &fl, sk, 0); - if (err < 0) { - sk->sk_err_soft = -err; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); + if (IS_ERR(dst)) { + sk->sk_err_soft = -PTR_ERR(dst); goto out; } } else @@ -249,34 +243,30 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, struct sk_buff *skb; struct ipv6_txoptions *opt = NULL; struct in6_addr *final_p, final; - struct flowi fl; + struct flowi6 fl6; int err = -1; struct dst_entry *dst; - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.fl6_flowlabel = 0; - fl.oif = ireq6->iif; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.flowlabel = 0; + fl6.flowi6_oif = ireq6->iif; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) - goto done; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0); - if (err < 0) + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; goto done; + } skb = dccp_make_response(sk, dst, req); if (skb != NULL) { @@ -285,8 +275,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, dh->dccph_checksum = dccp_v6_csum_finish(skb, &ireq6->loc_addr, &ireq6->rmt_addr); - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - err = ip6_xmit(sk, skb, &fl, opt); + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + err = ip6_xmit(sk, skb, &fl6, opt); err = net_xmit_eval(err); } @@ -308,7 +298,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { struct ipv6hdr *rxip6h; struct sk_buff *skb; - struct flowi fl; + struct flowi6 fl6; struct net *net = dev_net(skb_dst(rxskb)->dev); struct sock *ctl_sk = net->dccp.v6_ctl_sk; struct dst_entry *dst; @@ -327,25 +317,24 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, &rxip6h->daddr); - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); - ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); + memset(&fl6, 0, sizeof(fl6)); + ipv6_addr_copy(&fl6.daddr, &rxip6h->saddr); + ipv6_addr_copy(&fl6.saddr, &rxip6h->daddr); - fl.proto = IPPROTO_DCCP; - fl.oif = inet6_iif(rxskb); - fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; - fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; - security_skb_classify_flow(rxskb, &fl); + fl6.flowi6_proto = IPPROTO_DCCP; + fl6.flowi6_oif = inet6_iif(rxskb); + fl6.fl6_dport = dccp_hdr(skb)->dccph_dport; + fl6.fl6_sport = dccp_hdr(skb)->dccph_sport; + security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6)); /* sk = NULL, but it is safe for now. RST socket required. */ - if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { - if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { - skb_dst_set(skb, dst); - ip6_xmit(ctl_sk, skb, &fl, NULL); - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); - return; - } + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); + if (!IS_ERR(dst)) { + skb_dst_set(skb, dst); + ip6_xmit(ctl_sk, skb, &fl6, NULL); + DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + return; } kfree_skb(skb); @@ -484,7 +473,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct inet_sock *newinet; - struct dccp_sock *newdp; struct dccp6_sock *newdp6; struct sock *newsk; struct ipv6_txoptions *opt; @@ -498,7 +486,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, return NULL; newdp6 = (struct dccp6_sock *)newsk; - newdp = dccp_sk(newsk); newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); @@ -540,25 +527,20 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (dst == NULL) { struct in6_addr *final_p, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - final_p = fl6_update_dst(&fl, opt, &final); - ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_sk_classify_flow(sk, &fl); - - if (ip6_dst_lookup(sk, &dst, &fl)) - goto out; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + final_p = fl6_update_dst(&fl6, opt, &final); + ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + if (IS_ERR(dst)) goto out; } @@ -578,7 +560,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; - newdp = dccp_sk(newsk); newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); @@ -878,7 +859,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int addr_type; int err; @@ -891,14 +872,14 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - IP6_ECN_flow_init(fl.fl6_flowlabel); - if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { + fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; + IP6_ECN_flow_init(fl6.flowlabel); + if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); @@ -935,7 +916,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } ipv6_addr_copy(&np->daddr, &usin->sin6_addr); - np->flow_label = fl.fl6_flowlabel; + np->flow_label = fl6.flowlabel; /* * DCCP over IPv4 @@ -972,33 +953,24 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->inet_sport; - security_sk_classify_flow(sk, &fl); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, saddr ? saddr : &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.fl6_dport = usin->sin6_port; + fl6.fl6_sport = inet->inet_sport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl, np->opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto failure; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto failure; } if (saddr == NULL) { - saddr = &fl.fl6_src; + saddr = &fl6.saddr; ipv6_addr_copy(&np->rcv_saddr, saddr); } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 2af15b15d1fa..ea3b6ee21fc9 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -908,7 +908,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, struct socket *sock = sk->sk_socket; struct dn_scp *scp = DN_SK(sk); int err = -EISCONN; - struct flowi fl; + struct flowidn fld; if (sock->state == SS_CONNECTED) goto out; @@ -947,13 +947,13 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn)); err = -EHOSTUNREACH; - memset(&fl, 0, sizeof(fl)); - fl.oif = sk->sk_bound_dev_if; - fl.fld_dst = dn_saddr2dn(&scp->peer); - fl.fld_src = dn_saddr2dn(&scp->addr); - dn_sk_ports_copy(&fl, scp); - fl.proto = DNPROTO_NSP; - if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, flags) < 0) + memset(&fld, 0, sizeof(fld)); + fld.flowidn_oif = sk->sk_bound_dev_if; + fld.daddr = dn_saddr2dn(&scp->peer); + fld.saddr = dn_saddr2dn(&scp->addr); + dn_sk_ports_copy(&fld, scp); + fld.flowidn_proto = DNPROTO_NSP; + if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0) goto out; sk->sk_route_caps = sk->sk_dst_cache->dev->features; sock->state = SS_CONNECTING; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 0ef0a81bcd72..1c74ed36ce8f 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -201,7 +201,7 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct int err; if (nh->nh_gw) { - struct flowi fl; + struct flowidn fld; struct dn_fib_res res; if (nh->nh_flags&RTNH_F_ONLINK) { @@ -221,15 +221,15 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct return 0; } - memset(&fl, 0, sizeof(fl)); - fl.fld_dst = nh->nh_gw; - fl.oif = nh->nh_oif; - fl.fld_scope = r->rtm_scope + 1; + memset(&fld, 0, sizeof(fld)); + fld.daddr = nh->nh_gw; + fld.flowidn_oif = nh->nh_oif; + fld.flowidn_scope = r->rtm_scope + 1; - if (fl.fld_scope < RT_SCOPE_LINK) - fl.fld_scope = RT_SCOPE_LINK; + if (fld.flowidn_scope < RT_SCOPE_LINK) + fld.flowidn_scope = RT_SCOPE_LINK; - if ((err = dn_fib_lookup(&fl, &res)) != 0) + if ((err = dn_fib_lookup(&fld, &res)) != 0) return err; err = -EINVAL; @@ -404,7 +404,7 @@ failure: return NULL; } -int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi *fl, struct dn_fib_res *res) +int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res) { int err = dn_fib_props[type].error; @@ -424,7 +424,8 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi * for_nexthops(fi) { if (nh->nh_flags & RTNH_F_DEAD) continue; - if (!fl->oif || fl->oif == nh->nh_oif) + if (!fld->flowidn_oif || + fld->flowidn_oif == nh->nh_oif) break; } if (nhsel < fi->fib_nhs) { @@ -445,7 +446,7 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi * return err; } -void dn_fib_select_multipath(const struct flowi *fl, struct dn_fib_res *res) +void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; int w; diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 2ef115277bea..bd78836a81eb 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -78,7 +78,7 @@ static void dn_nsp_send(struct sk_buff *skb) struct sock *sk = skb->sk; struct dn_scp *scp = DN_SK(sk); struct dst_entry *dst; - struct flowi fl; + struct flowidn fld; skb_reset_transport_header(skb); scp->stamp = jiffies; @@ -91,13 +91,13 @@ try_again: return; } - memset(&fl, 0, sizeof(fl)); - fl.oif = sk->sk_bound_dev_if; - fl.fld_src = dn_saddr2dn(&scp->addr); - fl.fld_dst = dn_saddr2dn(&scp->peer); - dn_sk_ports_copy(&fl, scp); - fl.proto = DNPROTO_NSP; - if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, 0) == 0) { + memset(&fld, 0, sizeof(fld)); + fld.flowidn_oif = sk->sk_bound_dev_if; + fld.saddr = dn_saddr2dn(&scp->addr); + fld.daddr = dn_saddr2dn(&scp->peer); + dn_sk_ports_copy(&fld, scp); + fld.flowidn_proto = DNPROTO_NSP; + if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, 0) == 0) { dst = sk_dst_get(sk); sk->sk_route_caps = dst->dev->features; goto try_again; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5e636365d33c..9f09d4fc2880 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -112,6 +112,7 @@ static int dn_dst_gc(struct dst_ops *ops); static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); +static void dn_dst_destroy(struct dst_entry *); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); @@ -133,11 +134,18 @@ static struct dst_ops dn_dst_ops = { .check = dn_dst_check, .default_advmss = dn_dst_default_advmss, .default_mtu = dn_dst_default_mtu, + .cow_metrics = dst_cow_metrics_generic, + .destroy = dn_dst_destroy, .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, }; +static void dn_dst_destroy(struct dst_entry *dst) +{ + dst_destroy_metrics_generic(dst); +} + static __inline__ unsigned dn_hash(__le16 src, __le16 dst) { __u16 tmp = (__u16 __force)(src ^ dst); @@ -274,14 +282,14 @@ static void dn_dst_link_failure(struct sk_buff *skb) { } -static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) +static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2) { - return ((fl1->fld_dst ^ fl2->fld_dst) | - (fl1->fld_src ^ fl2->fld_src) | - (fl1->mark ^ fl2->mark) | - (fl1->fld_scope ^ fl2->fld_scope) | - (fl1->oif ^ fl2->oif) | - (fl1->iif ^ fl2->iif)) == 0; + return ((fl1->daddr ^ fl2->daddr) | + (fl1->saddr ^ fl2->saddr) | + (fl1->flowidn_mark ^ fl2->flowidn_mark) | + (fl1->flowidn_scope ^ fl2->flowidn_scope) | + (fl1->flowidn_oif ^ fl2->flowidn_oif) | + (fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0; } static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) @@ -295,7 +303,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route * spin_lock_bh(&dn_rt_hash_table[hash].lock); while ((rth = rcu_dereference_protected(*rthp, lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { - if (compare_keys(&rth->fl, &rt->fl)) { + if (compare_keys(&rth->fld, &rt->fld)) { /* Put it first */ *rthp = rth->dst.dn_next; rcu_assign_pointer(rth->dst.dn_next, @@ -814,14 +822,14 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; struct net_device *dev = rt->dst.dev; + unsigned int mss_metric; struct neighbour *n; - unsigned int metric; if (fi) { if (DN_FIB_RES_GW(*res) && DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = DN_FIB_RES_GW(*res); - dst_import_metrics(&rt->dst, fi->fib_metrics); + dst_init_metrics(&rt->dst, fi->fib_metrics, true); } rt->rt_type = res->type; @@ -834,10 +842,10 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); - metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); - if (metric) { + mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); + if (mss_metric) { unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); - if (metric > mss) + if (mss_metric > mss) dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); } return 0; @@ -895,14 +903,16 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re return (daddr&~mask)|res->fi->fib_nh->nh_gw; } -static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) +static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard) { - struct flowi fl = { .fld_dst = oldflp->fld_dst, - .fld_src = oldflp->fld_src, - .fld_scope = RT_SCOPE_UNIVERSE, - .mark = oldflp->mark, - .iif = init_net.loopback_dev->ifindex, - .oif = oldflp->oif }; + struct flowidn fld = { + .daddr = oldflp->daddr, + .saddr = oldflp->saddr, + .flowidn_scope = RT_SCOPE_UNIVERSE, + .flowidn_mark = oldflp->flowidn_mark, + .flowidn_iif = init_net.loopback_dev->ifindex, + .flowidn_oif = oldflp->flowidn_oif, + }; struct dn_route *rt = NULL; struct net_device *dev_out = NULL, *dev; struct neighbour *neigh = NULL; @@ -916,13 +926,14 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old if (decnet_debug_level & 16) printk(KERN_DEBUG "dn_route_output_slow: dst=%04x src=%04x mark=%d" - " iif=%d oif=%d\n", le16_to_cpu(oldflp->fld_dst), - le16_to_cpu(oldflp->fld_src), - oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif); + " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr), + le16_to_cpu(oldflp->saddr), + oldflp->flowidn_mark, init_net.loopback_dev->ifindex, + oldflp->flowidn_oif); /* If we have an output interface, verify its a DECnet device */ - if (oldflp->oif) { - dev_out = dev_get_by_index(&init_net, oldflp->oif); + if (oldflp->flowidn_oif) { + dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif); err = -ENODEV; if (dev_out && dev_out->dn_ptr == NULL) { dev_put(dev_out); @@ -933,11 +944,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old } /* If we have a source address, verify that its a local address */ - if (oldflp->fld_src) { + if (oldflp->saddr) { err = -EADDRNOTAVAIL; if (dev_out) { - if (dn_dev_islocal(dev_out, oldflp->fld_src)) + if (dn_dev_islocal(dev_out, oldflp->saddr)) goto source_ok; dev_put(dev_out); goto out; @@ -946,11 +957,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old for_each_netdev_rcu(&init_net, dev) { if (!dev->dn_ptr) continue; - if (!dn_dev_islocal(dev, oldflp->fld_src)) + if (!dn_dev_islocal(dev, oldflp->saddr)) continue; if ((dev->flags & IFF_LOOPBACK) && - oldflp->fld_dst && - !dn_dev_islocal(dev, oldflp->fld_dst)) + oldflp->daddr && + !dn_dev_islocal(dev, oldflp->daddr)) continue; dev_out = dev; @@ -965,22 +976,22 @@ source_ok: } /* No destination? Assume its local */ - if (!fl.fld_dst) { - fl.fld_dst = fl.fld_src; + if (!fld.daddr) { + fld.daddr = fld.saddr; err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); - if (!fl.fld_dst) { - fl.fld_dst = - fl.fld_src = dnet_select_source(dev_out, 0, + if (!fld.daddr) { + fld.daddr = + fld.saddr = dnet_select_source(dev_out, 0, RT_SCOPE_HOST); - if (!fl.fld_dst) + if (!fld.daddr) goto out; } - fl.oif = init_net.loopback_dev->ifindex; + fld.flowidn_oif = init_net.loopback_dev->ifindex; res.type = RTN_LOCAL; goto make_route; } @@ -989,8 +1000,8 @@ source_ok: printk(KERN_DEBUG "dn_route_output_slow: initial checks complete." " dst=%o4x src=%04x oif=%d try_hard=%d\n", - le16_to_cpu(fl.fld_dst), le16_to_cpu(fl.fld_src), - fl.oif, try_hard); + le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr), + fld.flowidn_oif, try_hard); /* * N.B. If the kernel is compiled without router support then @@ -998,7 +1009,7 @@ source_ok: * will always be executed. */ err = -ESRCH; - if (try_hard || (err = dn_fib_lookup(&fl, &res)) != 0) { + if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) { struct dn_dev *dn_db; if (err != -ESRCH) goto out; @@ -1013,19 +1024,19 @@ source_ok: * here */ if (!try_hard) { - neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst); + neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr); if (neigh) { - if ((oldflp->oif && - (neigh->dev->ifindex != oldflp->oif)) || - (oldflp->fld_src && + if ((oldflp->flowidn_oif && + (neigh->dev->ifindex != oldflp->flowidn_oif)) || + (oldflp->saddr && (!dn_dev_islocal(neigh->dev, - oldflp->fld_src)))) { + oldflp->saddr)))) { neigh_release(neigh); neigh = NULL; } else { if (dev_out) dev_put(dev_out); - if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { + if (dn_dev_islocal(neigh->dev, fld.daddr)) { dev_out = init_net.loopback_dev; res.type = RTN_LOCAL; } else { @@ -1045,7 +1056,7 @@ source_ok: goto out; dn_db = rcu_dereference_raw(dev_out->dn_ptr); /* Possible improvement - check all devices for local addr */ - if (dn_dev_islocal(dev_out, fl.fld_dst)) { + if (dn_dev_islocal(dev_out, fld.daddr)) { dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); @@ -1061,16 +1072,16 @@ select_source: if (neigh) gateway = ((struct dn_neigh *)neigh)->addr; if (gateway == 0) - gateway = fl.fld_dst; - if (fl.fld_src == 0) { - fl.fld_src = dnet_select_source(dev_out, gateway, - res.type == RTN_LOCAL ? - RT_SCOPE_HOST : - RT_SCOPE_LINK); - if (fl.fld_src == 0 && res.type != RTN_LOCAL) + gateway = fld.daddr; + if (fld.saddr == 0) { + fld.saddr = dnet_select_source(dev_out, gateway, + res.type == RTN_LOCAL ? + RT_SCOPE_HOST : + RT_SCOPE_LINK); + if (fld.saddr == 0 && res.type != RTN_LOCAL) goto e_addr; } - fl.oif = dev_out->ifindex; + fld.flowidn_oif = dev_out->ifindex; goto make_route; } free_res = 1; @@ -1079,61 +1090,61 @@ select_source: goto e_inval; if (res.type == RTN_LOCAL) { - if (!fl.fld_src) - fl.fld_src = fl.fld_dst; + if (!fld.saddr) + fld.saddr = fld.daddr; if (dev_out) dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); - fl.oif = dev_out->ifindex; + fld.flowidn_oif = dev_out->ifindex; if (res.fi) dn_fib_info_put(res.fi); res.fi = NULL; goto make_route; } - if (res.fi->fib_nhs > 1 && fl.oif == 0) - dn_fib_select_multipath(&fl, &res); + if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0) + dn_fib_select_multipath(&fld, &res); /* * We could add some logic to deal with default routes here and * get rid of some of the special casing above. */ - if (!fl.fld_src) - fl.fld_src = DN_FIB_RES_PREFSRC(res); + if (!fld.saddr) + fld.saddr = DN_FIB_RES_PREFSRC(res); if (dev_out) dev_put(dev_out); dev_out = DN_FIB_RES_DEV(res); dev_hold(dev_out); - fl.oif = dev_out->ifindex; + fld.flowidn_oif = dev_out->ifindex; gateway = DN_FIB_RES_GW(res); make_route: if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rt = dst_alloc(&dn_dst_ops); + rt = dst_alloc(&dn_dst_ops, 0); if (rt == NULL) goto e_nobufs; atomic_set(&rt->dst.__refcnt, 1); rt->dst.flags = DST_HOST; - rt->fl.fld_src = oldflp->fld_src; - rt->fl.fld_dst = oldflp->fld_dst; - rt->fl.oif = oldflp->oif; - rt->fl.iif = 0; - rt->fl.mark = oldflp->mark; + rt->fld.saddr = oldflp->saddr; + rt->fld.daddr = oldflp->daddr; + rt->fld.flowidn_oif = oldflp->flowidn_oif; + rt->fld.flowidn_iif = 0; + rt->fld.flowidn_mark = oldflp->flowidn_mark; - rt->rt_saddr = fl.fld_src; - rt->rt_daddr = fl.fld_dst; - rt->rt_gateway = gateway ? gateway : fl.fld_dst; - rt->rt_local_src = fl.fld_src; + rt->rt_saddr = fld.saddr; + rt->rt_daddr = fld.daddr; + rt->rt_gateway = gateway ? gateway : fld.daddr; + rt->rt_local_src = fld.saddr; - rt->rt_dst_map = fl.fld_dst; - rt->rt_src_map = fl.fld_src; + rt->rt_dst_map = fld.daddr; + rt->rt_src_map = fld.saddr; rt->dst.dev = dev_out; dev_hold(dev_out); @@ -1151,7 +1162,7 @@ make_route: if (err) goto e_neighbour; - hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); + hash = dn_hash(rt->fld.saddr, rt->fld.daddr); dn_insert_route(rt, hash, (struct dn_route **)pprt); done: @@ -1182,20 +1193,20 @@ e_neighbour: /* * N.B. The flags may be moved into the flowi at some future stage. */ -static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *flp, int flags) +static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags) { - unsigned hash = dn_hash(flp->fld_src, flp->fld_dst); + unsigned hash = dn_hash(flp->saddr, flp->daddr); struct dn_route *rt = NULL; if (!(flags & MSG_TRYHARD)) { rcu_read_lock_bh(); for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; rt = rcu_dereference_bh(rt->dst.dn_next)) { - if ((flp->fld_dst == rt->fl.fld_dst) && - (flp->fld_src == rt->fl.fld_src) && - (flp->mark == rt->fl.mark) && + if ((flp->daddr == rt->fld.daddr) && + (flp->saddr == rt->fld.saddr) && + (flp->flowidn_mark == rt->fld.flowidn_mark) && dn_is_output_route(rt) && - (rt->fl.oif == flp->oif)) { + (rt->fld.flowidn_oif == flp->flowidn_oif)) { dst_use(&rt->dst, jiffies); rcu_read_unlock_bh(); *pprt = &rt->dst; @@ -1208,25 +1219,36 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl return dn_route_output_slow(pprt, flp, flags); } -static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int flags) +static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags) { int err; err = __dn_route_output_key(pprt, flp, flags); - if (err == 0 && flp->proto) { - err = xfrm_lookup(&init_net, pprt, flp, NULL, 0); + if (err == 0 && flp->flowidn_proto) { + *pprt = xfrm_lookup(&init_net, *pprt, + flowidn_to_flowi(flp), NULL, 0); + if (IS_ERR(*pprt)) { + err = PTR_ERR(*pprt); + *pprt = NULL; + } } return err; } -int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock *sk, int flags) +int dn_route_output_sock(struct dst_entry **pprt, struct flowidn *fl, struct sock *sk, int flags) { int err; err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); - if (err == 0 && fl->proto) { - err = xfrm_lookup(&init_net, pprt, fl, sk, - (flags & MSG_DONTWAIT) ? 0 : XFRM_LOOKUP_WAIT); + if (err == 0 && fl->flowidn_proto) { + if (!(flags & MSG_DONTWAIT)) + fl->flowidn_flags |= FLOWI_FLAG_CAN_SLEEP; + *pprt = xfrm_lookup(&init_net, *pprt, + flowidn_to_flowi(fl), sk, 0); + if (IS_ERR(*pprt)) { + err = PTR_ERR(*pprt); + *pprt = NULL; + } } return err; } @@ -1243,11 +1265,13 @@ static int dn_route_input_slow(struct sk_buff *skb) int flags = 0; __le16 gateway = 0; __le16 local_src = 0; - struct flowi fl = { .fld_dst = cb->dst, - .fld_src = cb->src, - .fld_scope = RT_SCOPE_UNIVERSE, - .mark = skb->mark, - .iif = skb->dev->ifindex }; + struct flowidn fld = { + .daddr = cb->dst, + .saddr = cb->src, + .flowidn_scope = RT_SCOPE_UNIVERSE, + .flowidn_mark = skb->mark, + .flowidn_iif = skb->dev->ifindex, + }; struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; int err = -EINVAL; int free_res = 0; @@ -1258,7 +1282,7 @@ static int dn_route_input_slow(struct sk_buff *skb) goto out; /* Zero source addresses are not allowed */ - if (fl.fld_src == 0) + if (fld.saddr == 0) goto out; /* @@ -1272,7 +1296,7 @@ static int dn_route_input_slow(struct sk_buff *skb) if (dn_dev_islocal(in_dev, cb->src)) goto out; - err = dn_fib_lookup(&fl, &res); + err = dn_fib_lookup(&fld, &res); if (err) { if (err != -ESRCH) goto out; @@ -1284,7 +1308,7 @@ static int dn_route_input_slow(struct sk_buff *skb) res.type = RTN_LOCAL; } else { - __le16 src_map = fl.fld_src; + __le16 src_map = fld.saddr; free_res = 1; out_dev = DN_FIB_RES_DEV(res); @@ -1297,22 +1321,22 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(out_dev); if (res.r) - src_map = fl.fld_src; /* no NAT support for now */ + src_map = fld.saddr; /* no NAT support for now */ gateway = DN_FIB_RES_GW(res); if (res.type == RTN_NAT) { - fl.fld_dst = dn_fib_rules_map_destination(fl.fld_dst, &res); + fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res); dn_fib_res_put(&res); free_res = 0; - if (dn_fib_lookup(&fl, &res)) + if (dn_fib_lookup(&fld, &res)) goto e_inval; free_res = 1; if (res.type != RTN_UNICAST) goto e_inval; flags |= RTCF_DNAT; - gateway = fl.fld_dst; + gateway = fld.daddr; } - fl.fld_src = src_map; + fld.saddr = src_map; } switch(res.type) { @@ -1326,8 +1350,8 @@ static int dn_route_input_slow(struct sk_buff *skb) if (dn_db->parms.forwarding == 0) goto e_inval; - if (res.fi->fib_nhs > 1 && fl.oif == 0) - dn_fib_select_multipath(&fl, &res); + if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0) + dn_fib_select_multipath(&fld, &res); /* * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT @@ -1345,8 +1369,8 @@ static int dn_route_input_slow(struct sk_buff *skb) break; case RTN_LOCAL: flags |= RTCF_LOCAL; - fl.fld_src = cb->dst; - fl.fld_dst = cb->src; + fld.saddr = cb->dst; + fld.daddr = cb->src; /* Routing tables gave us a gateway */ if (gateway) @@ -1375,25 +1399,25 @@ static int dn_route_input_slow(struct sk_buff *skb) } make_route: - rt = dst_alloc(&dn_dst_ops); + rt = dst_alloc(&dn_dst_ops, 0); if (rt == NULL) goto e_nobufs; - rt->rt_saddr = fl.fld_src; - rt->rt_daddr = fl.fld_dst; - rt->rt_gateway = fl.fld_dst; + rt->rt_saddr = fld.saddr; + rt->rt_daddr = fld.daddr; + rt->rt_gateway = fld.daddr; if (gateway) rt->rt_gateway = gateway; rt->rt_local_src = local_src ? local_src : rt->rt_saddr; - rt->rt_dst_map = fl.fld_dst; - rt->rt_src_map = fl.fld_src; + rt->rt_dst_map = fld.daddr; + rt->rt_src_map = fld.saddr; - rt->fl.fld_src = cb->src; - rt->fl.fld_dst = cb->dst; - rt->fl.oif = 0; - rt->fl.iif = in_dev->ifindex; - rt->fl.mark = fl.mark; + rt->fld.saddr = cb->src; + rt->fld.daddr = cb->dst; + rt->fld.flowidn_oif = 0; + rt->fld.flowidn_iif = in_dev->ifindex; + rt->fld.flowidn_mark = fld.flowidn_mark; rt->dst.flags = DST_HOST; rt->dst.neighbour = neigh; @@ -1423,7 +1447,7 @@ make_route: if (err) goto e_neighbour; - hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); + hash = dn_hash(rt->fld.saddr, rt->fld.daddr); dn_insert_route(rt, hash, &rt); skb_dst_set(skb, &rt->dst); @@ -1463,11 +1487,11 @@ static int dn_route_input(struct sk_buff *skb) rcu_read_lock(); for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; rt = rcu_dereference(rt->dst.dn_next)) { - if ((rt->fl.fld_src == cb->src) && - (rt->fl.fld_dst == cb->dst) && - (rt->fl.oif == 0) && - (rt->fl.mark == skb->mark) && - (rt->fl.iif == cb->iif)) { + if ((rt->fld.saddr == cb->src) && + (rt->fld.daddr == cb->dst) && + (rt->fld.flowidn_oif == 0) && + (rt->fld.flowidn_mark == skb->mark) && + (rt->fld.flowidn_iif == cb->iif)) { dst_use(&rt->dst, jiffies); rcu_read_unlock(); skb_dst_set(skb, (struct dst_entry *)rt); @@ -1503,9 +1527,9 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; RTA_PUT(skb, RTA_DST, 2, &rt->rt_daddr); - if (rt->fl.fld_src) { + if (rt->fld.saddr) { r->rtm_src_len = 16; - RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src); + RTA_PUT(skb, RTA_SRC, 2, &rt->fld.saddr); } if (rt->dst.dev) RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex); @@ -1524,7 +1548,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, rt->dst.error) < 0) goto rtattr_failure; if (dn_is_input_route(rt)) - RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); + RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fld.flowidn_iif); nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; @@ -1547,13 +1571,13 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void struct dn_skb_cb *cb; int err; struct sk_buff *skb; - struct flowi fl; + struct flowidn fld; if (!net_eq(net, &init_net)) return -EINVAL; - memset(&fl, 0, sizeof(fl)); - fl.proto = DNPROTO_NSP; + memset(&fld, 0, sizeof(fld)); + fld.flowidn_proto = DNPROTO_NSP; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) @@ -1562,15 +1586,15 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void cb = DN_SKB_CB(skb); if (rta[RTA_SRC-1]) - memcpy(&fl.fld_src, RTA_DATA(rta[RTA_SRC-1]), 2); + memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2); if (rta[RTA_DST-1]) - memcpy(&fl.fld_dst, RTA_DATA(rta[RTA_DST-1]), 2); + memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2); if (rta[RTA_IIF-1]) - memcpy(&fl.iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); - if (fl.iif) { + if (fld.flowidn_iif) { struct net_device *dev; - if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) { + if ((dev = dev_get_by_index(&init_net, fld.flowidn_iif)) == NULL) { kfree_skb(skb); return -ENODEV; } @@ -1581,8 +1605,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void } skb->protocol = htons(ETH_P_DNA_RT); skb->dev = dev; - cb->src = fl.fld_src; - cb->dst = fl.fld_dst; + cb->src = fld.saddr; + cb->dst = fld.daddr; local_bh_disable(); err = dn_route_input(skb); local_bh_enable(); @@ -1594,8 +1618,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void int oif = 0; if (rta[RTA_OIF - 1]) memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fl.oif = oif; - err = dn_route_output_key((struct dst_entry **)&rt, &fl, 0); + fld.flowidn_oif = oif; + err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); } if (skb->dev) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 6eb91df3c550..f0efb0ccfeca 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -49,14 +49,15 @@ struct dn_fib_rule }; -int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) +int dn_fib_lookup(struct flowidn *flp, struct dn_fib_res *res) { struct fib_lookup_arg arg = { .result = res, }; int err; - err = fib_rules_lookup(dn_fib_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(dn_fib_rules_ops, + flowidn_to_flowi(flp), 0, &arg); res->r = arg.rule; return err; @@ -65,6 +66,7 @@ int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { + struct flowidn *fld = &flp->u.dn; int err = -EAGAIN; struct dn_fib_table *tbl; @@ -90,7 +92,7 @@ static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, if (tbl == NULL) goto errout; - err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result); + err = tbl->lookup(tbl, fld, (struct dn_fib_res *)arg->result); if (err > 0) err = -EAGAIN; errout: @@ -104,8 +106,9 @@ static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = { static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - __le16 daddr = fl->fld_dst; - __le16 saddr = fl->fld_src; + struct flowidn *fld = &fl->u.dn; + __le16 daddr = fld->daddr; + __le16 saddr = fld->saddr; if (((saddr ^ r->src) & r->srcmask) || ((daddr ^ r->dst) & r->dstmask)) @@ -175,7 +178,7 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, unsigned dnet_addr_type(__le16 addr) { - struct flowi fl = { .fld_dst = addr }; + struct flowidn fld = { .daddr = addr }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); @@ -183,7 +186,7 @@ unsigned dnet_addr_type(__le16 addr) res.r = NULL; if (tb) { - if (!tb->lookup(tb, &fl, &res)) { + if (!tb->lookup(tb, &fld, &res)) { ret = res.type; dn_fib_res_put(&res); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index b66600b3f4b5..99d8d3a40998 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -764,7 +764,7 @@ static int dn_fib_table_flush(struct dn_fib_table *tb) return found; } -static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, struct dn_fib_res *res) +static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowidn *flp, struct dn_fib_res *res) { int err; struct dn_zone *dz; @@ -773,7 +773,7 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, read_lock(&dn_fib_tables_lock); for(dz = t->dh_zone_list; dz; dz = dz->dz_next) { struct dn_fib_node *f; - dn_fib_key_t k = dz_key(flp->fld_dst, dz); + dn_fib_key_t k = dz_key(flp->daddr, dz); for(f = dz_chain(k, dz); f; f = f->fn_next) { if (!dn_key_eq(k, f->fn_key)) { @@ -788,7 +788,7 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, if (f->fn_state&DN_S_ZOMBIE) continue; - if (f->fn_scope < flp->fld_scope) + if (f->fn_scope < flp->flowidn_scope) continue; err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res); diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 739435a6af39..cfa7a5e1c5c9 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -67,8 +67,9 @@ dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen) size_t result_len = 0; const char *data = _data, *end, *opt; - kenter("%%%d,%s,'%s',%zu", - key->serial, key->description, data, datalen); + kenter("%%%d,%s,'%*.*s',%zu", + key->serial, key->description, + (int)datalen, (int)datalen, data, datalen); if (datalen <= 1 || !data || data[datalen - 1] != '\0') return -EINVAL; @@ -217,6 +218,19 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m) seq_printf(m, ": %u", key->datalen); } +/* + * read the DNS data + * - the key's semaphore is read-locked + */ +static long dns_resolver_read(const struct key *key, + char __user *buffer, size_t buflen) +{ + if (key->type_data.x[0]) + return key->type_data.x[0]; + + return user_read(key, buffer, buflen); +} + struct key_type key_type_dns_resolver = { .name = "dns_resolver", .instantiate = dns_resolver_instantiate, @@ -224,7 +238,7 @@ struct key_type key_type_dns_resolver = { .revoke = user_revoke, .destroy = user_destroy, .describe = dns_resolver_describe, - .read = user_read, + .read = dns_resolver_read, }; static int __init init_dns_resolver(void) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0c877a74e1f4..3fb14b7c13cf 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -428,7 +428,7 @@ static void __exit dsa_cleanup_module(void) } module_exit(dsa_cleanup_module); -MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>") +MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>"); MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:dsa"); diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c index 83277f463af7..8f4ff5a2c813 100644 --- a/net/dsa/mv88e6060.c +++ b/net/dsa/mv88e6060.c @@ -18,7 +18,7 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) { - return mdiobus_read(ds->master_mii_bus, addr, reg); + return mdiobus_read(ds->master_mii_bus, ds->pd->sw_addr + addr, reg); } #define REG_READ(addr, reg) \ @@ -34,7 +34,8 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) { - return mdiobus_write(ds->master_mii_bus, addr, reg, val); + return mdiobus_write(ds->master_mii_bus, ds->pd->sw_addr + addr, + reg, val); } #define REG_WRITE(addr, reg, val) \ @@ -50,7 +51,7 @@ static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr) { int ret; - ret = mdiobus_read(bus, REG_PORT(0), 0x03); + ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03); if (ret >= 0) { ret &= 0xfff0; if (ret == 0x0600) diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 15dcc1a586b4..0c2826337919 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -265,13 +265,13 @@ static void ec_tx_done(struct sk_buff *skb, int result) static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock *sk = sock->sk; struct sockaddr_ec *saddr=(struct sockaddr_ec *)msg->msg_name; struct net_device *dev; struct ec_addr addr; int err; unsigned char port, cb; #if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE) + struct sock *sk = sock->sk; struct sk_buff *skb; struct ec_cb *eb; #endif @@ -488,10 +488,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, error_free_buf: vfree(userbuf); +error: #else err = -EPROTOTYPE; #endif - error: mutex_unlock(&econet_mutex); return err; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8949a05ac307..cbb505ba9324 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -55,45 +55,9 @@ config IP_ADVANCED_ROUTER If unsure, say N here. -choice - prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" - depends on IP_ADVANCED_ROUTER - default ASK_IP_FIB_HASH - -config ASK_IP_FIB_HASH - bool "FIB_HASH" - ---help--- - Current FIB is very proven and good enough for most users. - -config IP_FIB_TRIE - bool "FIB_TRIE" - ---help--- - Use new experimental LC-trie as FIB lookup algorithm. - This improves lookup performance if you have a large - number of routes. - - LC-trie is a longest matching prefix lookup algorithm which - performs better than FIB_HASH for large routing tables. - But, it consumes more memory and is more complex. - - LC-trie is described in: - - IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson - IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, - June 1999 - - An experimental study of compression methods for dynamic tries - Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. - <http://www.csc.kth.se/~snilsson/software/dyntrie2/> - -endchoice - -config IP_FIB_HASH - def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER - config IP_FIB_TRIE_STATS bool "FIB TRIE statistics" - depends on IP_FIB_TRIE + depends on IP_ADVANCED_ROUTER ---help--- Keep track of statistics on structure of FIB TRIE table. Useful for testing and measuring TRIE performance. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4978d22f9a75..0dc772d0d125 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -10,12 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ - fib_frontend.o fib_semantics.o \ + fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o -obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o -obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f2b61107df6c..807d83c02ef6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -880,6 +880,19 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL(inet_ioctl); +#ifdef CONFIG_COMPAT +int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + int err = -ENOIOCTLCMD; + + if (sk->sk_prot->compat_ioctl) + err = sk->sk_prot->compat_ioctl(sk, cmd, arg); + + return err; +} +#endif + const struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, @@ -903,6 +916,7 @@ const struct proto_ops inet_stream_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; EXPORT_SYMBOL(inet_stream_ops); @@ -929,6 +943,7 @@ const struct proto_ops inet_dgram_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; EXPORT_SYMBOL(inet_dgram_ops); @@ -959,6 +974,7 @@ static const struct proto_ops inet_sockraw_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; @@ -1085,23 +1101,20 @@ int sysctl_ip_dynaddr __read_mostly; static int inet_sk_reselect_saddr(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); - int err; - struct rtable *rt; __be32 old_saddr = inet->inet_saddr; - __be32 new_saddr; __be32 daddr = inet->inet_daddr; + struct rtable *rt; + __be32 new_saddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; /* Query new route. */ - err = ip_route_connect(&rt, daddr, 0, - RT_CONN_FLAGS(sk), - sk->sk_bound_dev_if, - sk->sk_protocol, - inet->inet_sport, inet->inet_dport, sk, 0); - if (err) - return err; + rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if, sk->sk_protocol, + inet->inet_sport, inet->inet_dport, sk, false); + if (IS_ERR(rt)) + return PTR_ERR(rt); sk_setup_caps(sk, &rt->dst); @@ -1144,25 +1157,16 @@ int inet_sk_rebuild_header(struct sock *sk) daddr = inet->inet_daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; -{ - struct flowi fl = { - .oif = sk->sk_bound_dev_if, - .mark = sk->sk_mark, - .fl4_dst = daddr, - .fl4_src = inet->inet_saddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = inet->inet_dport, - }; - - security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); -} - if (!err) + rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, + inet->inet_dport, inet->inet_sport, + sk->sk_protocol, RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if); + if (!IS_ERR(rt)) { + err = 0; sk_setup_caps(sk, &rt->dst); - else { + } else { + err = PTR_ERR(rt); + /* Routing failed... */ sk->sk_route_caps = 0; /* @@ -1215,7 +1219,7 @@ out: return err; } -static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) +static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct iphdr *iph; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 86961bec70ab..4286fd3cc0e2 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -201,11 +201,14 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->ttl = 0; top_iph->check = 0; - ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; + if (x->props.flags & XFRM_STATE_ALIGN4) + ah->hdrlen = (XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; + else + ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, 0, skb->len); @@ -299,9 +302,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) nexthdr = ah->nexthdr; ah_hlen = (ah->hdrlen + 2) << 2; - if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && - ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) - goto out; + if (x->props.flags & XFRM_STATE_ALIGN4) { + if (ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_full_len) && + ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len)) + goto out; + } else { + if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && + ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) + goto out; + } if (!pskb_may_pull(skb, ah_hlen)) goto out; @@ -450,8 +459,12 @@ static int ah_init_state(struct xfrm_state *x) BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); - x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + - ahp->icv_trunc_len); + if (x->props.flags & XFRM_STATE_ALIGN4) + x->props.header_len = XFRM_ALIGN4(sizeof(struct ip_auth_hdr) + + ahp->icv_trunc_len); + else + x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + + ahp->icv_trunc_len); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); x->data = ahp; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 04c8b69fd426..090d273d7865 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -433,14 +433,13 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { - struct flowi fl = { .fl4_dst = sip, - .fl4_src = tip }; struct rtable *rt; int flag = 0; /*unsigned long now; */ struct net *net = dev_net(dev); - if (ip_route_output_key(net, &rt, &fl) < 0) + rt = ip_route_output(net, sip, tip, 0, 0); + if (IS_ERR(rt)) return 1; if (rt->dst.dev != dev) { NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); @@ -1017,14 +1016,13 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; return 0; } - if (__in_dev_get_rcu(dev)) { - IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on); + if (__in_dev_get_rtnl(dev)) { + IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); return 0; } return -ENXIO; } -/* must be called with rcu_read_lock() */ static int arp_req_set_public(struct net *net, struct arpreq *r, struct net_device *dev) { @@ -1062,12 +1060,10 @@ static int arp_req_set(struct net *net, struct arpreq *r, if (r->arp_flags & ATF_PERM) r->arp_flags |= ATF_COM; if (dev == NULL) { - struct flowi fl = { .fl4_dst = ip, - .fl4_tos = RTO_ONLINK }; - struct rtable *rt; - err = ip_route_output_key(net, &rt, &fl); - if (err != 0) - return err; + struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); + + if (IS_ERR(rt)) + return PTR_ERR(rt); dev = rt->dst.dev; ip_rt_put(rt); if (!dev) @@ -1178,7 +1174,6 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r, static int arp_req_delete(struct net *net, struct arpreq *r, struct net_device *dev) { - int err; __be32 ip; if (r->arp_flags & ATF_PUBL) @@ -1186,12 +1181,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r, ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; if (dev == NULL) { - struct flowi fl = { .fl4_dst = ip, - .fl4_tos = RTO_ONLINK }; - struct rtable *rt; - err = ip_route_output_key(net, &rt, &fl); - if (err != 0) - return err; + struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); + if (IS_ERR(rt)) + return PTR_ERR(rt); dev = rt->dst.dev; ip_rt_put(rt); if (!dev) @@ -1233,10 +1225,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!(r.arp_flags & ATF_NETMASK)) ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = htonl(0xFFFFFFFFUL); - rcu_read_lock(); + rtnl_lock(); if (r.arp_dev[0]) { err = -ENODEV; - dev = dev_get_by_name_rcu(net, r.arp_dev); + dev = __dev_get_by_name(net, r.arp_dev); if (dev == NULL) goto out; @@ -1263,7 +1255,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; } out: - rcu_read_unlock(); + rtnl_unlock(); if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r))) err = -EFAULT; return err; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 174be6caa5c8..85bd24ca4f6d 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -46,11 +46,12 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!saddr) saddr = inet->mc_addr; } - err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, - RT_CONN_FLAGS(sk), oif, - sk->sk_protocol, - inet->inet_sport, usin->sin_port, sk, 1); - if (err) { + rt = ip_route_connect(usin->sin_addr.s_addr, saddr, + RT_CONN_FLAGS(sk), oif, + sk->sk_protocol, + inet->inet_sport, usin->sin_port, sk, true); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); if (err == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 748cb5b337bd..6d85800daeb7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -51,6 +51,7 @@ #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/slab.h> +#include <linux/hash.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif @@ -92,6 +93,71 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; +/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE + * value. So if you change this define, make appropriate changes to + * inet_addr_hash as well. + */ +#define IN4_ADDR_HSIZE 256 +static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; +static DEFINE_SPINLOCK(inet_addr_hash_lock); + +static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) +{ + u32 val = (__force u32) addr ^ hash_ptr(net, 8); + + return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & + (IN4_ADDR_HSIZE - 1)); +} + +static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) +{ + unsigned int hash = inet_addr_hash(net, ifa->ifa_local); + + spin_lock(&inet_addr_hash_lock); + hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); + spin_unlock(&inet_addr_hash_lock); +} + +static void inet_hash_remove(struct in_ifaddr *ifa) +{ + spin_lock(&inet_addr_hash_lock); + hlist_del_init_rcu(&ifa->hash); + spin_unlock(&inet_addr_hash_lock); +} + +/** + * __ip_dev_find - find the first device with a given source address. + * @net: the net namespace + * @addr: the source address + * @devref: if true, take a reference on the found device + * + * If a caller uses devref=false, it should be protected by RCU, or RTNL + */ +struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) +{ + unsigned int hash = inet_addr_hash(net, addr); + struct net_device *result = NULL; + struct in_ifaddr *ifa; + struct hlist_node *node; + + rcu_read_lock(); + hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { + struct net_device *dev = ifa->ifa_dev->dev; + + if (!net_eq(dev_net(dev), net)) + continue; + if (ifa->ifa_local == addr) { + result = dev; + break; + } + } + if (result && devref) + dev_hold(result); + rcu_read_unlock(); + return result; +} +EXPORT_SYMBOL(__ip_dev_find); + static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); @@ -265,6 +331,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } if (!do_promote) { + inet_hash_remove(ifa); *ifap1 = ifa->ifa_next; rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); @@ -281,6 +348,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, /* 2. Unlink it */ *ifap = ifa1->ifa_next; + inet_hash_remove(ifa1); /* 3. Announce address deletion */ @@ -368,6 +436,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, ifa->ifa_next = *ifap; *ifap = ifa; + inet_hash_insert(dev_net(in_dev->dev), ifa); + /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ @@ -521,6 +591,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) if (tb[IFA_ADDRESS] == NULL) tb[IFA_ADDRESS] = tb[IFA_LOCAL]; + INIT_HLIST_NODE(&ifa->hash); ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); ifa->ifa_flags = ifm->ifa_flags; @@ -670,7 +741,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) ifap = &ifa->ifa_next) { if (!strcmp(ifr.ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == - ifa->ifa_address) { + ifa->ifa_local) { break; /* found */ } } @@ -728,6 +799,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) { ret = -ENOBUFS; ifa = inet_alloc_ifa(); + INIT_HLIST_NODE(&ifa->hash); if (!ifa) break; if (colon) @@ -1030,6 +1102,21 @@ static inline bool inetdev_valid_mtu(unsigned mtu) return mtu >= 68; } +static void inetdev_send_gratuitous_arp(struct net_device *dev, + struct in_device *in_dev) + +{ + struct in_ifaddr *ifa = in_dev->ifa_list; + + if (!ifa) + return; + + arp_send(ARPOP_REQUEST, ETH_P_ARP, + ifa->ifa_local, dev, + ifa->ifa_local, NULL, + dev->dev_addr, NULL); +} + /* Called only under RTNL semaphore */ static int inetdev_event(struct notifier_block *this, unsigned long event, @@ -1069,6 +1156,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, struct in_ifaddr *ifa = inet_alloc_ifa(); if (ifa) { + INIT_HLIST_NODE(&ifa->hash); ifa->ifa_local = ifa->ifa_address = htonl(INADDR_LOOPBACK); ifa->ifa_prefixlen = 8; @@ -1082,18 +1170,13 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, } ip_mc_up(in_dev); /* fall through */ - case NETDEV_NOTIFY_PEERS: case NETDEV_CHANGEADDR: + if (!IN_DEV_ARP_NOTIFY(in_dev)) + break; + /* fall through */ + case NETDEV_NOTIFY_PEERS: /* Send gratuitous ARP to notify of link change */ - if (IN_DEV_ARP_NOTIFY(in_dev)) { - struct in_ifaddr *ifa = in_dev->ifa_list; - - if (ifa) - arp_send(ARPOP_REQUEST, ETH_P_ARP, - ifa->ifa_address, dev, - ifa->ifa_address, NULL, - dev->dev_addr, NULL); - } + inetdev_send_gratuitous_arp(dev, in_dev); break; case NETDEV_DOWN: ip_mc_down(in_dev); @@ -1710,6 +1793,11 @@ static struct rtnl_af_ops inet_af_ops = { void __init devinet_init(void) { + int i; + + for (i = 0; i < IN4_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet_addr_lst[i]); + register_pernet_subsys(&devinet_ops); register_gifconf(PF_INET, inet_gifconf); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index e42a905180f0..03f994bcf7de 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -33,11 +33,14 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) { unsigned int len; - len = crypto_aead_ivsize(aead); + len = seqhilen; + + len += crypto_aead_ivsize(aead); + if (len) { len += crypto_aead_alignmask(aead) & ~(crypto_tfm_ctx_alignment() - 1); @@ -52,10 +55,15 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) return kmalloc(len, GFP_ATOMIC); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) +static inline __be32 *esp_tmp_seqhi(void *tmp) +{ + return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); +} +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; + PTR_ALIGN((u8 *)tmp + seqhilen, + crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; } static inline struct aead_givcrypt_request *esp_tmp_givreq( @@ -122,6 +130,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int plen; int tfclen; int nfrags; + int assoclen; + int sglists; + int seqhilen; + __be32 *seqhi; /* skb is pure payload to encrypt */ @@ -151,14 +163,25 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) goto error; nfrags = err; - tmp = esp_alloc_tmp(aead, nfrags + 1); + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto error; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_givreq(aead, iv); asg = esp_givreq_sg(aead, req); - sg = asg + 1; + sg = asg + sglists; /* Fill padding... */ tail = skb_tail_pointer(trailer); @@ -215,19 +238,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } esph->spi = x->id.spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, esph->enc_data + crypto_aead_ivsize(aead) - skb->data, clen + alen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_givcrypt_set_callback(req, 0, esp_output_done, skb); aead_givcrypt_set_crypt(req, sg, sg, clen, iv); - aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); + aead_givcrypt_set_assoc(req, asg, assoclen); aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output); + XFRM_SKB_CB(skb)->seq.output.low); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); @@ -346,6 +377,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct sk_buff *trailer; int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); int nfrags; + int assoclen; + int sglists; + int seqhilen; + __be32 *seqhi; void *tmp; u8 *iv; struct scatterlist *sg; @@ -362,16 +397,27 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; nfrags = err; + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + err = -ENOMEM; - tmp = esp_alloc_tmp(aead, nfrags + 1); + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto out; ESP_SKB_CB(skb)->tmp = tmp; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); asg = esp_req_sg(aead, req); - sg = asg + 1; + sg = asg + sglists; skb->ip_summed = CHECKSUM_NONE; @@ -382,11 +428,19 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_request_set_callback(req, 0, esp_input_done, skb); aead_request_set_crypt(req, sg, sg, elen, iv); - aead_request_set_assoc(req, asg, sizeof(*esph)); + aead_request_set_assoc(req, asg, assoclen); err = crypto_aead_decrypt(req); if (err == -EINPROGRESS) @@ -500,10 +554,20 @@ static int esp_init_authenc(struct xfrm_state *x) goto error; err = -ENAMETOOLONG; - if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", - x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) - goto error; + + if ((x->props.flags & XFRM_STATE_ESN)) { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authencesn(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } else { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authenc(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1d2cdd43a878..a373a259253c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -51,11 +51,11 @@ static int __net_init fib4_rules_init(struct net *net) { struct fib_table *local_table, *main_table; - local_table = fib_hash_table(RT_TABLE_LOCAL); + local_table = fib_trie_table(RT_TABLE_LOCAL); if (local_table == NULL) return -ENOMEM; - main_table = fib_hash_table(RT_TABLE_MAIN); + main_table = fib_trie_table(RT_TABLE_MAIN); if (main_table == NULL) goto fail; @@ -82,7 +82,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - tb = fib_hash_table(id); + tb = fib_trie_table(id); if (!tb) return NULL; h = id & (FIB_TABLE_HASHSZ - 1); @@ -114,21 +114,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id) } #endif /* CONFIG_IP_MULTIPLE_TABLES */ -void fib_select_default(struct net *net, - const struct flowi *flp, struct fib_result *res) -{ - struct fib_table *tb; - int table = RT_TABLE_MAIN; -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) - return; - table = res->r->table; -#endif - tb = fib_get_table(net, table); - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - fib_table_select_default(tb, flp, res); -} - static void fib_flush(struct net *net) { int flushed = 0; @@ -147,46 +132,6 @@ static void fib_flush(struct net *net) rt_cache_flush(net, -1); } -/** - * __ip_dev_find - find the first device with a given source address. - * @net: the net namespace - * @addr: the source address - * @devref: if true, take a reference on the found device - * - * If a caller uses devref=false, it should be protected by RCU, or RTNL - */ -struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) -{ - struct flowi fl = { - .fl4_dst = addr, - }; - struct fib_result res = { 0 }; - struct net_device *dev = NULL; - struct fib_table *local_table; - -#ifdef CONFIG_IP_MULTIPLE_TABLES - res.r = NULL; -#endif - - rcu_read_lock(); - local_table = fib_get_table(net, RT_TABLE_LOCAL); - if (!local_table || - fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { - rcu_read_unlock(); - return NULL; - } - if (res.type != RTN_LOCAL) - goto out; - dev = FIB_RES_DEV(res); - - if (dev && devref) - dev_hold(dev); -out: - rcu_read_unlock(); - return dev; -} -EXPORT_SYMBOL(__ip_dev_find); - /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. @@ -195,7 +140,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr) { - struct flowi fl = { .fl4_dst = addr }; + struct flowi4 fl4 = { .daddr = addr }; struct fib_result res; unsigned ret = RTN_BROADCAST; struct fib_table *local_table; @@ -213,7 +158,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, if (local_table) { ret = RTN_UNICAST; rcu_read_lock(); - if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { + if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; } @@ -248,19 +193,21 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, u32 *itag, u32 mark) { struct in_device *in_dev; - struct flowi fl = { - .fl4_dst = src, - .fl4_src = dst, - .fl4_tos = tos, - .mark = mark, - .iif = oif - }; + struct flowi4 fl4; struct fib_result res; int no_addr, rpf, accept_local; bool dev_match; int ret; struct net *net; + fl4.flowi4_oif = 0; + fl4.flowi4_iif = oif; + fl4.flowi4_mark = mark; + fl4.daddr = src; + fl4.saddr = dst; + fl4.flowi4_tos = tos; + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + no_addr = rpf = accept_local = 0; in_dev = __in_dev_get_rcu(dev); if (in_dev) { @@ -268,14 +215,14 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, rpf = IN_DEV_RPFILTER(in_dev); accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); if (mark && !IN_DEV_SRC_VMARK(in_dev)) - fl.mark = 0; + fl4.flowi4_mark = 0; } if (in_dev == NULL) goto e_inval; net = dev_net(dev); - if (fib_lookup(net, &fl, &res)) + if (fib_lookup(net, &fl4, &res)) goto last_resort; if (res.type != RTN_UNICAST) { if (res.type != RTN_LOCAL || !accept_local) @@ -306,10 +253,10 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, goto last_resort; if (rpf == 1) goto e_rpf; - fl.oif = dev->ifindex; + fl4.flowi4_oif = dev->ifindex; ret = 0; - if (fib_lookup(net, &fl, &res) == 0) { + if (fib_lookup(net, &fl4, &res) == 0) { if (res.type == RTN_UNICAST) { *spec_dst = FIB_RES_PREFSRC(res); ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; @@ -849,11 +796,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) { struct fib_result res; - struct flowi fl = { - .mark = frn->fl_mark, - .fl4_dst = frn->fl_addr, - .fl4_tos = frn->fl_tos, - .fl4_scope = frn->fl_scope, + struct flowi4 fl4 = { + .flowi4_mark = frn->fl_mark, + .daddr = frn->fl_addr, + .flowi4_tos = frn->fl_tos, + .flowi4_scope = frn->fl_scope, }; #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -866,7 +813,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) frn->tb_id = tb->tb_id; rcu_read_lock(); - frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); + frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); if (!frn->err) { frn->prefixlen = res.prefixlen; @@ -945,10 +892,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif + fib_update_nh_saddrs(dev); rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_del_ifaddr(ifa); + fib_update_nh_saddrs(dev); if (ifa->ifa_dev->ifa_list == NULL) { /* Last address was deleted from this interface. * Disable IP. @@ -1101,5 +1050,5 @@ void __init ip_fib_init(void) register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - fib_hash_init(); + fib_trie_init(); } diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c deleted file mode 100644 index b3acb0417b21..000000000000 --- a/net/ipv4/fib_hash.c +++ /dev/null @@ -1,1133 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * IPv4 FIB: lookup engine and maintenance routines. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/errno.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/inetdevice.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> -#include <linux/netlink.h> -#include <linux/init.h> -#include <linux/slab.h> - -#include <net/net_namespace.h> -#include <net/ip.h> -#include <net/protocol.h> -#include <net/route.h> -#include <net/tcp.h> -#include <net/sock.h> -#include <net/ip_fib.h> - -#include "fib_lookup.h" - -static struct kmem_cache *fn_hash_kmem __read_mostly; -static struct kmem_cache *fn_alias_kmem __read_mostly; - -struct fib_node { - struct hlist_node fn_hash; - struct list_head fn_alias; - __be32 fn_key; - struct fib_alias fn_embedded_alias; -}; - -#define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head)) - -struct fn_zone { - struct fn_zone __rcu *fz_next; /* Next not empty zone */ - struct hlist_head __rcu *fz_hash; /* Hash table pointer */ - seqlock_t fz_lock; - u32 fz_hashmask; /* (fz_divisor - 1) */ - - u8 fz_order; /* Zone order (0..32) */ - u8 fz_revorder; /* 32 - fz_order */ - __be32 fz_mask; /* inet_make_mask(order) */ -#define FZ_MASK(fz) ((fz)->fz_mask) - - struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE]; - - int fz_nent; /* Number of entries */ - int fz_divisor; /* Hash size (mask+1) */ -}; - -struct fn_hash { - struct fn_zone *fn_zones[33]; - struct fn_zone __rcu *fn_zone_list; -}; - -static inline u32 fn_hash(__be32 key, struct fn_zone *fz) -{ - u32 h = ntohl(key) >> fz->fz_revorder; - h ^= (h>>20); - h ^= (h>>10); - h ^= (h>>5); - h &= fz->fz_hashmask; - return h; -} - -static inline __be32 fz_key(__be32 dst, struct fn_zone *fz) -{ - return dst & FZ_MASK(fz); -} - -static unsigned int fib_hash_genid; - -#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head)) - -static struct hlist_head *fz_hash_alloc(int divisor) -{ - unsigned long size = divisor * sizeof(struct hlist_head); - - if (size <= PAGE_SIZE) - return kzalloc(size, GFP_KERNEL); - - return (struct hlist_head *) - __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size)); -} - -/* The fib hash lock must be held when this is called. */ -static inline void fn_rebuild_zone(struct fn_zone *fz, - struct hlist_head *old_ht, - int old_divisor) -{ - int i; - - for (i = 0; i < old_divisor; i++) { - struct hlist_node *node, *n; - struct fib_node *f; - - hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { - struct hlist_head *new_head; - - hlist_del_rcu(&f->fn_hash); - - new_head = rcu_dereference_protected(fz->fz_hash, 1) + - fn_hash(f->fn_key, fz); - hlist_add_head_rcu(&f->fn_hash, new_head); - } - } -} - -static void fz_hash_free(struct hlist_head *hash, int divisor) -{ - unsigned long size = divisor * sizeof(struct hlist_head); - - if (size <= PAGE_SIZE) - kfree(hash); - else - free_pages((unsigned long)hash, get_order(size)); -} - -static void fn_rehash_zone(struct fn_zone *fz) -{ - struct hlist_head *ht, *old_ht; - int old_divisor, new_divisor; - u32 new_hashmask; - - new_divisor = old_divisor = fz->fz_divisor; - - switch (old_divisor) { - case EMBEDDED_HASH_SIZE: - new_divisor *= EMBEDDED_HASH_SIZE; - break; - case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE: - new_divisor *= (EMBEDDED_HASH_SIZE/2); - break; - default: - if ((old_divisor << 1) > FZ_MAX_DIVISOR) { - printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); - return; - } - new_divisor = (old_divisor << 1); - break; - } - - new_hashmask = (new_divisor - 1); - -#if RT_CACHE_DEBUG >= 2 - printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n", - fz->fz_order, old_divisor); -#endif - - ht = fz_hash_alloc(new_divisor); - - if (ht) { - struct fn_zone nfz; - - memcpy(&nfz, fz, sizeof(nfz)); - - write_seqlock_bh(&fz->fz_lock); - old_ht = rcu_dereference_protected(fz->fz_hash, 1); - RCU_INIT_POINTER(nfz.fz_hash, ht); - nfz.fz_hashmask = new_hashmask; - nfz.fz_divisor = new_divisor; - fn_rebuild_zone(&nfz, old_ht, old_divisor); - fib_hash_genid++; - rcu_assign_pointer(fz->fz_hash, ht); - fz->fz_hashmask = new_hashmask; - fz->fz_divisor = new_divisor; - write_sequnlock_bh(&fz->fz_lock); - - if (old_ht != fz->fz_embedded_hash) { - synchronize_rcu(); - fz_hash_free(old_ht, old_divisor); - } - } -} - -static void fn_free_node_rcu(struct rcu_head *head) -{ - struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu); - - kmem_cache_free(fn_hash_kmem, f); -} - -static inline void fn_free_node(struct fib_node *f) -{ - call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu); -} - -static void fn_free_alias_rcu(struct rcu_head *head) -{ - struct fib_alias *fa = container_of(head, struct fib_alias, rcu); - - kmem_cache_free(fn_alias_kmem, fa); -} - -static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f) -{ - fib_release_info(fa->fa_info); - if (fa == &f->fn_embedded_alias) - fa->fa_info = NULL; - else - call_rcu(&fa->rcu, fn_free_alias_rcu); -} - -static struct fn_zone * -fn_new_zone(struct fn_hash *table, int z) -{ - int i; - struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL); - if (!fz) - return NULL; - - seqlock_init(&fz->fz_lock); - fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1; - fz->fz_hashmask = fz->fz_divisor - 1; - RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash); - fz->fz_order = z; - fz->fz_revorder = 32 - z; - fz->fz_mask = inet_make_mask(z); - - /* Find the first not empty zone with more specific mask */ - for (i = z + 1; i <= 32; i++) - if (table->fn_zones[i]) - break; - if (i > 32) { - /* No more specific masks, we are the first. */ - rcu_assign_pointer(fz->fz_next, - rtnl_dereference(table->fn_zone_list)); - rcu_assign_pointer(table->fn_zone_list, fz); - } else { - rcu_assign_pointer(fz->fz_next, - rtnl_dereference(table->fn_zones[i]->fz_next)); - rcu_assign_pointer(table->fn_zones[i]->fz_next, fz); - } - table->fn_zones[z] = fz; - fib_hash_genid++; - return fz; -} - -int fib_table_lookup(struct fib_table *tb, - const struct flowi *flp, struct fib_result *res, - int fib_flags) -{ - int err; - struct fn_zone *fz; - struct fn_hash *t = (struct fn_hash *)tb->tb_data; - - rcu_read_lock(); - for (fz = rcu_dereference(t->fn_zone_list); - fz != NULL; - fz = rcu_dereference(fz->fz_next)) { - struct hlist_head *head; - struct hlist_node *node; - struct fib_node *f; - __be32 k; - unsigned int seq; - - do { - seq = read_seqbegin(&fz->fz_lock); - k = fz_key(flp->fl4_dst, fz); - - head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz); - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - if (f->fn_key != k) - continue; - - err = fib_semantic_match(&f->fn_alias, - flp, res, - fz->fz_order, fib_flags); - if (err <= 0) - goto out; - } - } while (read_seqretry(&fz->fz_lock, seq)); - } - err = 1; -out: - rcu_read_unlock(); - return err; -} - -void fib_table_select_default(struct fib_table *tb, - const struct flowi *flp, struct fib_result *res) -{ - int order, last_idx; - struct hlist_node *node; - struct fib_node *f; - struct fib_info *fi = NULL; - struct fib_info *last_resort; - struct fn_hash *t = (struct fn_hash *)tb->tb_data; - struct fn_zone *fz = t->fn_zones[0]; - struct hlist_head *head; - - if (fz == NULL) - return; - - last_idx = -1; - last_resort = NULL; - order = -1; - - rcu_read_lock(); - head = rcu_dereference(fz->fz_hash); - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - struct fib_alias *fa; - - list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { - struct fib_info *next_fi = fa->fa_info; - - if (fa->fa_scope != res->scope || - fa->fa_type != RTN_UNICAST) - continue; - - if (next_fi->fib_priority > res->fi->fib_priority) - break; - if (!next_fi->fib_nh[0].nh_gw || - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) - continue; - - fib_alias_accessed(fa); - - if (fi == NULL) { - if (next_fi != res->fi) - break; - } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - fi = next_fi; - order++; - } - } - - if (order <= 0 || fi == NULL) { - tb->tb_default = -1; - goto out; - } - - if (!fib_detect_death(fi, order, &last_resort, &last_idx, - tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - - if (last_idx >= 0) - fib_result_assign(res, last_resort); - tb->tb_default = last_idx; -out: - rcu_read_unlock(); -} - -/* Insert node F to FZ. */ -static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) -{ - struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz); - - hlist_add_head_rcu(&f->fn_hash, head); -} - -/* Return the node in FZ matching KEY. */ -static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) -{ - struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz); - struct hlist_node *node; - struct fib_node *f; - - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - if (f->fn_key == key) - return f; - } - - return NULL; -} - - -static struct fib_alias *fib_fast_alloc(struct fib_node *f) -{ - struct fib_alias *fa = &f->fn_embedded_alias; - - if (fa->fa_info != NULL) - fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); - return fa; -} - -/* Caller must hold RTNL. */ -int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) -{ - struct fn_hash *table = (struct fn_hash *) tb->tb_data; - struct fib_node *new_f = NULL; - struct fib_node *f; - struct fib_alias *fa, *new_fa; - struct fn_zone *fz; - struct fib_info *fi; - u8 tos = cfg->fc_tos; - __be32 key; - int err; - - if (cfg->fc_dst_len > 32) - return -EINVAL; - - fz = table->fn_zones[cfg->fc_dst_len]; - if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) - return -ENOBUFS; - - key = 0; - if (cfg->fc_dst) { - if (cfg->fc_dst & ~FZ_MASK(fz)) - return -EINVAL; - key = fz_key(cfg->fc_dst, fz); - } - - fi = fib_create_info(cfg); - if (IS_ERR(fi)) - return PTR_ERR(fi); - - if (fz->fz_nent > (fz->fz_divisor<<1) && - fz->fz_divisor < FZ_MAX_DIVISOR && - (cfg->fc_dst_len == 32 || - (1 << cfg->fc_dst_len) > fz->fz_divisor)) - fn_rehash_zone(fz); - - f = fib_find_node(fz, key); - - if (!f) - fa = NULL; - else - fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority); - - /* Now fa, if non-NULL, points to the first fib alias - * with the same keys [prefix,tos,priority], if such key already - * exists or to the node before which we will insert new one. - * - * If fa is NULL, we will need to allocate a new one and - * insert to the head of f. - * - * If f is NULL, no fib node matched the destination key - * and we need to allocate a new one of those as well. - */ - - if (fa && fa->fa_tos == tos && - fa->fa_info->fib_priority == fi->fib_priority) { - struct fib_alias *fa_first, *fa_match; - - err = -EEXIST; - if (cfg->fc_nlflags & NLM_F_EXCL) - goto out; - - /* We have 2 goals: - * 1. Find exact match for type, scope, fib_info to avoid - * duplicate routes - * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it - */ - fa_match = NULL; - fa_first = fa; - fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); - list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { - if (fa->fa_tos != tos) - break; - if (fa->fa_info->fib_priority != fi->fib_priority) - break; - if (fa->fa_type == cfg->fc_type && - fa->fa_scope == cfg->fc_scope && - fa->fa_info == fi) { - fa_match = fa; - break; - } - } - - if (cfg->fc_nlflags & NLM_F_REPLACE) { - u8 state; - - fa = fa_first; - if (fa_match) { - if (fa == fa_match) - err = 0; - goto out; - } - err = -ENOBUFS; - new_fa = fib_fast_alloc(f); - if (new_fa == NULL) - goto out; - - new_fa->fa_tos = fa->fa_tos; - new_fa->fa_info = fi; - new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; - state = fa->fa_state; - new_fa->fa_state = state & ~FA_S_ACCESSED; - fib_hash_genid++; - list_replace_rcu(&fa->fa_list, &new_fa->fa_list); - - fn_free_alias(fa, f); - if (state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); - rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, - tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); - return 0; - } - - /* Error if we find a perfect match which - * uses the same scope, type, and nexthop - * information. - */ - if (fa_match) - goto out; - - if (!(cfg->fc_nlflags & NLM_F_APPEND)) - fa = fa_first; - } - - err = -ENOENT; - if (!(cfg->fc_nlflags & NLM_F_CREATE)) - goto out; - - err = -ENOBUFS; - - if (!f) { - new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL); - if (new_f == NULL) - goto out; - - INIT_HLIST_NODE(&new_f->fn_hash); - INIT_LIST_HEAD(&new_f->fn_alias); - new_f->fn_key = key; - f = new_f; - } - - new_fa = fib_fast_alloc(f); - if (new_fa == NULL) - goto out; - - new_fa->fa_info = fi; - new_fa->fa_tos = tos; - new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; - new_fa->fa_state = 0; - - /* - * Insert new entry to the list. - */ - - if (new_f) - fib_insert_node(fz, new_f); - list_add_tail_rcu(&new_fa->fa_list, - (fa ? &fa->fa_list : &f->fn_alias)); - fib_hash_genid++; - - if (new_f) - fz->fz_nent++; - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); - - rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, - &cfg->fc_nlinfo, 0); - return 0; - -out: - if (new_f) - kmem_cache_free(fn_hash_kmem, new_f); - fib_release_info(fi); - return err; -} - -int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) -{ - struct fn_hash *table = (struct fn_hash *)tb->tb_data; - struct fib_node *f; - struct fib_alias *fa, *fa_to_delete; - struct fn_zone *fz; - __be32 key; - - if (cfg->fc_dst_len > 32) - return -EINVAL; - - if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) - return -ESRCH; - - key = 0; - if (cfg->fc_dst) { - if (cfg->fc_dst & ~FZ_MASK(fz)) - return -EINVAL; - key = fz_key(cfg->fc_dst, fz); - } - - f = fib_find_node(fz, key); - - if (!f) - fa = NULL; - else - fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); - if (!fa) - return -ESRCH; - - fa_to_delete = NULL; - fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); - list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { - struct fib_info *fi = fa->fa_info; - - if (fa->fa_tos != cfg->fc_tos) - break; - - if ((!cfg->fc_type || - fa->fa_type == cfg->fc_type) && - (cfg->fc_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == cfg->fc_scope) && - (!cfg->fc_protocol || - fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi) == 0) { - fa_to_delete = fa; - break; - } - } - - if (fa_to_delete) { - int kill_fn; - - fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, - tb->tb_id, &cfg->fc_nlinfo, 0); - - kill_fn = 0; - list_del_rcu(&fa->fa_list); - if (list_empty(&f->fn_alias)) { - hlist_del_rcu(&f->fn_hash); - kill_fn = 1; - } - fib_hash_genid++; - - if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); - fn_free_alias(fa, f); - if (kill_fn) { - fn_free_node(f); - fz->fz_nent--; - } - - return 0; - } - return -ESRCH; -} - -static int fn_flush_list(struct fn_zone *fz, int idx) -{ - struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx; - struct hlist_node *node, *n; - struct fib_node *f; - int found = 0; - - hlist_for_each_entry_safe(f, node, n, head, fn_hash) { - struct fib_alias *fa, *fa_node; - int kill_f; - - kill_f = 0; - list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) { - struct fib_info *fi = fa->fa_info; - - if (fi && (fi->fib_flags&RTNH_F_DEAD)) { - list_del_rcu(&fa->fa_list); - if (list_empty(&f->fn_alias)) { - hlist_del_rcu(&f->fn_hash); - kill_f = 1; - } - fib_hash_genid++; - - fn_free_alias(fa, f); - found++; - } - } - if (kill_f) { - fn_free_node(f); - fz->fz_nent--; - } - } - return found; -} - -/* caller must hold RTNL. */ -int fib_table_flush(struct fib_table *tb) -{ - struct fn_hash *table = (struct fn_hash *) tb->tb_data; - struct fn_zone *fz; - int found = 0; - - for (fz = rtnl_dereference(table->fn_zone_list); - fz != NULL; - fz = rtnl_dereference(fz->fz_next)) { - int i; - - for (i = fz->fz_divisor - 1; i >= 0; i--) - found += fn_flush_list(fz, i); - } - return found; -} - -void fib_free_table(struct fib_table *tb) -{ - struct fn_hash *table = (struct fn_hash *) tb->tb_data; - struct fn_zone *fz, *next; - - next = table->fn_zone_list; - while (next != NULL) { - fz = next; - next = fz->fz_next; - - if (fz->fz_hash != fz->fz_embedded_hash) - fz_hash_free(fz->fz_hash, fz->fz_divisor); - - kfree(fz); - } - - kfree(tb); -} - -static inline int -fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, - struct fib_table *tb, - struct fn_zone *fz, - struct hlist_head *head) -{ - struct hlist_node *node; - struct fib_node *f; - int i, s_i; - - s_i = cb->args[4]; - i = 0; - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - struct fib_alias *fa; - - list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { - if (i < s_i) - goto next; - - if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_NEWROUTE, - tb->tb_id, - fa->fa_type, - fa->fa_scope, - f->fn_key, - fz->fz_order, - fa->fa_tos, - fa->fa_info, - NLM_F_MULTI) < 0) { - cb->args[4] = i; - return -1; - } -next: - i++; - } - } - cb->args[4] = i; - return skb->len; -} - -static inline int -fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, - struct fib_table *tb, - struct fn_zone *fz) -{ - int h, s_h; - struct hlist_head *head = rcu_dereference(fz->fz_hash); - - if (head == NULL) - return skb->len; - s_h = cb->args[3]; - for (h = s_h; h < fz->fz_divisor; h++) { - if (hlist_empty(head + h)) - continue; - if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) { - cb->args[3] = h; - return -1; - } - memset(&cb->args[4], 0, - sizeof(cb->args) - 4*sizeof(cb->args[0])); - } - cb->args[3] = h; - return skb->len; -} - -int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, - struct netlink_callback *cb) -{ - int m = 0, s_m; - struct fn_zone *fz; - struct fn_hash *table = (struct fn_hash *)tb->tb_data; - - s_m = cb->args[2]; - rcu_read_lock(); - for (fz = rcu_dereference(table->fn_zone_list); - fz != NULL; - fz = rcu_dereference(fz->fz_next), m++) { - if (m < s_m) - continue; - if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { - cb->args[2] = m; - rcu_read_unlock(); - return -1; - } - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); - } - rcu_read_unlock(); - cb->args[2] = m; - return skb->len; -} - -void __init fib_hash_init(void) -{ - fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node), - 0, SLAB_PANIC, NULL); - - fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), - 0, SLAB_PANIC, NULL); - -} - -struct fib_table *fib_hash_table(u32 id) -{ - struct fib_table *tb; - - tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), - GFP_KERNEL); - if (tb == NULL) - return NULL; - - tb->tb_id = id; - tb->tb_default = -1; - - memset(tb->tb_data, 0, sizeof(struct fn_hash)); - return tb; -} - -/* ------------------------------------------------------------------------ */ -#ifdef CONFIG_PROC_FS - -struct fib_iter_state { - struct seq_net_private p; - struct fn_zone *zone; - int bucket; - struct hlist_head *hash_head; - struct fib_node *fn; - struct fib_alias *fa; - loff_t pos; - unsigned int genid; - int valid; -}; - -static struct fib_alias *fib_get_first(struct seq_file *seq) -{ - struct fib_iter_state *iter = seq->private; - struct fib_table *main_table; - struct fn_hash *table; - - main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); - table = (struct fn_hash *)main_table->tb_data; - - iter->bucket = 0; - iter->hash_head = NULL; - iter->fn = NULL; - iter->fa = NULL; - iter->pos = 0; - iter->genid = fib_hash_genid; - iter->valid = 1; - - for (iter->zone = rcu_dereference(table->fn_zone_list); - iter->zone != NULL; - iter->zone = rcu_dereference(iter->zone->fz_next)) { - int maxslot; - - if (!iter->zone->fz_nent) - continue; - - iter->hash_head = rcu_dereference(iter->zone->fz_hash); - maxslot = iter->zone->fz_divisor; - - for (iter->bucket = 0; iter->bucket < maxslot; - ++iter->bucket, ++iter->hash_head) { - struct hlist_node *node; - struct fib_node *fn; - - hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { - struct fib_alias *fa; - - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fn = fn; - iter->fa = fa; - goto out; - } - } - } - } -out: - return iter->fa; -} - -static struct fib_alias *fib_get_next(struct seq_file *seq) -{ - struct fib_iter_state *iter = seq->private; - struct fib_node *fn; - struct fib_alias *fa; - - /* Advance FA, if any. */ - fn = iter->fn; - fa = iter->fa; - if (fa) { - BUG_ON(!fn); - list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) { - iter->fa = fa; - goto out; - } - } - - fa = iter->fa = NULL; - - /* Advance FN. */ - if (fn) { - struct hlist_node *node = &fn->fn_hash; - hlist_for_each_entry_continue(fn, node, fn_hash) { - iter->fn = fn; - - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fa = fa; - goto out; - } - } - } - - fn = iter->fn = NULL; - - /* Advance hash chain. */ - if (!iter->zone) - goto out; - - for (;;) { - struct hlist_node *node; - int maxslot; - - maxslot = iter->zone->fz_divisor; - - while (++iter->bucket < maxslot) { - iter->hash_head++; - - hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fn = fn; - iter->fa = fa; - goto out; - } - } - } - - iter->zone = rcu_dereference(iter->zone->fz_next); - - if (!iter->zone) - goto out; - - iter->bucket = 0; - iter->hash_head = rcu_dereference(iter->zone->fz_hash); - - hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fn = fn; - iter->fa = fa; - goto out; - } - } - } -out: - iter->pos++; - return fa; -} - -static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos) -{ - struct fib_iter_state *iter = seq->private; - struct fib_alias *fa; - - if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) { - fa = iter->fa; - pos -= iter->pos; - } else - fa = fib_get_first(seq); - - if (fa) - while (pos && (fa = fib_get_next(seq))) - --pos; - return pos ? NULL : fa; -} - -static void *fib_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - void *v = NULL; - - rcu_read_lock(); - if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) - v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; - return v; -} - -static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq); -} - -static void fib_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) -{ - static const unsigned type2flags[RTN_MAX + 1] = { - [7] = RTF_REJECT, - [8] = RTF_REJECT, - }; - unsigned flags = type2flags[type]; - - if (fi && fi->fib_nh->nh_gw) - flags |= RTF_GATEWAY; - if (mask == htonl(0xFFFFFFFF)) - flags |= RTF_HOST; - flags |= RTF_UP; - return flags; -} - -/* - * This outputs /proc/net/route. - * - * It always works in backward compatibility mode. - * The format of the file is not supposed to be changed. - */ -static int fib_seq_show(struct seq_file *seq, void *v) -{ - struct fib_iter_state *iter; - int len; - __be32 prefix, mask; - unsigned flags; - struct fib_node *f; - struct fib_alias *fa; - struct fib_info *fi; - - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " - "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" - "\tWindow\tIRTT"); - goto out; - } - - iter = seq->private; - f = iter->fn; - fa = iter->fa; - fi = fa->fa_info; - prefix = f->fn_key; - mask = FZ_MASK(iter->zone); - flags = fib_flag_trans(fa->fa_type, mask, fi); - if (fi) - seq_printf(seq, - "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", - fi->fib_dev ? fi->fib_dev->name : "*", prefix, - fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority, - mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0), - fi->fib_window, - fi->fib_rtt >> 3, &len); - else - seq_printf(seq, - "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", - prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0, &len); - - seq_printf(seq, "%*s\n", 127 - len, ""); -out: - return 0; -} - -static const struct seq_operations fib_seq_ops = { - .start = fib_seq_start, - .next = fib_seq_next, - .stop = fib_seq_stop, - .show = fib_seq_show, -}; - -static int fib_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &fib_seq_ops, - sizeof(struct fib_iter_state)); -} - -static const struct file_operations fib_seq_fops = { - .owner = THIS_MODULE, - .open = fib_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -int __net_init fib_proc_init(struct net *net) -{ - if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops)) - return -ENOMEM; - return 0; -} - -void __net_exit fib_proc_exit(struct net *net) -{ - proc_net_remove(net, "route"); -} -#endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c079cc0ec651..4ec323875a02 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -25,9 +25,6 @@ static inline void fib_alias_accessed(struct fib_alias *fa) } /* Exported by fib_semantics.c */ -extern int fib_semantic_match(struct list_head *head, - const struct flowi *flp, - struct fib_result *res, int prefixlen, int fib_flags); extern void fib_release_info(struct fib_info *); extern struct fib_info *fib_create_info(struct fib_config *cfg); extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); @@ -51,4 +48,11 @@ static inline void fib_result_assign(struct fib_result *res, res->fi = fi; } +struct fib_prop { + int error; + u8 scope; +}; + +extern const struct fib_prop fib_props[RTN_MAX + 1]; + #endif /* _FIB_LOOKUP_H */ diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 9cefe72029cf..a53bb1b5b118 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -47,13 +47,13 @@ struct fib4_rule { }; #ifdef CONFIG_IP_ROUTE_CLASSID -u32 fib_rules_tclass(struct fib_result *res) +u32 fib_rules_tclass(const struct fib_result *res) { return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; } #endif -int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) +int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { struct fib_lookup_arg arg = { .result = res, @@ -61,7 +61,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) }; int err; - err = fib_rules_lookup(net->ipv4.rules_ops, flp, 0, &arg); + err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); res->r = arg.rule; return err; @@ -95,7 +95,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, if (!tbl) goto errout; - err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags); + err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags); if (err > 0) err = -EAGAIN; errout: @@ -106,14 +106,15 @@ errout: static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct fib4_rule *r = (struct fib4_rule *) rule; - __be32 daddr = fl->fl4_dst; - __be32 saddr = fl->fl4_src; + struct flowi4 *fl4 = &fl->u.ip4; + __be32 daddr = fl4->daddr; + __be32 saddr = fl4->saddr; if (((saddr ^ r->src) & r->srcmask) || ((daddr ^ r->dst) & r->dstmask)) return 0; - if (r->tos && (r->tos != fl->fl4_tos)) + if (r->tos && (r->tos != fl4->flowi4_tos)) return 0; return 1; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 9aff11d7278f..622ac4c95026 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -49,7 +49,7 @@ static DEFINE_SPINLOCK(fib_info_lock); static struct hlist_head *fib_info_hash; static struct hlist_head *fib_info_laddrhash; -static unsigned int fib_hash_size; +static unsigned int fib_info_hash_size; static unsigned int fib_info_cnt; #define DEVINDEX_HASHBITS 8 @@ -90,11 +90,7 @@ static DEFINE_SPINLOCK(fib_multipath_lock); #define endfor_nexthops(fi) } -static const struct -{ - int error; - u8 scope; -} fib_props[RTN_MAX + 1] = { +const struct fib_prop fib_props[RTN_MAX + 1] = { [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE, @@ -152,6 +148,8 @@ static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); + if (fi->fib_metrics != (u32 *) dst_default_metrics) + kfree(fi->fib_metrics); kfree(fi); } @@ -221,7 +219,7 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val) static inline unsigned int fib_info_hashfn(const struct fib_info *fi) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = (fib_info_hash_size - 1); unsigned int val = fi->fib_nhs; val ^= fi->fib_protocol; @@ -562,16 +560,16 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, } rcu_read_lock(); { - struct flowi fl = { - .fl4_dst = nh->nh_gw, - .fl4_scope = cfg->fc_scope + 1, - .oif = nh->nh_oif, + struct flowi4 fl4 = { + .daddr = nh->nh_gw, + .flowi4_scope = cfg->fc_scope + 1, + .flowi4_oif = nh->nh_oif, }; /* It is not necessary, but requires a bit of thinking */ - if (fl.fl4_scope < RT_SCOPE_LINK) - fl.fl4_scope = RT_SCOPE_LINK; - err = fib_lookup(net, &fl, &res); + if (fl4.flowi4_scope < RT_SCOPE_LINK) + fl4.flowi4_scope = RT_SCOPE_LINK; + err = fib_lookup(net, &fl4, &res); if (err) { rcu_read_unlock(); return err; @@ -613,14 +611,14 @@ out: static inline unsigned int fib_laddr_hashfn(__be32 val) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = (fib_info_hash_size - 1); return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; } -static struct hlist_head *fib_hash_alloc(int bytes) +static struct hlist_head *fib_info_hash_alloc(int bytes) { if (bytes <= PAGE_SIZE) return kzalloc(bytes, GFP_KERNEL); @@ -630,7 +628,7 @@ static struct hlist_head *fib_hash_alloc(int bytes) get_order(bytes)); } -static void fib_hash_free(struct hlist_head *hash, int bytes) +static void fib_info_hash_free(struct hlist_head *hash, int bytes) { if (!hash) return; @@ -641,18 +639,18 @@ static void fib_hash_free(struct hlist_head *hash, int bytes) free_pages((unsigned long) hash, get_order(bytes)); } -static void fib_hash_move(struct hlist_head *new_info_hash, - struct hlist_head *new_laddrhash, - unsigned int new_size) +static void fib_info_hash_move(struct hlist_head *new_info_hash, + struct hlist_head *new_laddrhash, + unsigned int new_size) { struct hlist_head *old_info_hash, *old_laddrhash; - unsigned int old_size = fib_hash_size; + unsigned int old_size = fib_info_hash_size; unsigned int i, bytes; spin_lock_bh(&fib_info_lock); old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; - fib_hash_size = new_size; + fib_info_hash_size = new_size; for (i = 0; i < old_size; i++) { struct hlist_head *head = &fib_info_hash[i]; @@ -693,8 +691,8 @@ static void fib_hash_move(struct hlist_head *new_info_hash, spin_unlock_bh(&fib_info_lock); bytes = old_size * sizeof(struct hlist_head *); - fib_hash_free(old_info_hash, bytes); - fib_hash_free(old_laddrhash, bytes); + fib_info_hash_free(old_info_hash, bytes); + fib_info_hash_free(old_laddrhash, bytes); } struct fib_info *fib_create_info(struct fib_config *cfg) @@ -705,6 +703,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg) int nhs = 1; struct net *net = cfg->fc_nlinfo.nl_net; + if (cfg->fc_type > RTN_MAX) + goto err_inval; + /* Fast check to catch the most weird cases */ if (fib_props[cfg->fc_type].scope > cfg->fc_scope) goto err_inval; @@ -718,8 +719,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) #endif err = -ENOBUFS; - if (fib_info_cnt >= fib_hash_size) { - unsigned int new_size = fib_hash_size << 1; + if (fib_info_cnt >= fib_info_hash_size) { + unsigned int new_size = fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; unsigned int bytes; @@ -727,21 +728,27 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (!new_size) new_size = 1; bytes = new_size * sizeof(struct hlist_head *); - new_info_hash = fib_hash_alloc(bytes); - new_laddrhash = fib_hash_alloc(bytes); + new_info_hash = fib_info_hash_alloc(bytes); + new_laddrhash = fib_info_hash_alloc(bytes); if (!new_info_hash || !new_laddrhash) { - fib_hash_free(new_info_hash, bytes); - fib_hash_free(new_laddrhash, bytes); + fib_info_hash_free(new_info_hash, bytes); + fib_info_hash_free(new_laddrhash, bytes); } else - fib_hash_move(new_info_hash, new_laddrhash, new_size); + fib_info_hash_move(new_info_hash, new_laddrhash, new_size); - if (!fib_hash_size) + if (!fib_info_hash_size) goto failure; } fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; + if (cfg->fc_mx) { + fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!fi->fib_metrics) + goto failure; + } else + fi->fib_metrics = (u32 *) dst_default_metrics; fib_info_cnt++; fi->fib_net = hold_net(net); @@ -804,6 +811,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) goto err_inval; goto link_it; + } else { + switch (cfg->fc_type) { + case RTN_UNICAST: + case RTN_LOCAL: + case RTN_BROADCAST: + case RTN_ANYCAST: + case RTN_MULTICAST: + break; + default: + goto err_inval; + } } if (cfg->fc_scope > RT_SCOPE_HOST) @@ -835,6 +853,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto err_inval; } + change_nexthops(fi) { + nexthop_nh->nh_cfg_scope = cfg->fc_scope; + nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev, + nexthop_nh->nh_gw, + nexthop_nh->nh_cfg_scope); + } endfor_nexthops(fi) + link_it: ofi = fib_find_info(fi); if (ofi) { @@ -880,84 +905,6 @@ failure: return ERR_PTR(err); } -/* Note! fib_semantic_match intentionally uses RCU list functions. */ -int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, int prefixlen, int fib_flags) -{ - struct fib_alias *fa; - int nh_sel = 0; - - list_for_each_entry_rcu(fa, head, fa_list) { - int err; - - if (fa->fa_tos && - fa->fa_tos != flp->fl4_tos) - continue; - - if (fa->fa_scope < flp->fl4_scope) - continue; - - fib_alias_accessed(fa); - - err = fib_props[fa->fa_type].error; - if (err == 0) { - struct fib_info *fi = fa->fa_info; - - if (fi->fib_flags & RTNH_F_DEAD) - continue; - - switch (fa->fa_type) { - case RTN_UNICAST: - case RTN_LOCAL: - case RTN_BROADCAST: - case RTN_ANYCAST: - case RTN_MULTICAST: - for_nexthops(fi) { - if (nh->nh_flags & RTNH_F_DEAD) - continue; - if (!flp->oif || flp->oif == nh->nh_oif) - break; - } -#ifdef CONFIG_IP_ROUTE_MULTIPATH - if (nhsel < fi->fib_nhs) { - nh_sel = nhsel; - goto out_fill_res; - } -#else - if (nhsel < 1) - goto out_fill_res; -#endif - endfor_nexthops(fi); - continue; - - default: - pr_warning("fib_semantic_match bad type %#x\n", - fa->fa_type); - return -EINVAL; - } - } - return err; - } - return 1; - -out_fill_res: - res->prefixlen = prefixlen; - res->nh_sel = nh_sel; - res->type = fa->fa_type; - res->scope = fa->fa_scope; - res->fi = fa->fa_info; - if (!(fib_flags & FIB_LOOKUP_NOREF)) - atomic_inc(&res->fi->fib_clntref); - return 0; -} - -/* Find appropriate source address to this destination */ - -__be32 __fib_res_prefsrc(struct fib_result *res) -{ - return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); -} - int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) @@ -1125,6 +1072,80 @@ int fib_sync_down_dev(struct net_device *dev, int force) return ret; } +/* Must be invoked inside of an RCU protected region. */ +void fib_select_default(struct fib_result *res) +{ + struct fib_info *fi = NULL, *last_resort = NULL; + struct list_head *fa_head = res->fa_head; + struct fib_table *tb = res->table; + int order = -1, last_idx = -1; + struct fib_alias *fa; + + list_for_each_entry_rcu(fa, fa_head, fa_list) { + struct fib_info *next_fi = fa->fa_info; + + if (fa->fa_scope != res->scope || + fa->fa_type != RTN_UNICAST) + continue; + + if (next_fi->fib_priority > res->fi->fib_priority) + break; + if (!next_fi->fib_nh[0].nh_gw || + next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) + continue; + + fib_alias_accessed(fa); + + if (fi == NULL) { + if (next_fi != res->fi) + break; + } else if (!fib_detect_death(fi, order, &last_resort, + &last_idx, tb->tb_default)) { + fib_result_assign(res, fi); + tb->tb_default = order; + goto out; + } + fi = next_fi; + order++; + } + + if (order <= 0 || fi == NULL) { + tb->tb_default = -1; + goto out; + } + + if (!fib_detect_death(fi, order, &last_resort, &last_idx, + tb->tb_default)) { + fib_result_assign(res, fi); + tb->tb_default = order; + goto out; + } + + if (last_idx >= 0) + fib_result_assign(res, last_resort); + tb->tb_default = last_idx; +out: + return; +} + +void fib_update_nh_saddrs(struct net_device *dev) +{ + struct hlist_head *head; + struct hlist_node *node; + struct fib_nh *nh; + unsigned int hash; + + hash = fib_devindex_hashfn(dev->ifindex); + head = &fib_info_devhash[hash]; + hlist_for_each_entry(nh, node, head, nh_hash) { + if (nh->nh_dev != dev) + continue; + nh->nh_saddr = inet_select_addr(nh->nh_dev, + nh->nh_gw, + nh->nh_cfg_scope); + } +} + #ifdef CONFIG_IP_ROUTE_MULTIPATH /* @@ -1189,7 +1210,7 @@ int fib_sync_up(struct net_device *dev) * The algorithm is suboptimal, but it provides really * fair weighted route distribution. */ -void fib_select_multipath(const struct flowi *flp, struct fib_result *res) +void fib_select_multipath(struct fib_result *res) { struct fib_info *fi = res->fi; int w; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0f280348e0fd..3d28a35c2e1a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -95,7 +95,7 @@ typedef unsigned int t_key; #define IS_TNODE(n) (!(n->parent & T_LEAF)) #define IS_LEAF(n) (n->parent & T_LEAF) -struct node { +struct rt_trie_node { unsigned long parent; t_key key; }; @@ -126,7 +126,7 @@ struct tnode { struct work_struct work; struct tnode *tnode_free; }; - struct node *child[0]; + struct rt_trie_node *child[0]; }; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -151,16 +151,16 @@ struct trie_stat { }; struct trie { - struct node *trie; + struct rt_trie_node *trie; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats stats; #endif }; -static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); -static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, +static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n); +static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, int wasfull); -static struct node *resize(struct trie *t, struct tnode *tn); +static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); /* tnodes to free after resize(); protected by RTNL */ @@ -177,12 +177,12 @@ static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; -static inline struct tnode *node_parent(struct node *node) +static inline struct tnode *node_parent(struct rt_trie_node *node) { return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); } -static inline struct tnode *node_parent_rcu(struct node *node) +static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) { struct tnode *ret = node_parent(node); @@ -192,22 +192,22 @@ static inline struct tnode *node_parent_rcu(struct node *node) /* Same as rcu_assign_pointer * but that macro() assumes that value is a pointer. */ -static inline void node_set_parent(struct node *node, struct tnode *ptr) +static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) { smp_wmb(); node->parent = (unsigned long)ptr | NODE_TYPE(node); } -static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) +static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) { BUG_ON(i >= 1U << tn->bits); return tn->child[i]; } -static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) +static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) { - struct node *ret = tnode_get_child(tn, i); + struct rt_trie_node *ret = tnode_get_child(tn, i); return rcu_dereference_rtnl(ret); } @@ -217,12 +217,12 @@ static inline int tnode_child_length(const struct tnode *tn) return 1 << tn->bits; } -static inline t_key mask_pfx(t_key k, unsigned short l) +static inline t_key mask_pfx(t_key k, unsigned int l) { return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); } -static inline t_key tkey_extract_bits(t_key a, int offset, int bits) +static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) { if (offset < KEYLENGTH) return ((t_key)(a << offset)) >> (KEYLENGTH - bits); @@ -378,7 +378,7 @@ static void __tnode_free_rcu(struct rcu_head *head) { struct tnode *tn = container_of(head, struct tnode, rcu); size_t size = sizeof(struct tnode) + - (sizeof(struct node *) << tn->bits); + (sizeof(struct rt_trie_node *) << tn->bits); if (size <= PAGE_SIZE) kfree(tn); @@ -402,7 +402,7 @@ static void tnode_free_safe(struct tnode *tn) tn->tnode_free = tnode_free_head; tnode_free_head = tn; tnode_free_size += sizeof(struct tnode) + - (sizeof(struct node *) << tn->bits); + (sizeof(struct rt_trie_node *) << tn->bits); } static void tnode_free_flush(void) @@ -443,7 +443,7 @@ static struct leaf_info *leaf_info_new(int plen) static struct tnode *tnode_new(t_key key, int pos, int bits) { - size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); + size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); struct tnode *tn = tnode_alloc(sz); if (tn) { @@ -456,7 +456,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) } pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), - sizeof(struct node) << bits); + sizeof(struct rt_trie_node) << bits); return tn; } @@ -465,7 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) * and no bits are skipped. See discussion in dyntree paper p. 6 */ -static inline int tnode_full(const struct tnode *tn, const struct node *n) +static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) { if (n == NULL || IS_LEAF(n)) return 0; @@ -474,7 +474,7 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n) } static inline void put_child(struct trie *t, struct tnode *tn, int i, - struct node *n) + struct rt_trie_node *n) { tnode_put_child_reorg(tn, i, n, -1); } @@ -484,10 +484,10 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, * Update the value of full_children and empty_children. */ -static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, +static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, int wasfull) { - struct node *chi = tn->child[i]; + struct rt_trie_node *chi = tn->child[i]; int isfull; BUG_ON(i >= 1<<tn->bits); @@ -515,7 +515,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, } #define MAX_WORK 10 -static struct node *resize(struct trie *t, struct tnode *tn) +static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) { int i; struct tnode *old_tn; @@ -605,7 +605,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Keep root node larger */ - if (!node_parent((struct node *)tn)) { + if (!node_parent((struct rt_trie_node *)tn)) { inflate_threshold_use = inflate_threshold_root; halve_threshold_use = halve_threshold_root; } else { @@ -635,7 +635,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Return if at least one inflate is run */ if (max_work != MAX_WORK) - return (struct node *) tn; + return (struct rt_trie_node *) tn; /* * Halve as long as the number of empty children in this @@ -663,7 +663,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) if (tn->empty_children == tnode_child_length(tn) - 1) { one_child: for (i = 0; i < tnode_child_length(tn); i++) { - struct node *n; + struct rt_trie_node *n; n = tn->child[i]; if (!n) @@ -676,7 +676,7 @@ one_child: return n; } } - return (struct node *) tn; + return (struct rt_trie_node *) tn; } static struct tnode *inflate(struct trie *t, struct tnode *tn) @@ -723,14 +723,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) goto nomem; } - put_child(t, tn, 2*i, (struct node *) left); - put_child(t, tn, 2*i+1, (struct node *) right); + put_child(t, tn, 2*i, (struct rt_trie_node *) left); + put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); } } for (i = 0; i < olen; i++) { struct tnode *inode; - struct node *node = tnode_get_child(oldtnode, i); + struct rt_trie_node *node = tnode_get_child(oldtnode, i); struct tnode *left, *right; int size, j; @@ -825,7 +825,7 @@ nomem: static struct tnode *halve(struct trie *t, struct tnode *tn) { struct tnode *oldtnode = tn; - struct node *left, *right; + struct rt_trie_node *left, *right; int i; int olen = tnode_child_length(tn); @@ -856,7 +856,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) if (!newn) goto nomem; - put_child(t, tn, i/2, (struct node *)newn); + put_child(t, tn, i/2, (struct rt_trie_node *)newn); } } @@ -958,7 +958,7 @@ fib_find_node(struct trie *t, u32 key) { int pos; struct tnode *tn; - struct node *n; + struct rt_trie_node *n; pos = 0; n = rcu_dereference_rtnl(t->trie); @@ -993,17 +993,17 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) key = tn->key; - while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { + while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); tn = (struct tnode *) resize(t, (struct tnode *)tn); tnode_put_child_reorg((struct tnode *)tp, cindex, - (struct node *)tn, wasfull); + (struct rt_trie_node *)tn, wasfull); - tp = node_parent((struct node *) tn); + tp = node_parent((struct rt_trie_node *) tn); if (!tp) - rcu_assign_pointer(t->trie, (struct node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); if (!tp) @@ -1015,7 +1015,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); - rcu_assign_pointer(t->trie, (struct node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); } @@ -1025,7 +1025,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) { int pos, newpos; struct tnode *tp = NULL, *tn = NULL; - struct node *n; + struct rt_trie_node *n; struct leaf *l; int missbit; struct list_head *fa_head = NULL; @@ -1111,10 +1111,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) if (t->trie && n == NULL) { /* Case 2: n is NULL, and will just insert a new leaf */ - node_set_parent((struct node *)l, tp); + node_set_parent((struct rt_trie_node *)l, tp); cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, (struct tnode *)tp, cindex, (struct node *)l); + put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); } else { /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ /* @@ -1141,18 +1141,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) return NULL; } - node_set_parent((struct node *)tn, tp); + node_set_parent((struct rt_trie_node *)tn, tp); missbit = tkey_extract_bits(key, newpos, 1); - put_child(t, tn, missbit, (struct node *)l); + put_child(t, tn, missbit, (struct rt_trie_node *)l); put_child(t, tn, 1-missbit, n); if (tp) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, - (struct node *)tn); + (struct rt_trie_node *)tn); } else { - rcu_assign_pointer(t->trie, (struct node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tp = tn; } } @@ -1340,8 +1340,8 @@ err: } /* should be called with rcu_read_lock */ -static int check_leaf(struct trie *t, struct leaf *l, - t_key key, const struct flowi *flp, +static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, + t_key key, const struct flowi4 *flp, struct fib_result *res, int fib_flags) { struct leaf_info *li; @@ -1349,40 +1349,75 @@ static int check_leaf(struct trie *t, struct leaf *l, struct hlist_node *node; hlist_for_each_entry_rcu(li, node, hhead, hlist) { - int err; + struct fib_alias *fa; int plen = li->plen; __be32 mask = inet_make_mask(plen); if (l->key != (key & ntohl(mask))) continue; - err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags); + list_for_each_entry_rcu(fa, &li->falh, fa_list) { + struct fib_info *fi = fa->fa_info; + int nhsel, err; + if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) + continue; + if (fa->fa_scope < flp->flowi4_scope) + continue; + fib_alias_accessed(fa); + err = fib_props[fa->fa_type].error; + if (err) { #ifdef CONFIG_IP_FIB_TRIE_STATS - if (err <= 0) - t->stats.semantic_match_passed++; - else - t->stats.semantic_match_miss++; + t->stats.semantic_match_miss++; +#endif + return 1; + } + if (fi->fib_flags & RTNH_F_DEAD) + continue; + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; + + if (nh->nh_flags & RTNH_F_DEAD) + continue; + if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) + continue; + +#ifdef CONFIG_IP_FIB_TRIE_STATS + t->stats.semantic_match_passed++; +#endif + res->prefixlen = plen; + res->nh_sel = nhsel; + res->type = fa->fa_type; + res->scope = fa->fa_scope; + res->fi = fi; + res->table = tb; + res->fa_head = &li->falh; + if (!(fib_flags & FIB_LOOKUP_NOREF)) + atomic_inc(&res->fi->fib_clntref); + return 0; + } + } + +#ifdef CONFIG_IP_FIB_TRIE_STATS + t->stats.semantic_match_miss++; #endif - if (err <= 0) - return err; } return 1; } -int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, +int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags) { struct trie *t = (struct trie *) tb->tb_data; int ret; - struct node *n; + struct rt_trie_node *n; struct tnode *pn; - int pos, bits; - t_key key = ntohl(flp->fl4_dst); - int chopped_off; + unsigned int pos, bits; + t_key key = ntohl(flp->daddr); + unsigned int chopped_off; t_key cindex = 0; - int current_prefix_length = KEYLENGTH; + unsigned int current_prefix_length = KEYLENGTH; struct tnode *cn; t_key pref_mismatch; @@ -1398,7 +1433,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, /* Just a leaf? */ if (IS_LEAF(n)) { - ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); + ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); goto found; } @@ -1423,7 +1458,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, } if (IS_LEAF(n)) { - ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); + ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); if (ret > 0) goto backtrace; goto found; @@ -1541,7 +1576,7 @@ backtrace: if (chopped_off <= pn->bits) { cindex &= ~(1 << (chopped_off-1)); } else { - struct tnode *parent = node_parent_rcu((struct node *) pn); + struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); if (!parent) goto failed; @@ -1568,7 +1603,7 @@ found: */ static void trie_leaf_remove(struct trie *t, struct leaf *l) { - struct tnode *tp = node_parent((struct node *) l); + struct tnode *tp = node_parent((struct rt_trie_node *) l); pr_debug("entering trie_leaf_remove(%p)\n", l); @@ -1706,7 +1741,7 @@ static int trie_flush_leaf(struct leaf *l) * Scan for the next right leaf starting at node p->child[idx] * Since we have back pointer, no recursion necessary. */ -static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) +static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) { do { t_key idx; @@ -1732,7 +1767,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) } /* Node empty, walk back up to parent */ - c = (struct node *) p; + c = (struct rt_trie_node *) p; } while ((p = node_parent_rcu(c)) != NULL); return NULL; /* Root of trie */ @@ -1753,7 +1788,7 @@ static struct leaf *trie_firstleaf(struct trie *t) static struct leaf *trie_nextleaf(struct leaf *l) { - struct node *c = (struct node *) l; + struct rt_trie_node *c = (struct rt_trie_node *) l; struct tnode *p = node_parent_rcu(c); if (!p) @@ -1802,80 +1837,6 @@ void fib_free_table(struct fib_table *tb) kfree(tb); } -void fib_table_select_default(struct fib_table *tb, - const struct flowi *flp, - struct fib_result *res) -{ - struct trie *t = (struct trie *) tb->tb_data; - int order, last_idx; - struct fib_info *fi = NULL; - struct fib_info *last_resort; - struct fib_alias *fa = NULL; - struct list_head *fa_head; - struct leaf *l; - - last_idx = -1; - last_resort = NULL; - order = -1; - - rcu_read_lock(); - - l = fib_find_node(t, 0); - if (!l) - goto out; - - fa_head = get_fa_head(l, 0); - if (!fa_head) - goto out; - - if (list_empty(fa_head)) - goto out; - - list_for_each_entry_rcu(fa, fa_head, fa_list) { - struct fib_info *next_fi = fa->fa_info; - - if (fa->fa_scope != res->scope || - fa->fa_type != RTN_UNICAST) - continue; - - if (next_fi->fib_priority > res->fi->fib_priority) - break; - if (!next_fi->fib_nh[0].nh_gw || - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) - continue; - - fib_alias_accessed(fa); - - if (fi == NULL) { - if (next_fi != res->fi) - break; - } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - fi = next_fi; - order++; - } - if (order <= 0 || fi == NULL) { - tb->tb_default = -1; - goto out; - } - - if (!fib_detect_death(fi, order, &last_resort, &last_idx, - tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - if (last_idx >= 0) - fib_result_assign(res, last_resort); - tb->tb_default = last_idx; -out: - rcu_read_unlock(); -} - static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) @@ -1990,7 +1951,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, return skb->len; } -void __init fib_hash_init(void) +void __init fib_trie_init(void) { fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), @@ -2003,8 +1964,7 @@ void __init fib_hash_init(void) } -/* Fix more generic FIB names for init later */ -struct fib_table *fib_hash_table(u32 id) +struct fib_table *fib_trie_table(u32 id) { struct fib_table *tb; struct trie *t; @@ -2036,7 +1996,7 @@ struct fib_trie_iter { unsigned int depth; }; -static struct node *fib_trie_get_next(struct fib_trie_iter *iter) +static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) { struct tnode *tn = iter->tnode; unsigned int cindex = iter->index; @@ -2050,7 +2010,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter) iter->tnode, iter->index, iter->depth); rescan: while (cindex < (1<<tn->bits)) { - struct node *n = tnode_get_child_rcu(tn, cindex); + struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex); if (n) { if (IS_LEAF(n)) { @@ -2069,7 +2029,7 @@ rescan: } /* Current node exhausted, pop back up */ - p = node_parent_rcu((struct node *)tn); + p = node_parent_rcu((struct rt_trie_node *)tn); if (p) { cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; tn = p; @@ -2081,10 +2041,10 @@ rescan: return NULL; } -static struct node *fib_trie_get_first(struct fib_trie_iter *iter, +static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, struct trie *t) { - struct node *n; + struct rt_trie_node *n; if (!t) return NULL; @@ -2108,7 +2068,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, static void trie_collect_stats(struct trie *t, struct trie_stat *s) { - struct node *n; + struct rt_trie_node *n; struct fib_trie_iter iter; memset(s, 0, sizeof(*s)); @@ -2181,7 +2141,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_putc(seq, '\n'); seq_printf(seq, "\tPointers: %u\n", pointers); - bytes += sizeof(struct node *) * pointers; + bytes += sizeof(struct rt_trie_node *) * pointers; seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); } @@ -2262,7 +2222,7 @@ static const struct file_operations fib_triestat_fops = { .release = single_release_net, }; -static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) +static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) { struct fib_trie_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -2275,7 +2235,7 @@ static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) struct fib_table *tb; hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { - struct node *n; + struct rt_trie_node *n; for (n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); @@ -2304,7 +2264,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct fib_table *tb = iter->tb; struct hlist_node *tb_node; unsigned int h; - struct node *n; + struct rt_trie_node *n; ++*pos; /* next node in same table */ @@ -2390,7 +2350,7 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t) static int fib_trie_seq_show(struct seq_file *seq, void *v) { const struct fib_trie_iter *iter = seq->private; - struct node *n = v; + struct rt_trie_node *n = v; if (!node_parent_rcu(n)) fib_table_print(seq, iter->tb); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4aa1b7f01ea0..a91dc1611081 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -233,48 +233,11 @@ static inline void icmp_xmit_unlock(struct sock *sk) * Send an ICMP frame. */ -/* - * Check transmit rate limitation for given message. - * The rate information is held in the destination cache now. - * This function is generic and could be used for other purposes - * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. - * - * Note that the same dst_entry fields are modified by functions in - * route.c too, but these work for packet destinations while xrlim_allow - * works for icmp destinations. This means the rate limiting information - * for one "ip object" is shared - and these ICMPs are twice limited: - * by source and by destination. - * - * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate - * SHOULD allow setting of rate limits - * - * Shared between ICMPv4 and ICMPv6. - */ -#define XRLIM_BURST_FACTOR 6 -int xrlim_allow(struct dst_entry *dst, int timeout) -{ - unsigned long now, token = dst->rate_tokens; - int rc = 0; - - now = jiffies; - token += now - dst->rate_last; - dst->rate_last = now; - if (token > XRLIM_BURST_FACTOR * timeout) - token = XRLIM_BURST_FACTOR * timeout; - if (token >= timeout) { - token -= timeout; - rc = 1; - } - dst->rate_tokens = token; - return rc; -} -EXPORT_SYMBOL(xrlim_allow); - -static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, +static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, int type, int code) { struct dst_entry *dst = &rt->dst; - int rc = 1; + bool rc = true; if (type > NR_ICMP_TYPES) goto out; @@ -288,8 +251,12 @@ static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, goto out; /* Limit if icmp type is enabled in ratemask. */ - if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) - rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); + if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { + if (!rt->peer) + rt_bind_peer(rt, 1); + rc = inet_peer_xrlim_allow(rt->peer, + net->ipv4.sysctl_icmp_ratelimit); + } out: return rc; } @@ -386,12 +353,15 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) daddr = icmp_param->replyopts.faddr; } { - struct flowi fl = { .fl4_dst= daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .proto = IPPROTO_ICMP }; - security_skb_classify_flow(skb, &fl); - if (ip_route_output_key(net, &rt, &fl)) + struct flowi4 fl4 = { + .daddr = daddr, + .saddr = rt->rt_spec_dst, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_proto = IPPROTO_ICMP, + }; + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) goto out_unlock; } if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, @@ -402,6 +372,97 @@ out_unlock: icmp_xmit_unlock(sk); } +static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, + struct iphdr *iph, + __be32 saddr, u8 tos, + int type, int code, + struct icmp_bxm *param) +{ + struct flowi4 fl4 = { + .daddr = (param->replyopts.srr ? + param->replyopts.faddr : iph->saddr), + .saddr = saddr, + .flowi4_tos = RT_TOS(tos), + .flowi4_proto = IPPROTO_ICMP, + .fl4_icmp_type = type, + .fl4_icmp_code = code, + }; + struct rtable *rt, *rt2; + int err; + + security_skb_classify_flow(skb_in, flowi4_to_flowi(&fl4)); + rt = __ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + return rt; + + /* No need to clone since we're just using its address. */ + rt2 = rt; + + if (!fl4.saddr) + fl4.saddr = rt->rt_src; + + rt = (struct rtable *) xfrm_lookup(net, &rt->dst, + flowi4_to_flowi(&fl4), NULL, 0); + if (!IS_ERR(rt)) { + if (rt != rt2) + return rt; + } else if (PTR_ERR(rt) == -EPERM) { + rt = NULL; + } else + return rt; + + err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4), AF_INET); + if (err) + goto relookup_failed; + + if (inet_addr_type(net, fl4.saddr) == RTN_LOCAL) { + rt2 = __ip_route_output_key(net, &fl4); + if (IS_ERR(rt2)) + err = PTR_ERR(rt2); + } else { + struct flowi4 fl4_2 = {}; + unsigned long orefdst; + + fl4_2.daddr = fl4.saddr; + rt2 = ip_route_output_key(net, &fl4_2); + if (IS_ERR(rt2)) { + err = PTR_ERR(rt2); + goto relookup_failed; + } + /* Ugh! */ + orefdst = skb_in->_skb_refdst; /* save old refdst */ + err = ip_route_input(skb_in, fl4.daddr, fl4.saddr, + RT_TOS(tos), rt2->dst.dev); + + dst_release(&rt2->dst); + rt2 = skb_rtable(skb_in); + skb_in->_skb_refdst = orefdst; /* restore old refdst */ + } + + if (err) + goto relookup_failed; + + rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, + flowi4_to_flowi(&fl4), NULL, + XFRM_LOOKUP_ICMP); + if (!IS_ERR(rt2)) { + dst_release(&rt->dst); + rt = rt2; + } else if (PTR_ERR(rt2) == -EPERM) { + if (rt) + dst_release(&rt->dst); + return rt2; + } else { + err = PTR_ERR(rt2); + goto relookup_failed; + } + return rt; + +relookup_failed: + if (rt) + return rt; + return ERR_PTR(err); +} /* * Send an ICMP message in response to a situation @@ -507,7 +568,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) rcu_read_lock(); if (rt_is_input_route(rt) && net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index_rcu(net, rt->fl.iif); + dev = dev_get_by_index_rcu(net, rt->rt_iif); if (dev) saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); @@ -539,86 +600,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) ipc.opt = &icmp_param.replyopts; ipc.tx_flags = 0; - { - struct flowi fl = { - .fl4_dst = icmp_param.replyopts.srr ? - icmp_param.replyopts.faddr : iph->saddr, - .fl4_src = saddr, - .fl4_tos = RT_TOS(tos), - .proto = IPPROTO_ICMP, - .fl_icmp_type = type, - .fl_icmp_code = code, - }; - int err; - struct rtable *rt2; - - security_skb_classify_flow(skb_in, &fl); - if (__ip_route_output_key(net, &rt, &fl)) - goto out_unlock; - - /* No need to clone since we're just using its address. */ - rt2 = rt; - - if (!fl.nl_u.ip4_u.saddr) - fl.nl_u.ip4_u.saddr = rt->rt_src; - - err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); - switch (err) { - case 0: - if (rt != rt2) - goto route_done; - break; - case -EPERM: - rt = NULL; - break; - default: - goto out_unlock; - } - - if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET)) - goto relookup_failed; - - if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) - err = __ip_route_output_key(net, &rt2, &fl); - else { - struct flowi fl2 = {}; - unsigned long orefdst; - - fl2.fl4_dst = fl.fl4_src; - if (ip_route_output_key(net, &rt2, &fl2)) - goto relookup_failed; - - /* Ugh! */ - orefdst = skb_in->_skb_refdst; /* save old refdst */ - err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, - RT_TOS(tos), rt2->dst.dev); - - dst_release(&rt2->dst); - rt2 = skb_rtable(skb_in); - skb_in->_skb_refdst = orefdst; /* restore old refdst */ - } - - if (err) - goto relookup_failed; - - err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, - XFRM_LOOKUP_ICMP); - switch (err) { - case 0: - dst_release(&rt->dst); - rt = rt2; - break; - case -EPERM: - goto ende; - default: -relookup_failed: - if (!rt) - goto out_unlock; - break; - } - } + rt = icmp_route_lookup(net, skb_in, iph, saddr, tos, + type, code, &icmp_param); + if (IS_ERR(rt)) + goto out_unlock; -route_done: if (!icmpv4_xrlim_allow(net, rt, type, code)) goto ende; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index e0e77e297de3..1fd3d9ce8398 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -321,14 +321,12 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) } igmp_skb_size(skb) = size; - { - struct flowi fl = { .oif = dev->ifindex, - .fl4_dst = IGMPV3_ALL_MCR, - .proto = IPPROTO_IGMP }; - if (ip_route_output_key(net, &rt, &fl)) { - kfree_skb(skb); - return NULL; - } + rt = ip_route_output_ports(net, NULL, IGMPV3_ALL_MCR, 0, + 0, 0, + IPPROTO_IGMP, 0, dev->ifindex); + if (IS_ERR(rt)) { + kfree_skb(skb); + return NULL; } if (rt->rt_src == 0) { kfree_skb(skb); @@ -666,13 +664,12 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, else dst = group; - { - struct flowi fl = { .oif = dev->ifindex, - .fl4_dst = dst, - .proto = IPPROTO_IGMP }; - if (ip_route_output_key(net, &rt, &fl)) - return -1; - } + rt = ip_route_output_ports(net, NULL, dst, 0, + 0, 0, + IPPROTO_IGMP, 0, dev->ifindex); + if (IS_ERR(rt)) + return -1; + if (rt->rt_src == 0) { ip_rt_put(rt); return -1; @@ -1439,8 +1436,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* RTNL is locked */ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { - struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr }; - struct rtable *rt; struct net_device *dev = NULL; struct in_device *idev = NULL; @@ -1454,9 +1449,14 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) return NULL; } - if (!dev && !ip_route_output_key(net, &rt, &fl)) { - dev = rt->dst.dev; - ip_rt_put(rt); + if (!dev) { + struct rtable *rt = ip_route_output(net, + imr->imr_multiaddr.s_addr, + 0, 0, 0); + if (!IS_ERR(rt)) { + dev = rt->dst.dev; + ip_rt_put(rt); + } } if (dev) { imr->imr_ifindex = dev->ifindex; @@ -2329,13 +2329,13 @@ void ip_mc_drop_socket(struct sock *sk) rtnl_unlock(); } -int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) +/* called with rcu_read_lock() */ +int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; struct ip_sf_list *psf; int rv = 0; - rcu_read_lock(); for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == mc_addr) break; @@ -2357,7 +2357,6 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p } else rv = 1; /* unspecified source; tentatively allow */ } - rcu_read_unlock(); return rv; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 97e5fb765265..6c0b7f4a3d7d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -356,20 +356,23 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options *opt = inet_rsk(req)->opt; - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .mark = sk->sk_mark, - .fl4_dst = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .fl4_src = ireq->loc_addr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet_sk(sk)->inet_sport, - .fl_ip_dport = ireq->rmt_port }; + struct flowi4 fl4 = { + .flowi4_oif = sk->sk_bound_dev_if, + .flowi4_mark = sk->sk_mark, + .daddr = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .saddr = ireq->loc_addr, + .flowi4_tos = RT_CONN_FLAGS(sk), + .flowi4_proto = sk->sk_protocol, + .flowi4_flags = inet_sk_flowi_flags(sk), + .fl4_sport = inet_sk(sk)->inet_sport, + .fl4_dport = ireq->rmt_port, + }; struct net *net = sock_net(sk); - security_req_classify_flow(req, &fl); - if (ip_route_output_flow(net, &rt, &fl, sk, 0)) + security_req_classify_flow(req, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); + if (IS_ERR(rt)) goto no_route; if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto route_err; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2746c1fa6417..2ada17129fce 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -858,7 +858,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) nlmsg_len(nlh) < hdrlen) return -EINVAL; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (nlh->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(nlh, hdrlen)) { struct nlattr *attr; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c5af909cf701..3c8dfa16614d 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -505,7 +505,9 @@ restart: } rcu_read_unlock(); + local_bh_disable(); inet_twsk_deschedule(tw, twdr); + local_bh_enable(); inet_twsk_put(tw); goto restart_rcu; } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d9bc85751c74..dd1b20eca1a2 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -81,19 +81,19 @@ static const struct inet_peer peer_fake_node = { struct inet_peer_base { struct inet_peer __rcu *root; - spinlock_t lock; + seqlock_t lock; int total; }; static struct inet_peer_base v4_peers = { .root = peer_avl_empty_rcu, - .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock), + .lock = __SEQLOCK_UNLOCKED(v4_peers.lock), .total = 0, }; static struct inet_peer_base v6_peers = { .root = peer_avl_empty_rcu, - .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock), + .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), .total = 0, }; @@ -167,9 +167,9 @@ static int addr_compare(const struct inetpeer_addr *a, int i, n = (a->family == AF_INET ? 1 : 4); for (i = 0; i < n; i++) { - if (a->a6[i] == b->a6[i]) + if (a->addr.a6[i] == b->addr.a6[i]) continue; - if (a->a6[i] < b->a6[i]) + if (a->addr.a6[i] < b->addr.a6[i]) return -1; return 1; } @@ -177,6 +177,9 @@ static int addr_compare(const struct inetpeer_addr *a, return 0; } +#define rcu_deref_locked(X, BASE) \ + rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock)) + /* * Called with local BH disabled and the pool lock held. */ @@ -187,8 +190,7 @@ static int addr_compare(const struct inetpeer_addr *a, \ stackptr = _stack; \ *stackptr++ = &_base->root; \ - for (u = rcu_dereference_protected(_base->root, \ - lockdep_is_held(&_base->lock)); \ + for (u = rcu_deref_locked(_base->root, _base); \ u != peer_avl_empty; ) { \ int cmp = addr_compare(_daddr, &u->daddr); \ if (cmp == 0) \ @@ -198,23 +200,22 @@ static int addr_compare(const struct inetpeer_addr *a, else \ v = &u->avl_right; \ *stackptr++ = v; \ - u = rcu_dereference_protected(*v, \ - lockdep_is_held(&_base->lock)); \ + u = rcu_deref_locked(*v, _base); \ } \ u; \ }) /* - * Called with rcu_read_lock_bh() + * Called with rcu_read_lock() * Because we hold no lock against a writer, its quite possible we fall * in an endless loop. * But every pointer we follow is guaranteed to be valid thanks to RCU. * We exit from this function if number of links exceeds PEER_MAXDEPTH */ -static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, - struct inet_peer_base *base) +static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, + struct inet_peer_base *base) { - struct inet_peer *u = rcu_dereference_bh(base->root); + struct inet_peer *u = rcu_dereference(base->root); int count = 0; while (u != peer_avl_empty) { @@ -230,9 +231,9 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, return u; } if (cmp == -1) - u = rcu_dereference_bh(u->avl_left); + u = rcu_dereference(u->avl_left); else - u = rcu_dereference_bh(u->avl_right); + u = rcu_dereference(u->avl_right); if (unlikely(++count == PEER_MAXDEPTH)) break; } @@ -246,13 +247,11 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, struct inet_peer __rcu **v; \ *stackptr++ = &start->avl_left; \ v = &start->avl_left; \ - for (u = rcu_dereference_protected(*v, \ - lockdep_is_held(&base->lock)); \ + for (u = rcu_deref_locked(*v, base); \ u->avl_right != peer_avl_empty_rcu; ) { \ v = &u->avl_right; \ *stackptr++ = v; \ - u = rcu_dereference_protected(*v, \ - lockdep_is_held(&base->lock)); \ + u = rcu_deref_locked(*v, base); \ } \ u; \ }) @@ -271,21 +270,16 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], while (stackend > stack) { nodep = *--stackend; - node = rcu_dereference_protected(*nodep, - lockdep_is_held(&base->lock)); - l = rcu_dereference_protected(node->avl_left, - lockdep_is_held(&base->lock)); - r = rcu_dereference_protected(node->avl_right, - lockdep_is_held(&base->lock)); + node = rcu_deref_locked(*nodep, base); + l = rcu_deref_locked(node->avl_left, base); + r = rcu_deref_locked(node->avl_right, base); lh = node_height(l); rh = node_height(r); if (lh > rh + 1) { /* l: RH+2 */ struct inet_peer *ll, *lr, *lrl, *lrr; int lrh; - ll = rcu_dereference_protected(l->avl_left, - lockdep_is_held(&base->lock)); - lr = rcu_dereference_protected(l->avl_right, - lockdep_is_held(&base->lock)); + ll = rcu_deref_locked(l->avl_left, base); + lr = rcu_deref_locked(l->avl_right, base); lrh = node_height(lr); if (lrh <= node_height(ll)) { /* ll: RH+1 */ RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ @@ -296,10 +290,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], l->avl_height = node->avl_height + 1; RCU_INIT_POINTER(*nodep, l); } else { /* ll: RH, lr: RH+1 */ - lrl = rcu_dereference_protected(lr->avl_left, - lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */ - lrr = rcu_dereference_protected(lr->avl_right, - lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */ + lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */ + lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */ RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ node->avl_height = rh + 1; /* node: RH+1 */ @@ -314,10 +306,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], } else if (rh > lh + 1) { /* r: LH+2 */ struct inet_peer *rr, *rl, *rlr, *rll; int rlh; - rr = rcu_dereference_protected(r->avl_right, - lockdep_is_held(&base->lock)); - rl = rcu_dereference_protected(r->avl_left, - lockdep_is_held(&base->lock)); + rr = rcu_deref_locked(r->avl_right, base); + rl = rcu_deref_locked(r->avl_left, base); rlh = node_height(rl); if (rlh <= node_height(rr)) { /* rr: LH+1 */ RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ @@ -328,10 +318,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], r->avl_height = node->avl_height + 1; RCU_INIT_POINTER(*nodep, r); } else { /* rr: RH, rl: RH+1 */ - rlr = rcu_dereference_protected(rl->avl_right, - lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */ - rll = rcu_dereference_protected(rl->avl_left, - lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */ + rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */ + rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */ RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ node->avl_height = lh + 1; /* node: LH+1 */ @@ -372,7 +360,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) do_free = 0; - spin_lock_bh(&base->lock); + write_seqlock_bh(&base->lock); /* Check the reference counter. It was artificially incremented by 1 * in cleanup() function to prevent sudden disappearing. If we can * atomically (because of lockless readers) take this last reference, @@ -392,8 +380,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) /* look for a node to insert instead of p */ struct inet_peer *t; t = lookup_rightempty(p, base); - BUG_ON(rcu_dereference_protected(*stackptr[-1], - lockdep_is_held(&base->lock)) != t); + BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); **--stackptr = t->avl_left; /* t is removed, t->daddr > x->daddr for any * x in p->avl_left subtree. @@ -409,10 +396,10 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) base->total--; do_free = 1; } - spin_unlock_bh(&base->lock); + write_sequnlock_bh(&base->lock); if (do_free) - call_rcu_bh(&p->rcu, inetpeer_free_rcu); + call_rcu(&p->rcu, inetpeer_free_rcu); else /* The node is used again. Decrease the reference counter * back. The loop "cleanup -> unlink_from_unused @@ -475,15 +462,19 @@ static int cleanup_once(unsigned long ttl) struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(AF_INET); + struct inet_peer_base *base = family_to_base(daddr->family); struct inet_peer *p; + unsigned int sequence; + int invalidated; /* Look up for the address quickly, lockless. * Because of a concurrent writer, we might not find an existing entry. */ - rcu_read_lock_bh(); - p = lookup_rcu_bh(daddr, base); - rcu_read_unlock_bh(); + rcu_read_lock(); + sequence = read_seqbegin(&base->lock); + p = lookup_rcu(daddr, base); + invalidated = read_seqretry(&base->lock, sequence); + rcu_read_unlock(); if (p) { /* The existing node has been found. @@ -493,14 +484,18 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) return p; } + /* If no writer did a change during our lookup, we can return early. */ + if (!create && !invalidated) + return NULL; + /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ - spin_lock_bh(&base->lock); + write_seqlock_bh(&base->lock); p = lookup(daddr, stack, base); if (p != peer_avl_empty) { atomic_inc(&p->refcnt); - spin_unlock_bh(&base->lock); + write_sequnlock_bh(&base->lock); /* Remove the entry from unused list if it was there. */ unlink_from_unused(p); return p; @@ -510,8 +505,14 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) p->daddr = *daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); - atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); + atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4)); p->tcp_ts_stamp = 0; + p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; + p->rate_tokens = 0; + p->rate_last = 0; + p->pmtu_expires = 0; + p->pmtu_orig = 0; + memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); INIT_LIST_HEAD(&p->unused); @@ -519,7 +520,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) link_to_pool(p, base); base->total++; } - spin_unlock_bh(&base->lock); + write_sequnlock_bh(&base->lock); if (base->total >= inet_peer_threshold) /* Remove one less-recently-used entry. */ @@ -579,3 +580,44 @@ void inet_putpeer(struct inet_peer *p) local_bh_enable(); } EXPORT_SYMBOL_GPL(inet_putpeer); + +/* + * Check transmit rate limitation for given message. + * The rate information is held in the inet_peer entries now. + * This function is generic and could be used for other purposes + * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. + * + * Note that the same inet_peer fields are modified by functions in + * route.c too, but these work for packet destinations while xrlim_allow + * works for icmp destinations. This means the rate limiting information + * for one "ip object" is shared - and these ICMPs are twice limited: + * by source and by destination. + * + * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate + * SHOULD allow setting of rate limits + * + * Shared between ICMPv4 and ICMPv6. + */ +#define XRLIM_BURST_FACTOR 6 +bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) +{ + unsigned long now, token; + bool rc = false; + + if (!peer) + return true; + + token = peer->rate_tokens; + now = jiffies; + token += now - peer->rate_last; + peer->rate_last = now; + if (token > XRLIM_BURST_FACTOR * timeout) + token = XRLIM_BURST_FACTOR * timeout; + if (token >= timeout) { + token -= timeout; + rc = true; + } + peer->rate_tokens = token; + return rc; +} +EXPORT_SYMBOL(inet_peer_xrlim_allow); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index eb68a0e34e49..da5941f18c3c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -769,18 +769,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); } - { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = dst, - .fl4_src = tiph->saddr, - .fl4_tos = RT_TOS(tos), - .fl_gre_key = tunnel->parms.o_key - }; - if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - dev->stats.tx_carrier_errors++; - goto tx_error; - } + rt = ip_route_output_gre(dev_net(dev), dst, tiph->saddr, + tunnel->parms.o_key, RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; } tdev = rt->dst.dev; @@ -944,17 +938,13 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) /* Guess output device to choose reasonable mtu and needed_headroom */ if (iph->daddr) { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_GRE, - .fl_gre_key = tunnel->parms.o_key - }; - struct rtable *rt; - - if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { + struct rtable *rt = ip_route_output_gre(dev_net(dev), + iph->daddr, iph->saddr, + tunnel->parms.o_key, + RT_TOS(iph->tos), + tunnel->parms.link); + + if (!IS_ERR(rt)) { tdev = rt->dst.dev; ip_rt_put(rt); } @@ -1206,17 +1196,14 @@ static int ipgre_open(struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); if (ipv4_is_multicast(t->parms.iph.daddr)) { - struct flowi fl = { - .oif = t->parms.link, - .fl4_dst = t->parms.iph.daddr, - .fl4_src = t->parms.iph.saddr, - .fl4_tos = RT_TOS(t->parms.iph.tos), - .proto = IPPROTO_GRE, - .fl_gre_key = t->parms.o_key - }; - struct rtable *rt; - - if (ip_route_output_key(dev_net(dev), &rt, &fl)) + struct rtable *rt = ip_route_output_gre(dev_net(dev), + t->parms.iph.daddr, + t->parms.iph.saddr, + t->parms.o_key, + RT_TOS(t->parms.iph.tos), + t->parms.link); + + if (IS_ERR(rt)) return -EADDRNOTAVAIL; dev = rt->dst.dev; ip_rt_put(rt); @@ -1764,4 +1751,4 @@ module_exit(ipgre_fini); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); -MODULE_ALIAS("gre0"); +MODULE_ALIAS_NETDEV("gre0"); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04c7b3ba6b39..67f241b97649 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -339,25 +339,19 @@ int ip_queue_xmit(struct sk_buff *skb) if(opt && opt->srr) daddr = opt->faddr; - { - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .mark = sk->sk_mark, - .fl4_dst = daddr, - .fl4_src = inet->inet_saddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = inet->inet_dport }; - - /* If this fails, retransmit mechanism of transport layer will - * keep trying until route appears or the connection times - * itself out. - */ - security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) - goto no_route; - } + /* If this fails, retransmit mechanism of transport layer will + * keep trying until route appears or the connection times + * itself out. + */ + rt = ip_route_output_ports(sock_net(sk), sk, + daddr, inet->inet_saddr, + inet->inet_dport, + inet->inet_sport, + sk->sk_protocol, + RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if); + if (IS_ERR(rt)) + goto no_route; sk_setup_caps(sk, &rt->dst); } skb_dst_set_noref(skb, &rt->dst); @@ -733,6 +727,7 @@ csum_page(struct page *page, int offset, int copy) } static inline int ip_ufo_append_data(struct sock *sk, + struct sk_buff_head *queue, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, @@ -745,7 +740,7 @@ static inline int ip_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + if ((skb = skb_peek_tail(queue)) == NULL) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -767,40 +762,28 @@ static inline int ip_ufo_append_data(struct sock *sk, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - sk->sk_sndmsg_off = 0; /* specify the length of each IP datagram fragment */ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); } return skb_append_datato_frags(sk, skb, getfrag, from, (length - transhdrlen)); } -/* - * ip_append_data() and ip_append_page() can make one large IP datagram - * from many pieces of data. Each pieces will be holded on the socket - * until ip_push_pending_frames() is called. Each piece can be a page - * or non-page data. - * - * Not only UDP, other transport protocols - e.g. raw sockets - can use - * this interface potentially. - * - * LATER: length must be adjusted by pad at tail, when it is required. - */ -int ip_append_data(struct sock *sk, - int getfrag(void *from, char *to, int offset, int len, - int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - struct ipcm_cookie *ipc, struct rtable **rtp, - unsigned int flags) +static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, + struct inet_cork *cork, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; - struct ip_options *opt = NULL; + struct ip_options *opt = cork->opt; int hh_len; int exthdrlen; int mtu; @@ -809,58 +792,19 @@ int ip_append_data(struct sock *sk, int offset = 0; unsigned int maxfraglen, fragheaderlen; int csummode = CHECKSUM_NONE; - struct rtable *rt; + struct rtable *rt = (struct rtable *)cork->dst; - if (flags&MSG_PROBE) - return 0; - - if (skb_queue_empty(&sk->sk_write_queue)) { - /* - * setup for corking. - */ - opt = ipc->opt; - if (opt) { - if (inet->cork.opt == NULL) { - inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); - if (unlikely(inet->cork.opt == NULL)) - return -ENOBUFS; - } - memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen); - inet->cork.flags |= IPCORK_OPT; - inet->cork.addr = ipc->addr; - } - rt = *rtp; - if (unlikely(!rt)) - return -EFAULT; - /* - * We steal reference to this route, caller should not release it - */ - *rtp = NULL; - inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? - rt->dst.dev->mtu : - dst_mtu(rt->dst.path); - inet->cork.dst = &rt->dst; - inet->cork.length = 0; - sk->sk_sndmsg_page = NULL; - sk->sk_sndmsg_off = 0; - exthdrlen = rt->dst.header_len; - length += exthdrlen; - transhdrlen += exthdrlen; - } else { - rt = (struct rtable *)inet->cork.dst; - if (inet->cork.flags & IPCORK_OPT) - opt = inet->cork.opt; + exthdrlen = transhdrlen ? rt->dst.header_len : 0; + length += exthdrlen; + transhdrlen += exthdrlen; + mtu = cork->fragsize; - transhdrlen = 0; - exthdrlen = 0; - mtu = inet->cork.fragsize; - } hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - if (inet->cork.length + length > 0xFFFF - fragheaderlen) { + if (cork->length + length > 0xFFFF - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu-exthdrlen); return -EMSGSIZE; @@ -876,15 +820,15 @@ int ip_append_data(struct sock *sk, !exthdrlen) csummode = CHECKSUM_PARTIAL; - skb = skb_peek_tail(&sk->sk_write_queue); + skb = skb_peek_tail(queue); - inet->cork.length += length; + cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { - err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, - fragheaderlen, transhdrlen, mtu, - flags); + err = ip_ufo_append_data(sk, queue, getfrag, from, length, + hh_len, fragheaderlen, transhdrlen, + mtu, flags); if (err) goto error; return 0; @@ -961,7 +905,7 @@ alloc_new_skb: else /* only the initial fragment is time stamped */ - ipc->tx_flags = 0; + cork->tx_flags = 0; } if (skb == NULL) goto error; @@ -972,7 +916,7 @@ alloc_new_skb: skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); - skb_shinfo(skb)->tx_flags = ipc->tx_flags; + skb_shinfo(skb)->tx_flags = cork->tx_flags; /* * Find where to start putting bytes. @@ -1009,7 +953,7 @@ alloc_new_skb: /* * Put the packet on the pending queue. */ - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); continue; } @@ -1029,8 +973,8 @@ alloc_new_skb: } else { int i = skb_shinfo(skb)->nr_frags; skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - struct page *page = sk->sk_sndmsg_page; - int off = sk->sk_sndmsg_off; + struct page *page = cork->page; + int off = cork->off; unsigned int left; if (page && (left = PAGE_SIZE - off) > 0) { @@ -1042,7 +986,7 @@ alloc_new_skb: goto error; } get_page(page); - skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); + skb_fill_page_desc(skb, i, page, off, 0); frag = &skb_shinfo(skb)->frags[i]; } } else if (i < MAX_SKB_FRAGS) { @@ -1053,8 +997,8 @@ alloc_new_skb: err = -ENOMEM; goto error; } - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; + cork->page = page; + cork->off = 0; skb_fill_page_desc(skb, i, page, 0, 0); frag = &skb_shinfo(skb)->frags[i]; @@ -1066,7 +1010,7 @@ alloc_new_skb: err = -EFAULT; goto error; } - sk->sk_sndmsg_off += copy; + cork->off += copy; frag->size += copy; skb->len += copy; skb->data_len += copy; @@ -1080,11 +1024,87 @@ alloc_new_skb: return 0; error: - inet->cork.length -= length; + cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); return err; } +static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, + struct ipcm_cookie *ipc, struct rtable **rtp) +{ + struct inet_sock *inet = inet_sk(sk); + struct ip_options *opt; + struct rtable *rt; + + /* + * setup for corking. + */ + opt = ipc->opt; + if (opt) { + if (cork->opt == NULL) { + cork->opt = kmalloc(sizeof(struct ip_options) + 40, + sk->sk_allocation); + if (unlikely(cork->opt == NULL)) + return -ENOBUFS; + } + memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); + cork->flags |= IPCORK_OPT; + cork->addr = ipc->addr; + } + rt = *rtp; + if (unlikely(!rt)) + return -EFAULT; + /* + * We steal reference to this route, caller should not release it + */ + *rtp = NULL; + cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(rt->dst.path); + cork->dst = &rt->dst; + cork->length = 0; + cork->tx_flags = ipc->tx_flags; + cork->page = NULL; + cork->off = 0; + + return 0; +} + +/* + * ip_append_data() and ip_append_page() can make one large IP datagram + * from many pieces of data. Each pieces will be holded on the socket + * until ip_push_pending_frames() is called. Each piece can be a page + * or non-page data. + * + * Not only UDP, other transport protocols - e.g. raw sockets - can use + * this interface potentially. + * + * LATER: length must be adjusted by pad at tail, when it is required. + */ +int ip_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + struct ipcm_cookie *ipc, struct rtable **rtp, + unsigned int flags) +{ + struct inet_sock *inet = inet_sk(sk); + int err; + + if (flags&MSG_PROBE) + return 0; + + if (skb_queue_empty(&sk->sk_write_queue)) { + err = ip_setup_cork(sk, &inet->cork, ipc, rtp); + if (err) + return err; + } else { + transhdrlen = 0; + } + + return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag, + from, length, transhdrlen, flags); +} + ssize_t ip_append_page(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -1228,40 +1248,41 @@ error: return err; } -static void ip_cork_release(struct inet_sock *inet) +static void ip_cork_release(struct inet_cork *cork) { - inet->cork.flags &= ~IPCORK_OPT; - kfree(inet->cork.opt); - inet->cork.opt = NULL; - dst_release(inet->cork.dst); - inet->cork.dst = NULL; + cork->flags &= ~IPCORK_OPT; + kfree(cork->opt); + cork->opt = NULL; + dst_release(cork->dst); + cork->dst = NULL; } /* * Combined all pending IP fragments on the socket as one IP datagram * and push them out. */ -int ip_push_pending_frames(struct sock *sk) +struct sk_buff *__ip_make_skb(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork *cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); struct ip_options *opt = NULL; - struct rtable *rt = (struct rtable *)inet->cork.dst; + struct rtable *rt = (struct rtable *)cork->dst; struct iphdr *iph; __be16 df = 0; __u8 ttl; - int err = 0; - if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) + if ((skb = __skb_dequeue(queue)) == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); - while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + while ((tmp_skb = __skb_dequeue(queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); @@ -1287,8 +1308,8 @@ int ip_push_pending_frames(struct sock *sk) ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); - if (inet->cork.flags & IPCORK_OPT) - opt = inet->cork.opt; + if (cork->flags & IPCORK_OPT) + opt = cork->opt; if (rt->rt_type == RTN_MULTICAST) ttl = inet->mc_ttl; @@ -1300,7 +1321,7 @@ int ip_push_pending_frames(struct sock *sk) iph->ihl = 5; if (opt) { iph->ihl += opt->optlen>>2; - ip_options_build(skb, opt, inet->cork.addr, rt, 0); + ip_options_build(skb, opt, cork->addr, rt, 0); } iph->tos = inet->tos; iph->frag_off = df; @@ -1316,44 +1337,95 @@ int ip_push_pending_frames(struct sock *sk) * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount */ - inet->cork.dst = NULL; + cork->dst = NULL; skb_dst_set(skb, &rt->dst); if (iph->protocol == IPPROTO_ICMP) icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); - /* Netfilter gets whole the not fragmented skb. */ + ip_cork_release(cork); +out: + return skb; +} + +int ip_send_skb(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + int err; + err = ip_local_out(skb); if (err) { if (err > 0) err = net_xmit_errno(err); if (err) - goto error; + IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); } -out: - ip_cork_release(inet); return err; +} -error: - IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); - goto out; +int ip_push_pending_frames(struct sock *sk) +{ + struct sk_buff *skb; + + skb = ip_finish_skb(sk); + if (!skb) + return 0; + + /* Netfilter gets whole the not fragmented skb. */ + return ip_send_skb(skb); } /* * Throw away all pending data on the socket. */ -void ip_flush_pending_frames(struct sock *sk) +static void __ip_flush_pending_frames(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork *cork) { struct sk_buff *skb; - while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) + while ((skb = __skb_dequeue_tail(queue)) != NULL) kfree_skb(skb); - ip_cork_release(inet_sk(sk)); + ip_cork_release(cork); +} + +void ip_flush_pending_frames(struct sock *sk) +{ + __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); } +struct sk_buff *ip_make_skb(struct sock *sk, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + struct ipcm_cookie *ipc, struct rtable **rtp, + unsigned int flags) +{ + struct inet_cork cork = {}; + struct sk_buff_head queue; + int err; + + if (flags & MSG_PROBE) + return NULL; + + __skb_queue_head_init(&queue); + + err = ip_setup_cork(sk, &cork, ipc, rtp); + if (err) + return ERR_PTR(err); + + err = __ip_append_data(sk, &queue, &cork, getfrag, + from, length, transhdrlen, flags); + if (err) { + __ip_flush_pending_frames(sk, &queue, &cork); + return ERR_PTR(err); + } + + return __ip_make_skb(sk, &queue, &cork); +} /* * Fetch data from kernel space and fill in checksum if needed. @@ -1402,16 +1474,19 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } { - struct flowi fl = { .oif = arg->bound_dev_if, - .fl4_dst = daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl_ip_sport = tcp_hdr(skb)->dest, - .fl_ip_dport = tcp_hdr(skb)->source, - .proto = sk->sk_protocol, - .flags = ip_reply_arg_flowi_flags(arg) }; - security_skb_classify_flow(skb, &fl); - if (ip_route_output_key(sock_net(sk), &rt, &fl)) + struct flowi4 fl4 = { + .flowi4_oif = arg->bound_dev_if, + .daddr = daddr, + .saddr = rt->rt_spec_dst, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .fl4_sport = tcp_hdr(skb)->dest, + .fl4_dport = tcp_hdr(skb)->source, + .flowi4_proto = sk->sk_protocol, + .flowi4_flags = ip_reply_arg_flowi_flags(arg), + }; + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(sock_net(sk), &fl4); + if (IS_ERR(rt)) return; } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 988f52fba54a..bfc17c5914e7 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -460,19 +460,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error_icmp; } - { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = dst, - .fl4_src= tiph->saddr, - .fl4_tos = RT_TOS(tos), - .proto = IPPROTO_IPIP - }; - - if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; - } + rt = ip_route_output_ports(dev_net(dev), NULL, + dst, tiph->saddr, + 0, 0, + IPPROTO_IPIP, RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; } tdev = rt->dst.dev; @@ -583,16 +578,14 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_IPIP - }; - struct rtable *rt; - - if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { + struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, + iph->daddr, iph->saddr, + 0, 0, + IPPROTO_IPIP, + RT_TOS(iph->tos), + tunnel->parms.link); + + if (!IS_ERR(rt)) { tdev = rt->dst.dev; ip_rt_put(rt); } @@ -913,4 +906,4 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); -MODULE_ALIAS("tunl0"); +MODULE_ALIAS_NETDEV("tunl0"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3f3a9afd73e0..1f62eaeb6de4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -60,6 +60,7 @@ #include <linux/notifier.h> #include <linux/if_arp.h> #include <linux/netfilter_ipv4.h> +#include <linux/compat.h> #include <net/ipip.h> #include <net/checksum.h> #include <net/netlink.h> @@ -147,14 +148,15 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) return NULL; } -static int ipmr_fib_lookup(struct net *net, struct flowi *flp, +static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { struct ipmr_result res; struct fib_lookup_arg arg = { .result = &res, }; int err; - err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(net->ipv4.mr_rules_ops, + flowi4_to_flowi(flp4), 0, &arg); if (err < 0) return err; *mrt = res.mrt; @@ -282,7 +284,7 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) return net->ipv4.mrt; } -static int ipmr_fib_lookup(struct net *net, struct flowi *flp, +static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { *mrt = net->ipv4.mrt; @@ -434,14 +436,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); struct mr_table *mrt; - struct flowi fl = { - .oif = dev->ifindex, - .iif = skb->skb_iif, - .mark = skb->mark, + struct flowi4 fl4 = { + .flowi4_oif = dev->ifindex, + .flowi4_iif = skb->skb_iif, + .flowi4_mark = skb->mark, }; int err; - err = ipmr_fib_lookup(net, &fl, &mrt); + err = ipmr_fib_lookup(net, &fl4, &mrt); if (err < 0) { kfree_skb(skb); return err; @@ -1434,6 +1436,81 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) } } +#ifdef CONFIG_COMPAT +struct compat_sioc_sg_req { + struct in_addr src; + struct in_addr grp; + compat_ulong_t pktcnt; + compat_ulong_t bytecnt; + compat_ulong_t wrong_if; +}; + +struct compat_sioc_vif_req { + vifi_t vifi; /* Which iface */ + compat_ulong_t icount; + compat_ulong_t ocount; + compat_ulong_t ibytes; + compat_ulong_t obytes; +}; + +int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) +{ + struct compat_sioc_sg_req sr; + struct compat_sioc_vif_req vr; + struct vif_device *vif; + struct mfc_cache *c; + struct net *net = sock_net(sk); + struct mr_table *mrt; + + mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); + if (mrt == NULL) + return -ENOENT; + + switch (cmd) { + case SIOCGETVIFCNT: + if (copy_from_user(&vr, arg, sizeof(vr))) + return -EFAULT; + if (vr.vifi >= mrt->maxvif) + return -EINVAL; + read_lock(&mrt_lock); + vif = &mrt->vif_table[vr.vifi]; + if (VIF_EXISTS(mrt, vr.vifi)) { + vr.icount = vif->pkt_in; + vr.ocount = vif->pkt_out; + vr.ibytes = vif->bytes_in; + vr.obytes = vif->bytes_out; + read_unlock(&mrt_lock); + + if (copy_to_user(arg, &vr, sizeof(vr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + case SIOCGETSGCNT: + if (copy_from_user(&sr, arg, sizeof(sr))) + return -EFAULT; + + rcu_read_lock(); + c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); + if (c) { + sr.pktcnt = c->mfc_un.res.pkt; + sr.bytecnt = c->mfc_un.res.bytes; + sr.wrong_if = c->mfc_un.res.wrong_if; + rcu_read_unlock(); + + if (copy_to_user(arg, &sr, sizeof(sr))) + return -EFAULT; + return 0; + } + rcu_read_unlock(); + return -EADDRNOTAVAIL; + default: + return -ENOIOCTLCMD; + } +} +#endif + static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -1535,26 +1612,20 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, #endif if (vif->flags & VIFF_TUNNEL) { - struct flowi fl = { - .oif = vif->link, - .fl4_dst = vif->remote, - .fl4_src = vif->local, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_IPIP - }; - - if (ip_route_output_key(net, &rt, &fl)) + rt = ip_route_output_ports(net, NULL, + vif->remote, vif->local, + 0, 0, + IPPROTO_IPIP, + RT_TOS(iph->tos), vif->link); + if (IS_ERR(rt)) goto out_free; encap = sizeof(struct iphdr); } else { - struct flowi fl = { - .oif = vif->link, - .fl4_dst = iph->daddr, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_IPIP - }; - - if (ip_route_output_key(net, &rt, &fl)) + rt = ip_route_output_ports(net, NULL, iph->daddr, 0, + 0, 0, + IPPROTO_IPIP, + RT_TOS(iph->tos), vif->link); + if (IS_ERR(rt)) goto out_free; } @@ -1717,6 +1788,24 @@ dont_forward: return 0; } +static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) +{ + struct flowi4 fl4 = { + .daddr = rt->rt_key_dst, + .saddr = rt->rt_key_src, + .flowi4_tos = rt->rt_tos, + .flowi4_oif = rt->rt_oif, + .flowi4_iif = rt->rt_iif, + .flowi4_mark = rt->rt_mark, + }; + struct mr_table *mrt; + int err; + + err = ipmr_fib_lookup(net, &fl4, &mrt); + if (err) + return ERR_PTR(err); + return mrt; +} /* * Multicast packets for forwarding arrive here @@ -1729,7 +1818,6 @@ int ip_mr_input(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; struct mr_table *mrt; - int err; /* Packet is looped back after forward, it should not be * forwarded second time, but still can be delivered locally. @@ -1737,12 +1825,11 @@ int ip_mr_input(struct sk_buff *skb) if (IPCB(skb)->flags & IPSKB_FORWARDED) goto dont_forward; - err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); - if (err < 0) { + mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + if (IS_ERR(mrt)) { kfree_skb(skb); - return err; + return PTR_ERR(mrt); } - if (!local) { if (IPCB(skb)->opt.router_alert) { if (ip_call_ra_chain(skb)) @@ -1870,9 +1957,9 @@ int pim_rcv_v1(struct sk_buff *skb) pim = igmp_hdr(skb); - if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) + mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + if (IS_ERR(mrt)) goto drop; - if (!mrt->mroute_do_pim || pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) goto drop; @@ -1902,9 +1989,9 @@ static int pim_rcv(struct sk_buff *skb) csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; - if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) + mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + if (IS_ERR(mrt)) goto drop; - if (__pim_rcv(mrt, skb, sizeof(*pim))) { drop: kfree_skb(skb); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 994a1f29ebbc..f3c0b549b8e1 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -16,7 +16,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) struct net *net = dev_net(skb_dst(skb)->dev); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; - struct flowi fl = {}; + struct flowi4 fl4 = {}; unsigned long orefdst; unsigned int hh_len; unsigned int type; @@ -31,14 +31,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ if (addr_type == RTN_LOCAL) { - fl.fl4_dst = iph->daddr; + fl4.daddr = iph->daddr; if (type == RTN_LOCAL) - fl.fl4_src = iph->saddr; - fl.fl4_tos = RT_TOS(iph->tos); - fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; - fl.mark = skb->mark; - fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; - if (ip_route_output_key(net, &rt, &fl) != 0) + fl4.saddr = iph->saddr; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) return -1; /* Drop old route. */ @@ -47,8 +48,9 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ - fl.fl4_dst = iph->saddr; - if (ip_route_output_key(net, &rt, &fl) != 0) + fl4.daddr = iph->saddr; + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) return -1; orefdst = skb->_skb_refdst; @@ -66,10 +68,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, &fl, AF_INET) == 0) { + xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); - if (xfrm_lookup(net, &dst, &fl, skb->sk, 0)) + dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); + if (IS_ERR(dst)) return -1; skb_dst_set(skb, dst); } @@ -102,7 +105,8 @@ int ip_xfrm_me_harder(struct sk_buff *skb) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) + dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); + if (IS_ERR(dst)) return -1; skb_dst_drop(skb); @@ -219,7 +223,11 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) { - return ip_route_output_key(&init_net, (struct rtable **)dst, fl); + struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4); + if (IS_ERR(rt)) + return PTR_ERR(rt); + *dst = &rt->dst; + return 0; } static const struct nf_afinfo nf_ip_afinfo = { diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index b8ddcc480ed9..a5e52a9f0a12 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -60,12 +60,12 @@ static int checkentry(const struct xt_tgchk_param *par) if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) - return false; + return -EINVAL; if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && mangle->target != XT_CONTINUE) - return false; - return true; + return -EINVAL; + return 0; } static struct xt_target arpt_mangle_reg __read_mostly = { diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 95481fee8bdb..7317bdf1d457 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -31,6 +31,7 @@ #ifdef CONFIG_XFRM static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) { + struct flowi4 *fl4 = &fl->u.ip4; const struct nf_conn *ct; const struct nf_conntrack_tuple *t; enum ip_conntrack_info ctinfo; @@ -49,25 +50,25 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) statusbit = IPS_SRC_NAT; if (ct->status & statusbit) { - fl->fl4_dst = t->dst.u3.ip; + fl4->daddr = t->dst.u3.ip; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) - fl->fl_ip_dport = t->dst.u.tcp.port; + fl4->fl4_dport = t->dst.u.tcp.port; } statusbit ^= IPS_NAT_MASK; if (ct->status & statusbit) { - fl->fl4_src = t->src.u3.ip; + fl4->saddr = t->src.u3.ip; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) - fl->fl_ip_sport = t->src.u.tcp.port; + fl4->fl4_sport = t->src.u.tcp.port; } } #endif diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a3d5ab786e81..e837ffd3edc3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -76,6 +76,7 @@ #include <linux/seq_file.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> +#include <linux/compat.h> static struct raw_hashinfo raw_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), @@ -401,7 +402,7 @@ error: return err; } -static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +static int raw_probe_proto_opt(struct flowi4 *fl4, struct msghdr *msg) { struct iovec *iov; u8 __user *type = NULL; @@ -417,7 +418,7 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (!iov) continue; - switch (fl->proto) { + switch (fl4->flowi4_proto) { case IPPROTO_ICMP: /* check if one-byte field is readable or not. */ if (iov->iov_base && iov->iov_len < 1) @@ -432,8 +433,8 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - if (get_user(fl->fl_icmp_type, type) || - get_user(fl->fl_icmp_code, code)) + if (get_user(fl4->fl4_icmp_type, type) || + get_user(fl4->fl4_icmp_code, code)) return -EFAULT; probed = 1; } @@ -547,25 +548,30 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } { - struct flowi fl = { .oif = ipc.oif, - .mark = sk->sk_mark, - .fl4_dst = daddr, - .fl4_src = saddr, - .fl4_tos = tos, - .proto = inet->hdrincl ? IPPROTO_RAW : - sk->sk_protocol, - }; + struct flowi4 fl4 = { + .flowi4_oif = ipc.oif, + .flowi4_mark = sk->sk_mark, + .daddr = daddr, + .saddr = saddr, + .flowi4_tos = tos, + .flowi4_proto = (inet->hdrincl ? + IPPROTO_RAW : + sk->sk_protocol), + .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, + }; if (!inet->hdrincl) { - err = raw_probe_proto_opt(&fl, msg); + err = raw_probe_proto_opt(&fl4, msg); if (err) goto done; } - security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto done; + } } - if (err) - goto done; err = -EACCES; if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) @@ -838,6 +844,23 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) } } +#ifdef CONFIG_COMPAT +static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case SIOCOUTQ: + case SIOCINQ: + return -ENOIOCTLCMD; + default: +#ifdef CONFIG_IP_MROUTE + return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg)); +#else + return -ENOIOCTLCMD; +#endif + } +} +#endif + struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, @@ -860,6 +883,7 @@ struct proto raw_prot = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_raw_setsockopt, .compat_getsockopt = compat_raw_getsockopt, + .compat_ioctl = compat_raw_ioctl, #endif }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3e5b7cc2db4f..209989cf7d1b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -109,8 +109,8 @@ #include <linux/sysctl.h> #endif -#define RT_FL_TOS(oldflp) \ - ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) +#define RT_FL_TOS(oldflp4) \ + ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) #define IP_MAX_MTU 0xFFF0 @@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int rt_chain_length_max __read_mostly = 20; -static struct delayed_work expires_work; -static unsigned long expires_ljiffies; - /* * Interface to generic destination cache. */ @@ -152,6 +149,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { } +static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + struct rtable *rt = (struct rtable *) dst; + struct inet_peer *peer; + u32 *p = NULL; + + if (!rt->peer) + rt_bind_peer(rt, 1); + + peer = rt->peer; + if (peer) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + p = peer->metrics; + if (inet_metrics_new(peer)) + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } else { + if (rt->fi) { + fib_info_put(rt->fi); + rt->fi = NULL; + } + } + } + return p; +} + static struct dst_ops ipv4_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), @@ -159,6 +191,7 @@ static struct dst_ops ipv4_dst_ops = { .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, .default_mtu = ipv4_default_mtu, + .cow_metrics = ipv4_cow_metrics, .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, @@ -391,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) dst_metric(&r->dst, RTAX_WINDOW), (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + dst_metric(&r->dst, RTAX_RTTVAR)), - r->fl.fl4_tos, + r->rt_tos, r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, r->dst.hh ? (r->dst.hh->hh_output == dev_queue_xmit) : 0, @@ -632,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth) static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - rth->dst.expires; + (rth->peer && rth->peer->pmtu_expires); } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -643,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t if (atomic_read(&rth->dst.__refcnt)) goto out; - ret = 1; - if (rth->dst.expires && - time_after_eq(jiffies, rth->dst.expires)) - goto out; - age = jiffies - rth->dst.lastuse; - ret = 0; if ((age <= tmo1 && !rt_fast_clean(rth)) || (age <= tmo2 && rt_valuable(rth))) goto out; @@ -684,22 +711,22 @@ static inline bool rt_caching(const struct net *net) net->ipv4.sysctl_rt_cache_rebuild_count; } -static inline bool compare_hash_inputs(const struct flowi *fl1, - const struct flowi *fl2) +static inline bool compare_hash_inputs(const struct rtable *rt1, + const struct rtable *rt2) { - return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | - ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | - (fl1->iif ^ fl2->iif)) == 0); + return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | + ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | + (rt1->rt_iif ^ rt2->rt_iif)) == 0); } -static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) +static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) { - return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | - ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | - (fl1->mark ^ fl2->mark) | - (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | - (fl1->oif ^ fl2->oif) | - (fl1->iif ^ fl2->iif)) == 0; + return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | + ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | + (rt1->rt_mark ^ rt2->rt_mark) | + (rt1->rt_tos ^ rt2->rt_tos) | + (rt1->rt_oif ^ rt2->rt_oif) | + (rt1->rt_iif ^ rt2->rt_iif)) == 0; } static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) @@ -786,104 +813,13 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) const struct rtable *aux = head; while (aux != rth) { - if (compare_hash_inputs(&aux->fl, &rth->fl)) + if (compare_hash_inputs(aux, rth)) return 0; aux = rcu_dereference_protected(aux->dst.rt_next, 1); } return ONE; } -static void rt_check_expire(void) -{ - static unsigned int rover; - unsigned int i = rover, goal; - struct rtable *rth; - struct rtable __rcu **rthp; - unsigned long samples = 0; - unsigned long sum = 0, sum2 = 0; - unsigned long delta; - u64 mult; - - delta = jiffies - expires_ljiffies; - expires_ljiffies = jiffies; - mult = ((u64)delta) << rt_hash_log; - if (ip_rt_gc_timeout > 1) - do_div(mult, ip_rt_gc_timeout); - goal = (unsigned int)mult; - if (goal > rt_hash_mask) - goal = rt_hash_mask + 1; - for (; goal > 0; goal--) { - unsigned long tmo = ip_rt_gc_timeout; - unsigned long length; - - i = (i + 1) & rt_hash_mask; - rthp = &rt_hash_table[i].chain; - - if (need_resched()) - cond_resched(); - - samples++; - - if (rcu_dereference_raw(*rthp) == NULL) - continue; - length = 0; - spin_lock_bh(rt_hash_lock_addr(i)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { - prefetch(rth->dst.rt_next); - if (rt_is_expired(rth)) { - *rthp = rth->dst.rt_next; - rt_free(rth); - continue; - } - if (rth->dst.expires) { - /* Entry is expired even if it is in use */ - if (time_before_eq(jiffies, rth->dst.expires)) { -nofree: - tmo >>= 1; - rthp = &rth->dst.rt_next; - /* - * We only count entries on - * a chain with equal hash inputs once - * so that entries for different QOS - * levels, and other non-hash input - * attributes don't unfairly skew - * the length computation - */ - length += has_noalias(rt_hash_table[i].chain, rth); - continue; - } - } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) - goto nofree; - - /* Cleanup aged off entries. */ - *rthp = rth->dst.rt_next; - rt_free(rth); - } - spin_unlock_bh(rt_hash_lock_addr(i)); - sum += length; - sum2 += length*length; - } - if (samples) { - unsigned long avg = sum / samples; - unsigned long sd = int_sqrt(sum2 / samples - avg*avg); - rt_chain_length_max = max_t(unsigned long, - ip_rt_gc_elasticity, - (avg + 4*sd) >> FRACT_BITS); - } - rover = i; -} - -/* - * rt_worker_func() is run in process context. - * we call rt_check_expire() to scan part of the hash table - */ -static void rt_worker_func(struct work_struct *work) -{ - rt_check_expire(); - schedule_delayed_work(&expires_work, ip_rt_gc_interval); -} - /* * Pertubation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -1078,8 +1014,8 @@ static int slow_chain_length(const struct rtable *head) return length >> FRACT_BITS; } -static int rt_intern_hash(unsigned hash, struct rtable *rt, - struct rtable **rp, struct sk_buff *skb, int ifindex) +static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, + struct sk_buff *skb, int ifindex) { struct rtable *rth, *cand; struct rtable __rcu **rthp, **candp; @@ -1120,7 +1056,7 @@ restart: printk(KERN_WARNING "Neighbour table failure & not caching routes.\n"); ip_rt_put(rt); - return err; + return ERR_PTR(err); } } @@ -1137,7 +1073,7 @@ restart: rt_free(rth); continue; } - if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { + if (compare_keys(rth, rt) && compare_netns(rth, rt)) { /* Put it first */ *rthp = rth->dst.rt_next; /* @@ -1157,11 +1093,9 @@ restart: spin_unlock_bh(rt_hash_lock_addr(hash)); rt_drop(rt); - if (rp) - *rp = rth; - else + if (skb) skb_dst_set(skb, &rth->dst); - return 0; + return rth; } if (!atomic_read(&rth->dst.__refcnt)) { @@ -1202,7 +1136,7 @@ restart: rt_emergency_hash_rebuild(net); spin_unlock_bh(rt_hash_lock_addr(hash)); - hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, + hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, ifindex, rt_genid(net)); goto restart; } @@ -1218,7 +1152,7 @@ restart: if (err != -ENOBUFS) { rt_drop(rt); - return err; + return ERR_PTR(err); } /* Neighbour tables are full and nothing @@ -1239,7 +1173,7 @@ restart: if (net_ratelimit()) printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); rt_drop(rt); - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); } } @@ -1265,11 +1199,16 @@ restart: spin_unlock_bh(rt_hash_lock_addr(hash)); skip_hashing: - if (rp) - *rp = rt; - else + if (skb) skb_dst_set(skb, &rt->dst); - return 0; + return rt; +} + +static atomic_t __rt_peer_genid = ATOMIC_INIT(0); + +static u32 rt_peer_genid(void) +{ + return atomic_read(&__rt_peer_genid); } void rt_bind_peer(struct rtable *rt, int create) @@ -1280,6 +1219,8 @@ void rt_bind_peer(struct rtable *rt, int create) if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) inet_putpeer(peer); + else + rt->rt_peer_genid = rt_peer_genid(); } /* @@ -1349,13 +1290,8 @@ static void rt_del(unsigned hash, struct rtable *rt) void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) { - int i, k; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct rtable *rth; - struct rtable __rcu **rthp; - __be32 skeys[2] = { saddr, 0 }; - int ikeys[2] = { dev->ifindex, 0 }; - struct netevent_redirect netevent; + struct inet_peer *peer; struct net *net; if (!in_dev) @@ -1367,9 +1303,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, ipv4_is_zeronet(new_gw)) goto reject_redirect; - if (!rt_caching(net)) - goto reject_redirect; - if (!IN_DEV_SHARED_MEDIA(in_dev)) { if (!inet_addr_onlink(in_dev, new_gw, old_gw)) goto reject_redirect; @@ -1380,91 +1313,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, goto reject_redirect; } - for (i = 0; i < 2; i++) { - for (k = 0; k < 2; k++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], - rt_genid(net)); - - rthp = &rt_hash_table[hash].chain; - - while ((rth = rcu_dereference(*rthp)) != NULL) { - struct rtable *rt; - - if (rth->fl.fl4_dst != daddr || - rth->fl.fl4_src != skeys[i] || - rth->fl.oif != ikeys[k] || - rt_is_input_route(rth) || - rt_is_expired(rth) || - !net_eq(dev_net(rth->dst.dev), net)) { - rthp = &rth->dst.rt_next; - continue; - } - - if (rth->rt_dst != daddr || - rth->rt_src != saddr || - rth->dst.error || - rth->rt_gateway != old_gw || - rth->dst.dev != dev) - break; - - dst_hold(&rth->dst); - - rt = dst_alloc(&ipv4_dst_ops); - if (rt == NULL) { - ip_rt_put(rth); - return; - } - - /* Copy all the information. */ - *rt = *rth; - rt->dst.__use = 1; - atomic_set(&rt->dst.__refcnt, 1); - rt->dst.child = NULL; - if (rt->dst.dev) - dev_hold(rt->dst.dev); - rt->dst.obsolete = -1; - rt->dst.lastuse = jiffies; - rt->dst.path = &rt->dst; - rt->dst.neighbour = NULL; - rt->dst.hh = NULL; -#ifdef CONFIG_XFRM - rt->dst.xfrm = NULL; -#endif - rt->rt_genid = rt_genid(net); - rt->rt_flags |= RTCF_REDIRECTED; - - /* Gateway is different ... */ - rt->rt_gateway = new_gw; - - /* Redirect received -> path was valid */ - dst_confirm(&rth->dst); - - if (rt->peer) - atomic_inc(&rt->peer->refcnt); - - if (arp_bind_neighbour(&rt->dst) || - !(rt->dst.neighbour->nud_state & - NUD_VALID)) { - if (rt->dst.neighbour) - neigh_event_send(rt->dst.neighbour, NULL); - ip_rt_put(rth); - rt_drop(rt); - goto do_next; - } + peer = inet_getpeer_v4(daddr, 1); + if (peer) { + peer->redirect_learned.a4 = new_gw; - netevent.old = &rth->dst; - netevent.new = &rt->dst; - call_netevent_notifiers(NETEVENT_REDIRECT, - &netevent); + inet_putpeer(peer); - rt_del(hash, rth); - if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) - ip_rt_put(rt); - goto do_next; - } - do_next: - ; - } + atomic_inc(&__rt_peer_genid); } return; @@ -1488,18 +1343,24 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) if (dst->obsolete > 0) { ip_rt_put(rt); ret = NULL; - } else if ((rt->rt_flags & RTCF_REDIRECTED) || - (rt->dst.expires && - time_after_eq(jiffies, rt->dst.expires))) { - unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, - rt->fl.oif, + } else if (rt->rt_flags & RTCF_REDIRECTED) { + unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, + rt->rt_oif, rt_genid(dev_net(dst->dev))); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", - &rt->rt_dst, rt->fl.fl4_tos); + &rt->rt_dst, rt->rt_tos); #endif rt_del(hash, rt); ret = NULL; + } else if (rt->peer && + rt->peer->pmtu_expires && + time_after_eq(jiffies, rt->peer->pmtu_expires)) { + unsigned long orig = rt->peer->pmtu_expires; + + if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) + dst_metric_set(dst, RTAX_MTU, + rt->peer->pmtu_orig); } } return ret; @@ -1525,6 +1386,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); struct in_device *in_dev; + struct inet_peer *peer; int log_martians; rcu_read_lock(); @@ -1536,33 +1398,41 @@ void ip_rt_send_redirect(struct sk_buff *skb) log_martians = IN_DEV_LOG_MARTIANS(in_dev); rcu_read_unlock(); + if (!rt->peer) + rt_bind_peer(rt, 1); + peer = rt->peer; + if (!peer) { + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); + return; + } + /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) - rt->dst.rate_tokens = 0; + if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) + peer->rate_tokens = 0; /* Too many ignored redirects; do not send anything * set dst.rate_last to the last seen redirected packet. */ - if (rt->dst.rate_tokens >= ip_rt_redirect_number) { - rt->dst.rate_last = jiffies; + if (peer->rate_tokens >= ip_rt_redirect_number) { + peer->rate_last = jiffies; return; } /* Check for load limit; set rate_last to the latest sent * redirect. */ - if (rt->dst.rate_tokens == 0 || + if (peer->rate_tokens == 0 || time_after(jiffies, - (rt->dst.rate_last + - (ip_rt_redirect_load << rt->dst.rate_tokens)))) { + (peer->rate_last + + (ip_rt_redirect_load << peer->rate_tokens)))) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); - rt->dst.rate_last = jiffies; - ++rt->dst.rate_tokens; + peer->rate_last = jiffies; + ++peer->rate_tokens; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && - rt->dst.rate_tokens == ip_rt_redirect_number && + peer->rate_tokens == ip_rt_redirect_number && net_ratelimit()) printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", &rt->rt_src, rt->rt_iif, @@ -1574,7 +1444,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) static int ip_error(struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); + struct inet_peer *peer; unsigned long now; + bool send; int code; switch (rt->dst.error) { @@ -1594,15 +1466,24 @@ static int ip_error(struct sk_buff *skb) break; } - now = jiffies; - rt->dst.rate_tokens += now - rt->dst.rate_last; - if (rt->dst.rate_tokens > ip_rt_error_burst) - rt->dst.rate_tokens = ip_rt_error_burst; - rt->dst.rate_last = now; - if (rt->dst.rate_tokens >= ip_rt_error_cost) { - rt->dst.rate_tokens -= ip_rt_error_cost; - icmp_send(skb, ICMP_DEST_UNREACH, code, 0); + if (!rt->peer) + rt_bind_peer(rt, 1); + peer = rt->peer; + + send = true; + if (peer) { + now = jiffies; + peer->rate_tokens += now - peer->rate_last; + if (peer->rate_tokens > ip_rt_error_burst) + peer->rate_tokens = ip_rt_error_burst; + peer->rate_last = now; + if (peer->rate_tokens >= ip_rt_error_cost) + peer->rate_tokens -= ip_rt_error_cost; + else + send = false; } + if (send) + icmp_send(skb, ICMP_DEST_UNREACH, code, 0); out: kfree_skb(skb); return 0; @@ -1630,88 +1511,142 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev) { - int i, k; unsigned short old_mtu = ntohs(iph->tot_len); - struct rtable *rth; - int ikeys[2] = { dev->ifindex, 0 }; - __be32 skeys[2] = { iph->saddr, 0, }; - __be32 daddr = iph->daddr; unsigned short est_mtu = 0; + struct inet_peer *peer; - for (k = 0; k < 2; k++) { - for (i = 0; i < 2; i++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], - rt_genid(net)); - - rcu_read_lock(); - for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->dst.rt_next)) { - unsigned short mtu = new_mtu; - - if (rth->fl.fl4_dst != daddr || - rth->fl.fl4_src != skeys[i] || - rth->rt_dst != daddr || - rth->rt_src != iph->saddr || - rth->fl.oif != ikeys[k] || - rt_is_input_route(rth) || - dst_metric_locked(&rth->dst, RTAX_MTU) || - !net_eq(dev_net(rth->dst.dev), net) || - rt_is_expired(rth)) - continue; + peer = inet_getpeer_v4(iph->daddr, 1); + if (peer) { + unsigned short mtu = new_mtu; - if (new_mtu < 68 || new_mtu >= old_mtu) { + if (new_mtu < 68 || new_mtu >= old_mtu) { + /* BSD 4.2 derived systems incorrectly adjust + * tot_len by the IP header length, and report + * a zero MTU in the ICMP message. + */ + if (mtu == 0 && + old_mtu >= 68 + (iph->ihl << 2)) + old_mtu -= iph->ihl << 2; + mtu = guess_mtu(old_mtu); + } - /* BSD 4.2 compatibility hack :-( */ - if (mtu == 0 && - old_mtu >= dst_mtu(&rth->dst) && - old_mtu >= 68 + (iph->ihl << 2)) - old_mtu -= iph->ihl << 2; + if (mtu < ip_rt_min_pmtu) + mtu = ip_rt_min_pmtu; + if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { + unsigned long pmtu_expires; - mtu = guess_mtu(old_mtu); - } - if (mtu <= dst_mtu(&rth->dst)) { - if (mtu < dst_mtu(&rth->dst)) { - dst_confirm(&rth->dst); - if (mtu < ip_rt_min_pmtu) { - u32 lock = dst_metric(&rth->dst, - RTAX_LOCK); - mtu = ip_rt_min_pmtu; - lock |= (1 << RTAX_MTU); - dst_metric_set(&rth->dst, RTAX_LOCK, - lock); - } - dst_metric_set(&rth->dst, RTAX_MTU, mtu); - dst_set_expires(&rth->dst, - ip_rt_mtu_expires); - } - est_mtu = mtu; - } - } - rcu_read_unlock(); + pmtu_expires = jiffies + ip_rt_mtu_expires; + if (!pmtu_expires) + pmtu_expires = 1UL; + + est_mtu = mtu; + peer->pmtu_learned = mtu; + peer->pmtu_expires = pmtu_expires; } + + inet_putpeer(peer); + + atomic_inc(&__rt_peer_genid); } return est_mtu ? : new_mtu; } +static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) +{ + unsigned long expires = peer->pmtu_expires; + + if (time_before(jiffies, expires)) { + u32 orig_dst_mtu = dst_mtu(dst); + if (peer->pmtu_learned < orig_dst_mtu) { + if (!peer->pmtu_orig) + peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); + dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); + } + } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) + dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); +} + static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { - if (dst_mtu(dst) > mtu && mtu >= 68 && - !(dst_metric_locked(dst, RTAX_MTU))) { - if (mtu < ip_rt_min_pmtu) { - u32 lock = dst_metric(dst, RTAX_LOCK); + struct rtable *rt = (struct rtable *) dst; + struct inet_peer *peer; + + dst_confirm(dst); + + if (!rt->peer) + rt_bind_peer(rt, 1); + peer = rt->peer; + if (peer) { + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); + if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { + unsigned long pmtu_expires; + + pmtu_expires = jiffies + ip_rt_mtu_expires; + if (!pmtu_expires) + pmtu_expires = 1UL; + + peer->pmtu_learned = mtu; + peer->pmtu_expires = pmtu_expires; + + atomic_inc(&__rt_peer_genid); + rt->rt_peer_genid = rt_peer_genid(); } - dst_metric_set(dst, RTAX_MTU, mtu); - dst_set_expires(dst, ip_rt_mtu_expires); - call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); + check_peer_pmtu(dst, peer); + + inet_putpeer(peer); + } +} + +static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) +{ + struct rtable *rt = (struct rtable *) dst; + __be32 orig_gw = rt->rt_gateway; + + dst_confirm(&rt->dst); + + neigh_release(rt->dst.neighbour); + rt->dst.neighbour = NULL; + + rt->rt_gateway = peer->redirect_learned.a4; + if (arp_bind_neighbour(&rt->dst) || + !(rt->dst.neighbour->nud_state & NUD_VALID)) { + if (rt->dst.neighbour) + neigh_event_send(rt->dst.neighbour, NULL); + rt->rt_gateway = orig_gw; + return -EAGAIN; + } else { + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, + rt->dst.neighbour); } + return 0; } static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { - if (rt_is_expired((struct rtable *)dst)) + struct rtable *rt = (struct rtable *) dst; + + if (rt_is_expired(rt)) return NULL; + if (rt->rt_peer_genid != rt_peer_genid()) { + struct inet_peer *peer; + + if (!rt->peer) + rt_bind_peer(rt, 0); + + peer = rt->peer; + if (peer && peer->pmtu_expires) + check_peer_pmtu(dst, peer); + + if (peer && peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + if (check_peer_redir(dst, peer)) + return NULL; + } + + rt->rt_peer_genid = rt_peer_genid(); + } return dst; } @@ -1720,6 +1655,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst) struct rtable *rt = (struct rtable *) dst; struct inet_peer *peer = rt->peer; + if (rt->fi) { + fib_info_put(rt->fi); + rt->fi = NULL; + } if (peer) { rt->peer = NULL; inet_putpeer(peer); @@ -1734,8 +1673,14 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt) - dst_set_expires(&rt->dst, 0); + if (rt && + rt->peer && + rt->peer->pmtu_expires) { + unsigned long orig = rt->peer->pmtu_expires; + + if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) + dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); + } } static int ip_rt_bug(struct sk_buff *skb) @@ -1764,8 +1709,17 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt_is_output_route(rt)) src = rt->rt_src; else { + struct flowi4 fl4 = { + .daddr = rt->rt_key_dst, + .saddr = rt->rt_key_src, + .flowi4_tos = rt->rt_tos, + .flowi4_oif = rt->rt_oif, + .flowi4_iif = rt->rt_iif, + .flowi4_mark = rt->rt_mark, + }; + rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) + if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) src = FIB_RES_PREFSRC(res); else src = inet_select_addr(rt->dst.dev, rt->rt_gateway, @@ -1815,16 +1769,53 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) return mtu; } -static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) +static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, + struct fib_info *fi) +{ + struct inet_peer *peer; + int create = 0; + + /* If a peer entry exists for this destination, we must hook + * it up in order to get at cached metrics. + */ + if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) + create = 1; + + rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); + if (peer) { + rt->rt_peer_genid = rt_peer_genid(); + if (inet_metrics_new(peer)) + memcpy(peer->metrics, fi->fib_metrics, + sizeof(u32) * RTAX_MAX); + dst_init_metrics(&rt->dst, peer->metrics, false); + + if (peer->pmtu_expires) + check_peer_pmtu(&rt->dst, peer); + if (peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + rt->rt_gateway = peer->redirect_learned.a4; + rt->rt_flags |= RTCF_REDIRECTED; + } + } else { + if (fi->fib_metrics != (u32 *) dst_default_metrics) { + rt->fi = fi; + atomic_inc(&fi->fib_clntref); + } + dst_init_metrics(&rt->dst, fi->fib_metrics, true); + } +} + +static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, + const struct fib_result *res, + struct fib_info *fi, u16 type, u32 itag) { struct dst_entry *dst = &rt->dst; - struct fib_info *fi = res->fi; if (fi) { if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - dst_import_metrics(dst, fi->fib_metrics); + rt_init_metrics(rt, oldflp4, fi); #ifdef CONFIG_IP_ROUTE_CLASSID dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif @@ -1841,7 +1832,20 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) #endif set_class_tag(rt, itag); #endif - rt->rt_type = res->type; + rt->rt_type = type; +} + +static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) +{ + struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); + if (rt) { + rt->dst.obsolete = -1; + + rt->dst.flags = DST_HOST | + (nopolicy ? DST_NOPOLICY : 0) | + (noxfrm ? DST_NOXFRM : 0); + } + return rt; } /* called in rcu_read_lock() section */ @@ -1874,31 +1878,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (err < 0) goto e_err; } - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; rth->dst.output = ip_rt_bug; - rth->dst.obsolete = -1; - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; - rth->fl.fl4_dst = daddr; + rth->rt_key_dst = daddr; rth->rt_dst = daddr; - rth->fl.fl4_tos = tos; - rth->fl.mark = skb->mark; - rth->fl.fl4_src = saddr; + rth->rt_tos = tos; + rth->rt_mark = skb->mark; + rth->rt_key_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif - rth->rt_iif = - rth->fl.iif = dev->ifindex; + rth->rt_iif = dev->ifindex; rth->dst.dev = init_net.loopback_dev; dev_hold(rth->dst.dev); - rth->fl.oif = 0; + rth->rt_oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_genid = rt_genid(dev_net(dev)); @@ -1916,7 +1914,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, RT_CACHE_STAT_INC(in_slow_mc); hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); - return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); + rth = rt_intern_hash(hash, rth, skb, dev->ifindex); + err = 0; + if (IS_ERR(rth)) + err = PTR_ERR(rth); e_nobufs: return -ENOBUFS; @@ -1959,7 +1960,7 @@ static void ip_handle_martian_source(struct net_device *dev, /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, - struct fib_result *res, + const struct fib_result *res, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) @@ -2013,39 +2014,31 @@ static int __mkroute_input(struct sk_buff *skb, } } - - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), + IN_DEV_CONF_GET(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; goto cleanup; } - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; - if (IN_DEV_CONF_GET(out_dev, NOXFRM)) - rth->dst.flags |= DST_NOXFRM; - rth->fl.fl4_dst = daddr; + rth->rt_key_dst = daddr; rth->rt_dst = daddr; - rth->fl.fl4_tos = tos; - rth->fl.mark = skb->mark; - rth->fl.fl4_src = saddr; + rth->rt_tos = tos; + rth->rt_mark = skb->mark; + rth->rt_key_src = saddr; rth->rt_src = saddr; rth->rt_gateway = daddr; - rth->rt_iif = - rth->fl.iif = in_dev->dev->ifindex; + rth->rt_iif = in_dev->dev->ifindex; rth->dst.dev = (out_dev)->dev; dev_hold(rth->dst.dev); - rth->fl.oif = 0; + rth->rt_oif = 0; rth->rt_spec_dst= spec_dst; - rth->dst.obsolete = -1; rth->dst.input = ip_forward; rth->dst.output = ip_output; rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); - rt_set_nexthop(rth, res, itag); + rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); rth->rt_flags = flags; @@ -2057,7 +2050,7 @@ static int __mkroute_input(struct sk_buff *skb, static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, - const struct flowi *fl, + const struct flowi4 *fl4, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { @@ -2066,8 +2059,8 @@ static int ip_mkroute_input(struct sk_buff *skb, unsigned hash; #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) - fib_select_multipath(fl, res); + if (res->fi && res->fi->fib_nhs > 1) + fib_select_multipath(res); #endif /* create a routing cache entry */ @@ -2076,9 +2069,12 @@ static int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif, + hash = rt_hash(daddr, saddr, fl4->flowi4_iif, rt_genid(dev_net(rth->dst.dev))); - return rt_intern_hash(hash, rth, NULL, skb, fl->iif); + rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); + if (IS_ERR(rth)) + return PTR_ERR(rth); + return 0; } /* @@ -2097,12 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, { struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct flowi fl = { .fl4_dst = daddr, - .fl4_src = saddr, - .fl4_tos = tos, - .fl4_scope = RT_SCOPE_UNIVERSE, - .mark = skb->mark, - .iif = dev->ifindex }; + struct flowi4 fl4; unsigned flags = 0; u32 itag = 0; struct rtable * rth; @@ -2139,7 +2130,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ - err = fib_lookup(net, &fl, &res); + fl4.flowi4_oif = 0; + fl4.flowi4_iif = dev->ifindex; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_tos = tos; + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.daddr = daddr; + fl4.saddr = saddr; + err = fib_lookup(net, &fl4, &res); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) goto e_hostunreach; @@ -2168,7 +2166,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type != RTN_UNICAST) goto martian_destination; - err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); + err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos); out: return err; brd_input: @@ -2190,29 +2188,23 @@ brd_input: RT_CACHE_STAT_INC(in_brd); local_input: - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; rth->dst.output= ip_rt_bug; - rth->dst.obsolete = -1; rth->rt_genid = rt_genid(net); - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; - rth->fl.fl4_dst = daddr; + rth->rt_key_dst = daddr; rth->rt_dst = daddr; - rth->fl.fl4_tos = tos; - rth->fl.mark = skb->mark; - rth->fl.fl4_src = saddr; + rth->rt_tos = tos; + rth->rt_mark = skb->mark; + rth->rt_key_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif - rth->rt_iif = - rth->fl.iif = dev->ifindex; + rth->rt_iif = dev->ifindex; rth->dst.dev = net->loopback_dev; dev_hold(rth->dst.dev); rth->rt_gateway = daddr; @@ -2225,8 +2217,11 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); - err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); + hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); + rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); + err = 0; + if (IS_ERR(rth)) + err = PTR_ERR(rth); goto out; no_route: @@ -2288,12 +2283,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; rth = rcu_dereference(rth->dst.rt_next)) { - if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | - ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | - (rth->fl.iif ^ iif) | - rth->fl.oif | - (rth->fl.fl4_tos ^ tos)) == 0 && - rth->fl.mark == skb->mark && + if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | + ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | + (rth->rt_iif ^ iif) | + rth->rt_oif | + (rth->rt_tos ^ tos)) == 0 && + rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { if (noref) { @@ -2326,8 +2321,8 @@ skip_cache: struct in_device *in_dev = __in_dev_get_rcu(dev); if (in_dev) { - int our = ip_check_mc(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); + int our = ip_check_mc_rcu(in_dev, daddr, saddr, + ip_hdr(skb)->protocol); if (our #ifdef CONFIG_IP_MROUTE || @@ -2351,98 +2346,91 @@ skip_cache: EXPORT_SYMBOL(ip_route_input_common); /* called with rcu_read_lock() */ -static int __mkroute_output(struct rtable **result, - struct fib_result *res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) +static struct rtable *__mkroute_output(const struct fib_result *res, + const struct flowi4 *fl4, + const struct flowi4 *oldflp4, + struct net_device *dev_out, + unsigned int flags) { - struct rtable *rth; + struct fib_info *fi = res->fi; + u32 tos = RT_FL_TOS(oldflp4); struct in_device *in_dev; - u32 tos = RT_FL_TOS(oldflp); + u16 type = res->type; + struct rtable *rth; - if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) - return -EINVAL; + if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) + return ERR_PTR(-EINVAL); - if (ipv4_is_lbcast(fl->fl4_dst)) - res->type = RTN_BROADCAST; - else if (ipv4_is_multicast(fl->fl4_dst)) - res->type = RTN_MULTICAST; - else if (ipv4_is_zeronet(fl->fl4_dst)) - return -EINVAL; + if (ipv4_is_lbcast(fl4->daddr)) + type = RTN_BROADCAST; + else if (ipv4_is_multicast(fl4->daddr)) + type = RTN_MULTICAST; + else if (ipv4_is_zeronet(fl4->daddr)) + return ERR_PTR(-EINVAL); if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; in_dev = __in_dev_get_rcu(dev_out); if (!in_dev) - return -EINVAL; + return ERR_PTR(-EINVAL); - if (res->type == RTN_BROADCAST) { + if (type == RTN_BROADCAST) { flags |= RTCF_BROADCAST | RTCF_LOCAL; - res->fi = NULL; - } else if (res->type == RTN_MULTICAST) { + fi = NULL; + } else if (type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; - if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, - oldflp->proto)) + if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr, + oldflp4->flowi4_proto)) flags &= ~RTCF_LOCAL; /* If multicast route do not exist use * default one, but do not gateway in this case. * Yes, it is hack. */ - if (res->fi && res->prefixlen < 4) - res->fi = NULL; + if (fi && res->prefixlen < 4) + fi = NULL; } - - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), + IN_DEV_CONF_GET(in_dev, NOXFRM)); if (!rth) - return -ENOBUFS; - - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOXFRM)) - rth->dst.flags |= DST_NOXFRM; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; - - rth->fl.fl4_dst = oldflp->fl4_dst; - rth->fl.fl4_tos = tos; - rth->fl.fl4_src = oldflp->fl4_src; - rth->fl.oif = oldflp->oif; - rth->fl.mark = oldflp->mark; - rth->rt_dst = fl->fl4_dst; - rth->rt_src = fl->fl4_src; - rth->rt_iif = oldflp->oif ? : dev_out->ifindex; + return ERR_PTR(-ENOBUFS); + + rth->rt_key_dst = oldflp4->daddr; + rth->rt_tos = tos; + rth->rt_key_src = oldflp4->saddr; + rth->rt_oif = oldflp4->flowi4_oif; + rth->rt_mark = oldflp4->flowi4_mark; + rth->rt_dst = fl4->daddr; + rth->rt_src = fl4->saddr; + rth->rt_iif = 0; /* get references to the devices that are to be hold by the routing cache entry */ rth->dst.dev = dev_out; dev_hold(dev_out); - rth->rt_gateway = fl->fl4_dst; - rth->rt_spec_dst= fl->fl4_src; + rth->rt_gateway = fl4->daddr; + rth->rt_spec_dst= fl4->saddr; rth->dst.output=ip_output; - rth->dst.obsolete = -1; rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); if (flags & RTCF_LOCAL) { rth->dst.input = ip_local_deliver; - rth->rt_spec_dst = fl->fl4_dst; + rth->rt_spec_dst = fl4->daddr; } if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { - rth->rt_spec_dst = fl->fl4_src; + rth->rt_spec_dst = fl4->saddr; if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { rth->dst.output = ip_mc_output; RT_CACHE_STAT_INC(out_slow_mc); } #ifdef CONFIG_IP_MROUTE - if (res->type == RTN_MULTICAST) { + if (type == RTN_MULTICAST) { if (IN_DEV_MFORWARD(in_dev) && - !ipv4_is_local_multicast(oldflp->fl4_dst)) { + !ipv4_is_local_multicast(oldflp4->daddr)) { rth->dst.input = ip_mr_input; rth->dst.output = ip_mc_output; } @@ -2450,31 +2438,10 @@ static int __mkroute_output(struct rtable **result, #endif } - rt_set_nexthop(rth, res, 0); + rt_set_nexthop(rth, oldflp4, res, fi, type, 0); rth->rt_flags = flags; - *result = rth; - return 0; -} - -/* called with rcu_read_lock() */ -static int ip_mkroute_output(struct rtable **rp, - struct fib_result *res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) -{ - struct rtable *rth = NULL; - int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); - unsigned hash; - if (err == 0) { - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, - rt_genid(dev_net(dev_out))); - err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); - } - - return err; + return rth; } /* @@ -2482,34 +2449,36 @@ static int ip_mkroute_output(struct rtable **rp, * called with rcu_read_lock(); */ -static int ip_route_output_slow(struct net *net, struct rtable **rp, - const struct flowi *oldflp) -{ - u32 tos = RT_FL_TOS(oldflp); - struct flowi fl = { .fl4_dst = oldflp->fl4_dst, - .fl4_src = oldflp->fl4_src, - .fl4_tos = tos & IPTOS_RT_MASK, - .fl4_scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), - .mark = oldflp->mark, - .iif = net->loopback_dev->ifindex, - .oif = oldflp->oif }; +static struct rtable *ip_route_output_slow(struct net *net, + const struct flowi4 *oldflp4) +{ + u32 tos = RT_FL_TOS(oldflp4); + struct flowi4 fl4; struct fib_result res; unsigned int flags = 0; struct net_device *dev_out = NULL; - int err; - + struct rtable *rth; res.fi = NULL; #ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL; #endif - if (oldflp->fl4_src) { - err = -EINVAL; - if (ipv4_is_multicast(oldflp->fl4_src) || - ipv4_is_lbcast(oldflp->fl4_src) || - ipv4_is_zeronet(oldflp->fl4_src)) + fl4.flowi4_oif = oldflp4->flowi4_oif; + fl4.flowi4_iif = net->loopback_dev->ifindex; + fl4.flowi4_mark = oldflp4->flowi4_mark; + fl4.daddr = oldflp4->daddr; + fl4.saddr = oldflp4->saddr; + fl4.flowi4_tos = tos & IPTOS_RT_MASK; + fl4.flowi4_scope = ((tos & RTO_ONLINK) ? + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); + + rcu_read_lock(); + if (oldflp4->saddr) { + rth = ERR_PTR(-EINVAL); + if (ipv4_is_multicast(oldflp4->saddr) || + ipv4_is_lbcast(oldflp4->saddr) || + ipv4_is_zeronet(oldflp4->saddr)) goto out; /* I removed check for oif == dev_out->oif here. @@ -2520,11 +2489,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, of another iface. --ANK */ - if (oldflp->oif == 0 && - (ipv4_is_multicast(oldflp->fl4_dst) || - ipv4_is_lbcast(oldflp->fl4_dst))) { + if (oldflp4->flowi4_oif == 0 && + (ipv4_is_multicast(oldflp4->daddr) || + ipv4_is_lbcast(oldflp4->daddr))) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = __ip_dev_find(net, oldflp->fl4_src, false); + dev_out = __ip_dev_find(net, oldflp4->saddr, false); if (dev_out == NULL) goto out; @@ -2543,60 +2512,60 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, Luckily, this hack is good workaround. */ - fl.oif = dev_out->ifindex; + fl4.flowi4_oif = dev_out->ifindex; goto make_route; } - if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { + if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - if (!__ip_dev_find(net, oldflp->fl4_src, false)) + if (!__ip_dev_find(net, oldflp4->saddr, false)) goto out; } } - if (oldflp->oif) { - dev_out = dev_get_by_index_rcu(net, oldflp->oif); - err = -ENODEV; + if (oldflp4->flowi4_oif) { + dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif); + rth = ERR_PTR(-ENODEV); if (dev_out == NULL) goto out; /* RACE: Check return value of inet_select_addr instead. */ if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { - err = -ENETUNREACH; + rth = ERR_PTR(-ENETUNREACH); goto out; } - if (ipv4_is_local_multicast(oldflp->fl4_dst) || - ipv4_is_lbcast(oldflp->fl4_dst)) { - if (!fl.fl4_src) - fl.fl4_src = inet_select_addr(dev_out, 0, - RT_SCOPE_LINK); + if (ipv4_is_local_multicast(oldflp4->daddr) || + ipv4_is_lbcast(oldflp4->daddr)) { + if (!fl4.saddr) + fl4.saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_LINK); goto make_route; } - if (!fl.fl4_src) { - if (ipv4_is_multicast(oldflp->fl4_dst)) - fl.fl4_src = inet_select_addr(dev_out, 0, - fl.fl4_scope); - else if (!oldflp->fl4_dst) - fl.fl4_src = inet_select_addr(dev_out, 0, - RT_SCOPE_HOST); + if (!fl4.saddr) { + if (ipv4_is_multicast(oldflp4->daddr)) + fl4.saddr = inet_select_addr(dev_out, 0, + fl4.flowi4_scope); + else if (!oldflp4->daddr) + fl4.saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_HOST); } } - if (!fl.fl4_dst) { - fl.fl4_dst = fl.fl4_src; - if (!fl.fl4_dst) - fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); + if (!fl4.daddr) { + fl4.daddr = fl4.saddr; + if (!fl4.daddr) + fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; - fl.oif = net->loopback_dev->ifindex; + fl4.flowi4_oif = net->loopback_dev->ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; } - if (fib_lookup(net, &fl, &res)) { + if (fib_lookup(net, &fl4, &res)) { res.fi = NULL; - if (oldflp->oif) { + if (oldflp4->flowi4_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2615,90 +2584,93 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, likely IPv6, but we do not. */ - if (fl.fl4_src == 0) - fl.fl4_src = inet_select_addr(dev_out, 0, - RT_SCOPE_LINK); + if (fl4.saddr == 0) + fl4.saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_LINK); res.type = RTN_UNICAST; goto make_route; } - err = -ENETUNREACH; + rth = ERR_PTR(-ENETUNREACH); goto out; } if (res.type == RTN_LOCAL) { - if (!fl.fl4_src) { + if (!fl4.saddr) { if (res.fi->fib_prefsrc) - fl.fl4_src = res.fi->fib_prefsrc; + fl4.saddr = res.fi->fib_prefsrc; else - fl.fl4_src = fl.fl4_dst; + fl4.saddr = fl4.daddr; } dev_out = net->loopback_dev; - fl.oif = dev_out->ifindex; + fl4.flowi4_oif = dev_out->ifindex; res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res.fi->fib_nhs > 1 && fl.oif == 0) - fib_select_multipath(&fl, &res); + if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0) + fib_select_multipath(&res); else #endif - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) - fib_select_default(net, &fl, &res); + if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif) + fib_select_default(&res); - if (!fl.fl4_src) - fl.fl4_src = FIB_RES_PREFSRC(res); + if (!fl4.saddr) + fl4.saddr = FIB_RES_PREFSRC(res); dev_out = FIB_RES_DEV(res); - fl.oif = dev_out->ifindex; + fl4.flowi4_oif = dev_out->ifindex; make_route: - err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); + rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags); + if (!IS_ERR(rth)) { + unsigned int hash; -out: return err; + hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif, + rt_genid(dev_net(dev_out))); + rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif); + } + +out: + rcu_read_unlock(); + return rth; } -int __ip_route_output_key(struct net *net, struct rtable **rp, - const struct flowi *flp) +struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) { - unsigned int hash; - int res; struct rtable *rth; + unsigned int hash; if (!rt_caching(net)) goto slow_output; - hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); + hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); rcu_read_lock_bh(); for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; rth = rcu_dereference_bh(rth->dst.rt_next)) { - if (rth->fl.fl4_dst == flp->fl4_dst && - rth->fl.fl4_src == flp->fl4_src && + if (rth->rt_key_dst == flp4->daddr && + rth->rt_key_src == flp4->saddr && rt_is_output_route(rth) && - rth->fl.oif == flp->oif && - rth->fl.mark == flp->mark && - !((rth->fl.fl4_tos ^ flp->fl4_tos) & + rth->rt_oif == flp4->flowi4_oif && + rth->rt_mark == flp4->flowi4_mark && + !((rth->rt_tos ^ flp4->flowi4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); - *rp = rth; - return 0; + return rth; } RT_CACHE_STAT_INC(out_hlist_search); } rcu_read_unlock_bh(); slow_output: - rcu_read_lock(); - res = ip_route_output_slow(net, rp, flp); - rcu_read_unlock(); - return res; + return ip_route_output_slow(net, flp4); } EXPORT_SYMBOL_GPL(__ip_route_output_key); @@ -2707,6 +2679,11 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo return NULL; } +static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) +{ + return 0; +} + static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } @@ -2716,20 +2693,19 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, .check = ipv4_blackhole_dst_check, + .default_mtu = ipv4_blackhole_default_mtu, + .default_advmss = ipv4_default_advmss, .update_pmtu = ipv4_rt_blackhole_update_pmtu, }; - -static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp) +struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rtable *ort = *rp; - struct rtable *rt = (struct rtable *) - dst_alloc(&ipv4_dst_blackhole_ops); + struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1); + struct rtable *ort = (struct rtable *) dst_orig; if (rt) { struct dst_entry *new = &rt->dst; - atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; @@ -2739,7 +2715,12 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi if (new->dev) dev_hold(new->dev); - rt->fl = ort->fl; + rt->rt_key_dst = ort->rt_key_dst; + rt->rt_key_src = ort->rt_key_src; + rt->rt_tos = ort->rt_tos; + rt->rt_iif = ort->rt_iif; + rt->rt_oif = ort->rt_oif; + rt->rt_mark = ort->rt_mark; rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; @@ -2752,46 +2733,40 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi rt->peer = ort->peer; if (rt->peer) atomic_inc(&rt->peer->refcnt); + rt->fi = ort->fi; + if (rt->fi) + atomic_inc(&rt->fi->fib_clntref); dst_free(new); } - dst_release(&(*rp)->dst); - *rp = rt; - return rt ? 0 : -ENOMEM; + dst_release(dst_orig); + + return rt ? &rt->dst : ERR_PTR(-ENOMEM); } -int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, - struct sock *sk, int flags) +struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, + struct sock *sk) { - int err; + struct rtable *rt = __ip_route_output_key(net, flp4); - if ((err = __ip_route_output_key(net, rp, flp)) != 0) - return err; + if (IS_ERR(rt)) + return rt; - if (flp->proto) { - if (!flp->fl4_src) - flp->fl4_src = (*rp)->rt_src; - if (!flp->fl4_dst) - flp->fl4_dst = (*rp)->rt_dst; - err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, - flags ? XFRM_LOOKUP_WAIT : 0); - if (err == -EREMOTE) - err = ipv4_dst_blackhole(net, rp, flp); - - return err; + if (flp4->flowi4_proto) { + if (!flp4->saddr) + flp4->saddr = rt->rt_src; + if (!flp4->daddr) + flp4->daddr = rt->rt_dst; + rt = (struct rtable *) xfrm_lookup(net, &rt->dst, + flowi4_to_flowi(flp4), + sk, 0); } - return 0; + return rt; } EXPORT_SYMBOL_GPL(ip_route_output_flow); -int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) -{ - return ip_route_output_flow(net, rp, flp, NULL, 0); -} -EXPORT_SYMBOL(ip_route_output_key); - static int rt_fill_info(struct net *net, struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags) @@ -2810,7 +2785,7 @@ static int rt_fill_info(struct net *net, r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; - r->rtm_tos = rt->fl.fl4_tos; + r->rtm_tos = rt->rt_tos; r->rtm_table = RT_TABLE_MAIN; NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; @@ -2822,9 +2797,9 @@ static int rt_fill_info(struct net *net, NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); - if (rt->fl.fl4_src) { + if (rt->rt_key_src) { r->rtm_src_len = 32; - NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); + NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); } if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); @@ -2834,7 +2809,7 @@ static int rt_fill_info(struct net *net, #endif if (rt_is_input_route(rt)) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); - else if (rt->rt_src != rt->fl.fl4_src) + else if (rt->rt_src != rt->rt_key_src) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); if (rt->rt_dst != rt->rt_gateway) @@ -2843,11 +2818,12 @@ static int rt_fill_info(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - if (rt->fl.mark) - NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); + if (rt->rt_mark) + NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); error = rt->dst.error; - expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; + expires = (rt->peer && rt->peer->pmtu_expires) ? + rt->peer->pmtu_expires - jiffies : 0; if (rt->peer) { inet_peer_refcheck(rt->peer); id = atomic_read(&rt->peer->ip_id_count) & 0xffff; @@ -2877,7 +2853,7 @@ static int rt_fill_info(struct net *net, } } else #endif - NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); + NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); } if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, @@ -2951,14 +2927,18 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (err == 0 && rt->dst.error) err = -rt->dst.error; } else { - struct flowi fl = { - .fl4_dst = dst, - .fl4_src = src, - .fl4_tos = rtm->rtm_tos, - .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, - .mark = mark, + struct flowi4 fl4 = { + .daddr = dst, + .saddr = src, + .flowi4_tos = rtm->rtm_tos, + .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + .flowi4_mark = mark, }; - err = ip_route_output_key(net, &rt, &fl); + rt = ip_route_output_key(net, &fl4); + + err = 0; + if (IS_ERR(rt)) + err = PTR_ERR(rt); } if (err) @@ -3304,14 +3284,6 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); - /* All the timers, started at system startup tend - to synchronize. Perturb it a bit. - */ - INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); - expires_ljiffies = jiffies; - schedule_delayed_work(&expires_work, - net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 47519205a014..8b44c6d2a79b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -345,17 +345,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * no easy way to do this. */ { - struct flowi fl = { .mark = sk->sk_mark, - .fl4_dst = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .fl4_src = ireq->loc_addr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = IPPROTO_TCP, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = th->dest, - .fl_ip_dport = th->source }; - security_req_classify_flow(req, &fl); - if (ip_route_output_key(sock_net(sk), &rt, &fl)) { + struct flowi4 fl4 = { + .flowi4_mark = sk->sk_mark, + .daddr = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .saddr = ireq->loc_addr, + .flowi4_tos = RT_CONN_FLAGS(sk), + .flowi4_proto = IPPROTO_TCP, + .flowi4_flags = inet_sk_flowi_flags(sk), + .fl4_sport = th->dest, + .fl4_dport = th->source, + }; + security_req_classify_flow(req, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(sock_net(sk), &fl4); + if (IS_ERR(rt)) { reqsk_free(req); goto out; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6c11eece262c..b22d45010545 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -505,6 +505,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) else answ = tp->write_seq - tp->snd_una; break; + case SIOCOUTQNSD: + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) + answ = 0; + else + answ = tp->write_seq - tp->snd_nxt; + break; default: return -ENOIOCTLCMD; } @@ -873,9 +882,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, flags); lock_sock(sk); - TCP_CHECK_TIMER(sk); res = do_tcp_sendpages(sk, &page, offset, size, flags); - TCP_CHECK_TIMER(sk); release_sock(sk); return res; } @@ -916,7 +923,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, long timeo; lock_sock(sk); - TCP_CHECK_TIMER(sk); flags = msg->msg_flags; timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); @@ -1104,7 +1110,6 @@ wait_for_memory: out: if (copied) tcp_push(sk, flags, mss_now, tp->nonagle); - TCP_CHECK_TIMER(sk); release_sock(sk); return copied; @@ -1123,7 +1128,6 @@ do_error: goto out; out_err: err = sk_stream_error(sk, flags, err); - TCP_CHECK_TIMER(sk); release_sock(sk); return err; } @@ -1415,8 +1419,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, lock_sock(sk); - TCP_CHECK_TIMER(sk); - err = -ENOTCONN; if (sk->sk_state == TCP_LISTEN) goto out; @@ -1767,12 +1769,10 @@ skip_copy: /* Clean up data we have read: This will do ACK frames. */ tcp_cleanup_rbuf(sk, copied); - TCP_CHECK_TIMER(sk); release_sock(sk); return copied; out: - TCP_CHECK_TIMER(sk); release_sock(sk); return err; @@ -2653,7 +2653,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct tcphdr *th; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 3b53fd1af23f..6187eb4d1dcf 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -209,7 +209,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) } -static struct tcp_congestion_ops bictcp = { +static struct tcp_congestion_ops bictcp __read_mostly = { .init = bictcp_init, .ssthresh = bictcp_recalc_ssthresh, .cong_avoid = bictcp_cong_avoid, diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 71d5f2f29fa6..62f775cb0863 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -405,7 +405,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) hystart_update(sk, delay); } -static struct tcp_congestion_ops cubictcp = { +static struct tcp_congestion_ops cubictcp __read_mostly = { .init = bictcp_init, .ssthresh = bictcp_recalc_ssthresh, .cong_avoid = bictcp_cong_avoid, diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 8b6caaf75bb9..30f27f6b3655 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -158,7 +158,7 @@ static u32 hstcp_ssthresh(struct sock *sk) } -static struct tcp_congestion_ops tcp_highspeed = { +static struct tcp_congestion_ops tcp_highspeed __read_mostly = { .init = hstcp_init, .ssthresh = hstcp_ssthresh, .cong_avoid = hstcp_cong_avoid, diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 7c94a4955416..c1a8175361e8 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -284,7 +284,7 @@ static void htcp_state(struct sock *sk, u8 new_state) } } -static struct tcp_congestion_ops htcp = { +static struct tcp_congestion_ops htcp __read_mostly = { .init = htcp_init, .ssthresh = htcp_recalc_ssthresh, .cong_avoid = htcp_cong_avoid, diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 377bc9349371..fe3ecf484b44 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -162,7 +162,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); } -static struct tcp_congestion_ops tcp_hybla = { +static struct tcp_congestion_ops tcp_hybla __read_mostly = { .init = hybla_init, .ssthresh = tcp_reno_ssthresh, .min_cwnd = tcp_reno_min_cwnd, diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 00ca688d8964..813b43a76fec 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -322,7 +322,7 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, } } -static struct tcp_congestion_ops tcp_illinois = { +static struct tcp_congestion_ops tcp_illinois __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_illinois_init, .ssthresh = tcp_illinois_ssthresh, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2549b29b062d..08ea735b9d72 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -817,7 +817,7 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); if (!cwnd) - cwnd = rfc3390_bytes_to_packets(tp->mss_cache); + cwnd = TCP_INIT_CWND; return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } @@ -1222,7 +1222,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, } /* D-SACK for already forgotten data... Do dumb counting. */ - if (dup_sack && + if (dup_sack && tp->undo_marker && tp->undo_retrans && !after(end_seq_0, prior_snd_una) && after(end_seq_0, tp->undo_marker)) tp->undo_retrans--; @@ -1299,7 +1299,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { - if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) + if (tp->undo_marker && tp->undo_retrans && + after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) tp->undo_retrans--; if (sacked & TCPCB_SACKED_ACKED) state->reord = min(fack_count, state->reord); @@ -4399,7 +4400,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) { tp->ucopy.len -= chunk; tp->copied_seq += chunk; - eaten = (chunk == skb->len && !th->fin); + eaten = (chunk == skb->len); tcp_rcv_space_adjust(sk); } local_bh_disable(); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 856f68466d49..f7e6c2c2d2bb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -149,9 +149,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + __be16 orig_sport, orig_dport; struct rtable *rt; __be32 daddr, nexthop; - int tmp; int err; if (addr_len < sizeof(struct sockaddr_in)) @@ -167,14 +167,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = inet->opt->faddr; } - tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, - IPPROTO_TCP, - inet->inet_sport, usin->sin_port, sk, 1); - if (tmp < 0) { - if (tmp == -ENETUNREACH) + orig_sport = inet->inet_sport; + orig_dport = usin->sin_port; + rt = ip_route_connect(nexthop, inet->inet_saddr, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, + IPPROTO_TCP, + orig_sport, orig_dport, sk, true); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + if (err == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); - return tmp; + return err; } if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { @@ -233,11 +236,14 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err) goto failure; - err = ip_route_newports(&rt, IPPROTO_TCP, - inet->inet_sport, inet->inet_dport, sk); - if (err) + rt = ip_route_newports(rt, IPPROTO_TCP, + orig_sport, orig_dport, + inet->inet_sport, inet->inet_dport, sk); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + rt = NULL; goto failure; - + } /* OK, now commit destination to socket. */ sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->dst); @@ -1341,7 +1347,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_death_row.sysctl_tw_recycle && (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && - peer->daddr.a4 == saddr) { + peer->daddr.addr.a4 == saddr) { inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > @@ -1556,12 +1562,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ sock_rps_save_rxhash(sk, skb->rxhash); - TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; } - TCP_CHECK_TIMER(sk); return 0; } @@ -1583,13 +1587,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } else sock_rps_save_rxhash(sk, skb->rxhash); - - TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; } - TCP_CHECK_TIMER(sk); return 0; reset: @@ -1994,7 +1995,6 @@ static void *listening_get_next(struct seq_file *seq, void *cur) } req = req->dl_next; } - st->offset = 0; if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) break; get_req: diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index de870377fbba..656d431c99ad 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -313,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) lp->last_drop = tcp_time_stamp; } -static struct tcp_congestion_ops tcp_lp = { +static struct tcp_congestion_ops tcp_lp __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 406f320336e6..dfa5beb0c1c8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2162,7 +2162,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (!tp->retrans_stamp) tp->retrans_stamp = TCP_SKB_CB(skb)->when; - tp->undo_retrans++; + tp->undo_retrans += tcp_skb_pcount(skb); /* snd_nxt is stored to detect loss of retransmitted segment, * see tcp_input.c tcp_sacktag_write_queue(). diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index a76513779e2b..8ce55b8aaec8 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk) } -static struct tcp_congestion_ops tcp_scalable = { +static struct tcp_congestion_ops tcp_scalable __read_mostly = { .ssthresh = tcp_scalable_ssthresh, .cong_avoid = tcp_scalable_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 74a6aa003657..ecd44b0c45f1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -259,7 +259,6 @@ static void tcp_delack_timer(unsigned long data) tcp_send_ack(sk); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); } - TCP_CHECK_TIMER(sk); out: if (tcp_memory_pressure) @@ -481,7 +480,6 @@ static void tcp_write_timer(unsigned long data) tcp_probe_timer(sk); break; } - TCP_CHECK_TIMER(sk); out: sk_mem_reclaim(sk); @@ -589,7 +587,6 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_when(tp) - elapsed; } - TCP_CHECK_TIMER(sk); sk_mem_reclaim(sk); resched: diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index c6743eec9b7d..80fa2bfd7ede 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -304,7 +304,7 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(tcp_vegas_get_info); -static struct tcp_congestion_ops tcp_vegas = { +static struct tcp_congestion_ops tcp_vegas __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 38bc0b52d745..ac43cd747bce 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -201,7 +201,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk) return max(tp->snd_cwnd >> 1U, 2U); } -static struct tcp_congestion_ops tcp_veno = { +static struct tcp_congestion_ops tcp_veno __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_veno_init, .ssthresh = tcp_veno_ssthresh, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index a534dda5456e..1b91bf48e277 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -272,7 +272,7 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, } -static struct tcp_congestion_ops tcp_westwood = { +static struct tcp_congestion_ops tcp_westwood __read_mostly = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index a0f240358892..dc7f43179c9a 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -225,7 +225,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { return tp->snd_cwnd - reduction; } -static struct tcp_congestion_ops tcp_yeah = { +static struct tcp_congestion_ops tcp_yeah __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8157b17959ee..588f47af5faf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -663,75 +663,72 @@ void udp_flush_pending_frames(struct sock *sk) EXPORT_SYMBOL(udp_flush_pending_frames); /** - * udp4_hwcsum_outgoing - handle outgoing HW checksumming - * @sk: socket we are sending on + * udp4_hwcsum - handle outgoing HW checksumming * @skb: sk_buff containing the filled-in UDP header * (checksum field must be zeroed out) + * @src: source IP address + * @dst: destination IP address */ -static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, int len) +static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) { - unsigned int offset; struct udphdr *uh = udp_hdr(skb); + struct sk_buff *frags = skb_shinfo(skb)->frag_list; + int offset = skb_transport_offset(skb); + int len = skb->len - offset; + int hlen = len; __wsum csum = 0; - if (skb_queue_len(&sk->sk_write_queue) == 1) { + if (!frags) { /* * Only one fragment on the socket. */ skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); + uh->check = ~csum_tcpudp_magic(src, dst, len, + IPPROTO_UDP, 0); } else { /* * HW-checksum won't work as there are two or more * fragments on the socket so that all csums of sk_buffs * should be together */ - offset = skb_transport_offset(skb); - skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + do { + csum = csum_add(csum, frags->csum); + hlen -= frags->len; + } while ((frags = frags->next)); + csum = skb_checksum(skb, offset, hlen, csum); skb->ip_summed = CHECKSUM_NONE; - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } - uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; } } -/* - * Push out all pending data as one UDP datagram. Socket is locked. - */ -static int udp_push_pending_frames(struct sock *sk) +static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport) { - struct udp_sock *up = udp_sk(sk); + struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; - struct sk_buff *skb; struct udphdr *uh; + struct rtable *rt = (struct rtable *)skb_dst(skb); int err = 0; int is_udplite = IS_UDPLITE(sk); + int offset = skb_transport_offset(skb); + int len = skb->len - offset; __wsum csum = 0; - /* Grab the skbuff where UDP header space exists. */ - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) - goto out; - /* * Create a UDP header */ uh = udp_hdr(skb); - uh->source = fl->fl_ip_sport; - uh->dest = fl->fl_ip_dport; - uh->len = htons(up->len); + uh->source = inet->inet_sport; + uh->dest = dport; + uh->len = htons(len); uh->check = 0; if (is_udplite) /* UDP-Lite */ - csum = udplite_csum_outgoing(sk, skb); + csum = udplite_csum(skb); else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ @@ -740,20 +737,20 @@ static int udp_push_pending_frames(struct sock *sk) } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); + udp4_hwcsum(skb, rt->rt_src, daddr); goto send; - } else /* `normal' UDP */ - csum = udp_csum_outgoing(sk, skb); + } else + csum = udp_csum(skb); /* add protocol-dependent pseudo-header */ - uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, + uh->check = csum_tcpudp_magic(rt->rt_src, daddr, len, sk->sk_protocol, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; send: - err = ip_push_pending_frames(sk); + err = ip_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { UDP_INC_STATS_USER(sock_net(sk), @@ -763,6 +760,26 @@ send: } else UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_OUTDATAGRAMS, is_udplite); + return err; +} + +/* + * Push out all pending data as one UDP datagram. Socket is locked. + */ +static int udp_push_pending_frames(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct flowi4 *fl4 = &inet->cork.fl.u.ip4; + struct sk_buff *skb; + int err = 0; + + skb = ip_finish_skb(sk); + if (!skb) + goto out; + + err = udp_send_skb(skb, fl4->daddr, fl4->fl4_dport); + out: up->len = 0; up->pending = 0; @@ -774,6 +791,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); + struct flowi4 *fl4; int ulen = len; struct ipcm_cookie ipc; struct rtable *rt = NULL; @@ -785,6 +803,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int err, is_udplite = IS_UDPLITE(sk); int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + struct sk_buff *skb; if (len > 0xFFFF) return -EMSGSIZE; @@ -799,6 +818,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.tx_flags = 0; + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; + if (up->pending) { /* * There are pending frames. @@ -888,20 +909,25 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = (struct rtable *)sk_dst_check(sk, 0); if (rt == NULL) { - struct flowi fl = { .oif = ipc.oif, - .mark = sk->sk_mark, - .fl4_dst = faddr, - .fl4_src = saddr, - .fl4_tos = tos, - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = dport }; + struct flowi4 fl4 = { + .flowi4_oif = ipc.oif, + .flowi4_mark = sk->sk_mark, + .daddr = faddr, + .saddr = saddr, + .flowi4_tos = tos, + .flowi4_proto = sk->sk_protocol, + .flowi4_flags = (inet_sk_flowi_flags(sk) | + FLOWI_FLAG_CAN_SLEEP), + .fl4_sport = inet->inet_sport, + .fl4_dport = dport, + }; struct net *net = sock_net(sk); - security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(net, &rt, &fl, sk, 1); - if (err) { + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + rt = NULL; if (err == -ENETUNREACH) IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); goto out; @@ -923,6 +949,17 @@ back_from_confirm: if (!ipc.addr) daddr = ipc.addr = rt->rt_dst; + /* Lockless fast path for the non-corking case. */ + if (!corkreq) { + skb = ip_make_skb(sk, getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), &ipc, &rt, + msg->msg_flags); + err = PTR_ERR(skb); + if (skb && !IS_ERR(skb)) + err = udp_send_skb(skb, daddr, dport); + goto out; + } + lock_sock(sk); if (unlikely(up->pending)) { /* The socket is already corked while preparing it. */ @@ -936,15 +973,15 @@ back_from_confirm: /* * Now cork the socket to pend data. */ - inet->cork.fl.fl4_dst = daddr; - inet->cork.fl.fl_ip_dport = dport; - inet->cork.fl.fl4_src = saddr; - inet->cork.fl.fl_ip_sport = inet->inet_sport; + fl4 = &inet->cork.fl.u.ip4; + fl4->daddr = daddr; + fl4->saddr = saddr; + fl4->fl4_dport = dport; + fl4->fl4_sport = inet->inet_sport; up->pending = AF_INET; do_append_data: up->len += ulen; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), &ipc, &rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); @@ -2199,7 +2236,7 @@ int udp4_ufo_send_check(struct sk_buff *skb) return 0; } -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) +struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b057d40addec..13e0e7f659ff 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -19,25 +19,23 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, - xfrm_address_t *saddr, - xfrm_address_t *daddr) + const xfrm_address_t *saddr, + const xfrm_address_t *daddr) { - struct flowi fl = { - .fl4_dst = daddr->a4, - .fl4_tos = tos, + struct flowi4 fl4 = { + .daddr = daddr->a4, + .flowi4_tos = tos, }; - struct dst_entry *dst; struct rtable *rt; - int err; if (saddr) - fl.fl4_src = saddr->a4; + fl4.saddr = saddr->a4; + + rt = __ip_route_output_key(net, &fl4); + if (!IS_ERR(rt)) + return &rt->dst; - err = __ip_route_output_key(net, &rt, &fl); - dst = &rt->dst; - if (err) - dst = ERR_PTR(err); - return dst; + return ERR_CAST(rt); } static int xfrm4_get_saddr(struct net *net, @@ -56,9 +54,9 @@ static int xfrm4_get_saddr(struct net *net, return 0; } -static int xfrm4_get_tos(struct flowi *fl) +static int xfrm4_get_tos(const struct flowi *fl) { - return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ + return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */ } static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, @@ -68,11 +66,17 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, } static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, - struct flowi *fl) + const struct flowi *fl) { struct rtable *rt = (struct rtable *)xdst->route; + const struct flowi4 *fl4 = &fl->u.ip4; - xdst->u.rt.fl = *fl; + rt->rt_key_dst = fl4->daddr; + rt->rt_key_src = fl4->saddr; + rt->rt_tos = fl4->flowi4_tos; + rt->rt_iif = fl4->flowi4_iif; + rt->rt_oif = fl4->flowi4_oif; + rt->rt_mark = fl4->flowi4_mark; xdst->u.dst.dev = dev; dev_hold(dev); @@ -99,9 +103,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) { struct iphdr *iph = ip_hdr(skb); u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + struct flowi4 *fl4 = &fl->u.ip4; - memset(fl, 0, sizeof(struct flowi)); - fl->mark = skb->mark; + memset(fl4, 0, sizeof(struct flowi4)); + fl4->flowi4_mark = skb->mark; if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { switch (iph->protocol) { @@ -114,8 +119,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ports = (__be16 *)xprth; - fl->fl_ip_sport = ports[!!reverse]; - fl->fl_ip_dport = ports[!reverse]; + fl4->fl4_sport = ports[!!reverse]; + fl4->fl4_dport = ports[!reverse]; } break; @@ -123,8 +128,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 2 - skb->data)) { u8 *icmp = xprth; - fl->fl_icmp_type = icmp[0]; - fl->fl_icmp_code = icmp[1]; + fl4->fl4_icmp_type = icmp[0]; + fl4->fl4_icmp_code = icmp[1]; } break; @@ -132,7 +137,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 4 - skb->data)) { __be32 *ehdr = (__be32 *)xprth; - fl->fl_ipsec_spi = ehdr[0]; + fl4->fl4_ipsec_spi = ehdr[0]; } break; @@ -140,7 +145,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 8 - skb->data)) { __be32 *ah_hdr = (__be32*)xprth; - fl->fl_ipsec_spi = ah_hdr[1]; + fl4->fl4_ipsec_spi = ah_hdr[1]; } break; @@ -148,7 +153,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ipcomp_hdr = (__be16 *)xprth; - fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); + fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } break; @@ -160,20 +165,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (greflags[0] & GRE_KEY) { if (greflags[0] & GRE_CSUM) gre_hdr++; - fl->fl_gre_key = gre_hdr[1]; + fl4->fl4_gre_key = gre_hdr[1]; } } break; default: - fl->fl_ipsec_spi = 0; + fl4->fl4_ipsec_spi = 0; break; } } - fl->proto = iph->protocol; - fl->fl4_dst = reverse ? iph->saddr : iph->daddr; - fl->fl4_src = reverse ? iph->daddr : iph->saddr; - fl->fl4_tos = iph->tos; + fl4->flowi4_proto = iph->protocol; + fl4->daddr = reverse ? iph->saddr : iph->daddr; + fl4->saddr = reverse ? iph->daddr : iph->saddr; + fl4->flowi4_tos = iph->tos; } static inline int xfrm4_garbage_collect(struct dst_ops *ops) @@ -196,8 +201,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + dst_destroy_metrics_generic(dst); + if (likely(xdst->u.rt.peer)) inet_putpeer(xdst->u.rt.peer); + xfrm_dst_destroy(xdst); } @@ -215,6 +223,7 @@ static struct dst_ops xfrm4_dst_ops = { .protocol = cpu_to_be16(ETH_P_IP), .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, + .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, @@ -230,6 +239,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .get_tos = xfrm4_get_tos, .init_path = xfrm4_init_path, .fill_dst = xfrm4_fill_dst, + .blackhole_route = ipv4_blackhole_route, }; #ifdef CONFIG_SYSCTL diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 47947624eccc..1717c64628d1 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -21,24 +21,26 @@ static int xfrm4_init_flags(struct xfrm_state *x) } static void -__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) +__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) { - sel->daddr.a4 = fl->fl4_dst; - sel->saddr.a4 = fl->fl4_src; - sel->dport = xfrm_flowi_dport(fl); + const struct flowi4 *fl4 = &fl->u.ip4; + + sel->daddr.a4 = fl4->daddr; + sel->saddr.a4 = fl4->saddr; + sel->dport = xfrm_flowi_dport(fl, &fl4->uli); sel->dport_mask = htons(0xffff); - sel->sport = xfrm_flowi_sport(fl); + sel->sport = xfrm_flowi_sport(fl, &fl4->uli); sel->sport_mask = htons(0xffff); sel->family = AF_INET; sel->prefixlen_d = 32; sel->prefixlen_s = 32; - sel->proto = fl->proto; - sel->ifindex = fl->oif; + sel->proto = fl4->flowi4_proto; + sel->ifindex = fl4->flowi4_oif; } static void -xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl, + const xfrm_address_t *daddr, const xfrm_address_t *saddr) { x->id = tmpl->id; if (x->id.daddr.a4 == 0) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 24a1cf110d80..3daaf3c7703c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -718,12 +718,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) struct inet6_ifaddr *ifa, *ifn; struct inet6_dev *idev = ifp->idev; int state; - int hash; int deleted = 0, onlink = 0; unsigned long expires = jiffies; - hash = ipv6_addr_hash(&ifp->addr); - spin_lock_bh(&ifp->state_lock); state = ifp->state; ifp->state = INET6_IFADDR_STATE_DEAD; @@ -2661,14 +2658,12 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct net *net = dev_net(dev); struct inet6_dev *idev; struct inet6_ifaddr *ifa; - LIST_HEAD(keep_list); - int state; + int state, i; ASSERT_RTNL(); - /* Flush routes if device is being removed or it is not loopback */ - if (how || !(dev->flags & IFF_LOOPBACK)) - rt6_ifdown(net, dev); + rt6_ifdown(net, dev); + neigh_ifdown(&nd_tbl, dev); idev = __in6_dev_get(dev); if (idev == NULL) @@ -2689,6 +2684,23 @@ static int addrconf_ifdown(struct net_device *dev, int how) } + /* Step 2: clear hash table */ + for (i = 0; i < IN6_ADDR_HSIZE; i++) { + struct hlist_head *h = &inet6_addr_lst[i]; + struct hlist_node *n; + + spin_lock_bh(&addrconf_hash_lock); + restart: + hlist_for_each_entry_rcu(ifa, n, h, addr_lst) { + if (ifa->idev == idev) { + hlist_del_init_rcu(&ifa->addr_lst); + addrconf_del_timer(ifa); + goto restart; + } + } + spin_unlock_bh(&addrconf_hash_lock); + } + write_lock_bh(&idev->lock); /* Step 2: clear flags for stateless addrconf */ @@ -2722,52 +2734,23 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct inet6_ifaddr, if_list); addrconf_del_timer(ifa); - /* If just doing link down, and address is permanent - and not link-local, then retain it. */ - if (!how && - (ifa->flags&IFA_F_PERMANENT) && - !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { - list_move_tail(&ifa->if_list, &keep_list); - - /* If not doing DAD on this address, just keep it. */ - if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) || - idev->cnf.accept_dad <= 0 || - (ifa->flags & IFA_F_NODAD)) - continue; + list_del(&ifa->if_list); - /* If it was tentative already, no need to notify */ - if (ifa->flags & IFA_F_TENTATIVE) - continue; + write_unlock_bh(&idev->lock); - /* Flag it for later restoration when link comes up */ - ifa->flags |= IFA_F_TENTATIVE; - ifa->state = INET6_IFADDR_STATE_DAD; - } else { - list_del(&ifa->if_list); - - /* clear hash table */ - spin_lock_bh(&addrconf_hash_lock); - hlist_del_init_rcu(&ifa->addr_lst); - spin_unlock_bh(&addrconf_hash_lock); - - write_unlock_bh(&idev->lock); - spin_lock_bh(&ifa->state_lock); - state = ifa->state; - ifa->state = INET6_IFADDR_STATE_DEAD; - spin_unlock_bh(&ifa->state_lock); - - if (state != INET6_IFADDR_STATE_DEAD) { - __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, - NETDEV_DOWN, ifa); - } + spin_lock_bh(&ifa->state_lock); + state = ifa->state; + ifa->state = INET6_IFADDR_STATE_DEAD; + spin_unlock_bh(&ifa->state_lock); - in6_ifa_put(ifa); - write_lock_bh(&idev->lock); + if (state != INET6_IFADDR_STATE_DEAD) { + __ipv6_ifa_notify(RTM_DELADDR, ifa); + atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); } - } + in6_ifa_put(ifa); - list_splice(&keep_list, &idev->addr_list); + write_lock_bh(&idev->lock); + } write_unlock_bh(&idev->lock); @@ -4156,8 +4139,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) addrconf_leave_solict(ifp->idev, &ifp->addr); dst_hold(&ifp->rt->dst); - if (ifp->state == INET6_IFADDR_STATE_DEAD && - ip6_del_rt(ifp->rt)) + if (ip6_del_rt(ifp->rt)) dst_free(&ifp->rt->dst); break; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 978e80e2c4a8..4b13d5d8890e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -644,41 +644,34 @@ EXPORT_SYMBOL(inet6_unregister_protosw); int inet6_sk_rebuild_header(struct sock *sk) { - int err; - struct dst_entry *dst; struct ipv6_pinfo *np = inet6_sk(sk); + struct dst_entry *dst; dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct in6_addr *final_p, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = sk->sk_protocol; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; - security_sk_classify_flow(sk, &fl); - - final_p = fl6_update_dst(&fl, np->opt, &final); - - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) { + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = sk->sk_protocol; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowlabel = np->flow_label; + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + final_p = fl6_update_dst(&fl6, np->opt, &final); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + if (IS_ERR(dst)) { sk->sk_route_caps = 0; - return err; - } - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { - sk->sk_err_soft = -err; - return err; + sk->sk_err_soft = -PTR_ERR(dst); + return PTR_ERR(dst); } __ip6_dst_store(sk, dst, NULL, NULL); @@ -772,7 +765,7 @@ out: return err; } -static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct ipv6hdr *ipv6h; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 1aba54ae53c4..2195ae651923 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -409,7 +409,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, 0, skb->len); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 320bdb877eed..16560336eb72 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,7 +40,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr, *final_p, final; struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; struct ip6_flowlabel *flowlabel = NULL; struct ipv6_txoptions *opt; int addr_type; @@ -59,11 +59,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); @@ -137,7 +137,7 @@ ipv4_connected: } ipv6_addr_copy(&np->daddr, daddr); - np->flow_label = fl.fl6_flowlabel; + np->flow_label = fl6.flowlabel; inet->inet_dport = usin->sin6_port; @@ -146,53 +146,46 @@ ipv4_connected: * destination cache for it. */ - fl.proto = sk->sk_protocol; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; + fl6.flowi6_proto = sk->sk_protocol; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; - if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) - fl.oif = np->mcast_oif; + if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) + fl6.flowi6_oif = np->mcast_oif; - security_sk_classify_flow(sk, &fl); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); opt = flowlabel ? flowlabel->opt : np->opt; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + err = 0; + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto out; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto out; } /* source address lookup done in ip6_dst_lookup */ if (ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&np->saddr, &fl.fl6_src); + ipv6_addr_copy(&np->saddr, &fl6.saddr); if (ipv6_addr_any(&np->rcv_saddr)) { - ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); + ipv6_addr_copy(&np->rcv_saddr, &fl6.saddr); inet->inet_rcv_saddr = LOOPBACK4_IPV6; if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? + ipv6_addr_equal(&fl6.daddr, &np->daddr) ? &np->daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : #endif NULL); @@ -238,7 +231,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, kfree_skb(skb); } -void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) +void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -257,7 +250,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) skb_put(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); iph = ipv6_hdr(skb); - ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); + ipv6_addr_copy(&iph->daddr, &fl6->daddr); serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; @@ -268,7 +261,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) serr->ee.ee_info = info; serr->ee.ee_data = 0; serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); - serr->port = fl->fl_ip_dport; + serr->port = fl6->fl6_dport; __skb_pull(skb, skb_tail_pointer(skb) - skb->data); skb_reset_transport_header(skb); @@ -277,7 +270,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) kfree_skb(skb); } -void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) +void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *iph; @@ -294,7 +287,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) skb_put(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); iph = ipv6_hdr(skb); - ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); + ipv6_addr_copy(&iph->daddr, &fl6->daddr); mtu_info = IP6CBMTU(skb); if (!mtu_info) { @@ -306,7 +299,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) mtu_info->ip6m_addr.sin6_family = AF_INET6; mtu_info->ip6m_addr.sin6_port = 0; mtu_info->ip6m_addr.sin6_flowinfo = 0; - mtu_info->ip6m_addr.sin6_scope_id = fl->oif; + mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif; ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr); __skb_pull(skb, skb_tail_pointer(skb) - skb->data); @@ -600,7 +593,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) } int datagram_send_ctl(struct net *net, - struct msghdr *msg, struct flowi *fl, + struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, int *hlimit, int *tclass, int *dontfrag) { @@ -636,16 +629,17 @@ int datagram_send_ctl(struct net *net, src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); if (src_info->ipi6_ifindex) { - if (fl->oif && src_info->ipi6_ifindex != fl->oif) + if (fl6->flowi6_oif && + src_info->ipi6_ifindex != fl6->flowi6_oif) return -EINVAL; - fl->oif = src_info->ipi6_ifindex; + fl6->flowi6_oif = src_info->ipi6_ifindex; } addr_type = __ipv6_addr_type(&src_info->ipi6_addr); rcu_read_lock(); - if (fl->oif) { - dev = dev_get_by_index_rcu(net, fl->oif); + if (fl6->flowi6_oif) { + dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); if (!dev) { rcu_read_unlock(); return -ENODEV; @@ -661,7 +655,7 @@ int datagram_send_ctl(struct net *net, strict ? dev : NULL, 0)) err = -EINVAL; else - ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); + ipv6_addr_copy(&fl6->saddr, &src_info->ipi6_addr); } rcu_read_unlock(); @@ -678,13 +672,13 @@ int datagram_send_ctl(struct net *net, goto exit_f; } - if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) { - if ((fl->fl6_flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { + if (fl6->flowlabel&IPV6_FLOWINFO_MASK) { + if ((fl6->flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { err = -EINVAL; goto exit_f; } } - fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg); + fl6->flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg); break; case IPV6_2292HOPOPTS: diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 1b5c9825743b..5aa8ec88f194 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -54,16 +54,20 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu); /* * Allocate an AEAD request structure with extra space for SG and IV. * - * For alignment considerations the IV is placed at the front, followed - * by the request and finally the SG list. + * For alignment considerations the upper 32 bits of the sequence number are + * placed at the front, if present. Followed by the IV, the request and finally + * the SG list. * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen) { unsigned int len; - len = crypto_aead_ivsize(aead); + len = seqihlen; + + len += crypto_aead_ivsize(aead); + if (len) { len += crypto_aead_alignmask(aead) & ~(crypto_tfm_ctx_alignment() - 1); @@ -78,10 +82,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) return kmalloc(len, GFP_ATOMIC); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) +static inline __be32 *esp_tmp_seqhi(void *tmp) +{ + return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); +} + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; + PTR_ALIGN((u8 *)tmp + seqhilen, + crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; } static inline struct aead_givcrypt_request *esp_tmp_givreq( @@ -145,8 +155,12 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int plen; int tfclen; int nfrags; + int assoclen; + int sglists; + int seqhilen; u8 *iv; u8 *tail; + __be32 *seqhi; struct esp_data *esp = x->data; /* skb is pure payload to encrypt */ @@ -175,14 +189,25 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) goto error; nfrags = err; - tmp = esp_alloc_tmp(aead, nfrags + 1); + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto error; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_givreq(aead, iv); asg = esp_givreq_sg(aead, req); - sg = asg + 1; + sg = asg + sglists; /* Fill padding... */ tail = skb_tail_pointer(trailer); @@ -204,19 +229,27 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) *skb_mac_header(skb) = IPPROTO_ESP; esph->spi = x->id.spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, esph->enc_data + crypto_aead_ivsize(aead) - skb->data, clen + alen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_givcrypt_set_callback(req, 0, esp_output_done, skb); aead_givcrypt_set_crypt(req, sg, sg, clen, iv); - aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); + aead_givcrypt_set_assoc(req, asg, assoclen); aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output); + XFRM_SKB_CB(skb)->seq.output.low); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); @@ -292,8 +325,12 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) struct sk_buff *trailer; int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); int nfrags; + int assoclen; + int sglists; + int seqhilen; int ret = 0; void *tmp; + __be32 *seqhi; u8 *iv; struct scatterlist *sg; struct scatterlist *asg; @@ -314,12 +351,24 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) } ret = -ENOMEM; - tmp = esp_alloc_tmp(aead, nfrags + 1); + + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto out; ESP_SKB_CB(skb)->tmp = tmp; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); asg = esp_req_sg(aead, req); sg = asg + 1; @@ -333,11 +382,19 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_request_set_callback(req, 0, esp_input_done, skb); aead_request_set_crypt(req, sg, sg, elen, iv); - aead_request_set_assoc(req, asg, sizeof(*esph)); + aead_request_set_assoc(req, asg, assoclen); ret = crypto_aead_decrypt(req); if (ret == -EINPROGRESS) @@ -443,10 +500,20 @@ static int esp_init_authenc(struct xfrm_state *x) goto error; err = -ENAMETOOLONG; - if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", - x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) - goto error; + + if ((x->props.flags & XFRM_STATE_ESN)) { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authencesn(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } else { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authenc(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 262f105d23b9..79a485e8a700 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -876,22 +876,22 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, * fl6_update_dst - update flowi destination address with info given * by srcrt option, if any. * - * @fl: flowi for which fl6_dst is to be updated + * @fl6: flowi6 for which daddr is to be updated * @opt: struct ipv6_txoptions in which to look for srcrt opt - * @orig: copy of original fl6_dst address if modified + * @orig: copy of original daddr address if modified * * Returns NULL if no txoptions or no srcrt, otherwise returns orig - * and initial value of fl->fl6_dst set in orig + * and initial value of fl6->daddr set in orig */ -struct in6_addr *fl6_update_dst(struct flowi *fl, +struct in6_addr *fl6_update_dst(struct flowi6 *fl6, const struct ipv6_txoptions *opt, struct in6_addr *orig) { if (!opt || !opt->srcrt) return NULL; - ipv6_addr_copy(orig, &fl->fl6_dst); - ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr); + ipv6_addr_copy(orig, &fl6->daddr); + ipv6_addr_copy(&fl6->daddr, ((struct rt0_hdr *)opt->srcrt)->addr); return orig; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index d829874d8946..34d244df907d 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -29,7 +29,7 @@ struct fib6_rule u8 tclass; }; -struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { struct fib_lookup_arg arg = { @@ -37,7 +37,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, .flags = FIB_LOOKUP_NOREF, }; - fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg); + fib_rules_lookup(net->ipv6.fib6_rules_ops, + flowi6_to_flowi(fl6), flags, &arg); if (arg.result) return arg.result; @@ -49,6 +50,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { + struct flowi6 *flp6 = &flp->u.ip6; struct rt6_info *rt = NULL; struct fib6_table *table; struct net *net = rule->fr_net; @@ -71,7 +73,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, table = fib6_get_table(net, rule->table); if (table) - rt = lookup(net, table, flp, flags); + rt = lookup(net, table, flp6, flags); if (rt != net->ipv6.ip6_null_entry) { struct fib6_rule *r = (struct fib6_rule *)rule; @@ -86,14 +88,14 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if (ipv6_dev_get_saddr(net, ip6_dst_idev(&rt->dst)->dev, - &flp->fl6_dst, + &flp6->daddr, rt6_flags2srcprefs(flags), &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen)) goto again; - ipv6_addr_copy(&flp->fl6_src, &saddr); + ipv6_addr_copy(&flp6->saddr, &saddr); } goto out; } @@ -113,9 +115,10 @@ out: static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct fib6_rule *r = (struct fib6_rule *) rule; + struct flowi6 *fl6 = &fl->u.ip6; if (r->dst.plen && - !ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) + !ipv6_prefix_equal(&fl6->daddr, &r->dst.addr, r->dst.plen)) return 0; /* @@ -125,14 +128,14 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) */ if (r->src.plen) { if (flags & RT6_LOOKUP_F_HAS_SADDR) { - if (!ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, + if (!ipv6_prefix_equal(&fl6->saddr, &r->src.addr, r->src.plen)) return 0; } else if (!(r->common.flags & FIB_RULE_FIND_SADDR)) return 0; } - if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) + if (r->tclass && r->tclass != ((ntohl(fl6->flowlabel) >> 20) & 0xff)) return 0; return 1; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 03e62f94ff8e..83cb4f9add81 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -157,32 +157,32 @@ static int is_ineligible(struct sk_buff *skb) /* * Check the ICMP output rate limit */ -static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, - struct flowi *fl) +static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, + struct flowi6 *fl6) { struct dst_entry *dst; struct net *net = sock_net(sk); - int res = 0; + bool res = false; /* Informational messages are not limited. */ if (type & ICMPV6_INFOMSG_MASK) - return 1; + return true; /* Do not limit pmtu discovery, it would break it. */ if (type == ICMPV6_PKT_TOOBIG) - return 1; + return true; /* * Look up the output route. * XXX: perhaps the expire for routing entries cloned by * this lookup should be more aggressive (not longer than timeout). */ - dst = ip6_route_output(net, sk, fl); + dst = ip6_route_output(net, sk, fl6); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { - res = 1; + res = true; } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = net->ipv6.sysctl.icmpv6_time; @@ -191,7 +191,9 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - res = xrlim_allow(dst, tmo); + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo); } dst_release(dst); return res; @@ -215,7 +217,7 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len) +static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; @@ -231,9 +233,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct if (skb_queue_len(&sk->sk_write_queue) == 1) { skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); - icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - len, fl->proto, + icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, + &fl6->daddr, + len, fl6->flowi6_proto, skb->csum); } else { __wsum tmp_csum = 0; @@ -244,9 +246,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); - icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - len, fl->proto, + icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, + &fl6->daddr, + len, fl6->flowi6_proto, tmp_csum); } ip6_push_pending_frames(sk); @@ -298,6 +300,68 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif +static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, + struct sock *sk, struct flowi6 *fl6) +{ + struct dst_entry *dst, *dst2; + struct flowi6 fl2; + int err; + + err = ip6_dst_lookup(sk, &dst, fl6); + if (err) + return ERR_PTR(err); + + /* + * We won't send icmp if the destination is known + * anycast. + */ + if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { + LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n"); + dst_release(dst); + return ERR_PTR(-EINVAL); + } + + /* No need to clone since we're just using its address. */ + dst2 = dst; + + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0); + if (!IS_ERR(dst)) { + if (dst != dst2) + return dst; + } else { + if (PTR_ERR(dst) == -EPERM) + dst = NULL; + else + return dst; + } + + err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6); + if (err) + goto relookup_failed; + + err = ip6_dst_lookup(sk, &dst2, &fl2); + if (err) + goto relookup_failed; + + dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP); + if (!IS_ERR(dst2)) { + dst_release(dst); + dst = dst2; + } else { + err = PTR_ERR(dst2); + if (err == -EPERM) { + dst_release(dst); + return dst2; + } else + goto relookup_failed; + } + +relookup_failed: + if (dst) + return dst; + return ERR_PTR(err); +} + /* * Send an ICMP message in response to a packet in error */ @@ -310,10 +374,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) struct ipv6_pinfo *np; struct in6_addr *saddr = NULL; struct dst_entry *dst; - struct dst_entry *dst2; struct icmp6hdr tmp_hdr; - struct flowi fl; - struct flowi fl2; + struct flowi6 fl6; struct icmpv6_msg msg; int iif = 0; int addr_type = 0; @@ -380,22 +442,22 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) mip6_addr_swap(skb); - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_ICMPV6; - ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_ICMPV6; + ipv6_addr_copy(&fl6.daddr, &hdr->saddr); if (saddr) - ipv6_addr_copy(&fl.fl6_src, saddr); - fl.oif = iif; - fl.fl_icmp_type = type; - fl.fl_icmp_code = code; - security_skb_classify_flow(skb, &fl); + ipv6_addr_copy(&fl6.saddr, saddr); + fl6.flowi6_oif = iif; + fl6.fl6_icmp_type = type; + fl6.fl6_icmp_code = code; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; np = inet6_sk(sk); - if (!icmpv6_xrlim_allow(sk, type, &fl)) + if (!icmpv6_xrlim_allow(sk, type, &fl6)) goto out; tmp_hdr.icmp6_type = type; @@ -403,61 +465,14 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) tmp_hdr.icmp6_cksum = 0; tmp_hdr.icmp6_pointer = htonl(info); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) + dst = icmpv6_route_lookup(net, skb, sk, &fl6); + if (IS_ERR(dst)) goto out; - /* - * We won't send icmp if the destination is known - * anycast. - */ - if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { - LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n"); - goto out_dst_release; - } - - /* No need to clone since we're just using its address. */ - dst2 = dst; - - err = xfrm_lookup(net, &dst, &fl, sk, 0); - switch (err) { - case 0: - if (dst != dst2) - goto route_done; - break; - case -EPERM: - dst = NULL; - break; - default: - goto out; - } - - if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6)) - goto relookup_failed; - - if (ip6_dst_lookup(sk, &dst2, &fl2)) - goto relookup_failed; - - err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP); - switch (err) { - case 0: - dst_release(dst); - dst = dst2; - break; - case -EPERM: - goto out_dst_release; - default: -relookup_failed: - if (!dst) - goto out; - break; - } - -route_done: - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -480,14 +495,14 @@ route_done: err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, - np->tclass, NULL, &fl, (struct rt6_info*)dst, + np->tclass, NULL, &fl6, (struct rt6_info*)dst, MSG_DONTWAIT, np->dontfrag); if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); goto out_put; } - err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr)); + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); out_put: if (likely(idev != NULL)) @@ -509,7 +524,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct in6_addr *saddr = NULL; struct icmp6hdr *icmph = icmp6_hdr(skb); struct icmp6hdr tmp_hdr; - struct flowi fl; + struct flowi6 fl6; struct icmpv6_msg msg; struct dst_entry *dst; int err = 0; @@ -523,30 +538,31 @@ static void icmpv6_echo_reply(struct sk_buff *skb) memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_ICMPV6; - ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_ICMPV6; + ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr); if (saddr) - ipv6_addr_copy(&fl.fl6_src, saddr); - fl.oif = skb->dev->ifindex; - fl.fl_icmp_type = ICMPV6_ECHO_REPLY; - security_skb_classify_flow(skb, &fl); + ipv6_addr_copy(&fl6.saddr, saddr); + fl6.flowi6_oif = skb->dev->ifindex; + fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; np = inet6_sk(sk); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; - err = ip6_dst_lookup(sk, &dst, &fl); + err = ip6_dst_lookup(sk, &dst, &fl6); if (err) goto out; - if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); + if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -560,7 +576,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) msg.type = ICMPV6_ECHO_REPLY; err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl, + sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6, (struct rt6_info*)dst, MSG_DONTWAIT, np->dontfrag); @@ -569,7 +585,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ip6_flush_pending_frames(sk); goto out_put; } - err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); out_put: if (likely(idev != NULL)) @@ -768,20 +784,20 @@ drop_no_count: return 0; } -void icmpv6_flow_init(struct sock *sk, struct flowi *fl, +void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, u8 type, const struct in6_addr *saddr, const struct in6_addr *daddr, int oif) { - memset(fl, 0, sizeof(*fl)); - ipv6_addr_copy(&fl->fl6_src, saddr); - ipv6_addr_copy(&fl->fl6_dst, daddr); - fl->proto = IPPROTO_ICMPV6; - fl->fl_icmp_type = type; - fl->fl_icmp_code = 0; - fl->oif = oif; - security_sk_classify_flow(sk, fl); + memset(fl6, 0, sizeof(*fl6)); + ipv6_addr_copy(&fl6->saddr, saddr); + ipv6_addr_copy(&fl6->daddr, daddr); + fl6->flowi6_proto = IPPROTO_ICMPV6; + fl6->fl6_icmp_type = type; + fl6->fl6_icmp_code = 0; + fl6->flowi6_oif = oif; + security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); } /* diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index d144e629d2b4..166054650466 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -61,26 +61,21 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - final_p = fl6_update_dst(&fl, np->opt, &final); - ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); - - if (ip6_dst_lookup(sk, &dst, &fl)) - return NULL; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); + final_p = fl6_update_dst(&fl6, np->opt, &final); + ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + if (IS_ERR(dst)) return NULL; return dst; @@ -213,42 +208,34 @@ int inet6_csk_xmit(struct sk_buff *skb) struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; struct in6_addr *final_p, final; - memset(&fl, 0, sizeof(fl)); - fl.proto = sk->sk_protocol; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_sport = inet->inet_sport; - fl.fl_ip_dport = inet->inet_dport; - security_sk_classify_flow(sk, &fl); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = sk->sk_protocol; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl6.flowlabel); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_sport = inet->inet_sport; + fl6.fl6_dport = inet->inet_dport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl, np->opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (dst == NULL) { - int err = ip6_dst_lookup(sk, &dst, &fl); - - if (err) { - sk->sk_err_soft = -err; - kfree_skb(skb); - return err; - } - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); - if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { + if (IS_ERR(dst)) { + sk->sk_err_soft = -PTR_ERR(dst); sk->sk_route_caps = 0; kfree_skb(skb); - return err; + return PTR_ERR(dst); } __inet6_csk_dst_store(sk, dst, NULL, NULL); @@ -257,9 +244,9 @@ int inet6_csk_xmit(struct sk_buff *skb) skb_dst_set(skb, dst_clone(dst)); /* Restore final destination back after routing done */ - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl6.daddr, &np->daddr); - return ip6_xmit(sk, skb, &fl, np->opt); + return ip6_xmit(sk, skb, &fl6, np->opt); } EXPORT_SYMBOL_GPL(inet6_csk_xmit); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index de382114609b..7548905e79e1 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -260,10 +260,10 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) return net->ipv6.fib6_main_tbl; } -struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { - return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags); + return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); } static void __net_init fib6_tables_init(struct net *net) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 13654686aeab..f3caf1b8d572 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -342,7 +342,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, if (olen > 0) { struct msghdr msg; - struct flowi flowi; + struct flowi6 flowi6; int junk; err = -ENOMEM; @@ -358,9 +358,9 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_controllen = olen; msg.msg_control = (void*)(fl->opt+1); - flowi.oif = 0; + memset(&flowi6, 0, sizeof(flowi6)); - err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, + err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk, &junk, &junk); if (err) goto done; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5f8d242be3f3..18208876aa8a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -174,15 +174,15 @@ int ip6_output(struct sk_buff *skb) * xmit an sk_buff (used by TCP, SCTP and DCCP) */ -int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, +int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, struct ipv6_txoptions *opt) { struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *first_hop = &fl->fl6_dst; + struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; - u8 proto = fl->proto; + u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; int tclass = 0; @@ -230,13 +230,13 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; + *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; - ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); + ipv6_addr_copy(&hdr->saddr, &fl6->saddr); ipv6_addr_copy(&hdr->daddr, first_hop); skb->priority = sk->sk_priority; @@ -274,13 +274,10 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *hdr; - int totlen; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - totlen = len + sizeof(struct ipv6hdr); - skb_reset_network_header(skb); skb_put(skb, sizeof(struct ipv6hdr)); hdr = ipv6_hdr(skb); @@ -479,10 +476,13 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ - if (xrlim_allow(dst, 1*HZ)) + if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) ndisc_send_redirect(skb, n, target); } else { int addrtype = ipv6_addr_type(&hdr->saddr); @@ -879,7 +879,7 @@ static inline int ip6_rt_check(struct rt6key *rt_key, static struct dst_entry *ip6_sk_dst_check(struct sock *sk, struct dst_entry *dst, - struct flowi *fl) + struct flowi6 *fl6) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt = (struct rt6_info *)dst; @@ -904,11 +904,11 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, * sockets. * 2. oif also should be the same. */ - if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || + if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || #ifdef CONFIG_IPV6_SUBTREES - ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || + ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl->oif && fl->oif != dst->dev->ifindex)) { + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } @@ -918,22 +918,22 @@ out: } static int ip6_dst_lookup_tail(struct sock *sk, - struct dst_entry **dst, struct flowi *fl) + struct dst_entry **dst, struct flowi6 *fl6) { int err; struct net *net = sock_net(sk); if (*dst == NULL) - *dst = ip6_route_output(net, sk, fl); + *dst = ip6_route_output(net, sk, fl6); if ((err = (*dst)->error)) goto out_err_release; - if (ipv6_addr_any(&fl->fl6_src)) { + if (ipv6_addr_any(&fl6->saddr)) { err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, - &fl->fl6_dst, + &fl6->daddr, sk ? inet6_sk(sk)->srcprefs : 0, - &fl->fl6_src); + &fl6->saddr); if (err) goto out_err_release; } @@ -949,10 +949,10 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; - struct flowi fl_gw; + struct flowi6 fl_gw6; int redirect; - ifp = ipv6_get_ifaddr(net, &fl->fl6_src, + ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); @@ -965,9 +965,9 @@ static int ip6_dst_lookup_tail(struct sock *sk, * default router instead */ dst_release(*dst); - memcpy(&fl_gw, fl, sizeof(struct flowi)); - memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); - *dst = ip6_route_output(net, sk, &fl_gw); + memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); + memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); + *dst = ip6_route_output(net, sk, &fl_gw6); if ((err = (*dst)->error)) goto out_err_release; } @@ -988,43 +988,85 @@ out_err_release: * ip6_dst_lookup - perform route lookup on flow * @sk: socket which provides route info * @dst: pointer to dst_entry * for result - * @fl: flow to lookup + * @fl6: flow to lookup * * This function performs a route lookup on the given flow. * * It returns zero on success, or a standard errno code on error. */ -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { *dst = NULL; - return ip6_dst_lookup_tail(sk, dst, fl); + return ip6_dst_lookup_tail(sk, dst, fl6); } EXPORT_SYMBOL_GPL(ip6_dst_lookup); /** - * ip6_sk_dst_lookup - perform socket cached route lookup on flow + * ip6_dst_lookup_flow - perform route lookup on flow with ipsec + * @sk: socket which provides route info + * @fl6: flow to lookup + * @final_dst: final destination address for ipsec lookup + * @can_sleep: we are in a sleepable context + * + * This function performs a route lookup on the given flow. + * + * It returns a valid dst pointer on success, or a pointer encoded + * error code. + */ +struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, + const struct in6_addr *final_dst, + bool can_sleep) +{ + struct dst_entry *dst = NULL; + int err; + + err = ip6_dst_lookup_tail(sk, &dst, fl6); + if (err) + return ERR_PTR(err); + if (final_dst) + ipv6_addr_copy(&fl6->daddr, final_dst); + if (can_sleep) + fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; + + return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); +} +EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); + +/** + * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow * @sk: socket which provides the dst cache and route info - * @dst: pointer to dst_entry * for result - * @fl: flow to lookup + * @fl6: flow to lookup + * @final_dst: final destination address for ipsec lookup + * @can_sleep: we are in a sleepable context * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. * It will take the socket dst lock when operating on the dst cache. * As a result, this function can only be used in process context. * - * It returns zero on success, or a standard errno code on error. + * It returns a valid dst pointer on success, or a pointer encoded + * error code. */ -int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, + const struct in6_addr *final_dst, + bool can_sleep) { - *dst = NULL; - if (sk) { - *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); - *dst = ip6_sk_dst_check(sk, *dst, fl); - } + struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); + int err; + + dst = ip6_sk_dst_check(sk, dst, fl6); + + err = ip6_dst_lookup_tail(sk, &dst, fl6); + if (err) + return ERR_PTR(err); + if (final_dst) + ipv6_addr_copy(&fl6->daddr, final_dst); + if (can_sleep) + fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; - return ip6_dst_lookup_tail(sk, dst, fl); + return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } -EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); +EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, @@ -1061,7 +1103,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - sk->sk_sndmsg_off = 0; } err = skb_append_datato_frags(sk,skb, getfrag, from, @@ -1104,7 +1145,7 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, + int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, int dontfrag) { struct inet_sock *inet = inet_sk(sk); @@ -1118,6 +1159,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int err; int offset = 0; int csummode = CHECKSUM_NONE; + __u8 tx_flags = 0; if (flags&MSG_PROBE) return 0; @@ -1161,7 +1203,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } dst_hold(&rt->dst); inet->cork.dst = &rt->dst; - inet->cork.fl = *fl; + inet->cork.fl.u.ip6 = *fl6; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? @@ -1182,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, transhdrlen += exthdrlen; } else { rt = (struct rt6_info *)inet->cork.dst; - fl = &inet->cork.fl; + fl6 = &inet->cork.fl.u.ip6; opt = np->cork.opt; transhdrlen = 0; exthdrlen = 0; @@ -1197,11 +1239,18 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { - ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); + ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); return -EMSGSIZE; } } + /* For UDP, check if TX timestamp is enabled */ + if (sk->sk_type == SOCK_DGRAM) { + err = sock_tx_timestamp(sk, &tx_flags); + if (err) + goto error; + } + /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. @@ -1222,7 +1271,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (length > mtu) { int proto = sk->sk_protocol; if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ - ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); + ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); return -EMSGSIZE; } @@ -1306,6 +1355,12 @@ alloc_new_skb: sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; + else { + /* Only the initial fragment + * is time stamped. + */ + tx_flags = 0; + } } if (skb == NULL) goto error; @@ -1317,6 +1372,9 @@ alloc_new_skb: /* reserve for fragmentation */ skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); + if (sk->sk_type == SOCK_DGRAM) + skb_shinfo(skb)->tx_flags = tx_flags; + /* * Find where to start putting bytes */ @@ -1458,8 +1516,8 @@ int ip6_push_pending_frames(struct sock *sk) struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; - struct flowi *fl = &inet->cork.fl; - unsigned char proto = fl->proto; + struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + unsigned char proto = fl6->flowi6_proto; int err = 0; if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) @@ -1484,7 +1542,7 @@ int ip6_push_pending_frames(struct sock *sk) if (np->pmtudisc < IPV6_PMTUDISC_DO) skb->local_df = 1; - ipv6_addr_copy(final_dst, &fl->fl6_dst); + ipv6_addr_copy(final_dst, &fl6->daddr); __skb_pull(skb, skb_network_header_len(skb)); if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); @@ -1495,12 +1553,12 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - *(__be32*)hdr = fl->fl6_flowlabel | + *(__be32*)hdr = fl6->flowlabel | htonl(0x60000000 | ((int)np->cork.tclass << 20)); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; - ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); + ipv6_addr_copy(&hdr->saddr, &fl6->saddr); ipv6_addr_copy(&hdr->daddr, final_dst); skb->priority = sk->sk_priority; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4f4483e697bd..c1b1bd312df2 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -57,6 +57,7 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETDEV("ip6tnl0"); #ifdef IP6_TNL_DEBUG #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) @@ -535,7 +536,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int err; struct sk_buff *skb2; struct iphdr *eiph; - struct flowi fl; struct rtable *rt; err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, @@ -577,11 +577,11 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, eiph = ip_hdr(skb2); /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_dst = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_IPIP; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) + rt = ip_route_output_ports(dev_net(skb->dev), NULL, + eiph->saddr, 0, + 0, 0, + IPPROTO_IPIP, RT_TOS(eiph->tos), 0); + if (IS_ERR(rt)) goto out; skb2->dev = rt->dst.dev; @@ -590,15 +590,18 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); rt = NULL; - fl.fl4_dst = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || + rt = ip_route_output_ports(dev_net(skb->dev), NULL, + eiph->daddr, eiph->saddr, + 0, 0, + IPPROTO_IPIP, + RT_TOS(eiph->tos), 0); + if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) { - ip_rt_put(rt); + if (!IS_ERR(rt)) + ip_rt_put(rt); goto out; } - skb_dst_set(skb2, (struct dst_entry *)rt); + skb_dst_set(skb2, &rt->dst); } else { ip_rt_put(rt); if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, @@ -881,7 +884,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) static int ip6_tnl_xmit2(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, - struct flowi *fl, + struct flowi6 *fl6, int encap_limit, __u32 *pmtu) { @@ -901,10 +904,16 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { - dst = ip6_route_output(net, NULL, fl); + dst = ip6_route_output(net, NULL, fl6); - if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) + if (dst->error) goto tx_err_link_failure; + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; + goto tx_err_link_failure; + } } tdev = dst->dev; @@ -954,7 +963,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb->transport_header = skb->network_header; - proto = fl->proto; + proto = fl6->flowi6_proto; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); @@ -962,13 +971,13 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); + *(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000); dsfield = INET_ECN_encapsulate(0, dsfield); ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; - ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); - ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); + ipv6_addr_copy(&ipv6h->saddr, &fl6->saddr); + ipv6_addr_copy(&ipv6h->daddr, &fl6->daddr); nf_reset(skb); pkt_len = skb->len; err = ip6_local_out(skb); @@ -998,7 +1007,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct iphdr *iph = ip_hdr(skb); int encap_limit = -1; - struct flowi fl; + struct flowi6 fl6; __u8 dsfield; __u32 mtu; int err; @@ -1010,16 +1019,16 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl, &t->fl, sizeof (fl)); - fl.proto = IPPROTO_IPIP; + memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) - fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) + fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) & IPV6_TCLASS_MASK; - err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); + err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) @@ -1038,7 +1047,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct ipv6hdr *ipv6h = ipv6_hdr(skb); int encap_limit = -1; __u16 offset; - struct flowi fl; + struct flowi6 fl6; __u8 dsfield; __u32 mtu; int err; @@ -1060,16 +1069,16 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl, &t->fl, sizeof (fl)); - fl.proto = IPPROTO_IPV6; + memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) - fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) - fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); - err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); + err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { if (err == -EMSGSIZE) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -1132,21 +1141,21 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; struct ip6_tnl_parm *p = &t->parms; - struct flowi *fl = &t->fl; + struct flowi6 *fl6 = &t->fl.u.ip6; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); /* Set up flowi template */ - ipv6_addr_copy(&fl->fl6_src, &p->laddr); - ipv6_addr_copy(&fl->fl6_dst, &p->raddr); - fl->oif = p->link; - fl->fl6_flowlabel = 0; + ipv6_addr_copy(&fl6->saddr, &p->laddr); + ipv6_addr_copy(&fl6->daddr, &p->raddr); + fl6->flowi6_oif = p->link; + fl6->flowlabel = 0; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) - fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; + fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) - fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; + fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; ip6_tnl_set_cap(t); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9fab274019c0..7ff0343e05c7 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -34,6 +34,7 @@ #include <linux/seq_file.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/compat.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/sock.h> @@ -134,14 +135,15 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) return NULL; } -static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, +static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr6_table **mrt) { struct ip6mr_result res; struct fib_lookup_arg arg = { .result = &res, }; int err; - err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(net->ipv6.mr6_rules_ops, + flowi6_to_flowi(flp6), 0, &arg); if (err < 0) return err; *mrt = res.mrt; @@ -269,7 +271,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) return net->ipv6.mrt6; } -static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, +static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr6_table **mrt) { *mrt = net->ipv6.mrt6; @@ -616,9 +618,9 @@ static int pim6_rcv(struct sk_buff *skb) struct net_device *reg_dev = NULL; struct net *net = dev_net(skb->dev); struct mr6_table *mrt; - struct flowi fl = { - .iif = skb->dev->ifindex, - .mark = skb->mark, + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .flowi6_mark = skb->mark, }; int reg_vif_num; @@ -643,7 +645,7 @@ static int pim6_rcv(struct sk_buff *skb) ntohs(encap->payload_len) + sizeof(*pim) > skb->len) goto drop; - if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) + if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) goto drop; reg_vif_num = mrt->mroute_reg_vif_num; @@ -686,14 +688,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, { struct net *net = dev_net(dev); struct mr6_table *mrt; - struct flowi fl = { - .oif = dev->ifindex, - .iif = skb->skb_iif, - .mark = skb->mark, + struct flowi6 fl6 = { + .flowi6_oif = dev->ifindex, + .flowi6_iif = skb->skb_iif, + .flowi6_mark = skb->mark, }; int err; - err = ip6mr_fib_lookup(net, &fl, &mrt); + err = ip6mr_fib_lookup(net, &fl6, &mrt); if (err < 0) return err; @@ -1038,7 +1040,6 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ipv6_hdr(skb)->version == 0) { - int err; struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { @@ -1049,7 +1050,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; } - err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).pid); } else ip6_mr_forward(net, mrt, skb, c); } @@ -1547,13 +1548,13 @@ int ip6mr_sk_done(struct sock *sk) struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) { struct mr6_table *mrt; - struct flowi fl = { - .iif = skb->skb_iif, - .oif = skb->dev->ifindex, - .mark = skb->mark, + struct flowi6 fl6 = { + .flowi6_iif = skb->skb_iif, + .flowi6_oif = skb->dev->ifindex, + .flowi6_mark = skb->mark, }; - if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) + if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) return NULL; return mrt->mroute6_sk; @@ -1804,6 +1805,80 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) } } +#ifdef CONFIG_COMPAT +struct compat_sioc_sg_req6 { + struct sockaddr_in6 src; + struct sockaddr_in6 grp; + compat_ulong_t pktcnt; + compat_ulong_t bytecnt; + compat_ulong_t wrong_if; +}; + +struct compat_sioc_mif_req6 { + mifi_t mifi; + compat_ulong_t icount; + compat_ulong_t ocount; + compat_ulong_t ibytes; + compat_ulong_t obytes; +}; + +int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) +{ + struct compat_sioc_sg_req6 sr; + struct compat_sioc_mif_req6 vr; + struct mif_device *vif; + struct mfc6_cache *c; + struct net *net = sock_net(sk); + struct mr6_table *mrt; + + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); + if (mrt == NULL) + return -ENOENT; + + switch (cmd) { + case SIOCGETMIFCNT_IN6: + if (copy_from_user(&vr, arg, sizeof(vr))) + return -EFAULT; + if (vr.mifi >= mrt->maxvif) + return -EINVAL; + read_lock(&mrt_lock); + vif = &mrt->vif6_table[vr.mifi]; + if (MIF_EXISTS(mrt, vr.mifi)) { + vr.icount = vif->pkt_in; + vr.ocount = vif->pkt_out; + vr.ibytes = vif->bytes_in; + vr.obytes = vif->bytes_out; + read_unlock(&mrt_lock); + + if (copy_to_user(arg, &vr, sizeof(vr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + case SIOCGETSGCNT_IN6: + if (copy_from_user(&sr, arg, sizeof(sr))) + return -EFAULT; + + read_lock(&mrt_lock); + c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); + if (c) { + sr.pktcnt = c->mfc_un.res.pkt; + sr.bytecnt = c->mfc_un.res.bytes; + sr.wrong_if = c->mfc_un.res.wrong_if; + read_unlock(&mrt_lock); + + if (copy_to_user(arg, &sr, sizeof(sr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + default: + return -ENOIOCTLCMD; + } +} +#endif static inline int ip6mr_forward2_finish(struct sk_buff *skb) { @@ -1823,7 +1898,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, struct mif_device *vif = &mrt->vif6_table[vifi]; struct net_device *dev; struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; if (vif->dev == NULL) goto out_free; @@ -1841,12 +1916,12 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, ipv6h = ipv6_hdr(skb); - fl = (struct flowi) { - .oif = vif->link, - .fl6_dst = ipv6h->daddr, + fl6 = (struct flowi6) { + .flowi6_oif = vif->link, + .daddr = ipv6h->daddr, }; - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (!dst) goto out_free; @@ -1969,13 +2044,13 @@ int ip6_mr_input(struct sk_buff *skb) struct mfc6_cache *cache; struct net *net = dev_net(skb->dev); struct mr6_table *mrt; - struct flowi fl = { - .iif = skb->dev->ifindex, - .mark = skb->mark, + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .flowi6_mark = skb->mark, }; int err; - err = ip6mr_fib_lookup(net, &fl, &mrt); + err = ip6mr_fib_lookup(net, &fl6, &mrt); if (err < 0) return err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d1770e061c08..9cb191ecaba8 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -444,12 +444,12 @@ sticky_done: { struct ipv6_txoptions *opt = NULL; struct msghdr msg; - struct flowi fl; + struct flowi6 fl6; int junk; - fl.fl6_flowlabel = 0; - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; if (optlen == 0) goto update; @@ -475,7 +475,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk, + retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk, &junk); if (retv) goto done; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 49f986d626a0..76b893771e6e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -319,7 +319,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, { struct in6_addr *source, *group; struct ipv6_mc_socklist *pmc; - struct net_device *dev; struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; @@ -341,7 +340,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, rcu_read_unlock(); return -ENODEV; } - dev = idev->dev; err = -EADDRNOTAVAIL; @@ -455,7 +453,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) { struct in6_addr *group; struct ipv6_mc_socklist *pmc; - struct net_device *dev; struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *newpsl, *psl; @@ -478,7 +475,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) rcu_read_unlock(); return -ENODEV; } - dev = idev->dev; err = 0; @@ -549,7 +545,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, struct in6_addr *group; struct ipv6_mc_socklist *pmc; struct inet6_dev *idev; - struct net_device *dev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; struct net *net = sock_net(sk); @@ -566,7 +561,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, rcu_read_unlock(); return -ENODEV; } - dev = idev->dev; err = -EADDRNOTAVAIL; /* @@ -1402,7 +1396,7 @@ static void mld_sendpack(struct sk_buff *skb) struct inet6_dev *idev; struct net *net = dev_net(skb->dev); int err; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; rcu_read_lock(); @@ -1425,11 +1419,16 @@ static void mld_sendpack(struct sk_buff *skb) goto err_out; } - icmpv6_flow_init(net->ipv6.igmp_sk, &fl, ICMPV6_MLD2_REPORT, + icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(net, &dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + err = 0; + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; + } skb_dst_set(skb, dst); if (err) goto err_out; @@ -1732,7 +1731,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; if (type == ICMPV6_MGM_REDUCTION) @@ -1792,13 +1791,15 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) goto err_out; } - icmpv6_flow_init(sk, &fl, type, + icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(net, &dst, &fl, NULL, 0); - if (err) + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto err_out; + } skb_dst_set(skb, dst); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index d6e9599d0705..9b210482fb05 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -203,18 +203,20 @@ static inline int mip6_report_rl_allow(struct timeval *stamp, return allow; } -static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) +static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, + const struct flowi *fl) { struct net *net = xs_net(x); struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; + const struct flowi6 *fl6 = &fl->u.ip6; struct ipv6_destopt_hao *hao = NULL; struct xfrm_selector sel; int offset; struct timeval stamp; int err = 0; - if (unlikely(fl->proto == IPPROTO_MH && - fl->fl_mh_type <= IP6_MH_TYPE_MAX)) + if (unlikely(fl6->flowi6_proto == IPPROTO_MH && + fl6->fl6_mh_type <= IP6_MH_TYPE_MAX)) goto out; if (likely(opt->dsthao)) { @@ -239,14 +241,14 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct sizeof(sel.saddr)); sel.prefixlen_s = 128; sel.family = AF_INET6; - sel.proto = fl->proto; - sel.dport = xfrm_flowi_dport(fl); + sel.proto = fl6->flowi6_proto; + sel.dport = xfrm_flowi_dport(fl, &fl6->uli); if (sel.dport) sel.dport_mask = htons(~0); - sel.sport = xfrm_flowi_sport(fl); + sel.sport = xfrm_flowi_sport(fl, &fl6->uli); if (sel.sport) sel.sport_mask = htons(~0); - sel.ifindex = fl->oif; + sel.ifindex = fl6->flowi6_oif; err = km_report(net, IPPROTO_DSTOPTS, &sel, (hao ? (xfrm_address_t *)&hao->addr : NULL)); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2342545a5ee9..0e49c9db3c98 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -511,7 +511,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *saddr, struct icmp6hdr *icmp6h) { - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; @@ -521,7 +521,7 @@ void ndisc_send_skb(struct sk_buff *skb, type = icmp6h->icmp6_type; - icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); + icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex); dst = icmp6_dst_alloc(dev, neigh, daddr); if (!dst) { @@ -529,8 +529,8 @@ void ndisc_send_skb(struct sk_buff *skb, return; } - err = xfrm_lookup(net, &dst, &fl, NULL, 0); - if (err < 0) { + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) { kfree_skb(skb); return; } @@ -1515,7 +1515,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, struct rt6_info *rt; struct dst_entry *dst; struct inet6_dev *idev; - struct flowi fl; + struct flowi6 fl6; u8 *opt; int rd_len; int err; @@ -1535,15 +1535,15 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - icmpv6_flow_init(sk, &fl, NDISC_REDIRECT, + icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (dst == NULL) return; - err = xfrm_lookup(net, &dst, &fl, NULL, 0); - if (err) + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) return; rt = (struct rt6_info *) dst; @@ -1553,7 +1553,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, "ICMPv6 Redirect: destination is not a neighbour.\n"); goto release; } - if (!xrlim_allow(dst, 1*HZ)) + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) goto release; if (dev->addr_len) { diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 35915e8617f0..39aaca2b4fd2 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -15,14 +15,14 @@ int ip6_route_me_harder(struct sk_buff *skb) struct net *net = dev_net(skb_dst(skb)->dev); struct ipv6hdr *iph = ipv6_hdr(skb); struct dst_entry *dst; - struct flowi fl = { - .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, - .mark = skb->mark, - .fl6_dst = iph->daddr, - .fl6_src = iph->saddr, + struct flowi6 fl6 = { + .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, + .flowi6_mark = skb->mark, + .daddr = iph->daddr, + .saddr = iph->saddr, }; - dst = ip6_route_output(net, skb->sk, &fl); + dst = ip6_route_output(net, skb->sk, &fl6); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); @@ -37,9 +37,10 @@ int ip6_route_me_harder(struct sk_buff *skb) #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, &fl, AF_INET6) == 0) { + xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { skb_dst_set(skb, NULL); - if (xfrm_lookup(net, &dst, &fl, skb->sk, 0)) + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0); + if (IS_ERR(dst)) return -1; skb_dst_set(skb, dst); } @@ -91,7 +92,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) { - *dst = ip6_route_output(&init_net, NULL, fl); + *dst = ip6_route_output(&init_net, NULL, &fl->u.ip6); return (*dst)->error; } diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 05027b753721..e6af8d72f26b 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -410,7 +410,7 @@ fallback: if (p != NULL) { sb_add(m, "%02x", *p++); for (i = 1; i < len; i++) - sb_add(m, ":%02x", p[i]); + sb_add(m, ":%02x", *p++); } sb_add(m, " "); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index bf998feac14e..28e74488a329 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -47,7 +47,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) struct ipv6hdr *ip6h; struct dst_entry *dst = NULL; u8 proto; - struct flowi fl; + struct flowi6 fl6; if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { @@ -89,19 +89,20 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) return; } - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); - ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); - fl.fl_ip_sport = otcph.dest; - fl.fl_ip_dport = otcph.source; - security_skb_classify_flow(oldskb, &fl); - dst = ip6_route_output(net, NULL, &fl); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.saddr, &oip6h->daddr); + ipv6_addr_copy(&fl6.daddr, &oip6h->saddr); + fl6.fl6_sport = otcph.dest; + fl6.fl6_dport = otcph.source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); if (dst == NULL || dst->error) { dst_release(dst); return; } - if (xfrm_lookup(net, &dst, &fl, NULL, 0)) + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) return; hh_len = (dst->dev->hard_header_len + 15)&~15; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 86c39526ba5e..4a1c3b46c56b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -31,6 +31,7 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/skbuff.h> +#include <linux/compat.h> #include <asm/uaccess.h> #include <asm/ioctls.h> @@ -123,18 +124,18 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); +typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); -int rawv6_mh_filter_register(int (*filter)(struct sock *sock, - struct sk_buff *skb)) +static mh_filter_t __rcu *mh_filter __read_mostly; + +int rawv6_mh_filter_register(mh_filter_t filter) { rcu_assign_pointer(mh_filter, filter); return 0; } EXPORT_SYMBOL(rawv6_mh_filter_register); -int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, - struct sk_buff *skb)) +int rawv6_mh_filter_unregister(mh_filter_t filter) { rcu_assign_pointer(mh_filter, NULL); synchronize_rcu(); @@ -192,10 +193,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) * policy is placed in rawv6_rcv() because it is * required for each socket. */ - int (*filter)(struct sock *sock, struct sk_buff *skb); + mh_filter_t *filter; filter = rcu_dereference(mh_filter); - filtered = filter ? filter(sk, skb) : 0; + filtered = filter ? (*filter)(sk, skb) : 0; break; } #endif @@ -523,7 +524,7 @@ csum_copy_err: goto out; } -static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, +static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { struct sk_buff *skb; @@ -585,11 +586,10 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, if (unlikely(csum)) tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); - csum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - total_len, fl->proto, tmp_csum); + csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, + total_len, fl6->flowi6_proto, tmp_csum); - if (csum == 0 && fl->proto == IPPROTO_UDP) + if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP) csum = CSUM_MANGLED_0; if (skb_store_bits(skb, offset, &csum, 2)) @@ -602,7 +602,7 @@ out: } static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, - struct flowi *fl, struct dst_entry **dstp, + struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -612,7 +612,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, struct rt6_info *rt = (struct rt6_info *)*dstp; if (length > rt->dst.dev->mtu) { - ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu); + ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); return -EMSGSIZE; } if (flags&MSG_PROBE) @@ -661,7 +661,7 @@ error: return err; } -static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg) { struct iovec *iov; u8 __user *type = NULL; @@ -678,7 +678,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (!iov) continue; - switch (fl->proto) { + switch (fl6->flowi6_proto) { case IPPROTO_ICMPV6: /* check if one-byte field is readable or not. */ if (iov->iov_base && iov->iov_len < 1) @@ -693,8 +693,8 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - if (get_user(fl->fl_icmp_type, type) || - get_user(fl->fl_icmp_code, code)) + if (get_user(fl6->fl6_icmp_type, type) || + get_user(fl6->fl6_icmp_code, code)) return -EFAULT; probed = 1; } @@ -705,7 +705,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) /* check if type field is readable or not. */ if (iov->iov_len > 2 - len) { u8 __user *p = iov->iov_base; - if (get_user(fl->fl_mh_type, &p[2 - len])) + if (get_user(fl6->fl6_mh_type, &p[2 - len])) return -EFAULT; probed = 1; } else @@ -734,7 +734,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; - struct flowi fl; + struct flowi6 fl6; int addr_len = msg->msg_namelen; int hlimit = -1; int tclass = -1; @@ -755,9 +755,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, /* * Get and verify the address. */ - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); - fl.mark = sk->sk_mark; + fl6.flowi6_mark = sk->sk_mark; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -779,9 +779,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, daddr = &sin6->sin6_addr; if (np->sndflow) { - fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; daddr = &flowlabel->dst; @@ -799,32 +799,32 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.oif = sin6->sin6_scope_id; + fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; proto = inet->inet_num; daddr = &np->daddr; - fl.fl6_flowlabel = np->flow_label; + fl6.flowlabel = np->flow_label; } - if (fl.oif == 0) - fl.oif = sk->sk_bound_dev_if; + if (fl6.flowi6_oif == 0) + fl6.flowi6_oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, + err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; } @@ -837,40 +837,31 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.proto = proto; - err = rawv6_probe_proto_opt(&fl, msg); + fl6.flowi6_proto = proto; + err = rawv6_probe_proto_opt(&fl6, msg); if (err) goto out; if (!ipv6_addr_any(daddr)) - ipv6_addr_copy(&fl.fl6_dst, daddr); + ipv6_addr_copy(&fl6.daddr, daddr); else - fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ - if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&fl6.saddr, &np->saddr); - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; - security_sk_classify_flow(sk, &fl); + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto out; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -889,17 +880,17 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, back_from_confirm: if (inet->hdrincl) - err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags); + err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags); else { lock_sock(sk); err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, - len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, + len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst, msg->msg_flags, dontfrag); if (err) ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) - err = rawv6_push_pending_frames(sk, &fl, rp); + err = rawv6_push_pending_frames(sk, &fl6, rp); release_sock(sk); } done: @@ -1157,6 +1148,23 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) } } +#ifdef CONFIG_COMPAT +static int compat_rawv6_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case SIOCOUTQ: + case SIOCINQ: + return -ENOIOCTLCMD; + default: +#ifdef CONFIG_IPV6_MROUTE + return ip6mr_compat_ioctl(sk, cmd, compat_ptr(arg)); +#else + return -ENOIOCTLCMD; +#endif + } +} +#endif + static void rawv6_close(struct sock *sk, long timeout) { if (inet_sk(sk)->inet_num == IPPROTO_RAW) @@ -1215,6 +1223,7 @@ struct proto rawv6_prot = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_rawv6_setsockopt, .compat_getsockopt = compat_rawv6_getsockopt, + .compat_ioctl = compat_rawv6_ioctl, #endif }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 373bd0416f69..6814c8722fa7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -72,8 +72,6 @@ #define RT6_TRACE(x...) do { ; } while (0) #endif -#define CLONE_OFFLINK_ROUTE 0 - static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); @@ -99,6 +97,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net, struct in6_addr *gwaddr, int ifindex); #endif +static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + struct rt6_info *rt = (struct rt6_info *) dst; + struct inet_peer *peer; + u32 *p = NULL; + + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + + peer = rt->rt6i_peer; + if (peer) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + p = peer->metrics; + if (inet_metrics_new(peer)) + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } + } + return p; +} + static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .protocol = cpu_to_be16(ETH_P_IPV6), @@ -107,6 +135,7 @@ static struct dst_ops ip6_dst_ops_template = { .check = ip6_dst_check, .default_advmss = ip6_default_advmss, .default_mtu = ip6_default_mtu, + .cow_metrics = ipv6_cow_metrics, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -115,6 +144,11 @@ static struct dst_ops ip6_dst_ops_template = { .local_out = __ip6_local_out, }; +static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) +{ + return 0; +} + static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } @@ -124,9 +158,15 @@ static struct dst_ops ip6_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, + .default_mtu = ip6_blackhole_default_mtu, + .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, }; +static const u32 ip6_template_metrics[RTAX_MAX] = { + [RTAX_HOPLIMIT - 1] = 255, +}; + static struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -182,7 +222,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) { - return (struct rt6_info *)dst_alloc(ops); + return (struct rt6_info *)dst_alloc(ops, 0); } static void ip6_dst_destroy(struct dst_entry *dst) @@ -196,22 +236,27 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } if (peer) { - BUG_ON(!(rt->rt6i_flags & RTF_CACHE)); rt->rt6i_peer = NULL; inet_putpeer(peer); } } +static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); + +static u32 rt6_peer_genid(void) +{ + return atomic_read(&__rt6_peer_genid); +} + void rt6_bind_peer(struct rt6_info *rt, int create) { struct inet_peer *peer; - if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE))) - return; - peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) inet_putpeer(peer); + else + rt->rt6i_peer_genid = rt6_peer_genid(); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -554,17 +599,17 @@ do { \ static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { struct fib6_node *fn; struct rt6_info *rt; read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: rt = fn->leaf; - rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); - BACKTRACK(net, &fl->fl6_src); + rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); + BACKTRACK(net, &fl6->saddr); out: dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); @@ -575,19 +620,19 @@ out: struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int strict) { - struct flowi fl = { - .oif = oif, - .fl6_dst = *daddr, + struct flowi6 fl6 = { + .flowi6_oif = oif, + .daddr = *daddr, }; struct dst_entry *dst; int flags = strict ? RT6_LOOKUP_F_IFACE : 0; if (saddr) { - memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); + memcpy(&fl6.saddr, saddr, sizeof(*saddr)); flags |= RT6_LOOKUP_F_HAS_SADDR; } - dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); + dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); if (dst->error == 0) return (struct rt6_info *) dst; @@ -708,7 +753,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d } static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; @@ -723,12 +768,12 @@ relookup: read_lock_bh(&table->tb6_lock); restart_2: - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: rt = rt6_select(fn, oif, strict | reachable); - BACKTRACK(net, &fl->fl6_src); + BACKTRACK(net, &fl6->saddr); if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; @@ -737,14 +782,11 @@ restart: read_unlock_bh(&table->tb6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); - else { -#if CLONE_OFFLINK_ROUTE - nrt = rt6_alloc_clone(rt, &fl->fl6_dst); -#else + nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); + else if (!(rt->dst.flags & DST_HOST)) + nrt = rt6_alloc_clone(rt, &fl6->daddr); + else goto out2; -#endif - } dst_release(&rt->dst); rt = nrt ? : net->ipv6.ip6_null_entry; @@ -781,9 +823,9 @@ out2: } static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { - return ip6_pol_route(net, table, fl->iif, fl, flags); + return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); } void ip6_route_input(struct sk_buff *skb) @@ -791,56 +833,54 @@ void ip6_route_input(struct sk_buff *skb) struct ipv6hdr *iph = ipv6_hdr(skb); struct net *net = dev_net(skb->dev); int flags = RT6_LOOKUP_F_HAS_SADDR; - struct flowi fl = { - .iif = skb->dev->ifindex, - .fl6_dst = iph->daddr, - .fl6_src = iph->saddr, - .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, - .mark = skb->mark, - .proto = iph->nexthdr, + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, + .flowi6_mark = skb->mark, + .flowi6_proto = iph->nexthdr, }; if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; - skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); + skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input)); } static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { - return ip6_pol_route(net, table, fl->oif, fl, flags); + return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, - struct flowi *fl) + struct flowi6 *fl6) { int flags = 0; - if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst)) + if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) flags |= RT6_LOOKUP_F_IFACE; - if (!ipv6_addr_any(&fl->fl6_src)) + if (!ipv6_addr_any(&fl6->saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; else if (sk) flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); - return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); + return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); } EXPORT_SYMBOL(ip6_route_output); -int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) +struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rt6_info *ort = (struct rt6_info *) *dstp; - struct rt6_info *rt = (struct rt6_info *) - dst_alloc(&ip6_dst_blackhole_ops); + struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1); + struct rt6_info *ort = (struct rt6_info *) dst_orig; struct dst_entry *new = NULL; if (rt) { new = &rt->dst; - atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; @@ -866,11 +906,9 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl dst_free(new); } - dst_release(*dstp); - *dstp = new; - return new ? 0 : -ENOMEM; + dst_release(dst_orig); + return new ? new : ERR_PTR(-ENOMEM); } -EXPORT_SYMBOL_GPL(ip6_dst_blackhole); /* * Destination cache support functions @@ -882,9 +920,14 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = (struct rt6_info *) dst; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { + if (rt->rt6i_peer_genid != rt6_peer_genid()) { + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 0); + rt->rt6i_peer_genid = rt6_peer_genid(); + } return dst; - + } return NULL; } @@ -935,7 +978,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_metric_set(dst, RTAX_FEATURES, features); } dst_metric_set(dst, RTAX_MTU, mtu); - call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -1032,11 +1074,9 @@ out: int icmp6_dst_gc(void) { - struct dst_entry *dst, *next, **pprev; + struct dst_entry *dst, **pprev; int more = 0; - next = NULL; - spin_lock_bh(&icmp6_dst_lock); pprev = &icmp6_dst_gc_list; @@ -1404,16 +1444,16 @@ static int ip6_route_del(struct fib6_config *cfg) * Handle redirects */ struct ip6rd_flowi { - struct flowi fl; + struct flowi6 fl6; struct in6_addr gateway; }; static struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_table *table, - struct flowi *fl, + struct flowi6 *fl6, int flags) { - struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; + struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; struct rt6_info *rt; struct fib6_node *fn; @@ -1429,7 +1469,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, */ read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { /* @@ -1444,7 +1484,7 @@ restart: continue; if (!(rt->rt6i_flags & RTF_GATEWAY)) continue; - if (fl->oif != rt->rt6i_dev->ifindex) + if (fl6->flowi6_oif != rt->rt6i_dev->ifindex) continue; if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) continue; @@ -1453,7 +1493,7 @@ restart: if (!rt) rt = net->ipv6.ip6_null_entry; - BACKTRACK(net, &fl->fl6_src); + BACKTRACK(net, &fl6->saddr); out: dst_hold(&rt->dst); @@ -1470,10 +1510,10 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, int flags = RT6_LOOKUP_F_HAS_SADDR; struct net *net = dev_net(dev); struct ip6rd_flowi rdfl = { - .fl = { - .oif = dev->ifindex, - .fl6_dst = *dest, - .fl6_src = *src, + .fl6 = { + .flowi6_oif = dev->ifindex, + .daddr = *dest, + .saddr = *src, }, }; @@ -1482,7 +1522,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, if (rt6_need_strict(dest)) flags |= RT6_LOOKUP_F_IFACE; - return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, + return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, flags, __ip6_route_redirect); } @@ -1984,12 +2024,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, if (IS_ERR(neigh)) { dst_free(&rt->dst); - /* We are casting this because that is the return - * value type. But an errno encoded pointer is the - * same regardless of the underlying pointer type, - * and that's what we are returning. So this is OK. - */ - return (struct rt6_info *) neigh; + return ERR_CAST(neigh); } rt->rt6i_nexthop = neigh; @@ -2350,7 +2385,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void struct rt6_info *rt; struct sk_buff *skb; struct rtmsg *rtm; - struct flowi fl; + struct flowi6 fl6; int err, iif = 0; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); @@ -2358,27 +2393,27 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void goto errout; err = -EINVAL; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (tb[RTA_SRC]) { if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) goto errout; - ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); + ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC])); } if (tb[RTA_DST]) { if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) goto errout; - ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); + ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST])); } if (tb[RTA_IIF]) iif = nla_get_u32(tb[RTA_IIF]); if (tb[RTA_OIF]) - fl.oif = nla_get_u32(tb[RTA_OIF]); + fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); if (iif) { struct net_device *dev; @@ -2401,10 +2436,10 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reset_mac_header(skb); skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); + rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6); skb_dst_set(skb, &rt->dst); - err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, + err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { @@ -2561,14 +2596,16 @@ static int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = current->nsproxy->net_ns; - int delay = net->ipv6.sysctl.flush_delay; - if (write) { - proc_dointvec(ctl, write, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); - return 0; - } else + struct net *net; + int delay; + if (!write) return -EINVAL; + + net = (struct net *)ctl->extra1; + delay = net->ipv6.sysctl.flush_delay; + proc_dointvec(ctl, write, buffer, lenp, ppos); + fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); + return 0; } ctl_table ipv6_route_table_template[] = { @@ -2655,6 +2692,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) if (table) { table[0].data = &net->ipv6.sysctl.flush_delay; + table[0].extra1 = net; table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; @@ -2688,7 +2726,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_null_entry->dst, + ip6_template_metrics, true); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, @@ -2699,7 +2738,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, + ip6_template_metrics, true); net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), @@ -2709,7 +2749,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, + ip6_template_metrics, true); #endif net->ipv6.sysctl.flush_delay = 0; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8ce38f10a547..43b33373adb2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -412,7 +412,7 @@ static void prl_list_destroy_rcu(struct rcu_head *head) p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); do { - n = p->next; + n = rcu_dereference_protected(p->next, 1); kfree(p); p = n; } while (p); @@ -421,15 +421,17 @@ static void prl_list_destroy_rcu(struct rcu_head *head) static int ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) { - struct ip_tunnel_prl_entry *x, **p; + struct ip_tunnel_prl_entry *x; + struct ip_tunnel_prl_entry __rcu **p; int err = 0; ASSERT_RTNL(); if (a && a->addr != htonl(INADDR_ANY)) { - for (p = &t->prl; *p; p = &(*p)->next) { - if ((*p)->addr == a->addr) { - x = *p; + for (p = &t->prl; + (x = rtnl_dereference(*p)) != NULL; + p = &x->next) { + if (x->addr == a->addr) { *p = x->next; call_rcu(&x->rcu_head, prl_entry_destroy_rcu); t->prl_count--; @@ -438,9 +440,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) } err = -ENXIO; } else { - if (t->prl) { + x = rtnl_dereference(t->prl); + if (x) { t->prl_count = 0; - x = t->prl; call_rcu(&x->rcu_head, prl_list_destroy_rcu); t->prl = NULL; } @@ -730,16 +732,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, dst = addr6->s6_addr32[3]; } - { - struct flowi fl = { .fl4_dst = dst, - .fl4_src = tiph->saddr, - .fl4_tos = RT_TOS(tos), - .oif = tunnel->parms.link, - .proto = IPPROTO_IPV6 }; - if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; - } + rt = ip_route_output_ports(dev_net(dev), NULL, + dst, tiph->saddr, + 0, 0, + IPPROTO_IPV6, RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; } if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); @@ -855,13 +855,14 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct flowi fl = { .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .oif = tunnel->parms.link, - .proto = IPPROTO_IPV6 }; - struct rtable *rt; - if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { + struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, + iph->daddr, iph->saddr, + 0, 0, + IPPROTO_IPV6, + RT_TOS(iph->tos), + tunnel->parms.link); + + if (!IS_ERR(rt)) { tdev = rt->dst.dev; ip_rt_put(rt); } @@ -1179,7 +1180,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; dev_hold(dev); - sitn->tunnels_wc[0] = tunnel; + rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; } @@ -1196,11 +1197,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t = sitn->tunnels[prio][h]; + struct ip_tunnel *t; + t = rtnl_dereference(sitn->tunnels[prio][h]); while (t != NULL) { unregister_netdevice_queue(t->dev, head); - t = t->next; + t = rtnl_dereference(t->next); } } } @@ -1290,4 +1292,4 @@ static int __init sit_init(void) module_init(sit_init); module_exit(sit_cleanup); MODULE_LICENSE("GPL"); -MODULE_ALIAS("sit0"); +MODULE_ALIAS_NETDEV("sit0"); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 09fd34f0dbf2..352c26081f5d 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -232,23 +232,20 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) */ { struct in6_addr *final_p, final; - struct flowi fl; - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - final_p = fl6_update_dst(&fl, np->opt, &final); - ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->inet_sport; - security_req_classify_flow(req, &fl); - if (ip6_dst_lookup(sk, &dst, &fl)) - goto out_free; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + struct flowi6 fl6; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + final_p = fl6_update_dst(&fl6, np->opt, &final); + ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_sk(sk)->inet_sport; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + if (IS_ERR(dst)) goto out_free; } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index fa1d8f4e0051..7cb65ef79f9c 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -15,6 +15,8 @@ #include <net/addrconf.h> #include <net/inet_frag.h> +static struct ctl_table empty[1]; + static ctl_table ipv6_table_template[] = { { .procname = "route", @@ -35,6 +37,12 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "neigh", + .maxlen = 0, + .mode = 0555, + .child = empty, + }, { } }; @@ -152,7 +160,6 @@ static struct ctl_table_header *ip6_base; int ipv6_static_sysctl_register(void) { - static struct ctl_table empty[1]; ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty); if (ip6_base == NULL) return -ENOMEM; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 20aa95e37359..2b0c186862c8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -131,7 +131,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct rt6_info *rt; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int addr_type; int err; @@ -142,14 +142,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; - IP6_ECN_flow_init(fl.fl6_flowlabel); - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { + fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; + IP6_ECN_flow_init(fl6.flowlabel); + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); @@ -195,7 +195,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } ipv6_addr_copy(&np->daddr, &usin->sin6_addr); - np->flow_label = fl.fl6_flowlabel; + np->flow_label = fl6.flowlabel; /* * TCP over IPv4 @@ -242,35 +242,27 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, (saddr ? saddr : &np->saddr)); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->inet_sport; + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = usin->sin6_port; + fl6.fl6_sport = inet->inet_sport; - final_p = fl6_update_dst(&fl, np->opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); - security_sk_classify_flow(sk, &fl); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto failure; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto failure; } if (saddr == NULL) { - saddr = &fl.fl6_src; + saddr = &fl6.saddr; ipv6_addr_copy(&np->rcv_saddr, saddr); } @@ -385,7 +377,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { - struct dst_entry *dst = NULL; + struct dst_entry *dst; if (sock_owned_by_user(sk)) goto out; @@ -397,29 +389,25 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); - struct flowi fl; + struct flowi6 fl6; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; - security_skb_classify_flow(skb, &fl); - - if ((err = ip6_dst_lookup(sk, &dst, &fl))) { - sk->sk_err_soft = -err; - goto out; - } - - if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) { - sk->sk_err_soft = -err; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); + if (IS_ERR(dst)) { + sk->sk_err_soft = -PTR_ERR(dst); goto out; } @@ -494,38 +482,36 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct sk_buff * skb; struct ipv6_txoptions *opt = NULL; struct in6_addr * final_p, final; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; - int err = -1; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); - fl.fl6_flowlabel = 0; - fl.oif = treq->iif; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); + int err; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); + ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); + fl6.flowlabel = 0; + fl6.flowi6_oif = treq->iif; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) - goto done; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto done; - + } skb = tcp_make_synack(sk, dst, req, rvp); + err = -ENOMEM; if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - err = ip6_xmit(sk, skb, &fl, opt); + ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); + err = ip6_xmit(sk, skb, &fl6, opt); err = net_xmit_eval(err); } @@ -1006,7 +992,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, { struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; - struct flowi fl; + struct flowi6 fl6; struct net *net = dev_net(skb_dst(skb)->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); @@ -1060,34 +1046,33 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, } #endif - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); - ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); + memset(&fl6, 0, sizeof(fl6)); + ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr); + ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr); buff->ip_summed = CHECKSUM_PARTIAL; buff->csum = 0; - __tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst); + __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); - fl.proto = IPPROTO_TCP; - fl.oif = inet6_iif(skb); - fl.fl_ip_dport = t1->dest; - fl.fl_ip_sport = t1->source; - security_skb_classify_flow(skb, &fl); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.flowi6_oif = inet6_iif(skb); + fl6.fl6_dport = t1->dest; + fl6.fl6_sport = t1->source; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST * Underlying function will use this to retrieve the network * namespace */ - if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { - if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { - skb_dst_set(buff, dst); - ip6_xmit(ctl_sk, buff, &fl, NULL); - TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); - if (rst) - TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); - return; - } + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); + if (!IS_ERR(dst)) { + skb_dst_set(buff, dst); + ip6_xmit(ctl_sk, buff, &fl6, NULL); + TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + if (rst) + TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); + return; } kfree_skb(buff); @@ -1323,7 +1308,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_death_row.sysctl_tw_recycle && (dst = inet6_csk_route_req(sk, req)) != NULL && (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && - ipv6_addr_equal((struct in6_addr *)peer->daddr.a6, + ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6, &treq->rmt_addr)) { inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && @@ -1636,10 +1621,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; - TCP_CHECK_TIMER(sk); if (opt_skb) goto ipv6_pktoptions; return 0; @@ -1667,10 +1650,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } } - TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) goto reset; - TCP_CHECK_TIMER(sk); if (opt_skb) goto ipv6_pktoptions; return 0; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9a009c66c8a3..d7037c006e13 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -886,7 +886,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct udphdr *uh; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; + struct flowi6 *fl6 = &inet->cork.fl.u.ip6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; @@ -899,23 +899,23 @@ static int udp_v6_push_pending_frames(struct sock *sk) * Create a UDP header */ uh = udp_hdr(skb); - uh->source = fl->fl_ip_sport; - uh->dest = fl->fl_ip_dport; + uh->source = fl6->fl6_sport; + uh->dest = fl6->fl6_dport; uh->len = htons(up->len); uh->check = 0; if (is_udplite) csum = udplite_csum_outgoing(sk, skb); else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst, + udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, up->len); goto send; } else csum = udp_csum_outgoing(sk, skb); /* add protocol-dependent pseudo-header */ - uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - up->len, fl->proto, csum ); + uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, + up->len, fl6->flowi6_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -947,7 +947,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int addr_len = msg->msg_namelen; int ulen = len; @@ -1030,19 +1030,19 @@ do_udp_sendmsg: } ulen += sizeof(struct udphdr); - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; - fl.fl_ip_dport = sin6->sin6_port; + fl6.fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { - fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; daddr = &flowlabel->dst; @@ -1060,38 +1060,38 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.oif = sin6->sin6_scope_id; + fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl.fl_ip_dport = inet->inet_dport; + fl6.fl6_dport = inet->inet_dport; daddr = &np->daddr; - fl.fl6_flowlabel = np->flow_label; + fl6.flowlabel = np->flow_label; connected = 1; } - if (!fl.oif) - fl.oif = sk->sk_bound_dev_if; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = sk->sk_bound_dev_if; - if (!fl.oif) - fl.oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl.mark = sk->sk_mark; + fl6.flowi6_mark = sk->sk_mark; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, + err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; } @@ -1105,42 +1105,35 @@ do_udp_sendmsg: opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.proto = sk->sk_protocol; + fl6.flowi6_proto = sk->sk_protocol; if (!ipv6_addr_any(daddr)) - ipv6_addr_copy(&fl.fl6_dst, daddr); + ipv6_addr_copy(&fl6.daddr, daddr); else - fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ - if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl_ip_sport = inet->inet_sport; + fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.fl6_sport = inet->inet_sport; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); if (final_p) connected = 0; - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { - fl.oif = np->mcast_oif; + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) { + fl6.flowi6_oif = np->mcast_oif; connected = 0; } - security_sk_classify_flow(sk, &fl); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - err = ip6_sk_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, true); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; goto out; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto out; } if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -1175,7 +1168,7 @@ do_append_data: up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), hlimit, tclass, opt, &fl, + sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info*)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); if (err) @@ -1188,10 +1181,10 @@ do_append_data: if (dst) { if (connected) { ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? + ipv6_addr_equal(&fl6.daddr, &np->daddr) ? &np->daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : #endif NULL); @@ -1299,7 +1292,7 @@ static int udp6_ufo_send_check(struct sk_buff *skb) return 0; } -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features) +static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 7e74023ea6e4..05e34c8ec913 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -27,18 +27,19 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo; static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, - xfrm_address_t *saddr, - xfrm_address_t *daddr) + const xfrm_address_t *saddr, + const xfrm_address_t *daddr) { - struct flowi fl = {}; + struct flowi6 fl6; struct dst_entry *dst; int err; - memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst)); + memset(&fl6, 0, sizeof(fl6)); + memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) - memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); + memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); err = dst->error; if (dst->error) { @@ -67,7 +68,7 @@ static int xfrm6_get_saddr(struct net *net, return 0; } -static int xfrm6_get_tos(struct flowi *fl) +static int xfrm6_get_tos(const struct flowi *fl) { return 0; } @@ -87,7 +88,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, } static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, - struct flowi *fl) + const struct flowi *fl) { struct rt6_info *rt = (struct rt6_info*)xdst->route; @@ -98,6 +99,10 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, if (!xdst->u.rt6.rt6i_idev) return -ENODEV; + xdst->u.rt6.rt6i_peer = rt->rt6i_peer; + if (rt->rt6i_peer) + atomic_inc(&rt->rt6i_peer->refcnt); + /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | @@ -116,6 +121,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, static inline void _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) { + struct flowi6 *fl6 = &fl->u.ip6; int onlyproto = 0; u16 offset = skb_network_header_len(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -123,11 +129,11 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) const unsigned char *nh = skb_network_header(skb); u8 nexthdr = nh[IP6CB(skb)->nhoff]; - memset(fl, 0, sizeof(struct flowi)); - fl->mark = skb->mark; + memset(fl6, 0, sizeof(struct flowi6)); + fl6->flowi6_mark = skb->mark; - ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); - ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); + ipv6_addr_copy(&fl6->daddr, reverse ? &hdr->saddr : &hdr->daddr); + ipv6_addr_copy(&fl6->saddr, reverse ? &hdr->daddr : &hdr->saddr); while (nh + offset + 1 < skb->data || pskb_may_pull(skb, nh + offset + 1 - skb->data)) { @@ -154,20 +160,20 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, nh + offset + 4 - skb->data))) { __be16 *ports = (__be16 *)exthdr; - fl->fl_ip_sport = ports[!!reverse]; - fl->fl_ip_dport = ports[!reverse]; + fl6->fl6_sport = ports[!!reverse]; + fl6->fl6_dport = ports[!reverse]; } - fl->proto = nexthdr; + fl6->flowi6_proto = nexthdr; return; case IPPROTO_ICMPV6: if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { u8 *icmp = (u8 *)exthdr; - fl->fl_icmp_type = icmp[0]; - fl->fl_icmp_code = icmp[1]; + fl6->fl6_icmp_type = icmp[0]; + fl6->fl6_icmp_code = icmp[1]; } - fl->proto = nexthdr; + fl6->flowi6_proto = nexthdr; return; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -176,9 +182,9 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct ip6_mh *mh; mh = (struct ip6_mh *)exthdr; - fl->fl_mh_type = mh->ip6mh_type; + fl6->fl6_mh_type = mh->ip6mh_type; } - fl->proto = nexthdr; + fl6->flowi6_proto = nexthdr; return; #endif @@ -187,8 +193,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ESP: case IPPROTO_COMP: default: - fl->fl_ipsec_spi = 0; - fl->proto = nexthdr; + fl6->fl6_ipsec_spi = 0; + fl6->flowi6_proto = nexthdr; return; } } @@ -216,6 +222,9 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); + dst_destroy_metrics_generic(dst); + if (likely(xdst->u.rt6.rt6i_peer)) + inet_putpeer(xdst->u.rt6.rt6i_peer); xfrm_dst_destroy(xdst); } @@ -251,6 +260,7 @@ static struct dst_ops xfrm6_dst_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, + .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, @@ -266,6 +276,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_tos = xfrm6_get_tos, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, + .blackhole_route = ip6_blackhole_route, }; static int __init xfrm6_policy_init(void) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index a67575d472a3..afe941e9415c 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -20,26 +20,28 @@ #include <net/addrconf.h> static void -__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) +__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) { + const struct flowi6 *fl6 = &fl->u.ip6; + /* Initialize temporary selector matching only * to current session. */ - ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); - ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); - sel->dport = xfrm_flowi_dport(fl); + ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl6->daddr); + ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl6->saddr); + sel->dport = xfrm_flowi_dport(fl, &fl6->uli); sel->dport_mask = htons(0xffff); - sel->sport = xfrm_flowi_sport(fl); + sel->sport = xfrm_flowi_sport(fl, &fl6->uli); sel->sport_mask = htons(0xffff); sel->family = AF_INET6; sel->prefixlen_d = 128; sel->prefixlen_s = 128; - sel->proto = fl->proto; - sel->ifindex = fl->oif; + sel->proto = fl6->flowi6_proto; + sel->ifindex = fl6->flowi6_oif; } static void -xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl, + const xfrm_address_t *daddr, const xfrm_address_t *saddr) { x->id = tmpl->id; if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) diff --git a/net/key/af_key.c b/net/key/af_key.c index d87c22df6f1e..7db86ffcf070 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -70,7 +70,7 @@ static inline struct pfkey_sock *pfkey_sk(struct sock *sk) return (struct pfkey_sock *)sk; } -static int pfkey_can_dump(struct sock *sk) +static int pfkey_can_dump(const struct sock *sk) { if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf) return 1; @@ -303,12 +303,13 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) return rc; } -static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig) +static inline void pfkey_hdr_dup(struct sadb_msg *new, + const struct sadb_msg *orig) { *new = *orig; } -static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk) +static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) { struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); struct sadb_msg *hdr; @@ -369,13 +370,13 @@ static u8 sadb_ext_min_len[] = { }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ -static int verify_address_len(void *p) +static int verify_address_len(const void *p) { - struct sadb_address *sp = p; - struct sockaddr *addr = (struct sockaddr *)(sp + 1); - struct sockaddr_in *sin; + const struct sadb_address *sp = p; + const struct sockaddr *addr = (const struct sockaddr *)(sp + 1); + const struct sockaddr_in *sin; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; + const struct sockaddr_in6 *sin6; #endif int len; @@ -411,16 +412,16 @@ static int verify_address_len(void *p) return 0; } -static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx) +static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx) { return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + sec_ctx->sadb_x_ctx_len, sizeof(uint64_t)); } -static inline int verify_sec_ctx_len(void *p) +static inline int verify_sec_ctx_len(const void *p) { - struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p; + const struct sadb_x_sec_ctx *sec_ctx = p; int len = sec_ctx->sadb_x_ctx_len; if (len > PAGE_SIZE) @@ -434,7 +435,7 @@ static inline int verify_sec_ctx_len(void *p) return 0; } -static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb_x_sec_ctx *sec_ctx) +static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx) { struct xfrm_user_sec_ctx *uctx = NULL; int ctx_size = sec_ctx->sadb_x_ctx_len; @@ -455,16 +456,16 @@ static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb return uctx; } -static int present_and_same_family(struct sadb_address *src, - struct sadb_address *dst) +static int present_and_same_family(const struct sadb_address *src, + const struct sadb_address *dst) { - struct sockaddr *s_addr, *d_addr; + const struct sockaddr *s_addr, *d_addr; if (!src || !dst) return 0; - s_addr = (struct sockaddr *)(src + 1); - d_addr = (struct sockaddr *)(dst + 1); + s_addr = (const struct sockaddr *)(src + 1); + d_addr = (const struct sockaddr *)(dst + 1); if (s_addr->sa_family != d_addr->sa_family) return 0; if (s_addr->sa_family != AF_INET @@ -477,15 +478,15 @@ static int present_and_same_family(struct sadb_address *src, return 1; } -static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void **ext_hdrs) { - char *p = (char *) hdr; + const char *p = (char *) hdr; int len = skb->len; len -= sizeof(*hdr); p += sizeof(*hdr); while (len > 0) { - struct sadb_ext *ehdr = (struct sadb_ext *) p; + const struct sadb_ext *ehdr = (const struct sadb_ext *) p; uint16_t ext_type; int ext_len; @@ -514,7 +515,7 @@ static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_h if (verify_sec_ctx_len(p)) return -EINVAL; } - ext_hdrs[ext_type-1] = p; + ext_hdrs[ext_type-1] = (void *) p; } p += ext_len; len -= ext_len; @@ -606,21 +607,21 @@ int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr) } static -int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, xfrm_address_t *xaddr) +int pfkey_sadb_addr2xfrm_addr(const struct sadb_address *addr, xfrm_address_t *xaddr) { return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1), xaddr); } -static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_msg *hdr, void **ext_hdrs) +static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs) { - struct sadb_sa *sa; - struct sadb_address *addr; + const struct sadb_sa *sa; + const struct sadb_address *addr; uint16_t proto; unsigned short family; xfrm_address_t *xaddr; - sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; + sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; if (sa == NULL) return NULL; @@ -629,18 +630,18 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_ return NULL; /* sadb_address_len should be checked by caller */ - addr = (struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1]; + addr = (const struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1]; if (addr == NULL) return NULL; - family = ((struct sockaddr *)(addr + 1))->sa_family; + family = ((const struct sockaddr *)(addr + 1))->sa_family; switch (family) { case AF_INET: - xaddr = (xfrm_address_t *)&((struct sockaddr_in *)(addr + 1))->sin_addr; + xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr; break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: - xaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(addr + 1))->sin6_addr; + xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr; break; #endif default: @@ -690,9 +691,9 @@ static inline int pfkey_mode_to_xfrm(int mode) } } -static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port, - struct sockaddr *sa, - unsigned short family) +static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port, + struct sockaddr *sa, + unsigned short family) { switch (family) { case AF_INET: @@ -720,7 +721,7 @@ static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port, return 0; } -static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, +static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, int add_keys, int hsc) { struct sk_buff *skb; @@ -1010,7 +1011,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, } -static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x) +static inline struct sk_buff *pfkey_xfrm_state2msg(const struct xfrm_state *x) { struct sk_buff *skb; @@ -1019,26 +1020,26 @@ static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x) return skb; } -static inline struct sk_buff *pfkey_xfrm_state2msg_expire(struct xfrm_state *x, +static inline struct sk_buff *pfkey_xfrm_state2msg_expire(const struct xfrm_state *x, int hsc) { return __pfkey_xfrm_state2msg(x, 0, hsc); } static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, - struct sadb_msg *hdr, - void **ext_hdrs) + const struct sadb_msg *hdr, + void * const *ext_hdrs) { struct xfrm_state *x; - struct sadb_lifetime *lifetime; - struct sadb_sa *sa; - struct sadb_key *key; - struct sadb_x_sec_ctx *sec_ctx; + const struct sadb_lifetime *lifetime; + const struct sadb_sa *sa; + const struct sadb_key *key; + const struct sadb_x_sec_ctx *sec_ctx; uint16_t proto; int err; - sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; + sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; if (!sa || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) @@ -1077,7 +1078,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, sa->sadb_sa_encrypt > SADB_X_CALG_MAX) || sa->sadb_sa_encrypt > SADB_EALG_MAX) return ERR_PTR(-EINVAL); - key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; + key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; if (key != NULL && sa->sadb_sa_auth != SADB_X_AALG_NULL && ((key->sadb_key_bits+7) / 8 == 0 || @@ -1104,14 +1105,14 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) x->props.flags |= XFRM_STATE_NOPMTUDISC; - lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; + lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; if (lifetime != NULL) { x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } - lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]; + lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]; if (lifetime != NULL) { x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); @@ -1119,7 +1120,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } - sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; + sec_ctx = (const struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -1133,7 +1134,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } - key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; + key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; if (sa->sadb_sa_auth) { int keysize = 0; struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); @@ -1202,7 +1203,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, &x->id.daddr); if (ext_hdrs[SADB_X_EXT_SA2-1]) { - struct sadb_x_sa2 *sa2 = (void*)ext_hdrs[SADB_X_EXT_SA2-1]; + const struct sadb_x_sa2 *sa2 = ext_hdrs[SADB_X_EXT_SA2-1]; int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); if (mode < 0) { err = -EINVAL; @@ -1213,7 +1214,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, } if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) { - struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]; + const struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]; /* Nobody uses this, but we try. */ x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr); @@ -1224,7 +1225,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { - struct sadb_x_nat_t_type* n_type; + const struct sadb_x_nat_t_type* n_type; struct xfrm_encap_tmpl *natt; x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); @@ -1236,12 +1237,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, natt->encap_type = n_type->sadb_x_nat_t_type_type; if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) { - struct sadb_x_nat_t_port* n_port = + const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]; natt->encap_sport = n_port->sadb_x_nat_t_port_port; } if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) { - struct sadb_x_nat_t_port* n_port = + const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]; natt->encap_dport = n_port->sadb_x_nat_t_port_port; } @@ -1261,12 +1262,12 @@ out: return ERR_PTR(err); } -static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { return -EOPNOTSUPP; } -static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct sk_buff *resp_skb; @@ -1365,7 +1366,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return 0; } -static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; @@ -1429,7 +1430,7 @@ static inline int event2keytype(int event) } /* ADD/UPD/DEL */ -static int key_notify_sa(struct xfrm_state *x, struct km_event *c) +static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -1453,7 +1454,7 @@ static int key_notify_sa(struct xfrm_state *x, struct km_event *c) return 0; } -static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; @@ -1492,7 +1493,7 @@ out: return err; } -static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; @@ -1534,7 +1535,7 @@ out: return err; } -static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); __u8 proto; @@ -1570,7 +1571,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, return 0; } -static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig, +static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, gfp_t allocation) { struct sk_buff *skb; @@ -1642,7 +1643,7 @@ out_put_algs: return skb; } -static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); struct sk_buff *supp_skb; @@ -1671,7 +1672,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg return 0; } -static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr) +static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -1688,7 +1689,7 @@ static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr) return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); } -static int key_notify_sa_flush(struct km_event *c) +static int key_notify_sa_flush(const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -1710,7 +1711,7 @@ static int key_notify_sa_flush(struct km_event *c) return 0; } -static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned proto; @@ -1784,7 +1785,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk) xfrm_state_walk_done(&pfk->dump.u.state); } -static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { u8 proto; struct pfkey_sock *pfk = pfkey_sk(sk); @@ -1805,19 +1806,29 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr return pfkey_do_dump(pfk); } -static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); int satype = hdr->sadb_msg_satype; + bool reset_errno = false; if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) { - /* XXX we mangle packet... */ - hdr->sadb_msg_errno = 0; + reset_errno = true; if (satype != 0 && satype != 1) return -EINVAL; pfk->promisc = satype; } - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); + if (reset_errno && skb_cloned(skb)) + skb = skb_copy(skb, GFP_KERNEL); + else + skb = skb_clone(skb, GFP_KERNEL); + + if (reset_errno && skb) { + struct sadb_msg *new_hdr = (struct sadb_msg *) skb->data; + new_hdr->sadb_msg_errno = 0; + } + + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } @@ -1921,7 +1932,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) return 0; } -static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp) +static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp) { struct xfrm_sec_ctx *xfrm_ctx = xp->security; @@ -1933,9 +1944,9 @@ static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp) return 0; } -static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) +static int pfkey_xfrm_policy2msg_size(const struct xfrm_policy *xp) { - struct xfrm_tmpl *t; + const struct xfrm_tmpl *t; int sockaddr_size = pfkey_sockaddr_size(xp->family); int socklen = 0; int i; @@ -1955,7 +1966,7 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) pfkey_xfrm_policy2sec_ctx_size(xp); } -static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp) +static struct sk_buff * pfkey_xfrm_policy2msg_prep(const struct xfrm_policy *xp) { struct sk_buff *skb; int size; @@ -1969,7 +1980,7 @@ static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp) return skb; } -static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir) +static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *xp, int dir) { struct sadb_msg *hdr; struct sadb_address *addr; @@ -2065,8 +2076,8 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in pol->sadb_x_policy_priority = xp->priority; for (i=0; i<xp->xfrm_nr; i++) { + const struct xfrm_tmpl *t = xp->xfrm_vec + i; struct sadb_x_ipsecrequest *rq; - struct xfrm_tmpl *t = xp->xfrm_vec + i; int req_size; int mode; @@ -2123,7 +2134,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in return 0; } -static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) +static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; @@ -2152,7 +2163,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c } -static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err = 0; @@ -2273,7 +2284,7 @@ out: return err; } -static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err; @@ -2350,7 +2361,7 @@ out: return err; } -static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb_msg *hdr, int dir) +static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struct sadb_msg *hdr, int dir) { int err; struct sk_buff *out_skb; @@ -2458,7 +2469,7 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, } static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, - struct sadb_msg *hdr, void **ext_hdrs) + const struct sadb_msg *hdr, void * const *ext_hdrs) { int i, len, ret, err = -EINVAL; u8 dir; @@ -2549,14 +2560,14 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, } #else static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, - struct sadb_msg *hdr, void **ext_hdrs) + const struct sadb_msg *hdr, void * const *ext_hdrs) { return -ENOPROTOOPT; } #endif -static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned int dir; @@ -2644,7 +2655,7 @@ static void pfkey_dump_sp_done(struct pfkey_sock *pfk) xfrm_policy_walk_done(&pfk->dump.u.policy); } -static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); @@ -2660,7 +2671,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg * return pfkey_do_dump(pfk); } -static int key_notify_policy_flush(struct km_event *c) +static int key_notify_policy_flush(const struct km_event *c) { struct sk_buff *skb_out; struct sadb_msg *hdr; @@ -2680,7 +2691,7 @@ static int key_notify_policy_flush(struct km_event *c) } -static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct km_event c; @@ -2709,7 +2720,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg } typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, - struct sadb_msg *hdr, void **ext_hdrs); + const struct sadb_msg *hdr, void * const *ext_hdrs); static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { [SADB_RESERVED] = pfkey_reserved, [SADB_GETSPI] = pfkey_getspi, @@ -2736,7 +2747,7 @@ static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { [SADB_X_MIGRATE] = pfkey_migrate, }; -static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr) +static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr) { void *ext_hdrs[SADB_EXT_MAX]; int err; @@ -2781,7 +2792,8 @@ static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp) return hdr; } -static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) +static inline int aalg_tmpl_set(const struct xfrm_tmpl *t, + const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; @@ -2791,7 +2803,8 @@ static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) return (t->aalgos >> id) & 1; } -static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) +static inline int ealg_tmpl_set(const struct xfrm_tmpl *t, + const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; @@ -2801,12 +2814,12 @@ static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) return (t->ealgos >> id) & 1; } -static int count_ah_combs(struct xfrm_tmpl *t) +static int count_ah_combs(const struct xfrm_tmpl *t) { int i, sz = 0; for (i = 0; ; i++) { - struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); + const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (aalg_tmpl_set(t, aalg) && aalg->available) @@ -2815,12 +2828,12 @@ static int count_ah_combs(struct xfrm_tmpl *t) return sz + sizeof(struct sadb_prop); } -static int count_esp_combs(struct xfrm_tmpl *t) +static int count_esp_combs(const struct xfrm_tmpl *t) { int i, k, sz = 0; for (i = 0; ; i++) { - struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); + const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; @@ -2828,7 +2841,7 @@ static int count_esp_combs(struct xfrm_tmpl *t) continue; for (k = 1; ; k++) { - struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); + const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; @@ -2839,7 +2852,7 @@ static int count_esp_combs(struct xfrm_tmpl *t) return sz + sizeof(struct sadb_prop); } -static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t) +static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int i; @@ -2851,7 +2864,7 @@ static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t) memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i = 0; ; i++) { - struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); + const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; @@ -2871,7 +2884,7 @@ static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t) } } -static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t) +static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int i, k; @@ -2883,7 +2896,7 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t) memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i=0; ; i++) { - struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); + const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; @@ -2892,7 +2905,7 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t) for (k = 1; ; k++) { struct sadb_comb *c; - struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); + const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; if (!(aalg_tmpl_set(t, aalg) && aalg->available)) @@ -2914,12 +2927,12 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t) } } -static int key_notify_policy_expire(struct xfrm_policy *xp, struct km_event *c) +static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) { return 0; } -static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) +static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; @@ -2949,7 +2962,7 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) return 0; } -static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) +static int pfkey_send_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = x ? xs_net(x) : c->net; struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); @@ -2976,7 +2989,7 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) return 0; } -static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) return 0; @@ -3318,7 +3331,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, #ifdef CONFIG_NET_KEY_MIGRATE static int set_sadb_address(struct sk_buff *skb, int sasize, int type, - struct xfrm_selector *sel) + const struct xfrm_selector *sel) { struct sadb_address *addr; addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize); @@ -3348,7 +3361,7 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, } -static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k) +static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress *k) { struct sadb_x_kmaddress *kma; u8 *sa; @@ -3376,7 +3389,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k) static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, uint32_t reqid, uint8_t family, - xfrm_address_t *src, xfrm_address_t *dst) + const xfrm_address_t *src, const xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; u8 *sa; @@ -3404,9 +3417,9 @@ static int set_ipsecrequest(struct sk_buff *skb, #endif #ifdef CONFIG_NET_KEY_MIGRATE -static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles, - struct xfrm_kmaddress *k) +static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_bundles, + const struct xfrm_kmaddress *k) { int i; int sasize_sel; @@ -3415,7 +3428,7 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_x_policy *pol; - struct xfrm_migrate *mp; + const struct xfrm_migrate *mp; if (type != XFRM_POLICY_TYPE_MAIN) return 0; @@ -3513,9 +3526,9 @@ err: return -EINVAL; } #else -static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles, - struct xfrm_kmaddress *k) +static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_bundles, + const struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } @@ -3655,6 +3668,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v) } static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) + __acquires(rcu) { struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); @@ -3672,6 +3686,7 @@ static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos) } static void pfkey_seq_stop(struct seq_file *f, void *v) + __releases(rcu) { rcu_read_unlock(); } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 110efb704c9b..fce9bd3bd3fe 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -320,11 +320,12 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len if (ipv4_is_multicast(lsa->l2tp_addr.s_addr)) goto out; - rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr, + rt = ip_route_connect(lsa->l2tp_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, IPPROTO_L2TP, - 0, 0, sk, 1); - if (rc) { + 0, 0, sk, true); + if (IS_ERR(rt)) { + rc = PTR_ERR(rt); if (rc == -ENETUNREACH) IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); goto out; @@ -474,24 +475,17 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (opt && opt->srr) daddr = opt->faddr; - { - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .fl4_dst = daddr, - .fl4_src = inet->inet_saddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = inet->inet_dport }; - - /* If this fails, retransmit mechanism of transport layer will - * keep trying until route appears or the connection times - * itself out. - */ - security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) - goto no_route; - } + /* If this fails, retransmit mechanism of transport layer will + * keep trying until route appears or the connection times + * itself out. + */ + rt = ip_route_output_ports(sock_net(sk), sk, + daddr, inet->inet_saddr, + inet->inet_dport, inet->inet_sport, + sk->sk_protocol, RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if); + if (IS_ERR(rt)) + goto no_route; sk_setup_caps(sk, &rt->dst); } skb_dst_set(skb, dst_clone(&rt->dst)); diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index f99687439139..058f1e9a9128 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -181,25 +181,26 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, * LLC functionality */ rcv = rcu_dereference(sap->rcv_func); - if (rcv) { - struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC); - if (cskb) - rcv(cskb, dev, pt, orig_dev); - } dest = llc_pdu_type(skb); - if (unlikely(!dest || !llc_type_handlers[dest - 1])) - goto drop_put; - llc_type_handlers[dest - 1](sap, skb); -out_put: + if (unlikely(!dest || !llc_type_handlers[dest - 1])) { + if (rcv) + rcv(skb, dev, pt, orig_dev); + else + kfree_skb(skb); + } else { + if (rcv) { + struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC); + if (cskb) + rcv(cskb, dev, pt, orig_dev); + } + llc_type_handlers[dest - 1](sap, skb); + } llc_sap_put(sap); out: return 0; drop: kfree_skb(skb); goto out; -drop_put: - kfree_skb(skb); - goto out_put; handle_station: if (!llc_station_handler) goto drop; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 9109262abd24..513f85cc2ae1 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -17,10 +17,10 @@ comment "CFG80211 needs to be enabled for MAC80211" if MAC80211 != n config MAC80211_HAS_RC - def_bool n + bool config MAC80211_RC_PID - bool "PID controller based rate control algorithm" if EMBEDDED + bool "PID controller based rate control algorithm" if EXPERT select MAC80211_HAS_RC ---help--- This option enables a TX rate control algorithm for @@ -28,14 +28,14 @@ config MAC80211_RC_PID rate. config MAC80211_RC_MINSTREL - bool "Minstrel" if EMBEDDED + bool "Minstrel" if EXPERT select MAC80211_HAS_RC default y ---help--- This option enables the 'minstrel' TX rate control algorithm config MAC80211_RC_MINSTREL_HT - bool "Minstrel 802.11n support" if EMBEDDED + bool "Minstrel 802.11n support" if EXPERT depends on MAC80211_RC_MINSTREL default y ---help--- @@ -78,7 +78,7 @@ config MAC80211_RC_DEFAULT endif comment "Some wireless drivers require a rate control algorithm" - depends on MAC80211_HAS_RC=n + depends on MAC80211 && MAC80211_HAS_RC=n config MAC80211_MESH bool "Enable mac80211 mesh networking (pre-802.11s) support" diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 227ca82eef72..0c9d0c07eae6 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -76,7 +76,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, #endif /* CONFIG_MAC80211_HT_DEBUG */ if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, - &sta->sta, tid, NULL)) + &sta->sta, tid, NULL, 0)) printk(KERN_DEBUG "HW problem - can not stop rx " "aggregation for tid %d\n", tid); @@ -232,6 +232,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, if (buf_size == 0) buf_size = IEEE80211_MAX_AMPDU_BUF; + /* make sure the size doesn't exceed the maximum supported by the hw */ + if (buf_size > local->hw.max_rx_aggregation_subframes) + buf_size = local->hw.max_rx_aggregation_subframes; /* examine state machine */ mutex_lock(&sta->ampdu_mlme.mtx); @@ -287,7 +290,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, } ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, - &sta->sta, tid, &start_seq_num); + &sta->sta, tid, &start_seq_num, 0); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); #endif /* CONFIG_MAC80211_HT_DEBUG */ diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 9cc472c6a6a5..63d852cb4ca2 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -190,7 +190,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_STOP, - &sta->sta, tid, NULL); + &sta->sta, tid, NULL, 0); /* HW shall not deny going back to legacy */ if (WARN_ON(ret)) { @@ -311,7 +311,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) start_seq_num = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, - &sta->sta, tid, &start_seq_num); + &sta->sta, tid, &start_seq_num, 0); if (ret) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA request denied - HW unavailable for" @@ -342,7 +342,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* send AddBA request */ ieee80211_send_addba_request(sdata, sta->sta.addr, tid, tid_tx->dialog_token, start_seq_num, - 0x40, tid_tx->timeout); + local->hw.max_tx_aggregation_subframes, + tid_tx->timeout); } int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, @@ -487,7 +488,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, - &sta->sta, tid, NULL); + &sta->sta, tid, NULL, + sta->ampdu_mlme.tid_tx[tid]->buf_size); /* * synchronize with TX path, while splicing the TX path @@ -742,9 +744,11 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, { struct tid_ampdu_tx *tid_tx; u16 capab, tid; + u8 buf_size; capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; + buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; mutex_lock(&sta->ampdu_mlme.mtx); @@ -767,12 +771,23 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) == WLAN_STATUS_SUCCESS) { + /* + * IEEE 802.11-2007 7.3.1.14: + * In an ADDBA Response frame, when the Status Code field + * is set to 0, the Buffer Size subfield is set to a value + * of at least 1. + */ + if (!buf_size) + goto out; + if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { /* ignore duplicate response */ goto out; } + tid_tx->buf_size = buf_size; + if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)) ieee80211_agg_tx_operational(local, sta, tid); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4bc8a9250cfd..7b701dcddb50 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -316,6 +316,17 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, return 0; } +static void rate_idx_to_bitrate(struct rate_info *rate, struct sta_info *sta, int idx) +{ + if (!(rate->flags & RATE_INFO_FLAGS_MCS)) { + struct ieee80211_supported_band *sband; + sband = sta->local->hw.wiphy->bands[ + sta->local->hw.conf.channel->band]; + rate->legacy = sband->bitrates[idx].bitrate; + } else + rate->mcs = idx; +} + static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -330,6 +341,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) STATION_INFO_TX_RETRIES | STATION_INFO_TX_FAILED | STATION_INFO_TX_BITRATE | + STATION_INFO_RX_BITRATE | STATION_INFO_RX_DROP_MISC; sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); @@ -355,15 +367,16 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; if (sta->last_tx_rate.flags & IEEE80211_TX_RC_SHORT_GI) sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI; + rate_idx_to_bitrate(&sinfo->txrate, sta, sta->last_tx_rate.idx); - if (!(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) { - struct ieee80211_supported_band *sband; - sband = sta->local->hw.wiphy->bands[ - sta->local->hw.conf.channel->band]; - sinfo->txrate.legacy = - sband->bitrates[sta->last_tx_rate.idx].bitrate; - } else - sinfo->txrate.mcs = sta->last_tx_rate.idx; + sinfo->rxrate.flags = 0; + if (sta->last_rx_rate_flag & RX_FLAG_HT) + sinfo->rxrate.flags |= RATE_INFO_FLAGS_MCS; + if (sta->last_rx_rate_flag & RX_FLAG_40MHZ) + sinfo->rxrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; + if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI) + sinfo->rxrate.flags |= RATE_INFO_FLAGS_SHORT_GI; + rate_idx_to_bitrate(&sinfo->rxrate, sta, sta->last_rx_rate_idx); if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH @@ -1215,6 +1228,9 @@ static int ieee80211_set_channel(struct wiphy *wiphy, { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = NULL; + struct ieee80211_channel *old_oper; + enum nl80211_channel_type old_oper_type; + enum nl80211_channel_type old_vif_oper_type= NL80211_CHAN_NO_HT; if (netdev) sdata = IEEE80211_DEV_TO_SUB_IF(netdev); @@ -1232,13 +1248,23 @@ static int ieee80211_set_channel(struct wiphy *wiphy, break; } - local->oper_channel = chan; + if (sdata) + old_vif_oper_type = sdata->vif.bss_conf.channel_type; + old_oper_type = local->_oper_channel_type; if (!ieee80211_set_channel_type(local, sdata, channel_type)) return -EBUSY; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - if (sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) + old_oper = local->oper_channel; + local->oper_channel = chan; + + /* Update driver if changes were actually made. */ + if ((old_oper != local->oper_channel) || + (old_oper_type != local->_oper_channel_type)) + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + + if ((sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) && + old_vif_oper_type != sdata->vif.bss_conf.channel_type) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT); return 0; @@ -1274,8 +1300,11 @@ static int ieee80211_scan(struct wiphy *wiphy, case NL80211_IFTYPE_P2P_GO: if (sdata->local->ops->hw_scan) break; - /* FIXME: implement NoA while scanning in software */ - return -EOPNOTSUPP; + /* + * FIXME: implement NoA while scanning in software, + * for now fall through to allow scanning only when + * beaconing hasn't been configured yet + */ case NL80211_IFTYPE_AP: if (sdata->u.ap.beacon) return -EOPNOTSUPP; @@ -1784,6 +1813,33 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, *cookie = (unsigned long) skb; + if (is_offchan && local->ops->offchannel_tx) { + int ret; + + IEEE80211_SKB_CB(skb)->band = chan->band; + + mutex_lock(&local->mtx); + + if (local->hw_offchan_tx_cookie) { + mutex_unlock(&local->mtx); + return -EBUSY; + } + + /* TODO: bitrate control, TX processing? */ + ret = drv_offchannel_tx(local, skb, chan, channel_type, wait); + + if (ret == 0) + local->hw_offchan_tx_cookie = *cookie; + mutex_unlock(&local->mtx); + + /* + * Allow driver to return 1 to indicate it wants to have the + * frame transmitted with a remain_on_channel + regular TX. + */ + if (ret != 1) + return ret; + } + if (is_offchan && local->ops->remain_on_channel) { unsigned int duration; int ret; @@ -1822,6 +1878,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, *cookie ^= 2; IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN; local->hw_roc_skb = skb; + local->hw_roc_skb_for_status = skb; mutex_unlock(&local->mtx); return 0; @@ -1846,6 +1903,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, wk->type = IEEE80211_WORK_OFFCHANNEL_TX; wk->chan = chan; + wk->chan_type = channel_type; wk->sdata = sdata; wk->done = ieee80211_offchan_tx_done; wk->offchan_tx.frame = skb; @@ -1868,6 +1926,18 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, mutex_lock(&local->mtx); + if (local->ops->offchannel_tx_cancel_wait && + local->hw_offchan_tx_cookie == cookie) { + ret = drv_offchannel_tx_cancel_wait(local); + + if (!ret) + local->hw_offchan_tx_cookie = 0; + + mutex_unlock(&local->mtx); + + return ret; + } + if (local->ops->cancel_remain_on_channel) { cookie ^= 2; ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); @@ -1875,6 +1945,7 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, if (ret == 0) { kfree_skb(local->hw_roc_skb); local->hw_roc_skb = NULL; + local->hw_roc_skb_for_status = NULL; } mutex_unlock(&local->mtx); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 1f02e599a318..51f0d780dafa 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -60,6 +60,10 @@ static const struct file_operations name## _ops = { \ debugfs_create_file(#name, mode, phyd, local, &name## _ops); +DEBUGFS_READONLY_FILE(user_power, "%d", + local->user_power_level); +DEBUGFS_READONLY_FILE(power, "%d", + local->hw.conf.power_level); DEBUGFS_READONLY_FILE(frequency, "%d", local->hw.conf.channel->center_freq); DEBUGFS_READONLY_FILE(total_ps_buffered, "%d", @@ -391,6 +395,8 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(uapsd_queues); DEBUGFS_ADD(uapsd_max_sp_len); DEBUGFS_ADD(channel_type); + DEBUGFS_ADD(user_power); + DEBUGFS_ADD(power); statsd = debugfs_create_dir("statistics", phyd); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 2dabdf7680d0..dacace6b1393 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -36,7 +36,7 @@ static ssize_t ieee80211_if_read( ret = (*format)(sdata, buf, sizeof(buf)); read_unlock(&dev_base_lock); - if (ret != -EINVAL) + if (ret >= 0) ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); return ret; @@ -81,6 +81,8 @@ static ssize_t ieee80211_if_fmt_##name( \ IEEE80211_IF_FMT(name, field, "%d\n") #define IEEE80211_IF_FMT_HEX(name, field) \ IEEE80211_IF_FMT(name, field, "%#x\n") +#define IEEE80211_IF_FMT_LHEX(name, field) \ + IEEE80211_IF_FMT(name, field, "%#lx\n") #define IEEE80211_IF_FMT_SIZE(name, field) \ IEEE80211_IF_FMT(name, field, "%zd\n") @@ -145,6 +147,9 @@ IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ], HEX); IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ], HEX); +IEEE80211_IF_FILE(flags, flags, HEX); +IEEE80211_IF_FILE(state, state, LHEX); +IEEE80211_IF_FILE(channel_type, vif.bss_conf.channel_type, DEC); /* STA attributes */ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); @@ -216,6 +221,104 @@ static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, __IEEE80211_IF_FILE_W(smps); +static ssize_t ieee80211_if_fmt_tkip_mic_test( + const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) +{ + return -EOPNOTSUPP; +} + +static int hwaddr_aton(const char *txt, u8 *addr) +{ + int i; + + for (i = 0; i < ETH_ALEN; i++) { + int a, b; + + a = hex_to_bin(*txt++); + if (a < 0) + return -1; + b = hex_to_bin(*txt++); + if (b < 0) + return -1; + *addr++ = (a << 4) | b; + if (i < 5 && *txt++ != ':') + return -1; + } + + return 0; +} + +static ssize_t ieee80211_if_parse_tkip_mic_test( + struct ieee80211_sub_if_data *sdata, const char *buf, int buflen) +{ + struct ieee80211_local *local = sdata->local; + u8 addr[ETH_ALEN]; + struct sk_buff *skb; + struct ieee80211_hdr *hdr; + __le16 fc; + + /* + * Assume colon-delimited MAC address with possible white space + * following. + */ + if (buflen < 3 * ETH_ALEN - 1) + return -EINVAL; + if (hwaddr_aton(buf, addr) < 0) + return -EINVAL; + + if (!ieee80211_sdata_running(sdata)) + return -ENOTCONN; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 100); + if (!skb) + return -ENOMEM; + skb_reserve(skb, local->hw.extra_tx_headroom); + + hdr = (struct ieee80211_hdr *) skb_put(skb, 24); + memset(hdr, 0, 24); + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); + /* DA BSSID SA */ + memcpy(hdr->addr1, addr, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + memcpy(hdr->addr3, sdata->vif.addr, ETH_ALEN); + break; + case NL80211_IFTYPE_STATION: + fc |= cpu_to_le16(IEEE80211_FCTL_TODS); + /* BSSID SA DA */ + if (sdata->vif.bss_conf.bssid == NULL) { + dev_kfree_skb(skb); + return -ENOTCONN; + } + memcpy(hdr->addr1, sdata->vif.bss_conf.bssid, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + memcpy(hdr->addr3, addr, ETH_ALEN); + break; + default: + dev_kfree_skb(skb); + return -EOPNOTSUPP; + } + hdr->frame_control = fc; + + /* + * Add some length to the test frame to make it look bit more valid. + * The exact contents does not matter since the recipient is required + * to drop this because of the Michael MIC failure. + */ + memset(skb_put(skb, 50), 0, 50); + + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_TKIP_MIC_FAILURE; + + ieee80211_tx_skb(sdata, skb); + + return buflen; +} + +__IEEE80211_IF_FILE_W(tkip_mic_test); + /* AP attributes */ IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); @@ -283,6 +386,9 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode, static void add_sta_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); @@ -291,22 +397,30 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(last_beacon); DEBUGFS_ADD(ave_beacon); DEBUGFS_ADD_MODE(smps, 0600); + DEBUGFS_ADD_MODE(tkip_mic_test, 0200); } static void add_ap_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); DEBUGFS_ADD(num_sta_ps); DEBUGFS_ADD(dtim_count); DEBUGFS_ADD(num_buffered_multicast); + DEBUGFS_ADD_MODE(tkip_mic_test, 0200); } static void add_wds_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); @@ -316,12 +430,18 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata) static void add_vlan_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); } static void add_monitor_files(struct ieee80211_sub_if_data *sdata) { + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); } #ifdef CONFIG_MAC80211_MESH diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 98d589960a49..3729296f6f95 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -5,9 +5,9 @@ #include "ieee80211_i.h" #include "driver-trace.h" -static inline int drv_tx(struct ieee80211_local *local, struct sk_buff *skb) +static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb) { - return local->ops->tx(&local->hw, skb); + local->ops->tx(&local->hw, skb); } static inline int drv_start(struct ieee80211_local *local) @@ -382,17 +382,17 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, - u16 *ssn) + u16 *ssn, u8 buf_size) { int ret = -EOPNOTSUPP; might_sleep(); - trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn); + trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size); if (local->ops->ampdu_action) ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action, - sta, tid, ssn); + sta, tid, ssn, buf_size); trace_drv_return_int(local, ret); @@ -495,4 +495,35 @@ static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local) return ret; } +static inline int drv_offchannel_tx(struct ieee80211_local *local, + struct sk_buff *skb, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int wait) +{ + int ret; + + might_sleep(); + + trace_drv_offchannel_tx(local, skb, chan, channel_type, wait); + ret = local->ops->offchannel_tx(&local->hw, skb, chan, + channel_type, wait); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline int drv_offchannel_tx_cancel_wait(struct ieee80211_local *local) +{ + int ret; + + might_sleep(); + + trace_drv_offchannel_tx_cancel_wait(local); + ret = local->ops->offchannel_tx_cancel_wait(&local->hw); + trace_drv_return_int(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 49c84218b2f4..520fe2444893 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -9,6 +9,11 @@ #undef TRACE_EVENT #define TRACE_EVENT(name, proto, ...) \ static inline void trace_ ## name(proto) {} +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(...) +#undef DEFINE_EVENT +#define DEFINE_EVENT(evt_class, name, proto, ...) \ +static inline void trace_ ## name(proto) {} #endif #undef TRACE_SYSTEM @@ -38,7 +43,7 @@ static inline void trace_ ## name(proto) {} * Tracing for driver callbacks. */ -TRACE_EVENT(drv_return_void, +DECLARE_EVENT_CLASS(local_only_evt, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local), TP_STRUCT__entry( @@ -50,6 +55,11 @@ TRACE_EVENT(drv_return_void, TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) ); +DEFINE_EVENT(local_only_evt, drv_return_void, + TP_PROTO(struct ieee80211_local *local), + TP_ARGS(local) +); + TRACE_EVENT(drv_return_int, TP_PROTO(struct ieee80211_local *local, int ret), TP_ARGS(local, ret), @@ -78,40 +88,14 @@ TRACE_EVENT(drv_return_u64, TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret) ); -TRACE_EVENT(drv_start, +DEFINE_EVENT(local_only_evt, drv_start, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(drv_stop, +DEFINE_EVENT(local_only_evt, drv_stop, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_add_interface, @@ -439,40 +423,14 @@ TRACE_EVENT(drv_hw_scan, ) ); -TRACE_EVENT(drv_sw_scan_start, +DEFINE_EVENT(local_only_evt, drv_sw_scan_start, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(drv_sw_scan_complete, +DEFINE_EVENT(local_only_evt, drv_sw_scan_complete, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_get_stats, @@ -702,23 +660,9 @@ TRACE_EVENT(drv_conf_tx, ) ); -TRACE_EVENT(drv_get_tsf, +DEFINE_EVENT(local_only_evt, drv_get_tsf, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, - LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_set_tsf, @@ -742,41 +686,14 @@ TRACE_EVENT(drv_set_tsf, ) ); -TRACE_EVENT(drv_reset_tsf, +DEFINE_EVENT(local_only_evt, drv_reset_tsf, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(drv_tx_last_beacon, +DEFINE_EVENT(local_only_evt, drv_tx_last_beacon, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, - LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_ampdu_action, @@ -784,9 +701,9 @@ TRACE_EVENT(drv_ampdu_action, struct ieee80211_sub_if_data *sdata, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, - u16 *ssn), + u16 *ssn, u8 buf_size), - TP_ARGS(local, sdata, action, sta, tid, ssn), + TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size), TP_STRUCT__entry( LOCAL_ENTRY @@ -794,6 +711,7 @@ TRACE_EVENT(drv_ampdu_action, __field(u32, action) __field(u16, tid) __field(u16, ssn) + __field(u8, buf_size) VIF_ENTRY ), @@ -804,11 +722,13 @@ TRACE_EVENT(drv_ampdu_action, __entry->action = action; __entry->tid = tid; __entry->ssn = ssn ? *ssn : 0; + __entry->buf_size = buf_size; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d", - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, + __entry->tid, __entry->buf_size ) ); @@ -959,24 +879,44 @@ TRACE_EVENT(drv_remain_on_channel, ) ); -TRACE_EVENT(drv_cancel_remain_on_channel, +DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel, TP_PROTO(struct ieee80211_local *local), + TP_ARGS(local) +); - TP_ARGS(local), +TRACE_EVENT(drv_offchannel_tx, + TP_PROTO(struct ieee80211_local *local, struct sk_buff *skb, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int wait), + + TP_ARGS(local, skb, chan, channel_type, wait), TP_STRUCT__entry( LOCAL_ENTRY + __field(int, center_freq) + __field(int, channel_type) + __field(unsigned int, wait) ), TP_fast_assign( LOCAL_ASSIGN; + __entry->center_freq = chan->center_freq; + __entry->channel_type = channel_type; + __entry->wait = wait; ), TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG + LOCAL_PR_FMT " freq:%dMHz, wait:%dms", + LOCAL_PR_ARG, __entry->center_freq, __entry->wait ) ); +DEFINE_EVENT(local_only_evt, drv_offchannel_tx_cancel_wait, + TP_PROTO(struct ieee80211_local *local), + TP_ARGS(local) +); + /* * Tracing for API calls that drivers call. */ @@ -1069,23 +1009,9 @@ TRACE_EVENT(api_stop_tx_ba_cb, ) ); -TRACE_EVENT(api_restart_hw, +DEFINE_EVENT(local_only_evt, api_restart_hw, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, - LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(api_beacon_loss, @@ -1214,40 +1140,14 @@ TRACE_EVENT(api_chswitch_done, ) ); -TRACE_EVENT(api_ready_on_channel, +DEFINE_EVENT(local_only_evt, api_ready_on_channel, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(api_remain_on_channel_expired, +DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); /* diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 75d679d75e63..b9e4b9bd2179 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -66,6 +66,9 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, /* own MCS TX capabilities */ tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; + /* Copy peer MCS TX capabilities, the driver might need them. */ + ht_cap->mcs.tx_params = ht_cap_ie->mcs.tx_params; + /* can we TX with MCS rates? */ if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED)) return; @@ -79,7 +82,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS; /* - * 802.11n D5.0 20.3.5 / 20.6 says: + * 802.11n-2009 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream * - 8 to 31 are multiple spatial streams using equal modulation * [8..15 for two streams, 16..23 for three and 24..31 for four] diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 53c7077ffd4f..3e81af1fce58 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -31,7 +31,6 @@ #define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) #define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) -#define IEEE80211_IBSS_MERGE_DELAY 0x400000 #define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) #define IEEE80211_IBSS_MAX_STA_ENTRIES 128 @@ -270,7 +269,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, enum ieee80211_band band = rx_status->band; if (elems->ds_params && elems->ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems->ds_params[0]); + freq = ieee80211_channel_to_frequency(elems->ds_params[0], + band); else freq = rx_status->freq; @@ -354,7 +354,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (memcmp(cbss->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) goto put_bss; - if (rx_status->flag & RX_FLAG_TSFT) { + if (rx_status->flag & RX_FLAG_MACTIME_MPDU) { /* * For correct IBSS merging we need mactime; since mactime is * defined as the time the first data symbol of the frame hits @@ -396,10 +396,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, jiffies); #endif - /* give slow hardware some time to do the TSF sync */ - if (rx_timestamp < IEEE80211_IBSS_MERGE_DELAY) - goto put_bss; - if (beacon_timestamp > rx_timestamp) { #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: beacon TSF higher than " @@ -663,12 +659,13 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) } static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len) + struct sk_buff *req) { + struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(req); + struct ieee80211_mgmt *mgmt = (void *)req->data; struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; - int tx_last_beacon; + int tx_last_beacon, len = req->len; struct sk_buff *skb; struct ieee80211_mgmt *resp; u8 *pos, *end; @@ -688,7 +685,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, mgmt->bssid, tx_last_beacon); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - if (!tx_last_beacon) + if (!tx_last_beacon && !(rx_status->rx_flags & IEEE80211_RX_RA_MATCH)) return; if (memcmp(mgmt->bssid, ifibss->bssid, ETH_ALEN) != 0 && @@ -785,7 +782,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: - ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_probe_req(sdata, skb); break; case IEEE80211_STYPE_PROBE_RESP: ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c47d7c0e48a4..a40401701424 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -225,6 +225,7 @@ struct ieee80211_if_ap { struct sk_buff_head ps_bc_buf; atomic_t num_sta_ps; /* number of stations in PS mode */ int dtim_count; + bool dtim_bc_mc; }; struct ieee80211_if_wds { @@ -654,8 +655,6 @@ struct tpt_led_trigger { * well be on the operating channel * @SCAN_HW_SCANNING: The hardware is scanning for us, we have no way to * determine if we are on the operating channel or not - * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, - * gets only set in conjunction with SCAN_SW_SCANNING * @SCAN_COMPLETED: Set for our scan work function when the driver reported * that the scan completed. * @SCAN_ABORTED: Set for our scan work function when the driver reported @@ -664,7 +663,6 @@ struct tpt_led_trigger { enum { SCAN_SW_SCANNING, SCAN_HW_SCANNING, - SCAN_OFF_CHANNEL, SCAN_COMPLETED, SCAN_ABORTED, }; @@ -953,12 +951,13 @@ struct ieee80211_local { struct ieee80211_channel *hw_roc_channel; struct net_device *hw_roc_dev; - struct sk_buff *hw_roc_skb; + struct sk_buff *hw_roc_skb, *hw_roc_skb_for_status; struct work_struct hw_roc_start, hw_roc_done; enum nl80211_channel_type hw_roc_channel_type; unsigned int hw_roc_duration; u32 hw_roc_cookie; bool hw_roc_for_tx; + unsigned long hw_offchan_tx_cookie; /* dummy netdev for use w/ NAPI */ struct net_device napi_dev; @@ -1068,8 +1067,6 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, void ieee80211_configure_filter(struct ieee80211_local *local); u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); -extern bool ieee80211_disable_40mhz_24ghz; - /* STA code */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, @@ -1147,10 +1144,14 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss); /* off-channel helpers */ -void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local); -void ieee80211_offchannel_stop_station(struct ieee80211_local *local); +bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local); +void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local, + bool tell_ap); +void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, + bool offchannel_ps_enable); void ieee80211_offchannel_return(struct ieee80211_local *local, - bool enable_beaconing); + bool enable_beaconing, + bool offchannel_ps_disable); void ieee80211_hw_roc_setup(struct ieee80211_local *local); /* interface handling */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8acba456744e..4054399be907 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -382,6 +382,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; int i; + enum nl80211_channel_type orig_ct; if (local->scan_sdata == sdata) ieee80211_scan_cancel(local); @@ -542,8 +543,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, hw_reconf_flags = 0; } + /* Re-calculate channel-type, in case there are multiple vifs + * on different channel types. + */ + orig_ct = local->_oper_channel_type; + ieee80211_set_channel_type(local, NULL, NL80211_CHAN_NO_HT); + /* do after stop to avoid reconfiguring when we stop anyway */ - if (hw_reconf_flags) + if (hw_reconf_flags || (orig_ct != local->_oper_channel_type)) ieee80211_hw_config(local, hw_reconf_flags); spin_lock_irqsave(&local->queue_stop_reason_lock, flags); @@ -1229,6 +1236,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } mutex_unlock(&local->iflist_mtx); unregister_netdevice_many(&unreg_list); + list_del(&unreg_list); } static u32 ieee80211_idle_off(struct ieee80211_local *local, diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 8106aa1b7466..4ddbe27eb570 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -21,7 +21,6 @@ #define WEP_IV_LEN 4 #define WEP_ICV_LEN 4 -#define ALG_TKIP_KEY_LEN 32 #define ALG_CCMP_KEY_LEN 16 #define CCMP_HDR_LEN 8 #define CCMP_MIC_LEN 8 diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a46ff06d7cb8..562d2984c482 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -34,7 +34,7 @@ #include "debugfs.h" -bool ieee80211_disable_40mhz_24ghz; +static bool ieee80211_disable_40mhz_24ghz; module_param(ieee80211_disable_40mhz_24ghz, bool, 0644); MODULE_PARM_DESC(ieee80211_disable_40mhz_24ghz, "Disable 40MHz support in the 2.4GHz band"); @@ -98,6 +98,47 @@ static void ieee80211_reconfig_filter(struct work_struct *work) ieee80211_configure_filter(local); } +/* + * Returns true if we are logically configured to be on + * the operating channel AND the hardware-conf is currently + * configured on the operating channel. Compares channel-type + * as well. + */ +bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local) +{ + struct ieee80211_channel *chan, *scan_chan; + enum nl80211_channel_type channel_type; + + /* This logic needs to match logic in ieee80211_hw_config */ + if (local->scan_channel) { + chan = local->scan_channel; + /* If scanning on oper channel, use whatever channel-type + * is currently in use. + */ + if (chan == local->oper_channel) + channel_type = local->_oper_channel_type; + else + channel_type = NL80211_CHAN_NO_HT; + } else if (local->tmp_channel) { + chan = scan_chan = local->tmp_channel; + channel_type = local->tmp_channel_type; + } else { + chan = local->oper_channel; + channel_type = local->_oper_channel_type; + } + + if (chan != local->oper_channel || + channel_type != local->_oper_channel_type) + return false; + + /* Check current hardware-config against oper_channel. */ + if ((local->oper_channel != local->hw.conf.channel) || + (local->_oper_channel_type != local->hw.conf.channel_type)) + return false; + + return true; +} + int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) { struct ieee80211_channel *chan, *scan_chan; @@ -110,21 +151,33 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) scan_chan = local->scan_channel; + /* If this off-channel logic ever changes, ieee80211_on_oper_channel + * may need to change as well. + */ offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; if (scan_chan) { chan = scan_chan; - channel_type = NL80211_CHAN_NO_HT; - local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; - } else if (local->tmp_channel && - local->oper_channel != local->tmp_channel) { + /* If scanning on oper channel, use whatever channel-type + * is currently in use. + */ + if (chan == local->oper_channel) + channel_type = local->_oper_channel_type; + else + channel_type = NL80211_CHAN_NO_HT; + } else if (local->tmp_channel) { chan = scan_chan = local->tmp_channel; channel_type = local->tmp_channel_type; - local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; } else { chan = local->oper_channel; channel_type = local->_oper_channel_type; - local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; } + + if (chan != local->oper_channel || + channel_type != local->_oper_channel_type) + local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; + else + local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; + offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; if (offchannel_flag || chan != local->hw.conf.channel || @@ -146,7 +199,8 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) changed |= IEEE80211_CONF_CHANGE_SMPS; } - if (scan_chan) + if ((local->scanning & SCAN_SW_SCANNING) || + (local->scanning & SCAN_HW_SCANNING)) power = chan->max_power; else power = local->power_constr_level ? @@ -231,7 +285,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, if (changed & BSS_CHANGED_BEACON_ENABLED) { if (local->quiescing || !ieee80211_sdata_running(sdata) || - test_bit(SCAN_SW_SCANNING, &local->scanning)) { + test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) { sdata->vif.bss_conf.enable_beacon = false; } else { /* @@ -326,6 +380,9 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw) trace_api_restart_hw(local); + wiphy_info(hw->wiphy, + "Hardware restart was requested\n"); + /* use this reason, ieee80211_reconfig will unblock it */ ieee80211_stop_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_SUSPEND); @@ -554,6 +611,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.queues = 1; local->hw.max_rates = 1; local->hw.max_report_rates = 0; + local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF; local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; local->user_power_level = -1; @@ -668,6 +726,18 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } channels += sband->n_channels; + /* + * Since ieee80211_disable_40mhz_24ghz is global, we can + * modify the sband's ht data even if the driver uses a + * global structure for that. + */ + if (ieee80211_disable_40mhz_24ghz && + band == IEEE80211_BAND_2GHZ && + sband->ht_cap.ht_supported) { + sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40; + } + if (max_bitrates < sband->n_bitrates) max_bitrates = sband->n_bitrates; supp_ht = supp_ht || sband->ht_cap.ht_supported; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index ca3af4685b0a..2a57cc02c618 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -574,7 +574,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, &elems); if (elems.ds_params && elems.ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + freq = ieee80211_channel_to_frequency(elems.ds_params[0], band); else freq = rx_status->freq; @@ -645,7 +645,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) mesh_mpath_table_grow(); - if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) + if (test_and_clear_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags)) mesh_mpp_table_grow(); if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags)) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 45fbb9e33746..cc984bd861cf 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -28,8 +28,15 @@ #include "rate.h" #include "led.h" -#define IEEE80211_MAX_NULLFUNC_TRIES 2 -#define IEEE80211_MAX_PROBE_TRIES 5 +static int max_nullfunc_tries = 2; +module_param(max_nullfunc_tries, int, 0644); +MODULE_PARM_DESC(max_nullfunc_tries, + "Maximum nullfunc tx tries before disconnecting (reason 4)."); + +static int max_probe_tries = 5; +module_param(max_probe_tries, int, 0644); +MODULE_PARM_DESC(max_probe_tries, + "Maximum probe tries before disconnecting (reason 4)."); /* * Beacon loss timeout is calculated as N frames times the @@ -51,7 +58,11 @@ * a probe request because of beacon loss or for * checking the connection still works. */ -#define IEEE80211_PROBE_WAIT (HZ / 2) +static int probe_wait_ms = 500; +module_param(probe_wait_ms, int, 0644); +MODULE_PARM_DESC(probe_wait_ms, + "Maximum time(ms) to wait for probe response" + " before disconnecting (reason 4)."); /* * Weight given to the latest Beacon frame when calculating average signal @@ -134,6 +145,9 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + if (unlikely(!sdata->u.mgd.associated)) + return; + if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) return; @@ -161,6 +175,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; struct sta_info *sta; u32 changed = 0; + int hti_cfreq; u16 ht_opmode; bool enable_ht = true; enum nl80211_channel_type prev_chantype; @@ -174,10 +189,27 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, if (!sband->ht_cap.ht_supported) enable_ht = false; - /* check that channel matches the right operating channel */ - if (local->hw.conf.channel->center_freq != - ieee80211_channel_to_frequency(hti->control_chan)) - enable_ht = false; + if (enable_ht) { + hti_cfreq = ieee80211_channel_to_frequency(hti->control_chan, + sband->band); + /* check that channel matches the right operating channel */ + if (local->hw.conf.channel->center_freq != hti_cfreq) { + /* Some APs mess this up, evidently. + * Netgear WNDR3700 sometimes reports 4 higher than + * the actual channel, for instance. + */ + printk(KERN_DEBUG + "%s: Wrong control channel in association" + " response: configured center-freq: %d" + " hti-cfreq: %d hti->control_chan: %d" + " band: %d. Disabling HT.\n", + sdata->name, + local->hw.conf.channel->center_freq, + hti_cfreq, hti->control_chan, + sband->band); + enable_ht = false; + } + } if (enable_ht) { channel_type = NL80211_CHAN_HT20; @@ -429,7 +461,8 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, container_of((void *)bss, struct cfg80211_bss, priv); struct ieee80211_channel *new_ch; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num); + int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num, + cbss->channel->band); ASSERT_MGD_MTX(ifmgd); @@ -600,6 +633,14 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; + if (sdata->vif.type == NL80211_IFTYPE_AP) { + /* If an AP vif is found, then disable PS + * by setting the count to zero thereby setting + * ps_sdata to NULL. + */ + count = 0; + break; + } if (sdata->vif.type != NL80211_IFTYPE_STATION) continue; found = sdata; @@ -700,9 +741,19 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) return; if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && - (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) + (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) { + netif_tx_stop_all_queues(sdata->dev); + /* + * Flush all the frames queued in the driver before + * going to power save + */ + drv_flush(local, false); ieee80211_send_nullfunc(local, sdata, 1); + /* Flush once again to get the tx status of nullfunc frame */ + drv_flush(local, false); + } + if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) || (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { @@ -710,6 +761,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) local->hw.conf.flags |= IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } + + netif_tx_start_all_queues(sdata->dev); } void ieee80211_dynamic_ps_timer(unsigned long data) @@ -1089,7 +1142,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; const u8 *ssid; u8 *dst = ifmgd->associated->bssid; - u8 unicast_limit = max(1, IEEE80211_MAX_PROBE_TRIES - 3); + u8 unicast_limit = max(1, max_probe_tries - 3); /* * Try sending broadcast probe requests for the last three @@ -1115,7 +1168,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) } ifmgd->probe_send_count++; - ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; + ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); run_again(ifmgd, ifmgd->probe_timeout); } @@ -1216,7 +1269,8 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); + printk(KERN_DEBUG "%s: Connection to AP %pM lost.\n", + sdata->name, bssid); ieee80211_set_disassoc(sdata, true, true); mutex_unlock(&ifmgd->mtx); @@ -1519,7 +1573,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } if (elems->ds_params && elems->ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems->ds_params[0]); + freq = ieee80211_channel_to_frequency(elems->ds_params[0], + rx_status->band); else freq = rx_status->freq; @@ -1960,9 +2015,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) - max_tries = IEEE80211_MAX_NULLFUNC_TRIES; + max_tries = max_nullfunc_tries; else - max_tries = IEEE80211_MAX_PROBE_TRIES; + max_tries = max_probe_tries; /* ACK received for nullfunc probing frame */ if (!ifmgd->probe_send_count) @@ -1972,9 +2027,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) #ifdef CONFIG_MAC80211_VERBOSE_DEBUG wiphy_debug(local->hw.wiphy, "%s: No ack for nullfunc frame to" - " AP %pM, try %d\n", + " AP %pM, try %d/%i\n", sdata->name, bssid, - ifmgd->probe_send_count); + ifmgd->probe_send_count, max_tries); #endif ieee80211_mgd_probe_ap_send(sdata); } else { @@ -1994,17 +2049,17 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "%s: Failed to send nullfunc to AP %pM" " after %dms, disconnecting.\n", sdata->name, - bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); + bssid, probe_wait_ms); #endif ieee80211_sta_connection_lost(sdata, bssid); } else if (ifmgd->probe_send_count < max_tries) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG wiphy_debug(local->hw.wiphy, "%s: No probe response from AP %pM" - " after %dms, try %d\n", + " after %dms, try %d/%i\n", sdata->name, - bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ, - ifmgd->probe_send_count); + bssid, probe_wait_ms, + ifmgd->probe_send_count, max_tries); #endif ieee80211_mgd_probe_ap_send(sdata); } else { @@ -2016,7 +2071,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "%s: No probe response from AP %pM" " after %dms, disconnecting.\n", sdata->name, - bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); + bssid, probe_wait_ms); ieee80211_sta_connection_lost(sdata, bssid); } @@ -2254,6 +2309,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, else wk->type = IEEE80211_WORK_DIRECT_PROBE; wk->chan = req->bss->channel; + wk->chan_type = NL80211_CHAN_NO_HT; wk->sdata = sdata; wk->done = ieee80211_probe_auth_done; @@ -2403,6 +2459,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN); wk->chan = req->bss->channel; + wk->chan_type = NL80211_CHAN_NO_HT; wk->sdata = sdata; wk->done = ieee80211_assoc_done; if (!bss->dtim_period && diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index b4e52676f3fb..13427b194ced 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -17,10 +17,14 @@ #include "driver-trace.h" /* - * inform AP that we will go to sleep so that it will buffer the frames - * while we scan + * Tell our hardware to disable PS. + * Optionally inform AP that we will go to sleep so that it will buffer + * the frames while we are doing off-channel work. This is optional + * because we *may* be doing work on-operating channel, and want our + * hardware unconditionally awake, but still let the AP send us normal frames. */ -static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) +static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata, + bool tell_ap) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -41,8 +45,8 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } - if (!(local->offchannel_ps_enabled) || - !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) + if (tell_ap && (!local->offchannel_ps_enabled || + !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK))) /* * If power save was enabled, no need to send a nullfunc * frame because AP knows that we are sleeping. But if the @@ -77,6 +81,9 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata) * we are sleeping, let's just enable power save mode in * hardware. */ + /* TODO: Only set hardware if CONF_PS changed? + * TODO: Should we set offchannel_ps_enabled to false? + */ local->hw.conf.flags |= IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } else if (local->hw.conf.dynamic_ps_timeout > 0) { @@ -95,63 +102,61 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata) ieee80211_sta_reset_conn_monitor(sdata); } -void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local) +void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, + bool offchannel_ps_enable) { struct ieee80211_sub_if_data *sdata; + /* + * notify the AP about us leaving the channel and stop all + * STA interfaces. + */ mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; - /* disable beaconing */ + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) + set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); + + /* Check to see if we should disable beaconing. */ if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_ADHOC || sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ieee80211_bss_info_change_notify( sdata, BSS_CHANGED_BEACON_ENABLED); - /* - * only handle non-STA interfaces here, STA interfaces - * are handled in ieee80211_offchannel_stop_station(), - * e.g., from the background scan state machine. - * - * In addition, do not stop monitor interface to allow it to be - * used from user space controlled off-channel operations. - */ - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_MONITOR) { - set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { netif_tx_stop_all_queues(sdata->dev); + if (offchannel_ps_enable && + (sdata->vif.type == NL80211_IFTYPE_STATION) && + sdata->u.mgd.associated) + ieee80211_offchannel_ps_enable(sdata, true); } } mutex_unlock(&local->iflist_mtx); } -void ieee80211_offchannel_stop_station(struct ieee80211_local *local) +void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local, + bool tell_ap) { struct ieee80211_sub_if_data *sdata; - /* - * notify the AP about us leaving the channel and stop all STA interfaces - */ mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); - netif_tx_stop_all_queues(sdata->dev); - if (sdata->u.mgd.associated) - ieee80211_offchannel_ps_enable(sdata); - } + if (sdata->vif.type == NL80211_IFTYPE_STATION && + sdata->u.mgd.associated) + ieee80211_offchannel_ps_enable(sdata, tell_ap); } mutex_unlock(&local->iflist_mtx); } void ieee80211_offchannel_return(struct ieee80211_local *local, - bool enable_beaconing) + bool enable_beaconing, + bool offchannel_ps_disable) { struct ieee80211_sub_if_data *sdata; @@ -161,7 +166,8 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, continue; /* Tell AP we're back */ - if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (offchannel_ps_disable && + sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.mgd.associated) ieee80211_offchannel_ps_disable(sdata); } @@ -181,7 +187,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, netif_tx_wake_all_queues(sdata->dev); } - /* re-enable beaconing */ + /* Check to see if we should re-enable beaconing */ if (enable_beaconing && (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_ADHOC || diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 165a4518bb48..bce14fbfc3b6 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -415,10 +415,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->sample_count--; } - if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) { + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) mi->sample_packets += info->status.ampdu_len; - minstrel_next_sample_idx(mi); - } for (i = 0; !last; i++) { last = (i == IEEE80211_TX_MAX_RATES - 1) || @@ -519,9 +517,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, rate->count = mr->retry_count; rate->flags = IEEE80211_TX_RC_MCS | group->flags; - if (txrc->short_preamble) - rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; - if (txrc->rts || rtscts) + if (rtscts) rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; } @@ -553,13 +549,14 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) sample_idx = sample_table[mg->column][mg->index]; mr = &mg->rates[sample_idx]; sample_idx += mi->sample_group * MCS_GROUP_RATES; + minstrel_next_sample_idx(mi); /* * When not using MRR, do not sample if the probability is already * higher than 95% to avoid wasting airtime */ if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) - goto next; + return -1; /* * Make sure that lower rates get sampled only occasionally, @@ -568,17 +565,13 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (minstrel_get_duration(sample_idx) > minstrel_get_duration(mi->max_tp_rate)) { if (mr->sample_skipped < 20) - goto next; + return -1; if (mi->sample_slow++ > 2) - goto next; + return -1; } return sample_idx; - -next: - minstrel_next_sample_idx(mi); - return -1; } static void diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 1a873f00691a..6510f8ee738e 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -24,9 +24,6 @@ /* Fixed point arithmetic shifting amount. */ #define RC_PID_ARITH_SHIFT 8 -/* Fixed point arithmetic factor. */ -#define RC_PID_ARITH_FACTOR (1 << RC_PID_ARITH_SHIFT) - /* Proportional PID component coefficient. */ #define RC_PID_COEFF_P 15 /* Integral PID component coefficient. */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a6701ed87f0d..5c1930ba8ebe 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -77,7 +77,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, /* always present fields */ len = sizeof(struct ieee80211_radiotap_header) + 9; - if (status->flag & RX_FLAG_TSFT) + if (status->flag & RX_FLAG_MACTIME_MPDU) len += 8; if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) len += 1; @@ -85,6 +85,9 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, if (len & 1) /* padding for RX_FLAGS if necessary */ len++; + if (status->flag & RX_FLAG_HT) /* HT info */ + len += 3; + return len; } @@ -120,7 +123,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* the order of the following fields is important */ /* IEEE80211_RADIOTAP_TSFT */ - if (status->flag & RX_FLAG_TSFT) { + if (status->flag & RX_FLAG_MACTIME_MPDU) { put_unaligned_le64(status->mactime, pos); rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT); @@ -139,11 +142,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_RATE */ if (status->flag & RX_FLAG_HT) { /* - * TODO: add following information into radiotap header once - * suitable fields are defined for it: - * - MCS index (status->rate_idx) - * - HT40 (status->flag & RX_FLAG_40MHZ) - * - short-GI (status->flag & RX_FLAG_SHORT_GI) + * MCS information is a separate field in radiotap, + * added below. */ *pos = 0; } else { @@ -193,6 +193,20 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, rx_flags |= IEEE80211_RADIOTAP_F_RX_BADPLCP; put_unaligned_le16(rx_flags, pos); pos += 2; + + if (status->flag & RX_FLAG_HT) { + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); + *pos++ = IEEE80211_RADIOTAP_MCS_HAVE_MCS | + IEEE80211_RADIOTAP_MCS_HAVE_GI | + IEEE80211_RADIOTAP_MCS_HAVE_BW; + *pos = 0; + if (status->flag & RX_FLAG_SHORT_GI) + *pos |= IEEE80211_RADIOTAP_MCS_SGI; + if (status->flag & RX_FLAG_40MHZ) + *pos |= IEEE80211_RADIOTAP_MCS_BW_40; + pos++; + *pos++ = status->rate_idx; + } } /* @@ -392,16 +406,10 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN))) return RX_CONTINUE; - if (test_bit(SCAN_HW_SCANNING, &local->scanning)) + if (test_bit(SCAN_HW_SCANNING, &local->scanning) || + test_bit(SCAN_SW_SCANNING, &local->scanning)) return ieee80211_scan_rx(rx->sdata, skb); - if (test_bit(SCAN_SW_SCANNING, &local->scanning)) { - /* drop all the other packets during a software scan anyway */ - if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED) - dev_kfree_skb(skb); - return RX_QUEUED; - } - /* scanning finished during invoking of handlers */ I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); return RX_DROP_UNUSABLE; @@ -798,7 +806,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) rx->local->dot11FrameDuplicateCount++; rx->sta->num_duplicates++; } - return RX_DROP_MONITOR; + return RX_DROP_UNUSABLE; } else rx->sta->last_seq_ctrl[rx->queue] = hdr->seq_ctrl; } @@ -824,18 +832,8 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) ieee80211_is_pspoll(hdr->frame_control)) && rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && rx->sdata->vif.type != NL80211_IFTYPE_WDS && - (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) { - if ((!ieee80211_has_fromds(hdr->frame_control) && - !ieee80211_has_tods(hdr->frame_control) && - ieee80211_is_data(hdr->frame_control)) || - !(status->rx_flags & IEEE80211_RX_RA_MATCH)) { - /* Drop IBSS frames and frames for other hosts - * silently. */ - return RX_DROP_MONITOR; - } - + (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) return RX_DROP_MONITOR; - } return RX_CONTINUE; } @@ -1088,7 +1086,8 @@ static void ap_sta_ps_start(struct sta_info *sta) atomic_inc(&sdata->bss->num_sta_ps); set_sta_flags(sta, WLAN_STA_PS_STA); - drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); + if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) + drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n", sdata->name, sta->sta.addr, sta->sta.aid); @@ -1117,6 +1116,27 @@ static void ap_sta_ps_end(struct sta_info *sta) ieee80211_sta_ps_deliver_wakeup(sta); } +int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start) +{ + struct sta_info *sta_inf = container_of(sta, struct sta_info, sta); + bool in_ps; + + WARN_ON(!(sta_inf->local->hw.flags & IEEE80211_HW_AP_LINK_PS)); + + /* Don't let the same PS state be set twice */ + in_ps = test_sta_flags(sta_inf, WLAN_STA_PS_STA); + if ((start && in_ps) || (!start && !in_ps)) + return -EINVAL; + + if (start) + ap_sta_ps_start(sta_inf); + else + ap_sta_ps_end(sta_inf); + + return 0; +} +EXPORT_SYMBOL(ieee80211_sta_ps_transition); + static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { @@ -1136,14 +1156,23 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, NL80211_IFTYPE_ADHOC); - if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0) + if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0) { sta->last_rx = jiffies; + if (ieee80211_is_data(hdr->frame_control)) { + sta->last_rx_rate_idx = status->rate_idx; + sta->last_rx_rate_flag = status->flag; + } + } } else if (!is_multicast_ether_addr(hdr->addr1)) { /* * Mesh beacons will update last_rx when if they are found to * match the current local configuration when processed. */ sta->last_rx = jiffies; + if (ieee80211_is_data(hdr->frame_control)) { + sta->last_rx_rate_idx = status->rate_idx; + sta->last_rx_rate_flag = status->flag; + } } if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) @@ -1161,7 +1190,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Change STA power saving mode only at the end of a frame * exchange sequence. */ - if (!ieee80211_has_morefrags(hdr->frame_control) && + if (!(sta->local->hw.flags & IEEE80211_HW_AP_LINK_PS) && + !ieee80211_has_morefrags(hdr->frame_control) && !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { @@ -1556,17 +1586,36 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + bool check_port_control = false; + struct ethhdr *ehdr; + int ret; if (ieee80211_has_a4(hdr->frame_control) && sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta) return -1; + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !!sdata->u.mgd.use_4addr != !!ieee80211_has_a4(hdr->frame_control)) { + + if (!sdata->u.mgd.use_4addr) + return -1; + else + check_port_control = true; + } + if (is_multicast_ether_addr(hdr->addr1) && - ((sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) || - (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.use_4addr))) + sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) return -1; - return ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type); + ret = ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type); + if (ret < 0 || !check_port_control) + return ret; + + ehdr = (struct ethhdr *) rx->skb->data; + if (ehdr->h_proto != rx->sdata->control_port_protocol) + return -1; + + return 0; } /* @@ -1893,7 +1942,10 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) dev->stats.rx_bytes += rx->skb->len; if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 && - !is_multicast_ether_addr(((struct ethhdr *)rx->skb->data)->h_dest)) { + !is_multicast_ether_addr( + ((struct ethhdr *)rx->skb->data)->h_dest) && + (!local->scanning && + !test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))) { mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); } @@ -2590,7 +2642,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, return 0; if (!multicast && compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) { - if (!(sdata->dev->flags & IFF_PROMISC)) + if (!(sdata->dev->flags & IFF_PROMISC) || + sdata->u.mgd.use_4addr) return 0; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } @@ -2639,7 +2692,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, return 0; } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { - if (!(status->rx_flags & IEEE80211_RX_IN_SCAN)) + if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) && + !ieee80211_is_beacon(hdr->frame_control)) return 0; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } @@ -2692,7 +2746,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, if (!skb) { if (net_ratelimit()) wiphy_debug(local->hw.wiphy, - "failed to copy multicast frame for %s\n", + "failed to copy skb for %s\n", sdata->name); return true; } @@ -2730,7 +2784,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, local->dot11ReceivedFragmentCount++; if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || - test_bit(SCAN_OFF_CHANNEL, &local->scanning))) + test_bit(SCAN_SW_SCANNING, &local->scanning))) status->rx_flags |= IEEE80211_RX_IN_SCAN; if (ieee80211_is_mgmt(fc)) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index fb274db77e3c..489b6ad200d4 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -196,7 +196,8 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) ieee802_11_parse_elems(elements, skb->len - baselen, &elems); if (elems.ds_params && elems.ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + freq = ieee80211_channel_to_frequency(elems.ds_params[0], + rx_status->band); else freq = rx_status->freq; @@ -211,6 +212,14 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) if (bss) ieee80211_rx_bss_put(sdata->local, bss); + /* If we are on-operating-channel, and this packet is for the + * current channel, pass the pkt on up the stack so that + * the rest of the stack can make use of it. + */ + if (ieee80211_cfg_on_oper_channel(sdata->local) + && (channel == sdata->local->oper_channel)) + return RX_CONTINUE; + dev_kfree_skb(skb); return RX_QUEUED; } @@ -249,10 +258,12 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) return true; } -static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, +static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, bool was_hw_scan) { struct ieee80211_local *local = hw_to_local(hw); + bool on_oper_chan; + bool enable_beacons = false; lockdep_assert_held(&local->mtx); @@ -266,12 +277,12 @@ static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, aborted = true; if (WARN_ON(!local->scan_req)) - return false; + return; if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { int rc = drv_hw_scan(local, local->scan_sdata, local->hw_scan_req); if (rc == 0) - return false; + return; } kfree(local->hw_scan_req); @@ -285,24 +296,28 @@ static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, local->scanning = 0; local->scan_channel = NULL; - return true; -} + on_oper_chan = ieee80211_cfg_on_oper_channel(local); -static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw, - bool was_hw_scan) -{ - struct ieee80211_local *local = hw_to_local(hw); + if (was_hw_scan || !on_oper_chan) + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + else + /* Set power back to normal operating levels. */ + ieee80211_hw_config(local, 0); - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); if (!was_hw_scan) { + bool on_oper_chan2; ieee80211_configure_filter(local); drv_sw_scan_complete(local); - ieee80211_offchannel_return(local, true); + on_oper_chan2 = ieee80211_cfg_on_oper_channel(local); + /* We should always be on-channel at this point. */ + WARN_ON(!on_oper_chan2); + if (on_oper_chan2 && (on_oper_chan != on_oper_chan2)) + enable_beacons = true; + + ieee80211_offchannel_return(local, enable_beacons, true); } - mutex_lock(&local->mtx); ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); @@ -340,16 +355,21 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) */ drv_sw_scan_start(local); - ieee80211_offchannel_stop_beaconing(local); - local->leave_oper_channel_time = 0; local->next_scan_state = SCAN_DECISION; local->scan_channel_idx = 0; - drv_flush(local, false); + /* We always want to use off-channel PS, even if we + * are not really leaving oper-channel. Don't + * tell the AP though, as long as we are on-channel. + */ + ieee80211_offchannel_enable_all_ps(local, false); ieee80211_configure_filter(local); + /* We need to set power level at maximum rate for scanning. */ + ieee80211_hw_config(local, 0); + ieee80211_queue_delayed_work(&local->hw, &local->scan_work, IEEE80211_CHANNEL_TIME); @@ -486,7 +506,20 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, } mutex_unlock(&local->iflist_mtx); - if (local->scan_channel) { + next_chan = local->scan_req->channels[local->scan_channel_idx]; + + if (ieee80211_cfg_on_oper_channel(local)) { + /* We're currently on operating channel. */ + if (next_chan == local->oper_channel) + /* We don't need to move off of operating channel. */ + local->next_scan_state = SCAN_SET_CHANNEL; + else + /* + * We do need to leave operating channel, as next + * scan is somewhere else. + */ + local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL; + } else { /* * we're currently scanning a different channel, let's * see if we can scan another channel without interfering @@ -502,7 +535,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, * * Otherwise switch back to the operating channel. */ - next_chan = local->scan_req->channels[local->scan_channel_idx]; bad_latency = time_after(jiffies + ieee80211_scan_get_channel_time(next_chan), @@ -520,12 +552,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, local->next_scan_state = SCAN_ENTER_OPER_CHANNEL; else local->next_scan_state = SCAN_SET_CHANNEL; - } else { - /* - * we're on the operating channel currently, let's - * leave that channel now to scan another one - */ - local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL; } *next_delay = 0; @@ -534,9 +560,10 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local, unsigned long *next_delay) { - ieee80211_offchannel_stop_station(local); - - __set_bit(SCAN_OFF_CHANNEL, &local->scanning); + /* PS will already be in off-channel mode, + * we do that once at the beginning of scanning. + */ + ieee80211_offchannel_stop_vifs(local, false); /* * What if the nullfunc frames didn't arrive? @@ -559,15 +586,15 @@ static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *loca { /* switch back to the operating channel */ local->scan_channel = NULL; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + if (!ieee80211_cfg_on_oper_channel(local)) + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); /* - * Only re-enable station mode interface now; beaconing will be - * re-enabled once the full scan has been completed. + * Re-enable vifs and beaconing. Leave PS + * in off-channel state..will put that back + * on-channel at the end of scanning. */ - ieee80211_offchannel_return(local, false); - - __clear_bit(SCAN_OFF_CHANNEL, &local->scanning); + ieee80211_offchannel_return(local, true, false); *next_delay = HZ / 5; local->next_scan_state = SCAN_DECISION; @@ -583,8 +610,11 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, chan = local->scan_req->channels[local->scan_channel_idx]; local->scan_channel = chan; - if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) - skip = 1; + + /* Only call hw-config if we really need to change channels. */ + if (chan != local->hw.conf.channel) + if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) + skip = 1; /* advance state machine to next channel/band */ local->scan_channel_idx++; @@ -642,12 +672,14 @@ void ieee80211_scan_work(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, scan_work.work); - struct ieee80211_sub_if_data *sdata = local->scan_sdata; + struct ieee80211_sub_if_data *sdata; unsigned long next_delay = 0; - bool aborted, hw_scan, finish; + bool aborted, hw_scan; mutex_lock(&local->mtx); + sdata = local->scan_sdata; + if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) { aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning); goto out_complete; @@ -711,17 +743,11 @@ void ieee80211_scan_work(struct work_struct *work) } while (next_delay == 0); ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay); - mutex_unlock(&local->mtx); - return; + goto out; out_complete: hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning); - finish = __ieee80211_scan_completed(&local->hw, aborted, hw_scan); - mutex_unlock(&local->mtx); - if (finish) - __ieee80211_scan_completed_finish(&local->hw, hw_scan); - return; - + __ieee80211_scan_completed(&local->hw, aborted, hw_scan); out: mutex_unlock(&local->mtx); } @@ -791,7 +817,6 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, void ieee80211_scan_cancel(struct ieee80211_local *local) { bool abortscan; - bool finish = false; /* * We are only canceling software scan, or deferred scan that was not @@ -811,14 +836,17 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) mutex_lock(&local->mtx); abortscan = local->scan_req && !test_bit(SCAN_HW_SCANNING, &local->scanning); - if (abortscan) - finish = __ieee80211_scan_completed(&local->hw, true, false); - mutex_unlock(&local->mtx); - if (abortscan) { - /* The scan is canceled, but stop work from being pending */ - cancel_delayed_work_sync(&local->scan_work); + /* + * The scan is canceled, but stop work from being pending. + * + * If the work is currently running, it must be blocked on + * the mutex, but we'll set scan_sdata = NULL and it'll + * simply exit once it acquires the mutex. + */ + cancel_delayed_work(&local->scan_work); + /* and clean up */ + __ieee80211_scan_completed(&local->hw, true, false); } - if (finish) - __ieee80211_scan_completed_finish(&local->hw, false); + mutex_unlock(&local->mtx); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index c426504ed1cf..5a11078827ab 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -899,7 +899,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) struct ieee80211_local *local = sdata->local; int sent, buffered; - drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); + if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) + drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); if (!skb_queue_empty(&sta->ps_tx_buf)) sta_info_clear_tim_bit(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index bbdd2a86a94b..57681149e37f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -82,6 +82,7 @@ enum ieee80211_sta_info_flags { * @state: session state (see above) * @stop_initiator: initiator of a session stop * @tx_stop: TX DelBA frame when stopping + * @buf_size: reorder buffer size at receiver * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. @@ -101,6 +102,7 @@ struct tid_ampdu_tx { u8 dialog_token; u8 stop_initiator; bool tx_stop; + u8 buf_size; }; /** @@ -207,6 +209,8 @@ enum plink_state { * @rate_ctrl_priv: rate control private per-STA pointer * @last_tx_rate: rate used for last transmit, to report to userspace as * "the" transmit rate + * @last_rx_rate_idx: rx status rate index of the last data packet + * @last_rx_rate_flag: rx status flag of the last data packet * @lock: used for locking all fields that require locking, see comments * in the header file. * @flaglock: spinlock for flags accesses @@ -309,6 +313,8 @@ struct sta_info { unsigned long tx_bytes; unsigned long tx_fragments; struct ieee80211_tx_rate last_tx_rate; + int last_rx_rate_idx; + int last_rx_rate_flag; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; /* diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 38a797217a91..b936dd29e92b 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -98,6 +98,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, * (b) always process RX events before TX status events if ordering * can be unknown, for example with different interrupt status * bits. + * (c) if PS mode transitions are manual (i.e. the flag + * %IEEE80211_HW_AP_LINK_PS is set), always process PS state + * changes before calling TX status events if ordering can be + * unknown. */ if (test_sta_flags(sta, WLAN_STA_PS_STA) && skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { @@ -314,8 +318,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_STAT_ACK) { local->ps_sdata->u.mgd.flags |= IEEE80211_STA_NULLFUNC_ACKED; - ieee80211_queue_work(&local->hw, - &local->dynamic_ps_enable_work); } else mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(10)); @@ -323,6 +325,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { struct ieee80211_work *wk; + u64 cookie = (unsigned long)skb; rcu_read_lock(); list_for_each_entry_rcu(wk, &local->work_list, list) { @@ -334,8 +337,16 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) break; } rcu_read_unlock(); + if (local->hw_roc_skb_for_status == skb) { + cookie = local->hw_roc_cookie ^ 2; + local->hw_roc_skb_for_status = NULL; + } + + if (cookie == local->hw_offchan_tx_cookie) + local->hw_offchan_tx_cookie = 0; + cfg80211_mgmt_tx_status( - skb->dev, (unsigned long) skb, skb->data, skb->len, + skb->dev, cookie, skb->data, skb->len, !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5950e3abead9..081dcaf6577b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -33,10 +33,6 @@ #include "wme.h" #include "rate.h" -#define IEEE80211_TX_OK 0 -#define IEEE80211_TX_AGAIN 1 -#define IEEE80211_TX_PENDING 2 - /* misc utils */ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, @@ -236,6 +232,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) if (local->hw.conf.flags & IEEE80211_CONF_PS) { ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_QUEUE_STOP_REASON_PS); + ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; ieee80211_queue_work(&local->hw, &local->dynamic_ps_disable_work); } @@ -257,7 +254,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) return TX_CONTINUE; - if (unlikely(test_bit(SCAN_OFF_CHANNEL, &tx->local->scanning)) && + if (unlikely(test_bit(SCAN_SW_SCANNING, &tx->local->scanning)) && + test_bit(SDATA_STATE_OFFCHANNEL, &tx->sdata->state) && !ieee80211_is_probe_req(hdr->frame_control) && !ieee80211_is_nullfunc(hdr->frame_control)) /* @@ -1283,16 +1281,17 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, return TX_CONTINUE; } -static int __ieee80211_tx(struct ieee80211_local *local, - struct sk_buff **skbp, - struct sta_info *sta, - bool txpending) +/* + * Returns false if the frame couldn't be transmitted but was queued instead. + */ +static bool __ieee80211_tx(struct ieee80211_local *local, struct sk_buff **skbp, + struct sta_info *sta, bool txpending) { struct sk_buff *skb = *skbp, *next; struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata; unsigned long flags; - int ret, len; + int len; bool fragm = false; while (skb) { @@ -1300,13 +1299,37 @@ static int __ieee80211_tx(struct ieee80211_local *local, __le16 fc; spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - ret = IEEE80211_TX_OK; if (local->queue_stop_reasons[q] || - (!txpending && !skb_queue_empty(&local->pending[q]))) - ret = IEEE80211_TX_PENDING; + (!txpending && !skb_queue_empty(&local->pending[q]))) { + /* + * Since queue is stopped, queue up frames for later + * transmission from the tx-pending tasklet when the + * queue is woken again. + */ + + do { + next = skb->next; + skb->next = NULL; + /* + * NB: If txpending is true, next must already + * be NULL since we must've gone through this + * loop before already; therefore we can just + * queue the frame to the head without worrying + * about reordering of fragments. + */ + if (unlikely(txpending)) + __skb_queue_head(&local->pending[q], + skb); + else + __skb_queue_tail(&local->pending[q], + skb); + } while ((skb = next)); + + spin_unlock_irqrestore(&local->queue_stop_reason_lock, + flags); + return false; + } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - if (ret != IEEE80211_TX_OK) - return ret; info = IEEE80211_SKB_CB(skb); @@ -1341,15 +1364,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, info->control.sta = NULL; fc = ((struct ieee80211_hdr *)skb->data)->frame_control; - ret = drv_tx(local, skb); - if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) { - dev_kfree_skb(skb); - ret = NETDEV_TX_OK; - } - if (ret != NETDEV_TX_OK) { - info->control.vif = &sdata->vif; - return IEEE80211_TX_AGAIN; - } + drv_tx(local, skb); ieee80211_tpt_led_trig_tx(local, fc, len); *skbp = skb = next; @@ -1357,7 +1372,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, fragm = true; } - return IEEE80211_TX_OK; + return true; } /* @@ -1394,7 +1409,8 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) /* handlers after fragment must be aware of tx info fragmentation! */ CALL_TXH(ieee80211_tx_h_stats); CALL_TXH(ieee80211_tx_h_encrypt); - CALL_TXH(ieee80211_tx_h_calculate_duration); + if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) + CALL_TXH(ieee80211_tx_h_calculate_duration); #undef CALL_TXH txh_done: @@ -1416,23 +1432,24 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) return 0; } -static void ieee80211_tx(struct ieee80211_sub_if_data *sdata, +/* + * Returns false if the frame couldn't be transmitted but was queued instead. + */ +static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool txpending) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_data tx; ieee80211_tx_result res_prepare; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct sk_buff *next; - unsigned long flags; - int ret, retries; u16 queue; + bool result = true; queue = skb_get_queue_mapping(skb); if (unlikely(skb->len < 10)) { dev_kfree_skb(skb); - return; + return true; } rcu_read_lock(); @@ -1442,85 +1459,19 @@ static void ieee80211_tx(struct ieee80211_sub_if_data *sdata, if (unlikely(res_prepare == TX_DROP)) { dev_kfree_skb(skb); - rcu_read_unlock(); - return; + goto out; } else if (unlikely(res_prepare == TX_QUEUED)) { - rcu_read_unlock(); - return; + goto out; } tx.channel = local->hw.conf.channel; info->band = tx.channel->band; - if (invoke_tx_handlers(&tx)) - goto out; - - retries = 0; - retry: - ret = __ieee80211_tx(local, &tx.skb, tx.sta, txpending); - switch (ret) { - case IEEE80211_TX_OK: - break; - case IEEE80211_TX_AGAIN: - /* - * Since there are no fragmented frames on A-MPDU - * queues, there's no reason for a driver to reject - * a frame there, warn and drop it. - */ - if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) - goto drop; - /* fall through */ - case IEEE80211_TX_PENDING: - skb = tx.skb; - - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - - if (local->queue_stop_reasons[queue] || - !skb_queue_empty(&local->pending[queue])) { - /* - * if queue is stopped, queue up frames for later - * transmission from the tasklet - */ - do { - next = skb->next; - skb->next = NULL; - if (unlikely(txpending)) - __skb_queue_head(&local->pending[queue], - skb); - else - __skb_queue_tail(&local->pending[queue], - skb); - } while ((skb = next)); - - spin_unlock_irqrestore(&local->queue_stop_reason_lock, - flags); - } else { - /* - * otherwise retry, but this is a race condition or - * a driver bug (which we warn about if it persists) - */ - spin_unlock_irqrestore(&local->queue_stop_reason_lock, - flags); - - retries++; - if (WARN(retries > 10, "tx refused but queue active\n")) - goto drop; - goto retry; - } - } + if (!invoke_tx_handlers(&tx)) + result = __ieee80211_tx(local, &tx.skb, tx.sta, txpending); out: rcu_read_unlock(); - return; - - drop: - rcu_read_unlock(); - - skb = tx.skb; - while (skb) { - next = skb->next; - dev_kfree_skb(skb); - skb = next; - } + return result; } /* device xmit handlers */ @@ -1547,7 +1498,7 @@ static int ieee80211_skb_resize(struct ieee80211_local *local, skb_orphan(skb); } - if (skb_header_cloned(skb)) + if (skb_cloned(skb)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); @@ -1750,7 +1701,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, __le16 fc; struct ieee80211_hdr hdr; struct ieee80211s_hdr mesh_hdr __maybe_unused; - struct mesh_path *mppath = NULL; + struct mesh_path __maybe_unused *mppath = NULL; const u8 *encaps_data; int encaps_len, skip_header_bytes; int nh_pos, h_pos; @@ -1815,19 +1766,19 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, mppath = mpp_path_lookup(skb->data, sdata); /* - * Do not use address extension, if it is a packet from - * the same interface and the destination is not being - * proxied by any other mest point. + * Use address extension if it is a packet from + * another interface or if we know the destination + * is being proxied by a portal (i.e. portal address + * differs from proxied address) */ if (compare_ether_addr(sdata->vif.addr, skb->data + ETH_ALEN) == 0 && - (!mppath || !compare_ether_addr(mppath->mpp, skb->data))) { + !(mppath && compare_ether_addr(mppath->mpp, skb->data))) { hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, skb->data, skb->data + ETH_ALEN); meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata, NULL, NULL); } else { - /* packet from other interface */ int is_mesh_mcast = 1; const u8 *mesh_da; @@ -2067,6 +2018,11 @@ void ieee80211_clear_tx_pending(struct ieee80211_local *local) skb_queue_purge(&local->pending[i]); } +/* + * Returns false if the frame couldn't be transmitted but was queued instead, + * which in this case means re-queued -- take as an indication to stop sending + * more pending frames. + */ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, struct sk_buff *skb) { @@ -2074,20 +2030,17 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata; struct sta_info *sta; struct ieee80211_hdr *hdr; - int ret; - bool result = true; + bool result; sdata = vif_to_sdata(info->control.vif); if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) { - ieee80211_tx(sdata, skb, true); + result = ieee80211_tx(sdata, skb, true); } else { hdr = (struct ieee80211_hdr *)skb->data; sta = sta_info_get(sdata, hdr->addr1); - ret = __ieee80211_tx(local, &skb, sta, true); - if (ret != IEEE80211_TX_OK) - result = false; + result = __ieee80211_tx(local, &skb, sta, true); } return result; @@ -2129,8 +2082,6 @@ void ieee80211_tx_pending(unsigned long data) flags); txok = ieee80211_tx_pending_skb(local, skb); - if (!txok) - __skb_queue_head(&local->pending[i], skb); spin_lock_irqsave(&local->queue_stop_reason_lock, flags); if (!txok) @@ -2178,6 +2129,8 @@ static void ieee80211_beacon_add_tim(struct ieee80211_if_ap *bss, if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) aid0 = 1; + bss->dtim_bc_mc = aid0 == 1; + if (have_bits) { /* Find largest even number N1 so that bits numbered 1 through * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits @@ -2230,6 +2183,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, sdata = vif_to_sdata(vif); + if (!ieee80211_sdata_running(sdata)) + goto out; + if (tim_offset) *tim_offset = 0; if (tim_length) @@ -2238,7 +2194,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (sdata->vif.type == NL80211_IFTYPE_AP) { ap = &sdata->u.ap; beacon = rcu_dereference(ap->beacon); - if (ap && beacon) { + if (beacon) { /* * headroom, head length, * tail length and maximum TIM length @@ -2299,6 +2255,11 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct ieee80211_mgmt *mgmt; u8 *pos; +#ifdef CONFIG_MAC80211_MESH + if (!sdata->u.mesh.mesh_id_len) + goto out; +#endif + /* headroom, head length, tail length and maximum TIM length */ skb = dev_alloc_skb(local->tx_headroom + 400 + sdata->u.mesh.vendor_ie_len); @@ -2540,7 +2501,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head) goto out; - if (bss->dtim_count != 0) + if (bss->dtim_count != 0 || !bss->dtim_bc_mc) goto out; /* send buffered bc/mc only after DTIM beacon */ while (1) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index cf68700abffa..556647a910ac 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -986,12 +986,6 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, u16 cap = sband->ht_cap.cap; __le16 tmp; - if (ieee80211_disable_40mhz_24ghz && - sband->band == IEEE80211_BAND_2GHZ) { - cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - cap &= ~IEEE80211_HT_CAP_SGI_40; - } - *pos++ = WLAN_EID_HT_CAPABILITY; *pos++ = sizeof(struct ieee80211_ht_cap); memset(pos, 0, sizeof(struct ieee80211_ht_cap)); @@ -1210,7 +1204,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: changed |= BSS_CHANGED_ASSOC; + mutex_lock(&sdata->u.mgd.mtx); ieee80211_bss_info_change_notify(sdata, changed); + mutex_unlock(&sdata->u.mgd.mtx); break; case NL80211_IFTYPE_ADHOC: changed |= BSS_CHANGED_IBSS; diff --git a/net/mac80211/work.c b/net/mac80211/work.c index 36305e0d06ef..e73c8cae036b 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -30,7 +30,6 @@ #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) #define IEEE80211_ASSOC_MAX_TRIES 3 -#define IEEE80211_MAX_PROBE_TRIES 5 enum work_action { WORK_ACT_MISMATCH, @@ -126,12 +125,6 @@ static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie, /* determine capability flags */ - if (ieee80211_disable_40mhz_24ghz && - sband->band == IEEE80211_BAND_2GHZ) { - cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - cap &= ~IEEE80211_HT_CAP_SGI_40; - } - switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: if (flags & IEEE80211_CHAN_NO_HT40PLUS) { @@ -874,6 +867,44 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local, kfree_skb(skb); } +static bool ieee80211_work_ct_coexists(enum nl80211_channel_type wk_ct, + enum nl80211_channel_type oper_ct) +{ + switch (wk_ct) { + case NL80211_CHAN_NO_HT: + return true; + case NL80211_CHAN_HT20: + if (oper_ct != NL80211_CHAN_NO_HT) + return true; + return false; + case NL80211_CHAN_HT40MINUS: + case NL80211_CHAN_HT40PLUS: + return (wk_ct == oper_ct); + } + WARN_ON(1); /* shouldn't get here */ + return false; +} + +static enum nl80211_channel_type +ieee80211_calc_ct(enum nl80211_channel_type wk_ct, + enum nl80211_channel_type oper_ct) +{ + switch (wk_ct) { + case NL80211_CHAN_NO_HT: + return oper_ct; + case NL80211_CHAN_HT20: + if (oper_ct != NL80211_CHAN_NO_HT) + return oper_ct; + return wk_ct; + case NL80211_CHAN_HT40MINUS: + case NL80211_CHAN_HT40PLUS: + return wk_ct; + } + WARN_ON(1); /* shouldn't get here */ + return wk_ct; +} + + static void ieee80211_work_timer(unsigned long data) { struct ieee80211_local *local = (void *) data; @@ -924,18 +955,52 @@ static void ieee80211_work_work(struct work_struct *work) } if (!started && !local->tmp_channel) { + bool on_oper_chan; + bool tmp_chan_changed = false; + bool on_oper_chan2; + enum nl80211_channel_type wk_ct; + on_oper_chan = ieee80211_cfg_on_oper_channel(local); + + /* Work with existing channel type if possible. */ + wk_ct = wk->chan_type; + if (wk->chan == local->hw.conf.channel) + wk_ct = ieee80211_calc_ct(wk->chan_type, + local->hw.conf.channel_type); + + if (local->tmp_channel) + if ((local->tmp_channel != wk->chan) || + (local->tmp_channel_type != wk_ct)) + tmp_chan_changed = true; + + local->tmp_channel = wk->chan; + local->tmp_channel_type = wk_ct; /* - * TODO: could optimize this by leaving the - * station vifs in awake mode if they - * happen to be on the same channel as - * the requested channel + * Leave the station vifs in awake mode if they + * happen to be on the same channel as + * the requested channel. */ - ieee80211_offchannel_stop_beaconing(local); - ieee80211_offchannel_stop_station(local); + on_oper_chan2 = ieee80211_cfg_on_oper_channel(local); + if (on_oper_chan != on_oper_chan2) { + if (on_oper_chan2) { + /* going off oper channel, PS too */ + ieee80211_offchannel_stop_vifs(local, + true); + ieee80211_hw_config(local, 0); + } else { + /* going on channel, but leave PS + * off-channel. */ + ieee80211_hw_config(local, 0); + ieee80211_offchannel_return(local, + true, + false); + } + } else if (tmp_chan_changed) + /* Still off-channel, but on some other + * channel, so update hardware. + * PS should already be off-channel. + */ + ieee80211_hw_config(local, 0); - local->tmp_channel = wk->chan; - local->tmp_channel_type = wk->chan_type; - ieee80211_hw_config(local, 0); started = true; wk->timeout = jiffies; } @@ -1005,15 +1070,34 @@ static void ieee80211_work_work(struct work_struct *work) continue; if (wk->chan != local->tmp_channel) continue; - if (wk->chan_type != local->tmp_channel_type) + if (ieee80211_work_ct_coexists(wk->chan_type, + local->tmp_channel_type)) continue; remain_off_channel = true; } if (!remain_off_channel && local->tmp_channel) { + bool on_oper_chan = ieee80211_cfg_on_oper_channel(local); local->tmp_channel = NULL; - ieee80211_hw_config(local, 0); - ieee80211_offchannel_return(local, true); + /* If tmp_channel wasn't operating channel, then + * we need to go back on-channel. + * NOTE: If we can ever be here while scannning, + * or if the hw_config() channel config logic changes, + * then we may need to do a more thorough check to see if + * we still need to do a hardware config. Currently, + * we cannot be here while scanning, however. + */ + if (ieee80211_cfg_on_oper_channel(local) && !on_oper_chan) + ieee80211_hw_config(local, 0); + + /* At the least, we need to disable offchannel_ps, + * so just go ahead and run the entire offchannel + * return logic here. We *could* skip enabling + * beaconing if we were already on-oper-channel + * as a future optimization. + */ + ieee80211_offchannel_return(local, true, true); + /* give connection some time to breathe */ run_again(local, jiffies + HZ/2); } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index bee230d8fd11..f1765de2f4bf 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -26,13 +26,12 @@ ieee80211_tx_result ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) { - u8 *data, *key, *mic, key_offset; + u8 *data, *key, *mic; size_t data_len; unsigned int hdrlen; struct ieee80211_hdr *hdr; struct sk_buff *skb = tx->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int authenticator; int tail; hdr = (struct ieee80211_hdr *)skb->data; @@ -47,6 +46,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) data = skb->data + hdrlen; data_len = skb->len - hdrlen; + if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) { + /* Need to use software crypto for the test */ + info->control.hw_key = NULL; + } + if (info->control.hw_key && !(tx->flags & IEEE80211_TX_FRAGMENTED) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { @@ -62,17 +66,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) skb_headroom(skb) < TKIP_IV_LEN)) return TX_DROP; -#if 0 - authenticator = fc & IEEE80211_FCTL_FROMDS; /* FIX */ -#else - authenticator = 1; -#endif - key_offset = authenticator ? - NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY : - NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY; - key = &tx->key->conf.key[key_offset]; + key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; mic = skb_put(skb, MICHAEL_MIC_LEN); michael_mic(key, hdr, data, data_len, mic); + if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) + mic[0]++; return TX_CONTINUE; } @@ -81,14 +79,13 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) ieee80211_rx_result ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) { - u8 *data, *key = NULL, key_offset; + u8 *data, *key = NULL; size_t data_len; unsigned int hdrlen; u8 mic[MICHAEL_MIC_LEN]; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - int authenticator = 1, wpa_test = 0; /* No way to verify the MIC if the hardware stripped it */ if (status->flag & RX_FLAG_MMIC_STRIPPED) @@ -106,17 +103,9 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) data = skb->data + hdrlen; data_len = skb->len - hdrlen - MICHAEL_MIC_LEN; -#if 0 - authenticator = fc & IEEE80211_FCTL_TODS; /* FIX */ -#else - authenticator = 1; -#endif - key_offset = authenticator ? - NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY : - NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY; - key = &rx->key->conf.key[key_offset]; + key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY]; michael_mic(key, hdr, data, data_len, mic); - if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { + if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0) { if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; @@ -208,7 +197,7 @@ ieee80211_rx_result ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - int hdrlen, res, hwaccel = 0, wpa_test = 0; + int hdrlen, res, hwaccel = 0; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); @@ -235,7 +224,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) hdr->addr1, hwaccel, rx->queue, &rx->tkip_iv32, &rx->tkip_iv16); - if (res != TKIP_DECRYPT_OK || wpa_test) + if (res != TKIP_DECRYPT_OK) return RX_DROP_UNUSABLE; /* Trim ICV */ diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 1e00bf7d27c5..899b71c0ff5d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -133,6 +133,7 @@ unsigned int nf_iterate(struct list_head *head, /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ +repeat: verdict = elem->hook(hook, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG @@ -145,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head, #endif if (verdict != NF_REPEAT) return verdict; - *i = (*i)->prev; + goto repeat; } } return NF_ACCEPT; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index fa6d44c62de3..b799cea31f95 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -75,15 +75,13 @@ static int __ip_vs_addr_is_local_v6(struct net *net, const struct in6_addr *addr) { struct rt6_info *rt; - struct flowi fl = { - .oif = 0, - .fl6_dst = *addr, - .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, + struct flowi6 fl6 = { + .daddr = *addr, }; - rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl); + rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) - return 1; + return 1; return 0; } @@ -800,9 +798,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->u_threshold = udest->u_threshold; dest->l_threshold = udest->l_threshold; - spin_lock(&dest->dst_lock); + spin_lock_bh(&dest->dst_lock); ip_vs_dst_reset(dest); - spin_unlock(&dest->dst_lock); + spin_unlock_bh(&dest->dst_lock); if (add) ip_vs_start_estimator(svc->net, &dest->stats); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index a48239aba33b..6132b213eddc 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -98,12 +98,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { - struct flowi fl = { - .fl4_dst = dest->addr.ip, - .fl4_tos = rtos, - }; - - if (ip_route_output_key(net, &rt, &fl)) { + rt = ip_route_output(net, dest->addr.ip, 0, rtos, 0); + if (IS_ERR(rt)) { spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &dest->addr.ip); @@ -116,12 +112,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, } spin_unlock(&dest->dst_lock); } else { - struct flowi fl = { - .fl4_dst = daddr, - .fl4_tos = rtos, - }; - - if (ip_route_output_key(net, &rt, &fl)) { + rt = ip_route_output(net, daddr, 0, rtos, 0); + if (IS_ERR(rt)) { IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); return NULL; @@ -173,14 +165,15 @@ __ip_vs_reroute_locally(struct sk_buff *skb) return 0; refdst_drop(orefdst); } else { - struct flowi fl = { - .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .mark = skb->mark, + struct flowi4 fl4 = { + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowi4_tos = RT_TOS(iph->tos), + .flowi4_mark = skb->mark, }; - if (ip_route_output_key(net, &rt, &fl)) + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) return 0; if (!(rt->rt_flags & RTCF_LOCAL)) { ip_rt_put(rt); @@ -205,22 +198,27 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, struct in6_addr *ret_saddr, int do_xfrm) { struct dst_entry *dst; - struct flowi fl = { - .fl6_dst = *daddr, + struct flowi6 fl6 = { + .daddr = *daddr, }; - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (dst->error) goto out_err; if (!ret_saddr) return dst; - if (ipv6_addr_any(&fl.fl6_src) && + if (ipv6_addr_any(&fl6.saddr) && ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, - &fl.fl6_dst, 0, &fl.fl6_src) < 0) + &fl6.daddr, 0, &fl6.saddr) < 0) goto out_err; - if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0) - goto out_err; - ipv6_addr_copy(ret_saddr, &fl.fl6_src); + if (do_xfrm) { + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) { + dst = NULL; + goto out_err; + } + } + ipv6_addr_copy(ret_saddr, &fl6.saddr); return dst; out_err: diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 118123606a21..941286ca911d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -959,8 +959,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); out: - if (tmpl) - nf_ct_put(tmpl); + if (tmpl) { + /* Special case: we have to repeat this hook, assign the + * template again to this packet. We assume that this packet + * has no conntrack assigned. This is used by nf_ct_tcp. */ + if (ret == NF_REPEAT) + skb->nfct = (struct nf_conntrack *)tmpl; + else + nf_ct_put(tmpl); + } return ret; } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 5702de35e2bb..63a1b915a7e4 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -63,6 +63,9 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) * this does not harm and it happens very rarely. */ unsigned long missed = e->missed; + if (!((events | missed) & e->ctmask)) + goto out_unlock; + ret = notify->fcn(events | missed, &item); if (unlikely(ret < 0 || missed)) { spin_lock_bh(&ct->lock); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index b969025cf82f..533a183e6661 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -714,7 +714,6 @@ static int callforward_do_filter(const union nf_inet_addr *src, u_int8_t family) { const struct nf_afinfo *afinfo; - struct flowi fl1, fl2; int ret = 0; /* rcu_read_lock()ed by nf_hook_slow() */ @@ -722,17 +721,20 @@ static int callforward_do_filter(const union nf_inet_addr *src, if (!afinfo) return 0; - memset(&fl1, 0, sizeof(fl1)); - memset(&fl2, 0, sizeof(fl2)); - switch (family) { case AF_INET: { + struct flowi4 fl1, fl2; struct rtable *rt1, *rt2; - fl1.fl4_dst = src->ip; - fl2.fl4_dst = dst->ip; - if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { - if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { + memset(&fl1, 0, sizeof(fl1)); + fl1.daddr = src->ip; + + memset(&fl2, 0, sizeof(fl2)); + fl2.daddr = dst->ip; + if (!afinfo->route((struct dst_entry **)&rt1, + flowi4_to_flowi(&fl1))) { + if (!afinfo->route((struct dst_entry **)&rt2, + flowi4_to_flowi(&fl2))) { if (rt1->rt_gateway == rt2->rt_gateway && rt1->dst.dev == rt2->dst.dev) ret = 1; @@ -745,12 +747,18 @@ static int callforward_do_filter(const union nf_inet_addr *src, #if defined(CONFIG_NF_CONNTRACK_IPV6) || \ defined(CONFIG_NF_CONNTRACK_IPV6_MODULE) case AF_INET6: { + struct flowi6 fl1, fl2; struct rt6_info *rt1, *rt2; - memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst)); - memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst)); - if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { - if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { + memset(&fl1, 0, sizeof(fl1)); + ipv6_addr_copy(&fl1.daddr, &src->in6); + + memset(&fl2, 0, sizeof(fl2)); + ipv6_addr_copy(&fl2.daddr, &dst->in6); + if (!afinfo->route((struct dst_entry **)&rt1, + flowi6_to_flowi(&fl1))) { + if (!afinfo->route((struct dst_entry **)&rt2, + flowi6_to_flowi(&fl2))) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && rt1->dst.dev == rt2->dst.dev) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index cb1a8190ba29..30bf8a167fc8 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -710,6 +710,7 @@ restart: if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, IPCTNL_MSG_CT_NEW, ct) < 0) { + nf_conntrack_get(&ct->ct_general); cb->args[1] = (unsigned long)ct; goto out; } @@ -967,7 +968,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, u16 zone; int err; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) + if (nlh->nlmsg_flags & NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, ctnetlink_done); @@ -1832,7 +1833,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, u16 zone; int err; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (nlh->nlmsg_flags & NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, ctnetlink_exp_dump_table, ctnetlink_exp_done); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 20c775cff2a8..20714edf6cd2 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -85,6 +85,8 @@ EXPORT_SYMBOL(nf_log_unregister); int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) { + if (pf >= ARRAY_SIZE(nf_loggers)) + return -EINVAL; mutex_lock(&nf_log_mutex); if (__find_logger(pf, logger->name) == NULL) { mutex_unlock(&nf_log_mutex); @@ -98,6 +100,8 @@ EXPORT_SYMBOL(nf_log_bind_pf); void nf_log_unbind_pf(u_int8_t pf) { + if (pf >= ARRAY_SIZE(nf_loggers)) + return; mutex_lock(&nf_log_mutex); rcu_assign_pointer(nf_loggers[pf], NULL); mutex_unlock(&nf_log_mutex); diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c index 4d87befb04c0..474d621cbc2e 100644 --- a/net/netfilter/nf_tproxy_core.c +++ b/net/netfilter/nf_tproxy_core.c @@ -28,26 +28,23 @@ nf_tproxy_destructor(struct sk_buff *skb) skb->destructor = NULL; if (sk) - nf_tproxy_put_sock(sk); + sock_put(sk); } /* consumes sk */ -int +void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { - bool transparent = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_transparent : - inet_sk(sk)->transparent; - - if (transparent) { - skb_orphan(skb); - skb->sk = sk; - skb->destructor = nf_tproxy_destructor; - return 1; - } else - nf_tproxy_put_sock(sk); - - return 0; + /* assigning tw sockets complicates things; most + * skb->sk->X checks would have to test sk->sk_state first */ + if (sk->sk_state == TCP_TIME_WAIT) { + inet_twsk_put(inet_twsk(sk)); + return; + } + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = nf_tproxy_destructor; } EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index eb81c380da1b..6e6b46cb1db9 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -148,16 +148,21 @@ tcpmss_mangle_packet(struct sk_buff *skb, static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, unsigned int family) { - struct flowi fl = {}; + struct flowi fl; const struct nf_afinfo *ai; struct rtable *rt = NULL; u_int32_t mtu = ~0U; - if (family == PF_INET) - fl.fl4_dst = ip_hdr(skb)->saddr; - else - fl.fl6_dst = ipv6_hdr(skb)->saddr; + if (family == PF_INET) { + struct flowi4 *fl4 = &fl.u.ip4; + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = ip_hdr(skb)->saddr; + } else { + struct flowi6 *fl6 = &fl.u.ip6; + memset(fl6, 0, sizeof(*fl6)); + ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr); + } rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 5128a6c4cb2c..5f054a0dbbb1 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -62,18 +62,19 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) const struct iphdr *iph = ip_hdr(skb); struct net *net = pick_net(skb); struct rtable *rt; - struct flowi fl; + struct flowi4 fl4; - memset(&fl, 0, sizeof(fl)); + memset(&fl4, 0, sizeof(fl4)); if (info->priv) { if (info->priv->oif == -1) return false; - fl.oif = info->priv->oif; + fl4.flowi4_oif = info->priv->oif; } - fl.fl4_dst = info->gw.ip; - fl.fl4_tos = RT_TOS(iph->tos); - fl.fl4_scope = RT_SCOPE_UNIVERSE; - if (ip_route_output_key(net, &rt, &fl) != 0) + fl4.daddr = info->gw.ip; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) return false; skb_dst_drop(skb); @@ -142,18 +143,18 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) const struct ipv6hdr *iph = ipv6_hdr(skb); struct net *net = pick_net(skb); struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (info->priv) { if (info->priv->oif == -1) return false; - fl.oif = info->priv->oif; + fl6.flowi6_oif = info->priv->oif; } - fl.fl6_dst = info->gw.in6; - fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | + fl6.daddr = info->gw.in6; + fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (dst == NULL) return false; diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 640678f47a2a..dcfd57eb9d02 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -33,6 +33,20 @@ #include <net/netfilter/nf_tproxy_core.h> #include <linux/netfilter/xt_TPROXY.h> +static bool tproxy_sk_is_transparent(struct sock *sk) +{ + if (sk->sk_state != TCP_TIME_WAIT) { + if (inet_sk(sk)->transparent) + return true; + sock_put(sk); + } else { + if (inet_twsk(sk)->tw_transparent) + return true; + inet_twsk_put(inet_twsk(sk)); + } + return false; +} + static inline __be32 tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) { @@ -141,7 +155,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, skb->dev, NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ - if (sk && nf_tproxy_assign_sock(skb, sk)) { + if (sk && tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~mark_mask) ^ mark_value; @@ -149,6 +163,8 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n", iph->protocol, &iph->daddr, ntohs(hp->dest), &laddr, ntohs(lport), skb->mark); + + nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } @@ -306,7 +322,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) par->in, NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ - if (sk && nf_tproxy_assign_sock(skb, sk)) { + if (sk && tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; @@ -314,6 +330,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n", tproto, &iph->saddr, ntohs(hp->source), laddr, ntohs(lport), skb->mark); + + nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index d3eb5ed1892f..b46626cddd93 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -53,15 +53,13 @@ iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par) } static inline int -iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b) +iprange_ipv6_lt(const struct in6_addr *a, const struct in6_addr *b) { unsigned int i; - int r; for (i = 0; i < 4; ++i) { - r = ntohl(a->s6_addr32[i]) - ntohl(b->s6_addr32[i]); - if (r != 0) - return r; + if (a->s6_addr32[i] != b->s6_addr32[i]) + return ntohl(a->s6_addr32[i]) < ntohl(b->s6_addr32[i]); } return 0; @@ -75,8 +73,8 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par) bool m; if (info->flags & IPRANGE_SRC) { - m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0; - m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0; + m = iprange_ipv6_lt(&iph->saddr, &info->src_min.in6); + m |= iprange_ipv6_lt(&info->src_max.in6, &iph->saddr); m ^= !!(info->flags & IPRANGE_SRC_INV); if (m) { pr_debug("src IP %pI6 NOT in range %s%pI6-%pI6\n", @@ -88,8 +86,8 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par) } } if (info->flags & IPRANGE_DST) { - m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0; - m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0; + m = iprange_ipv6_lt(&iph->daddr, &info->dst_min.in6); + m |= iprange_ipv6_lt(&info->dst_max.in6, &iph->daddr); m ^= !!(info->flags & IPRANGE_DST_INV); if (m) { pr_debug("dst IP %pI6 NOT in range %s%pI6-%pI6\n", diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 00d6ae838303..9cc46356b577 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -35,6 +35,15 @@ #include <net/netfilter/nf_conntrack.h> #endif +static void +xt_socket_put_sk(struct sock *sk) +{ + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_put(inet_twsk(sk)); + else + sock_put(sk); +} + static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, @@ -164,7 +173,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - nf_tproxy_put_sock(sk); + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; @@ -298,7 +307,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - nf_tproxy_put_sock(sk); + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 6caef8b20611..f4fc4c9ad567 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -49,9 +49,9 @@ static inline void netlbl_netlink_auditinfo(struct sk_buff *skb, struct netlbl_audit *audit_info) { - audit_info->secid = NETLINK_CB(skb).sid; - audit_info->loginuid = NETLINK_CB(skb).loginuid; - audit_info->sessionid = NETLINK_CB(skb).sessionid; + security_task_getsecid(current, &audit_info->secid); + audit_info->loginuid = audit_get_loginuid(current); + audit_info->sessionid = audit_get_sessionid(current); } /* NetLabel NETLINK I/O functions */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 478181d53c55..c8f35b5d2ee9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1362,17 +1362,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).pid = nlk->pid; NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).loginuid = audit_get_loginuid(current); - NETLINK_CB(skb).sessionid = audit_get_sessionid(current); - security_task_getsecid(current, &(NETLINK_CB(skb).sid)); memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - /* What can I do? Netlink is asynchronous, so that - we will have to save current capabilities to - check them, when this message will be delivered - to corresponding kernel module. --ANK (980802) - */ - err = -EFAULT; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { kfree_skb(skb); @@ -1407,7 +1398,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, int noblock = flags&MSG_DONTWAIT; size_t copied; struct sk_buff *skb, *data_skb; - int err; + int err, ret; if (flags&MSG_OOB) return -EOPNOTSUPP; @@ -1470,8 +1461,13 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, skb_free_datagram(sk, skb); - if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) - netlink_dump(sk); + if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { + ret = netlink_dump(sk); + if (ret) { + sk->sk_err = ret; + sk->sk_error_report(sk); + } + } scm_recv(sock, msg, siocb->scm, flags); out: @@ -1736,6 +1732,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct netlink_callback *cb; struct sock *sk; struct netlink_sock *nlk; + int ret; cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (cb == NULL) @@ -1764,9 +1761,13 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, nlk->cb = cb; mutex_unlock(nlk->cb_mutex); - netlink_dump(sk); + ret = netlink_dump(sk); + sock_put(sk); + if (ret) + return ret; + /* We successfully started a dump, by returning -EINTR we * signal not to send ACK even if it was requested. */ diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f83cb370292b..1781d99145e2 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -519,7 +519,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) security_netlink_recv(skb, CAP_NET_ADMIN)) return -EPERM; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (nlh->nlmsg_flags & NLM_F_DUMP) { if (ops->dumpit == NULL) return -EOPNOTSUPP; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c60649ec1193..b5362e96022b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -465,7 +465,7 @@ retry: */ err = -EMSGSIZE; - if (len > dev->mtu + dev->hard_header_len) + if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN) goto out_unlock; if (!skb) { @@ -496,6 +496,19 @@ retry: goto retry; } + if (len > (dev->mtu + dev->hard_header_len)) { + /* Earlier code assumed this would be a VLAN pkt, + * double-check this now that we have the actual + * packet in hand. + */ + struct ethhdr *ehdr; + skb_reset_mac_header(skb); + ehdr = eth_hdr(skb); + if (ehdr->h_proto != htons(ETH_P_8021Q)) { + err = -EMSGSIZE; + goto out_unlock; + } + } skb->protocol = proto; skb->dev = dev; @@ -953,7 +966,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { - struct socket *sock; struct sk_buff *skb; struct net_device *dev; __be16 proto; @@ -965,8 +977,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) int len_sum = 0; int status = 0; - sock = po->sk.sk_socket; - mutex_lock(&po->pg_vec_lock); err = -EBUSY; @@ -1199,7 +1209,7 @@ static int packet_snd(struct socket *sock, } err = -EMSGSIZE; - if (!gso_type && (len > dev->mtu+reserve)) + if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN)) goto out_unlock; err = -ENOBUFS; @@ -1224,6 +1234,20 @@ static int packet_snd(struct socket *sock, if (err < 0) goto out_free; + if (!gso_type && (len > dev->mtu + reserve)) { + /* Earlier code assumed this would be a VLAN pkt, + * double-check this now that we have the actual + * packet in hand. + */ + struct ethhdr *ehdr; + skb_reset_mac_header(skb); + ehdr = eth_hdr(skb); + if (ehdr->h_proto != htons(ETH_P_8021Q)) { + err = -EMSGSIZE; + goto out_free; + } + } + skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig index 0d9b8a220a78..6ec7d55b1769 100644 --- a/net/phonet/Kconfig +++ b/net/phonet/Kconfig @@ -14,15 +14,3 @@ config PHONET To compile this driver as a module, choose M here: the module will be called phonet. If unsure, say N. - -config PHONET_PIPECTRLR - bool "Phonet Pipe Controller (EXPERIMENTAL)" - depends on PHONET && EXPERIMENTAL - default N - help - The Pipe Controller implementation in Phonet stack to support Pipe - data with Nokia Slim modems like WG2.5 used on ST-Ericsson U8500 - platform. - - This option is incompatible with older Nokia modems. - Say N here unless you really know what you are doing. diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 1072b2c19d31..c6fffd946d42 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -110,6 +110,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol, sk->sk_protocol = protocol; pn = pn_sk(sk); pn->sobject = 0; + pn->dobject = 0; pn->resource = 0; sk->sk_prot->init(sk); err = 0; @@ -194,11 +195,7 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, if (skb->pkt_type == PACKET_LOOPBACK) { skb_reset_mac_header(skb); skb_orphan(skb); - if (irq) - netif_rx(skb); - else - netif_rx_ni(skb); - err = 0; + err = (irq ? netif_rx(skb) : netif_rx_ni(skb)) ? -ENOBUFS : 0; } else { err = dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len); @@ -207,6 +204,8 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, goto drop; } err = dev_queue_xmit(skb); + if (unlikely(err > 0)) + err = net_xmit_errno(err); } return err; @@ -242,8 +241,18 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, struct net_device *dev; struct pn_sock *pn = pn_sk(sk); int err; - u16 src; - u8 daddr = pn_sockaddr_get_addr(target), saddr = PN_NO_ADDR; + u16 src, dst; + u8 daddr, saddr, res; + + src = pn->sobject; + if (target != NULL) { + dst = pn_sockaddr_get_object(target); + res = pn_sockaddr_get_resource(target); + } else { + dst = pn->dobject; + res = pn->resource; + } + daddr = pn_addr(dst); err = -EHOSTUNREACH; if (sk->sk_bound_dev_if) @@ -251,10 +260,9 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, else if (phonet_address_lookup(net, daddr) == 0) { dev = phonet_device_get(net); skb->pkt_type = PACKET_LOOPBACK; - } else if (pn_sockaddr_get_object(target) == 0) { + } else if (dst == 0) { /* Resource routing (small race until phonet_rcv()) */ - struct sock *sk = pn_find_sock_by_res(net, - target->spn_resource); + struct sock *sk = pn_find_sock_by_res(net, res); if (sk) { sock_put(sk); dev = phonet_device_get(net); @@ -271,12 +279,10 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, if (saddr == PN_NO_ADDR) goto drop; - src = pn->sobject; if (!pn_addr(src)) src = pn_object(saddr, pn_obj(src)); - err = pn_send(skb, dev, pn_sockaddr_get_object(target), - src, pn_sockaddr_get_resource(target), 0); + err = pn_send(skb, dev, dst, src, res, 0); dev_put(dev); return err; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 3e60f2e4e6c2..68e635f11de8 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -42,7 +42,7 @@ * TCP_ESTABLISHED connected pipe in enabled state * * pep_sock locking: - * - sk_state, ackq, hlist: sock lock needed + * - sk_state, hlist: sock lock needed * - listener: read only * - pipe_handle: read only */ @@ -50,11 +50,6 @@ #define CREDITS_MAX 10 #define CREDITS_THR 7 -static const struct sockaddr_pn pipe_srv = { - .spn_family = AF_PHONET, - .spn_resource = 0xD9, /* pipe service */ -}; - #define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */ /* Get the next TLV sub-block. */ @@ -82,236 +77,95 @@ static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen, return data; } -static int pep_reply(struct sock *sk, struct sk_buff *oskb, - u8 code, const void *data, int len, gfp_t priority) +static struct sk_buff *pep_alloc_skb(struct sock *sk, const void *payload, + int len, gfp_t priority) { - const struct pnpipehdr *oph = pnp_hdr(oskb); - struct pnpipehdr *ph; - struct sk_buff *skb; - - skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + struct sk_buff *skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); if (!skb) - return -ENOMEM; + return NULL; skb_set_owner_w(skb, sk); skb_reserve(skb, MAX_PNPIPE_HEADER); __skb_put(skb, len); - skb_copy_to_linear_data(skb, data, len); - __skb_push(skb, sizeof(*ph)); + skb_copy_to_linear_data(skb, payload, len); + __skb_push(skb, sizeof(struct pnpipehdr)); skb_reset_transport_header(skb); - ph = pnp_hdr(skb); - ph->utid = oph->utid; - ph->message_id = oph->message_id + 1; /* REQ -> RESP */ - ph->pipe_handle = oph->pipe_handle; - ph->error_code = code; - - return pn_skb_send(sk, skb, &pipe_srv); -} - -#define PAD 0x00 - -#ifdef CONFIG_PHONET_PIPECTRLR -static u8 pipe_negotiate_fc(u8 *host_fc, u8 *remote_fc, int len) -{ - int i, j; - u8 base_fc, final_fc; - - for (i = 0; i < len; i++) { - base_fc = host_fc[i]; - for (j = 0; j < len; j++) { - if (remote_fc[j] == base_fc) { - final_fc = base_fc; - goto done; - } - } - } - return -EINVAL; - -done: - return final_fc; - -} - -static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb, - u8 *pref_rx_fc, u8 *req_tx_fc) -{ - struct pnpipehdr *hdr; - u8 n_sb; - - if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) - return -EINVAL; - - hdr = pnp_hdr(skb); - n_sb = hdr->data[4]; - - __skb_pull(skb, sizeof(*hdr) + 4); - while (n_sb > 0) { - u8 type, buf[3], len = sizeof(buf); - u8 *data = pep_get_sb(skb, &type, &len, buf); - - if (data == NULL) - return -EINVAL; - - switch (type) { - case PN_PIPE_SB_REQUIRED_FC_TX: - if (len < 3 || (data[2] | data[3] | data[4]) > 3) - break; - req_tx_fc[0] = data[2]; - req_tx_fc[1] = data[3]; - req_tx_fc[2] = data[4]; - break; - - case PN_PIPE_SB_PREFERRED_FC_RX: - if (len < 3 || (data[2] | data[3] | data[4]) > 3) - break; - pref_rx_fc[0] = data[2]; - pref_rx_fc[1] = data[3]; - pref_rx_fc[2] = data[4]; - break; - - } - n_sb--; - } - return 0; + return skb; } -static int pipe_handler_send_req(struct sock *sk, u8 utid, - u8 msg_id, gfp_t priority) +static int pep_reply(struct sock *sk, struct sk_buff *oskb, u8 code, + const void *data, int len, gfp_t priority) { - int len; + const struct pnpipehdr *oph = pnp_hdr(oskb); struct pnpipehdr *ph; struct sk_buff *skb; - struct pep_sock *pn = pep_sk(sk); - - static const u8 data[4] = { - PAD, PAD, PAD, PAD, - }; + struct sockaddr_pn peer; - switch (msg_id) { - case PNS_PEP_CONNECT_REQ: - len = sizeof(data); - break; - - case PNS_PEP_DISCONNECT_REQ: - case PNS_PEP_ENABLE_REQ: - case PNS_PEP_DISABLE_REQ: - len = 0; - break; - - default: - return -EINVAL; - } - - skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + skb = pep_alloc_skb(sk, data, len, priority); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - skb_reserve(skb, MAX_PNPIPE_HEADER); - if (len) { - __skb_put(skb, len); - skb_copy_to_linear_data(skb, data, len); - } - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); - ph->utid = utid; - ph->message_id = msg_id; - ph->pipe_handle = pn->pipe_handle; - ph->error_code = PN_PIPE_NO_ERROR; + ph->utid = oph->utid; + ph->message_id = oph->message_id + 1; /* REQ -> RESP */ + ph->pipe_handle = oph->pipe_handle; + ph->error_code = code; - return pn_skb_send(sk, skb, &pn->remote_pep); + pn_skb_get_src_sockaddr(oskb, &peer); + return pn_skb_send(sk, skb, &peer); } -static int pipe_handler_send_created_ind(struct sock *sk, - u8 utid, u8 msg_id) +static int pep_indicate(struct sock *sk, u8 id, u8 code, + const void *data, int len, gfp_t priority) { - int err_code; + struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; struct sk_buff *skb; - struct pep_sock *pn = pep_sk(sk); - static u8 data[4] = { - 0x03, 0x04, - }; - data[2] = pn->tx_fc; - data[3] = pn->rx_fc; - - /* - * actually, below is number of sub-blocks and not error code. - * Pipe_created_ind message format does not have any - * error code field. However, the Phonet stack will always send - * an error code as part of pnpipehdr. So, use that err_code to - * specify the number of sub-blocks. - */ - err_code = 0x01; - - skb = alloc_skb(MAX_PNPIPE_HEADER + sizeof(data), GFP_ATOMIC); + skb = pep_alloc_skb(sk, data, len, priority); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - skb_reserve(skb, MAX_PNPIPE_HEADER); - __skb_put(skb, sizeof(data)); - skb_copy_to_linear_data(skb, data, sizeof(data)); - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); - ph->utid = utid; - ph->message_id = msg_id; + ph->utid = 0; + ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->error_code = err_code; - - return pn_skb_send(sk, skb, &pn->remote_pep); + ph->data[0] = code; + return pn_skb_send(sk, skb, NULL); } -static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id) +#define PAD 0x00 + +static int pipe_handler_request(struct sock *sk, u8 id, u8 code, + const void *data, int len) { - int err_code; + struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; struct sk_buff *skb; - struct pep_sock *pn = pep_sk(sk); - - /* - * actually, below is a filler. - * Pipe_enabled/disabled_ind message format does not have any - * error code field. However, the Phonet stack will always send - * an error code as part of pnpipehdr. So, use that err_code to - * specify the filler value. - */ - err_code = 0x0; - skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC); + skb = pep_alloc_skb(sk, data, len, GFP_KERNEL); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - skb_reserve(skb, MAX_PNPIPE_HEADER); - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); - ph->utid = utid; - ph->message_id = msg_id; + ph->utid = id; /* whatever */ + ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->error_code = err_code; - - return pn_skb_send(sk, skb, &pn->remote_pep); + ph->data[0] = code; + return pn_skb_send(sk, skb, NULL); } -static int pipe_handler_enable_pipe(struct sock *sk, int enable) +static int pipe_handler_send_created_ind(struct sock *sk) { - int utid, req; - - if (enable) { - utid = PNS_PIPE_ENABLE_UTID; - req = PNS_PEP_ENABLE_REQ; - } else { - utid = PNS_PIPE_DISABLE_UTID; - req = PNS_PEP_DISABLE_REQ; - } - return pipe_handler_send_req(sk, utid, req, GFP_ATOMIC); + struct pep_sock *pn = pep_sk(sk); + u8 data[4] = { + PN_PIPE_SB_NEGOTIATED_FC, pep_sb_size(2), + pn->tx_fc, pn->rx_fc, + }; + + return pep_indicate(sk, PNS_PIPE_CREATED_IND, 1 /* sub-blocks */, + data, 4, GFP_ATOMIC); } -#endif static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) { @@ -334,11 +188,12 @@ static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) GFP_KERNEL); } -static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code) +static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code, + gfp_t priority) { static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ }; WARN_ON(code == PN_PIPE_NO_ERROR); - return pep_reply(sk, skb, code, data, sizeof(data), GFP_ATOMIC); + return pep_reply(sk, skb, code, data, sizeof(data), priority); } /* Control requests are not sent by the pipe service and have a specific @@ -350,23 +205,21 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, struct sk_buff *skb; struct pnpipehdr *ph; struct sockaddr_pn dst; + u8 data[4] = { + oph->data[0], /* PEP type */ + code, /* error code, at an unusual offset */ + PAD, PAD, + }; - skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); + skb = pep_alloc_skb(sk, data, 4, priority); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - - skb_reserve(skb, MAX_PHONET_HEADER); - ph = (struct pnpipehdr *)skb_put(skb, sizeof(*ph) + 4); + ph = pnp_hdr(skb); ph->utid = oph->utid; ph->message_id = PNS_PEP_CTRL_RESP; ph->pipe_handle = oph->pipe_handle; ph->data[0] = oph->data[1]; /* CTRL id */ - ph->data[1] = oph->data[0]; /* PEP type */ - ph->data[2] = code; /* error code, at an usual offset */ - ph->data[3] = PAD; - ph->data[4] = PAD; pn_skb_get_src_sockaddr(oskb, &dst); return pn_skb_send(sk, skb, &dst); @@ -374,38 +227,15 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) { - struct pep_sock *pn = pep_sk(sk); - struct pnpipehdr *ph; - struct sk_buff *skb; + u8 data[4] = { type, PAD, PAD, status }; - skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); - if (!skb) - return -ENOMEM; - skb_set_owner_w(skb, sk); - - skb_reserve(skb, MAX_PNPIPE_HEADER + 4); - __skb_push(skb, sizeof(*ph) + 4); - skb_reset_transport_header(skb); - ph = pnp_hdr(skb); - ph->utid = 0; - ph->message_id = PNS_PEP_STATUS_IND; - ph->pipe_handle = pn->pipe_handle; - ph->pep_type = PN_PEP_TYPE_COMMON; - ph->data[1] = type; - ph->data[2] = PAD; - ph->data[3] = PAD; - ph->data[4] = status; - -#ifdef CONFIG_PHONET_PIPECTRLR - return pn_skb_send(sk, skb, &pn->remote_pep); -#else - return pn_skb_send(sk, skb, &pipe_srv); -#endif + return pep_indicate(sk, PNS_PEP_STATUS_IND, PN_PEP_TYPE_COMMON, + data, 4, priority); } /* Send our RX flow control information to the sender. * Socket must be locked. */ -static void pipe_grant_credits(struct sock *sk) +static void pipe_grant_credits(struct sock *sk, gfp_t priority) { struct pep_sock *pn = pep_sk(sk); @@ -415,16 +245,16 @@ static void pipe_grant_credits(struct sock *sk) case PN_LEGACY_FLOW_CONTROL: /* TODO */ break; case PN_ONE_CREDIT_FLOW_CONTROL: - pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, - PEP_IND_READY, GFP_ATOMIC); - pn->rx_credits = 1; + if (pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, + PEP_IND_READY, priority) == 0) + pn->rx_credits = 1; break; case PN_MULTI_CREDIT_FLOW_CONTROL: if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX) break; if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS, CREDITS_MAX - pn->rx_credits, - GFP_ATOMIC) == 0) + priority) == 0) pn->rx_credits = CREDITS_MAX; break; } @@ -522,7 +352,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) switch (hdr->message_id) { case PNS_PEP_CONNECT_REQ: - pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_ATOMIC); break; case PNS_PEP_DISCONNECT_REQ: @@ -532,35 +362,11 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_state_change(sk); break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_DISCONNECT_RESP: - pn->pipe_state = PIPE_IDLE; - sk->sk_state = TCP_CLOSE; - break; -#endif - case PNS_PEP_ENABLE_REQ: /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_ENABLE_RESP: - pn->pipe_state = PIPE_ENABLED; - pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND_UTID, - PNS_PIPE_ENABLED_IND); - - if (!pn_flow_safe(pn->tx_fc)) { - atomic_set(&pn->tx_credits, 1); - sk->sk_write_space(sk); - } - if (sk->sk_state == TCP_ESTABLISHED) - break; /* Nothing to do */ - sk->sk_state = TCP_ESTABLISHED; - pipe_grant_credits(sk); - break; -#endif - case PNS_PEP_RESET_REQ: switch (hdr->state_after_reset) { case PN_PIPE_DISABLE: @@ -579,17 +385,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_DISABLE_RESP: - pn->pipe_state = PIPE_DISABLED; - atomic_set(&pn->tx_credits, 0); - pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND_UTID, - PNS_PIPE_DISABLED_IND); - sk->sk_state = TCP_SYN_RECV; - pn->rx_credits = 0; - break; -#endif - case PNS_PEP_CTRL_REQ: if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) { atomic_inc(&sk->sk_drops); @@ -607,7 +402,8 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) if (!pn_flow_safe(pn->rx_fc)) { err = sock_queue_rcv_skb(sk, skb); if (!err) - return 0; + return NET_RX_SUCCESS; + err = -ENOBUFS; break; } @@ -645,7 +441,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) break; /* Nothing to do */ sk->sk_state = TCP_ESTABLISHED; - pipe_grant_credits(sk); + pipe_grant_credits(sk, GFP_ATOMIC); break; case PNS_PIPE_DISABLED_IND: @@ -660,7 +456,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) } out: kfree_skb(skb); - return err; + return (err == -ENOBUFS) ? NET_RX_DROP : NET_RX_SUCCESS; queue: skb->dev = NULL; @@ -669,7 +465,7 @@ queue: skb_queue_tail(queue, skb); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, err); - return 0; + return NET_RX_SUCCESS; } /* Destroy connected sock. */ @@ -681,133 +477,126 @@ static void pipe_destruct(struct sock *sk) skb_queue_purge(&pn->ctrlreq_queue); } -#ifdef CONFIG_PHONET_PIPECTRLR -static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) +static u8 pipe_negotiate_fc(const u8 *fcs, unsigned n) { - struct pep_sock *pn = pep_sk(sk); - u8 host_pref_rx_fc[3] = {3, 2, 1}, host_req_tx_fc[3] = {3, 2, 1}; - u8 remote_pref_rx_fc[3], remote_req_tx_fc[3]; - u8 negotiated_rx_fc, negotiated_tx_fc; - int ret; - - pipe_get_flow_info(sk, skb, remote_pref_rx_fc, - remote_req_tx_fc); - negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc, - host_pref_rx_fc, - sizeof(host_pref_rx_fc)); - negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc, - remote_pref_rx_fc, - sizeof(host_pref_rx_fc)); - - pn->pipe_state = PIPE_DISABLED; - sk->sk_state = TCP_SYN_RECV; - sk->sk_backlog_rcv = pipe_do_rcv; - sk->sk_destruct = pipe_destruct; - pn->rx_credits = 0; - pn->rx_fc = negotiated_rx_fc; - pn->tx_fc = negotiated_tx_fc; - sk->sk_state_change(sk); + unsigned i; + u8 final_fc = PN_NO_FLOW_CONTROL; - ret = pipe_handler_send_created_ind(sk, - PNS_PIPE_CREATED_IND_UTID, - PNS_PIPE_CREATED_IND - ); + for (i = 0; i < n; i++) { + u8 fc = fcs[i]; - return ret; + if (fc > final_fc && fc < PN_MAX_FLOW_CONTROL) + final_fc = fc; + } + return final_fc; } -#endif -static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) +static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) { - struct sock *newsk; - struct pep_sock *newpn, *pn = pep_sk(sk); + struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *hdr; - struct sockaddr_pn dst; - u16 peer_type; - u8 pipe_handle, enabled, n_sb; - u8 aligned = 0; + u8 n_sb; if (!pskb_pull(skb, sizeof(*hdr) + 4)) return -EINVAL; hdr = pnp_hdr(skb); - pipe_handle = hdr->pipe_handle; - switch (hdr->state_after_connect) { - case PN_PIPE_DISABLE: - enabled = 0; - break; - case PN_PIPE_ENABLE: - enabled = 1; - break; - default: - pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM); - return -EINVAL; - } - peer_type = hdr->other_pep_type << 8; - - if (unlikely(sk->sk_state != TCP_LISTEN) || sk_acceptq_is_full(sk)) { - pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); - return -ENOBUFS; - } + if (hdr->error_code != PN_PIPE_NO_ERROR) + return -ECONNREFUSED; - /* Parse sub-blocks (options) */ + /* Parse sub-blocks */ n_sb = hdr->data[4]; while (n_sb > 0) { - u8 type, buf[1], len = sizeof(buf); + u8 type, buf[6], len = sizeof(buf); const u8 *data = pep_get_sb(skb, &type, &len, buf); if (data == NULL) return -EINVAL; + switch (type) { - case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: - if (len < 1) - return -EINVAL; - peer_type = (peer_type & 0xff00) | data[0]; + case PN_PIPE_SB_REQUIRED_FC_TX: + if (len < 2 || len < data[0]) + break; + pn->tx_fc = pipe_negotiate_fc(data + 2, len - 2); break; - case PN_PIPE_SB_ALIGNED_DATA: - aligned = data[0] != 0; + + case PN_PIPE_SB_PREFERRED_FC_RX: + if (len < 2 || len < data[0]) + break; + pn->rx_fc = pipe_negotiate_fc(data + 2, len - 2); break; + } n_sb--; } - skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) - return -ENOMEM; + return pipe_handler_send_created_ind(sk); +} - /* Create a new to-be-accepted sock */ - newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_ATOMIC, sk->sk_prot); - if (!newsk) { - kfree_skb(skb); - return -ENOMEM; - } - sock_init_data(NULL, newsk); - newsk->sk_state = TCP_SYN_RECV; - newsk->sk_backlog_rcv = pipe_do_rcv; - newsk->sk_protocol = sk->sk_protocol; - newsk->sk_destruct = pipe_destruct; +/* Queue an skb to an actively connected sock. + * Socket lock must be held. */ +static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + int err = NET_RX_SUCCESS; - newpn = pep_sk(newsk); - pn_skb_get_dst_sockaddr(skb, &dst); - newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); - newpn->pn_sk.resource = pn->pn_sk.resource; - skb_queue_head_init(&newpn->ctrlreq_queue); - newpn->pipe_handle = pipe_handle; - atomic_set(&newpn->tx_credits, 0); - newpn->peer_type = peer_type; - newpn->rx_credits = 0; - newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; - newpn->init_enable = enabled; - newpn->aligned = aligned; + switch (hdr->message_id) { + case PNS_PIPE_ALIGNED_DATA: + __skb_pull(skb, 1); + /* fall through */ + case PNS_PIPE_DATA: + __skb_pull(skb, 3); /* Pipe data header */ + if (!pn_flow_safe(pn->rx_fc)) { + err = sock_queue_rcv_skb(sk, skb); + if (!err) + return NET_RX_SUCCESS; + err = NET_RX_DROP; + break; + } - BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); - skb_queue_head(&newsk->sk_receive_queue, skb); - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, 0); + if (pn->rx_credits == 0) { + atomic_inc(&sk->sk_drops); + err = NET_RX_DROP; + break; + } + pn->rx_credits--; + skb->dev = NULL; + skb_set_owner_r(skb, sk); + err = skb->len; + skb_queue_tail(&sk->sk_receive_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, err); + return NET_RX_SUCCESS; - sk_acceptq_added(sk); - sk_add_node(newsk, &pn->ackq); - return 0; + case PNS_PEP_CONNECT_RESP: + if (sk->sk_state != TCP_SYN_SENT) + break; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + if (pep_connresp_rcv(sk, skb)) { + sk->sk_state = TCP_CLOSE_WAIT; + break; + } + + sk->sk_state = TCP_ESTABLISHED; + if (!pn_flow_safe(pn->tx_fc)) { + atomic_set(&pn->tx_credits, 1); + sk->sk_write_space(sk); + } + pipe_grant_credits(sk, GFP_ATOMIC); + break; + + case PNS_PEP_DISCONNECT_RESP: + /* sock should already be dead, nothing to do */ + break; + + case PNS_PEP_STATUS_IND: + pipe_rcv_status(sk, skb); + break; + } + kfree_skb(skb); + return err; } /* Listening sock must be locked */ @@ -847,7 +636,6 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) struct sock *sknode; struct pnpipehdr *hdr; struct sockaddr_pn dst; - int err = NET_RX_SUCCESS; u8 pipe_handle; if (!pskb_may_pull(skb, sizeof(*hdr))) @@ -865,26 +653,18 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) if (sknode) return sk_receive_skb(sknode, skb, 1); - /* Look for a pipe handle pending accept */ - sknode = pep_find_pipe(&pn->ackq, &dst, pipe_handle); - if (sknode) { - sock_put(sknode); - if (net_ratelimit()) - printk(KERN_WARNING"Phonet unconnected PEP ignored"); - err = NET_RX_DROP; - goto drop; - } - switch (hdr->message_id) { case PNS_PEP_CONNECT_REQ: - err = pep_connreq_rcv(sk, skb); - break; - -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_CONNECT_RESP: - err = pep_connresp_rcv(sk, skb); - break; -#endif + if (sk->sk_state != TCP_LISTEN || sk_acceptq_is_full(sk)) { + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, + GFP_ATOMIC); + break; + } + skb_queue_head(&sk->sk_receive_queue, skb); + sk_acceptq_added(sk); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, 0); + return NET_RX_SUCCESS; case PNS_PEP_DISCONNECT_REQ: pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); @@ -898,12 +678,17 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) case PNS_PEP_ENABLE_REQ: case PNS_PEP_DISABLE_REQ: /* invalid handle is not even allowed here! */ + break; + default: - err = NET_RX_DROP; + if ((1 << sk->sk_state) + & ~(TCPF_CLOSE|TCPF_LISTEN|TCPF_CLOSE_WAIT)) + /* actively connected socket */ + return pipe_handler_do_rcv(sk, skb); } drop: kfree_skb(skb); - return err; + return NET_RX_SUCCESS; } static int pipe_do_remove(struct sock *sk) @@ -912,20 +697,16 @@ static int pipe_do_remove(struct sock *sk) struct pnpipehdr *ph; struct sk_buff *skb; - skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL); + skb = pep_alloc_skb(sk, NULL, 0, GFP_KERNEL); if (!skb) return -ENOMEM; - skb_reserve(skb, MAX_PNPIPE_HEADER); - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); ph->utid = 0; ph->message_id = PNS_PIPE_REMOVE_REQ; ph->pipe_handle = pn->pipe_handle; ph->data[0] = PAD; - - return pn_skb_send(sk, skb, &pipe_srv); + return pn_skb_send(sk, skb, NULL); } /* associated socket ceases to exist */ @@ -938,29 +719,15 @@ static void pep_sock_close(struct sock *sk, long timeout) sk_common_release(sk); lock_sock(sk); - if (sk->sk_state == TCP_LISTEN) { - /* Destroy the listen queue */ - struct sock *sknode; - struct hlist_node *p, *n; - - sk_for_each_safe(sknode, p, n, &pn->ackq) - sk_del_node_init(sknode); - sk->sk_state = TCP_CLOSE; - } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) - /* Forcefully remove dangling Phonet pipe */ - pipe_do_remove(sk); - -#ifdef CONFIG_PHONET_PIPECTRLR - if (pn->pipe_state != PIPE_IDLE) { - /* send pep disconnect request */ - pipe_handler_send_req(sk, - PNS_PEP_DISCONNECT_UTID, PNS_PEP_DISCONNECT_REQ, - GFP_KERNEL); - - pn->pipe_state = PIPE_IDLE; - sk->sk_state = TCP_CLOSE; + if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) { + if (sk->sk_backlog_rcv == pipe_do_rcv) + /* Forcefully remove dangling Phonet pipe */ + pipe_do_remove(sk); + else + pipe_handler_request(sk, PNS_PEP_DISCONNECT_REQ, PAD, + NULL, 0); } -#endif + sk->sk_state = TCP_CLOSE; ifindex = pn->ifindex; pn->ifindex = 0; @@ -971,86 +738,141 @@ static void pep_sock_close(struct sock *sk, long timeout) sock_put(sk); } -static int pep_wait_connreq(struct sock *sk, int noblock) +static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) { - struct task_struct *tsk = current; - struct pep_sock *pn = pep_sk(sk); - long timeo = sock_rcvtimeo(sk, noblock); - - for (;;) { - DEFINE_WAIT(wait); + struct pep_sock *pn = pep_sk(sk), *newpn; + struct sock *newsk = NULL; + struct sk_buff *skb; + struct pnpipehdr *hdr; + struct sockaddr_pn dst, src; + int err; + u16 peer_type; + u8 pipe_handle, enabled, n_sb; + u8 aligned = 0; - if (sk->sk_state != TCP_LISTEN) - return -EINVAL; - if (!hlist_empty(&pn->ackq)) - break; - if (!timeo) - return -EWOULDBLOCK; - if (signal_pending(tsk)) - return sock_intr_errno(timeo); + skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp); + if (!skb) + return NULL; - prepare_to_wait_exclusive(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); - finish_wait(sk_sleep(sk), &wait); + lock_sock(sk); + if (sk->sk_state != TCP_LISTEN) { + err = -EINVAL; + goto drop; } + sk_acceptq_removed(sk); - return 0; -} + err = -EPROTO; + if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) + goto drop; -static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) -{ - struct pep_sock *pn = pep_sk(sk); - struct sock *newsk = NULL; - struct sk_buff *oskb; - int err; + hdr = pnp_hdr(skb); + pipe_handle = hdr->pipe_handle; + switch (hdr->state_after_connect) { + case PN_PIPE_DISABLE: + enabled = 0; + break; + case PN_PIPE_ENABLE: + enabled = 1; + break; + default: + pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM, + GFP_KERNEL); + goto drop; + } + peer_type = hdr->other_pep_type << 8; - lock_sock(sk); - err = pep_wait_connreq(sk, flags & O_NONBLOCK); - if (err) - goto out; + /* Parse sub-blocks (options) */ + n_sb = hdr->data[4]; + while (n_sb > 0) { + u8 type, buf[1], len = sizeof(buf); + const u8 *data = pep_get_sb(skb, &type, &len, buf); - newsk = __sk_head(&pn->ackq); + if (data == NULL) + goto drop; + switch (type) { + case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: + if (len < 1) + goto drop; + peer_type = (peer_type & 0xff00) | data[0]; + break; + case PN_PIPE_SB_ALIGNED_DATA: + aligned = data[0] != 0; + break; + } + n_sb--; + } - oskb = skb_dequeue(&newsk->sk_receive_queue); - err = pep_accept_conn(newsk, oskb); - if (err) { - skb_queue_head(&newsk->sk_receive_queue, oskb); + /* Check for duplicate pipe handle */ + newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle); + if (unlikely(newsk)) { + __sock_put(newsk); newsk = NULL; - goto out; + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_KERNEL); + goto drop; + } + + /* Create a new to-be-accepted sock */ + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot); + if (!newsk) { + pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); + err = -ENOBUFS; + goto drop; } - kfree_skb(oskb); + sock_init_data(NULL, newsk); + newsk->sk_state = TCP_SYN_RECV; + newsk->sk_backlog_rcv = pipe_do_rcv; + newsk->sk_protocol = sk->sk_protocol; + newsk->sk_destruct = pipe_destruct; + + newpn = pep_sk(newsk); + pn_skb_get_dst_sockaddr(skb, &dst); + pn_skb_get_src_sockaddr(skb, &src); + newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); + newpn->pn_sk.dobject = pn_sockaddr_get_object(&src); + newpn->pn_sk.resource = pn_sockaddr_get_resource(&dst); sock_hold(sk); - pep_sk(newsk)->listener = sk; + newpn->listener = sk; + skb_queue_head_init(&newpn->ctrlreq_queue); + newpn->pipe_handle = pipe_handle; + atomic_set(&newpn->tx_credits, 0); + newpn->ifindex = 0; + newpn->peer_type = peer_type; + newpn->rx_credits = 0; + newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; + newpn->init_enable = enabled; + newpn->aligned = aligned; - sock_hold(newsk); - sk_del_node_init(newsk); - sk_acceptq_removed(sk); + err = pep_accept_conn(newsk, skb); + if (err) { + sock_put(newsk); + newsk = NULL; + goto drop; + } sk_add_node(newsk, &pn->hlist); - __sock_put(newsk); - -out: +drop: release_sock(sk); + kfree_skb(skb); *errp = err; return newsk; } -#ifdef CONFIG_PHONET_PIPECTRLR static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) { struct pep_sock *pn = pep_sk(sk); - struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; - - memcpy(&pn->remote_pep, spn, sizeof(struct sockaddr_pn)); + int err; + u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD }; - return pipe_handler_send_req(sk, - PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ, - GFP_ATOMIC); + pn->pipe_handle = 1; /* anything but INVALID_HANDLE */ + err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ, + PN_PIPE_ENABLE, data, 4); + if (err) { + pn->pipe_handle = PN_PIPE_INVALID_HANDLE; + return err; + } + sk->sk_state = TCP_SYN_SENT; + return 0; } -#endif static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) { @@ -1081,10 +903,18 @@ static int pep_init(struct sock *sk) { struct pep_sock *pn = pep_sk(sk); - INIT_HLIST_HEAD(&pn->ackq); + sk->sk_destruct = pipe_destruct; INIT_HLIST_HEAD(&pn->hlist); + pn->listener = NULL; skb_queue_head_init(&pn->ctrlreq_queue); + atomic_set(&pn->tx_credits, 0); + pn->ifindex = 0; + pn->peer_type = 0; pn->pipe_handle = PN_PIPE_INVALID_HANDLE; + pn->rx_credits = 0; + pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL; + pn->init_enable = 1; + pn->aligned = 0; return 0; } @@ -1103,18 +933,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, lock_sock(sk); switch (optname) { -#ifdef CONFIG_PHONET_PIPECTRLR - case PNPIPE_PIPE_HANDLE: - if (val) { - if (pn->pipe_state > PIPE_IDLE) { - err = -EFAULT; - break; - } - pn->pipe_handle = val; - break; - } -#endif - case PNPIPE_ENCAP: if (val && val != PNPIPE_ENCAP_IP) { err = -EINVAL; @@ -1141,16 +959,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, } goto out_norel; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNPIPE_ENABLE: - if (pn->pipe_state <= PIPE_IDLE) { - err = -ENOTCONN; - break; - } - err = pipe_handler_enable_pipe(sk, val); - break; -#endif - default: err = -ENOPROTOOPT; } @@ -1180,13 +988,11 @@ static int pep_getsockopt(struct sock *sk, int level, int optname, val = pn->ifindex; break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNPIPE_ENABLE: - if (pn->pipe_state <= PIPE_IDLE) - return -ENOTCONN; - val = pn->pipe_state != PIPE_DISABLED; + case PNPIPE_HANDLE: + val = pn->pipe_handle; + if (val == PN_PIPE_INVALID_HANDLE) + return -EINVAL; break; -#endif default: return -ENOPROTOOPT; @@ -1222,11 +1028,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) } else ph->message_id = PNS_PIPE_DATA; ph->pipe_handle = pn->pipe_handle; -#ifdef CONFIG_PHONET_PIPECTRLR - err = pn_skb_send(sk, skb, &pn->remote_pep); -#else - err = pn_skb_send(sk, skb, &pipe_srv); -#endif + err = pn_skb_send(sk, skb, NULL); if (err && pn_flow_safe(pn->tx_fc)) atomic_inc(&pn->tx_credits); @@ -1355,7 +1157,7 @@ struct sk_buff *pep_read(struct sock *sk) struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); if (sk->sk_state == TCP_ESTABLISHED) - pipe_grant_credits(sk); + pipe_grant_credits(sk, GFP_ATOMIC); return skb; } @@ -1400,7 +1202,7 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, } if (sk->sk_state == TCP_ESTABLISHED) - pipe_grant_credits(sk); + pipe_grant_credits(sk, GFP_KERNEL); release_sock(sk); copy: msg->msg_flags |= MSG_EOR; @@ -1424,9 +1226,9 @@ static void pep_sock_unhash(struct sock *sk) lock_sock(sk); -#ifndef CONFIG_PHONET_PIPECTRLR - if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { + if (pn->listener != NULL) { skparent = pn->listener; + pn->listener = NULL; release_sock(sk); pn = pep_sk(skparent); @@ -1434,7 +1236,7 @@ static void pep_sock_unhash(struct sock *sk) sk_del_node_init(sk); sk = skparent; } -#endif + /* Unhash a listening sock only when it is closed * and all of its active connected pipes are closed. */ if (hlist_empty(&pn->hlist)) @@ -1448,9 +1250,7 @@ static void pep_sock_unhash(struct sock *sk) static struct proto pep_proto = { .close = pep_sock_close, .accept = pep_sock_accept, -#ifdef CONFIG_PHONET_PIPECTRLR .connect = pep_sock_connect, -#endif .ioctl = pep_ioctl, .init = pep_init, .setsockopt = pep_setsockopt, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 25f746d20c1f..b1adafab377c 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -225,15 +225,18 @@ static int pn_socket_autobind(struct socket *sock) return 0; /* socket was already bound */ } -#ifdef CONFIG_PHONET_PIPECTRLR static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, int len, int flags) { struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; - long timeo; + struct task_struct *tsk = current; + long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); int err; + if (pn_socket_autobind(sock)) + return -ENOBUFS; if (len < sizeof(struct sockaddr_pn)) return -EINVAL; if (spn->spn_family != AF_PHONET) @@ -243,82 +246,61 @@ static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, switch (sock->state) { case SS_UNCONNECTED: - sk->sk_state = TCP_CLOSE; - break; - case SS_CONNECTING: - switch (sk->sk_state) { - case TCP_SYN_RECV: - sock->state = SS_CONNECTED; + if (sk->sk_state != TCP_CLOSE) { err = -EISCONN; goto out; - case TCP_CLOSE: - err = -EALREADY; - if (flags & O_NONBLOCK) - goto out; - goto wait_connect; } break; - case SS_CONNECTED: - switch (sk->sk_state) { - case TCP_SYN_RECV: - err = -EISCONN; - goto out; - case TCP_CLOSE: - sock->state = SS_UNCONNECTED; - break; - } - break; - case SS_DISCONNECTING: - case SS_FREE: - break; + case SS_CONNECTING: + err = -EALREADY; + goto out; + default: + err = -EISCONN; + goto out; } - sk->sk_state = TCP_CLOSE; - sk_stream_kill_queues(sk); + pn->dobject = pn_sockaddr_get_object(spn); + pn->resource = pn_sockaddr_get_resource(spn); sock->state = SS_CONNECTING; + err = sk->sk_prot->connect(sk, addr, len); - if (err < 0) { + if (err) { sock->state = SS_UNCONNECTED; - sk->sk_state = TCP_CLOSE; + pn->dobject = 0; goto out; } - err = -EINPROGRESS; -wait_connect: - if (sk->sk_state != TCP_SYN_RECV && (flags & O_NONBLOCK)) - goto out; - - timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); - release_sock(sk); - - err = -ERESTARTSYS; - timeo = wait_event_interruptible_timeout(*sk_sleep(sk), - sk->sk_state != TCP_CLOSE, - timeo); - - lock_sock(sk); - if (timeo < 0) - goto out; /* -ERESTARTSYS */ + while (sk->sk_state == TCP_SYN_SENT) { + DEFINE_WAIT(wait); - err = -ETIMEDOUT; - if (timeo == 0 && sk->sk_state != TCP_SYN_RECV) - goto out; + if (!timeo) { + err = -EINPROGRESS; + goto out; + } + if (signal_pending(tsk)) { + err = sock_intr_errno(timeo); + goto out; + } - if (sk->sk_state != TCP_SYN_RECV) { - sock->state = SS_UNCONNECTED; - err = sock_error(sk); - if (!err) - err = -ECONNREFUSED; - goto out; + prepare_to_wait_exclusive(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + finish_wait(sk_sleep(sk), &wait); } - sock->state = SS_CONNECTED; - err = 0; + if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) + err = 0; + else if (sk->sk_state == TCP_CLOSE_WAIT) + err = -ECONNRESET; + else + err = -ECONNREFUSED; + sock->state = err ? SS_UNCONNECTED : SS_CONNECTED; out: release_sock(sk); return err; } -#endif static int pn_socket_accept(struct socket *sock, struct socket *newsock, int flags) @@ -327,6 +309,9 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock, struct sock *newsk; int err; + if (unlikely(sk->sk_state != TCP_LISTEN)) + return -EINVAL; + newsk = sk->sk_prot->accept(sk, flags, &err); if (!newsk) return err; @@ -363,13 +348,8 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, poll_wait(file, sk_sleep(sk), wait); - switch (sk->sk_state) { - case TCP_LISTEN: - return hlist_empty(&pn->ackq) ? 0 : POLLIN; - case TCP_CLOSE: + if (sk->sk_state == TCP_CLOSE) return POLLERR; - } - if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; if (!skb_queue_empty(&pn->ctrlreq_queue)) @@ -428,19 +408,19 @@ static int pn_socket_listen(struct socket *sock, int backlog) struct sock *sk = sock->sk; int err = 0; - if (sock->state != SS_UNCONNECTED) - return -EINVAL; if (pn_socket_autobind(sock)) return -ENOBUFS; lock_sock(sk); - if (sk->sk_state != TCP_CLOSE) { + if (sock->state != SS_UNCONNECTED) { err = -EINVAL; goto out; } - sk->sk_state = TCP_LISTEN; - sk->sk_ack_backlog = 0; + if (sk->sk_state != TCP_LISTEN) { + sk->sk_state = TCP_LISTEN; + sk->sk_ack_backlog = 0; + } sk->sk_max_ack_backlog = backlog; out: release_sock(sk); @@ -488,11 +468,7 @@ const struct proto_ops phonet_stream_ops = { .owner = THIS_MODULE, .release = pn_socket_release, .bind = pn_socket_bind, -#ifdef CONFIG_PHONET_PIPECTRLR .connect = pn_socket_connect, -#else - .connect = sock_no_connect, -#endif .socketpair = sock_no_socketpair, .accept = pn_socket_accept, .getname = pn_socket_getname, @@ -633,8 +609,8 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu " "%d %p %d%n", - sk->sk_protocol, pn->sobject, 0, pn->resource, - sk->sk_state, + sk->sk_protocol, pn->sobject, pn->dobject, + pn->resource, sk->sk_state, sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), sock_i_uid(sk), sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 71f373c421bc..c47a511f203d 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (conn->c_loopback && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); - return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + scat = &rm->data.op_sg[sg]; + ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + ret = min_t(int, ret, scat->length - conn->c_xmit_data_off); + return ret; } /* FIXME we may overallocate here */ diff --git a/net/rds/loop.c b/net/rds/loop.c index aeec1d483b17..bca6761a3ca2 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off) { + struct scatterlist *sgp = &rm->data.op_sg[sg]; + int ret = sizeof(struct rds_header) + + be32_to_cpu(rm->m_inc.i_hdr.h_len); + /* Do not send cong updates to loopback */ if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); - return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off); + goto out; } BUG_ON(hdr_off || sg || off); @@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, NULL); rds_inc_put(&rm->m_inc); - - return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); +out: + return ret; } /* diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index eaf765876458..7fce6dfd2180 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -18,7 +18,7 @@ config RFKILL_LEDS default y config RFKILL_INPUT - bool "RF switch input support" if EMBEDDED + bool "RF switch input support" if EXPERT depends on RFKILL depends on INPUT = y || RFKILL = INPUT - default y if !EMBEDDED + default y if !EXPERT diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index d952e7eac188..5ee0c62046a0 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -803,7 +803,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le rose_insert_socket(sk); /* Finish the bind */ } -rose_try_next_neigh: rose->dest_addr = addr->srose_addr; rose->dest_call = addr->srose_call; rose->rand = ((long)rose & 0xFFFF) + rose->lci; @@ -865,12 +864,6 @@ rose_try_next_neigh: } if (sk->sk_state != TCP_ESTABLISHED) { - /* Try next neighbour */ - rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic, 0); - if (rose->neighbour) - goto rose_try_next_neigh; - - /* No more neighbours */ sock->state = SS_UNCONNECTED; err = sock_error(sk); /* Always set at this point */ goto out_release; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index b4fdaac233f7..88a77e90e7e8 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -674,29 +674,34 @@ struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neig * Find a neighbour or a route given a ROSE address. */ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, - unsigned char *diagnostic, int new) + unsigned char *diagnostic, int route_frame) { struct rose_neigh *res = NULL; struct rose_node *node; int failed = 0; int i; - if (!new) spin_lock_bh(&rose_node_list_lock); + if (!route_frame) spin_lock_bh(&rose_node_list_lock); for (node = rose_node_list; node != NULL; node = node->next) { if (rosecmpm(addr, &node->address, node->mask) == 0) { for (i = 0; i < node->count; i++) { - if (new) { - if (node->neighbour[i]->restarted) { - res = node->neighbour[i]; - goto out; - } + if (node->neighbour[i]->restarted) { + res = node->neighbour[i]; + goto out; } - else { + } + } + } + if (!route_frame) { /* connect request */ + for (node = rose_node_list; node != NULL; node = node->next) { + if (rosecmpm(addr, &node->address, node->mask) == 0) { + for (i = 0; i < node->count; i++) { if (!rose_ftimer_running(node->neighbour[i])) { res = node->neighbour[i]; + failed = 0; goto out; - } else - failed = 1; + } + failed = 1; } } } @@ -711,8 +716,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, } out: - if (!new) spin_unlock_bh(&rose_node_list_lock); - + if (!route_frame) spin_unlock_bh(&rose_node_list_lock); return res; } diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index 89315009bab1..1a2b0633fece 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -423,6 +423,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) goto protocol_error; } + case RXRPC_PACKET_TYPE_ACKALL: case RXRPC_PACKET_TYPE_ACK: /* ACK processing is done in process context */ read_lock_bh(&call->state_lock); diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 5ee16f0353fe..d763793d39de 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -89,11 +89,11 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr, return ret; plen -= sizeof(*token); - token = kmalloc(sizeof(*token), GFP_KERNEL); + token = kzalloc(sizeof(*token), GFP_KERNEL); if (!token) return -ENOMEM; - token->kad = kmalloc(plen, GFP_KERNEL); + token->kad = kzalloc(plen, GFP_KERNEL); if (!token->kad) { kfree(token); return -ENOMEM; @@ -731,10 +731,10 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) goto error; ret = -ENOMEM; - token = kmalloc(sizeof(*token), GFP_KERNEL); + token = kzalloc(sizeof(*token), GFP_KERNEL); if (!token) goto error; - token->kad = kmalloc(plen, GFP_KERNEL); + token->kad = kzalloc(plen, GFP_KERNEL); if (!token->kad) goto error_free; diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index a53fb25a64ed..55b93dc60d0c 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -36,31 +36,15 @@ static void rxrpc_destroy_peer(struct work_struct *work); static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) { struct rtable *rt; - struct flowi fl; - int ret; peer->if_mtu = 1500; - memset(&fl, 0, sizeof(fl)); - - switch (peer->srx.transport.family) { - case AF_INET: - fl.oif = 0; - fl.proto = IPPROTO_UDP, - fl.fl4_dst = peer->srx.transport.sin.sin_addr.s_addr; - fl.fl4_src = 0; - fl.fl4_tos = 0; - /* assume AFS.CM talking to AFS.FS */ - fl.fl_ip_sport = htons(7001); - fl.fl_ip_dport = htons(7000); - break; - default: - BUG(); - } - - ret = ip_route_output_key(&init_net, &rt, &fl); - if (ret < 0) { - _leave(" [route err %d]", ret); + rt = ip_route_output_ports(&init_net, NULL, + peer->srx.transport.sin.sin_addr.s_addr, 0, + htons(7000), htons(7001), + IPPROTO_UDP, 0, 0); + if (IS_ERR(rt)) { + _leave(" [route err %ld]", PTR_ERR(rt)); return; } diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e318f458713e..a7a5583d4f68 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -126,6 +126,17 @@ config NET_SCH_RED To compile this code as a module, choose M here: the module will be called sch_red. +config NET_SCH_SFB + tristate "Stochastic Fair Blue (SFB)" + ---help--- + Say Y here if you want to use the Stochastic Fair Blue (SFB) + packet scheduling algorithm. + + See the top of <file:net/sched/sch_sfb.c> for more details. + + To compile this code as a module, choose M here: the + module will be called sch_sfb. + config NET_SCH_SFQ tristate "Stochastic Fairness Queueing (SFQ)" ---help--- @@ -217,6 +228,17 @@ config NET_SCH_MQPRIO If unsure, say N. +config NET_SCH_CHOKE + tristate "CHOose and Keep responsive flow scheduler (CHOKE)" + help + Say Y here if you want to use the CHOKe packet scheduler (CHOose + and Keep for responsive flows, CHOose and Kill for unresponsive + flows). This is a variation of RED which trys to penalize flows + that monopolize the queue. + + To compile this code as a module, choose M here: the + module will be called sch_choke. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index 26ce681a2c60..2e77b8dba22e 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_NET_SCH_RED) += sch_red.o obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o +obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o @@ -33,6 +34,8 @@ obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o +obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o + obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index d580cdfca093..a907905376df 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -143,7 +143,7 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, if (head == NULL) goto old_method; - iif = ((struct rtable *)dst)->fl.iif; + iif = ((struct rtable *)dst)->rt_iif; h = route4_fastmap_hash(id, iif); if (id == head->fastmap[h].id && diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 966920c14e7a..3b93fc0c8955 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -134,12 +134,12 @@ next_knode: for (i = n->sel.nkeys; i > 0; i--, key++) { int toff = off + key->off + (off2 & key->offmask); - __be32 *data, _data; + __be32 *data, hdata; if (skb_headroom(skb) + toff > INT_MAX) goto out; - data = skb_header_pointer(skb, toff, 4, &_data); + data = skb_header_pointer(skb, toff, 4, &hdata); if (!data) goto out; if ((*data ^ key->val) & key->mask) { @@ -187,10 +187,10 @@ check_terminal: ht = n->ht_down; sel = 0; if (ht->divisor) { - __be32 *data, _data; + __be32 *data, hdata; data = skb_header_pointer(skb, off + n->sel.hoff, 4, - &_data); + &hdata); if (!data) goto out; sel = ht->divisor & u32_hash_fold(*data, &n->sel, @@ -202,11 +202,11 @@ check_terminal: if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; if (n->sel.flags & TC_U32_VAROFFSET) { - __be16 *data, _data; + __be16 *data, hdata; data = skb_header_pointer(skb, off + n->sel.offoff, - 2, &_data); + 2, &hdata); if (!data) goto out; off2 += ntohs(n->sel.offmask & *data) >> diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index a889d099320f..a4de67eca824 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -264,7 +264,7 @@ META_COLLECTOR(int_rtiif) if (unlikely(skb_rtable(skb) == NULL)) *err = -1; else - dst->value = skb_rtable(skb)->fl.iif; + dst->value = skb_rtable(skb)->rt_iif; } /************************************************************************** @@ -401,7 +401,7 @@ META_COLLECTOR(int_sk_sndbuf) META_COLLECTOR(int_sk_alloc) { SKIP_NONLOCAL(skb); - dst->value = skb->sk->sk_allocation; + dst->value = (__force int) skb->sk->sk_allocation; } META_COLLECTOR(int_sk_route_caps) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 36ac0ec81ce0..7490f3f2db8b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -398,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) return stab; } +static void stab_kfree_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct qdisc_size_table, rcu)); +} + void qdisc_put_stab(struct qdisc_size_table *tab) { if (!tab) @@ -407,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (--tab->refcnt == 0) { list_del(&tab->list); - kfree(tab); + call_rcu_bh(&tab->rcu, stab_kfree_rcu); } spin_unlock(&qdisc_stab_lock); @@ -430,7 +435,7 @@ nla_put_failure: return -1; } -void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) +void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) { int pkt_len, slot; @@ -456,7 +461,7 @@ out: pkt_len = 1; qdisc_skb_cb(skb)->pkt_len = pkt_len; } -EXPORT_SYMBOL(qdisc_calculate_pkt_len); +EXPORT_SYMBOL(__qdisc_calculate_pkt_len); void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) { @@ -473,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); - wd->qdisc->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(wd->qdisc); __netif_schedule(qdisc_root(wd->qdisc)); return HRTIMER_NORESTART; @@ -495,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) &qdisc_root_sleeping(wd->qdisc)->state)) return; - wd->qdisc->flags |= TCQ_F_THROTTLED; + qdisc_throttled(wd->qdisc); time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); @@ -505,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { hrtimer_cancel(&wd->timer); - wd->qdisc->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(wd->qdisc); } EXPORT_SYMBOL(qdisc_watchdog_cancel); @@ -835,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, err = PTR_ERR(stab); goto err_out4; } - sch->stab = stab; + rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { spinlock_t *root_lock; @@ -875,7 +880,7 @@ err_out4: * Any broken qdiscs that would require a ops->reset() here? * The qdisc was never in action so it shouldn't be necessary. */ - qdisc_put_stab(sch->stab); + qdisc_put_stab(rtnl_dereference(sch->stab)); if (ops->destroy) ops->destroy(sch); goto err_out3; @@ -883,7 +888,7 @@ err_out4: static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) { - struct qdisc_size_table *stab = NULL; + struct qdisc_size_table *ostab, *stab = NULL; int err = 0; if (tca[TCA_OPTIONS]) { @@ -900,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) return PTR_ERR(stab); } - qdisc_put_stab(sch->stab); - sch->stab = stab; + ostab = rtnl_dereference(sch->stab); + rcu_assign_pointer(sch->stab, stab); + qdisc_put_stab(ostab); if (tca[TCA_RATE]) { /* NB: ignores errors from replace_estimator @@ -1180,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; + struct qdisc_size_table *stab; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); @@ -1195,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; q->qstats.qlen = q->q.qlen; - if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) + stab = rtnl_dereference(q->stab); + if (stab && qdisc_dump_stab(skb, stab) < 0) goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, @@ -1664,12 +1672,12 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { int err = 0; - __be16 protocol; #ifdef CONFIG_NET_CLS_ACT + __be16 protocol; struct tcf_proto *otp = tp; reclassify: -#endif protocol = skb->protocol; +#endif err = tc_classify_compat(skb, tp, res); #ifdef CONFIG_NET_CLS_ACT diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4aaf44c95c52..24d94c097b35 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -351,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) { int toplevel = q->toplevel; - if (toplevel > cl->level && !(cl->q->flags & TCQ_F_THROTTLED)) { + if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) { psched_time_t now; psched_tdiff_t incr; @@ -391,7 +391,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, cl->q); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; - qdisc_bstats_update(sch, skb); cbq_mark_toplevel(q, cl); if (!cl->next_alive) cbq_activate_class(cl); @@ -625,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); } - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); __netif_schedule(qdisc_root(sch)); return HRTIMER_NORESTART; } @@ -650,7 +649,6 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) ret = qdisc_enqueue(skb, cl->q); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; - qdisc_bstats_update(sch, skb); if (!cl->next_alive) cbq_activate_class(cl); return 0; @@ -973,8 +971,9 @@ cbq_dequeue(struct Qdisc *sch) skb = cbq_dequeue_1(sch); if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); return skb; } diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c new file mode 100644 index 000000000000..06afbaeb4c88 --- /dev/null +++ b/net/sched/sch_choke.c @@ -0,0 +1,688 @@ +/* + * net/sched/sch_choke.c CHOKE scheduler + * + * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com> + * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/reciprocal_div.h> +#include <linux/vmalloc.h> +#include <net/pkt_sched.h> +#include <net/inet_ecn.h> +#include <net/red.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> + +/* + CHOKe stateless AQM for fair bandwidth allocation + ================================================= + + CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for + unresponsive flows) is a variant of RED that penalizes misbehaving flows but + maintains no flow state. The difference from RED is an additional step + during the enqueuing process. If average queue size is over the + low threshold (qmin), a packet is chosen at random from the queue. + If both the new and chosen packet are from the same flow, both + are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it + needs to access packets in queue randomly. It has a minimal class + interface to allow overriding the builtin flow classifier with + filters. + + Source: + R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless + Active Queue Management Scheme for Approximating Fair Bandwidth Allocation", + IEEE INFOCOM, 2000. + + A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial + Characteristics", IEEE/ACM Transactions on Networking, 2004 + + */ + +/* Upper bound on size of sk_buff table (packets) */ +#define CHOKE_MAX_QUEUE (128*1024 - 1) + +struct choke_sched_data { +/* Parameters */ + u32 limit; + unsigned char flags; + + struct red_parms parms; + +/* Variables */ + struct tcf_proto *filter_list; + struct { + u32 prob_drop; /* Early probability drops */ + u32 prob_mark; /* Early probability marks */ + u32 forced_drop; /* Forced drops, qavg > max_thresh */ + u32 forced_mark; /* Forced marks, qavg > max_thresh */ + u32 pdrop; /* Drops due to queue limits */ + u32 other; /* Drops due to drop() calls */ + u32 matched; /* Drops to flow match */ + } stats; + + unsigned int head; + unsigned int tail; + + unsigned int tab_mask; /* size - 1 */ + + struct sk_buff **tab; +}; + +/* deliver a random number between 0 and N - 1 */ +static u32 random_N(unsigned int N) +{ + return reciprocal_divide(random32(), N); +} + +/* number of elements in queue including holes */ +static unsigned int choke_len(const struct choke_sched_data *q) +{ + return (q->tail - q->head) & q->tab_mask; +} + +/* Is ECN parameter configured */ +static int use_ecn(const struct choke_sched_data *q) +{ + return q->flags & TC_RED_ECN; +} + +/* Should packets over max just be dropped (versus marked) */ +static int use_harddrop(const struct choke_sched_data *q) +{ + return q->flags & TC_RED_HARDDROP; +} + +/* Move head pointer forward to skip over holes */ +static void choke_zap_head_holes(struct choke_sched_data *q) +{ + do { + q->head = (q->head + 1) & q->tab_mask; + if (q->head == q->tail) + break; + } while (q->tab[q->head] == NULL); +} + +/* Move tail pointer backwards to reuse holes */ +static void choke_zap_tail_holes(struct choke_sched_data *q) +{ + do { + q->tail = (q->tail - 1) & q->tab_mask; + if (q->head == q->tail) + break; + } while (q->tab[q->tail] == NULL); +} + +/* Drop packet from queue array by creating a "hole" */ +static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb = q->tab[idx]; + + q->tab[idx] = NULL; + + if (idx == q->head) + choke_zap_head_holes(q); + if (idx == q->tail) + choke_zap_tail_holes(q); + + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_drop(skb, sch); + qdisc_tree_decrease_qlen(sch, 1); + --sch->q.qlen; +} + +/* + * Compare flow of two packets + * Returns true only if source and destination address and port match. + * false for special cases + */ +static bool choke_match_flow(struct sk_buff *skb1, + struct sk_buff *skb2) +{ + int off1, off2, poff; + const u32 *ports1, *ports2; + u8 ip_proto; + __u32 hash1; + + if (skb1->protocol != skb2->protocol) + return false; + + /* Use hash value as quick check + * Assumes that __skb_get_rxhash makes IP header and ports linear + */ + hash1 = skb_get_rxhash(skb1); + if (!hash1 || hash1 != skb_get_rxhash(skb2)) + return false; + + /* Probably match, but be sure to avoid hash collisions */ + off1 = skb_network_offset(skb1); + off2 = skb_network_offset(skb2); + + switch (skb1->protocol) { + case __constant_htons(ETH_P_IP): { + const struct iphdr *ip1, *ip2; + + ip1 = (const struct iphdr *) (skb1->data + off1); + ip2 = (const struct iphdr *) (skb2->data + off2); + + ip_proto = ip1->protocol; + if (ip_proto != ip2->protocol || + ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr) + return false; + + if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET)) + ip_proto = 0; + off1 += ip1->ihl * 4; + off2 += ip2->ihl * 4; + break; + } + + case __constant_htons(ETH_P_IPV6): { + const struct ipv6hdr *ip1, *ip2; + + ip1 = (const struct ipv6hdr *) (skb1->data + off1); + ip2 = (const struct ipv6hdr *) (skb2->data + off2); + + ip_proto = ip1->nexthdr; + if (ip_proto != ip2->nexthdr || + ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) || + ipv6_addr_cmp(&ip1->daddr, &ip2->daddr)) + return false; + off1 += 40; + off2 += 40; + } + + default: /* Maybe compare MAC header here? */ + return false; + } + + poff = proto_ports_offset(ip_proto); + if (poff < 0) + return true; + + off1 += poff; + off2 += poff; + + ports1 = (__force u32 *)(skb1->data + off1); + ports2 = (__force u32 *)(skb2->data + off2); + return *ports1 == *ports2; +} + +struct choke_skb_cb { + u16 classid; +}; + +static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(skb->cb) < + sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb)); + return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data; +} + +static inline void choke_set_classid(struct sk_buff *skb, u16 classid) +{ + choke_skb_cb(skb)->classid = classid; +} + +static u16 choke_get_classid(const struct sk_buff *skb) +{ + return choke_skb_cb(skb)->classid; +} + +/* + * Classify flow using either: + * 1. pre-existing classification result in skb + * 2. fast internal classification + * 3. use TC filter based classification + */ +static bool choke_classify(struct sk_buff *skb, + struct Qdisc *sch, int *qerr) + +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct tcf_result res; + int result; + + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return false; + } +#endif + choke_set_classid(skb, TC_H_MIN(res.classid)); + return true; + } + + return false; +} + +/* + * Select a packet at random from queue + * HACK: since queue can have holes from previous deletion; retry several + * times to find a random skb but then just give up and return the head + * Will return NULL if queue is empty (q->head == q->tail) + */ +static struct sk_buff *choke_peek_random(const struct choke_sched_data *q, + unsigned int *pidx) +{ + struct sk_buff *skb; + int retrys = 3; + + do { + *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask; + skb = q->tab[*pidx]; + if (skb) + return skb; + } while (--retrys > 0); + + return q->tab[*pidx = q->head]; +} + +/* + * Compare new packet with random packet in queue + * returns true if matched and sets *pidx + */ +static bool choke_match_random(const struct choke_sched_data *q, + struct sk_buff *nskb, + unsigned int *pidx) +{ + struct sk_buff *oskb; + + if (q->head == q->tail) + return false; + + oskb = choke_peek_random(q, pidx); + if (q->filter_list) + return choke_get_classid(nskb) == choke_get_classid(oskb); + + return choke_match_flow(oskb, nskb); +} + +static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct red_parms *p = &q->parms; + int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + + if (q->filter_list) { + /* If using external classifiers, get result and record it. */ + if (!choke_classify(skb, sch, &ret)) + goto other_drop; /* Packet was eaten by filter */ + } + + /* Compute average queue usage (see RED) */ + p->qavg = red_calc_qavg(p, sch->q.qlen); + if (red_is_idling(p)) + red_end_of_idle_period(p); + + /* Is queue small? */ + if (p->qavg <= p->qth_min) + p->qcount = -1; + else { + unsigned int idx; + + /* Draw a packet at random from queue and compare flow */ + if (choke_match_random(q, skb, &idx)) { + q->stats.matched++; + choke_drop_by_idx(sch, idx); + goto congestion_drop; + } + + /* Queue is large, always mark/drop */ + if (p->qavg > p->qth_max) { + p->qcount = -1; + + sch->qstats.overlimits++; + if (use_harddrop(q) || !use_ecn(q) || + !INET_ECN_set_ce(skb)) { + q->stats.forced_drop++; + goto congestion_drop; + } + + q->stats.forced_mark++; + } else if (++p->qcount) { + if (red_mark_probability(p, p->qavg)) { + p->qcount = 0; + p->qR = red_random(p); + + sch->qstats.overlimits++; + if (!use_ecn(q) || !INET_ECN_set_ce(skb)) { + q->stats.prob_drop++; + goto congestion_drop; + } + + q->stats.prob_mark++; + } + } else + p->qR = red_random(p); + } + + /* Admit new packet */ + if (sch->q.qlen < q->limit) { + q->tab[q->tail] = skb; + q->tail = (q->tail + 1) & q->tab_mask; + ++sch->q.qlen; + sch->qstats.backlog += qdisc_pkt_len(skb); + return NET_XMIT_SUCCESS; + } + + q->stats.pdrop++; + sch->qstats.drops++; + kfree_skb(skb); + return NET_XMIT_DROP; + + congestion_drop: + qdisc_drop(skb, sch); + return NET_XMIT_CN; + + other_drop: + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; +} + +static struct sk_buff *choke_dequeue(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb; + + if (q->head == q->tail) { + if (!red_is_idling(&q->parms)) + red_start_of_idle_period(&q->parms); + return NULL; + } + + skb = q->tab[q->head]; + q->tab[q->head] = NULL; + choke_zap_head_holes(q); + --sch->q.qlen; + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_bstats_update(sch, skb); + + return skb; +} + +static unsigned int choke_drop(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + unsigned int len; + + len = qdisc_queue_drop(sch); + if (len > 0) + q->stats.other++; + else { + if (!red_is_idling(&q->parms)) + red_start_of_idle_period(&q->parms); + } + + return len; +} + +static void choke_reset(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + red_restart(&q->parms); +} + +static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = { + [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) }, + [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE }, +}; + + +static void choke_free(void *addr) +{ + if (addr) { + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); + } +} + +static int choke_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_CHOKE_MAX + 1]; + const struct tc_red_qopt *ctl; + int err; + struct sk_buff **old = NULL; + unsigned int mask; + + if (opt == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy); + if (err < 0) + return err; + + if (tb[TCA_CHOKE_PARMS] == NULL || + tb[TCA_CHOKE_STAB] == NULL) + return -EINVAL; + + ctl = nla_data(tb[TCA_CHOKE_PARMS]); + + if (ctl->limit > CHOKE_MAX_QUEUE) + return -EINVAL; + + mask = roundup_pow_of_two(ctl->limit + 1) - 1; + if (mask != q->tab_mask) { + struct sk_buff **ntab; + + ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); + if (!ntab) + ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); + if (!ntab) + return -ENOMEM; + + sch_tree_lock(sch); + old = q->tab; + if (old) { + unsigned int oqlen = sch->q.qlen, tail = 0; + + while (q->head != q->tail) { + struct sk_buff *skb = q->tab[q->head]; + + q->head = (q->head + 1) & q->tab_mask; + if (!skb) + continue; + if (tail < mask) { + ntab[tail++] = skb; + continue; + } + sch->qstats.backlog -= qdisc_pkt_len(skb); + --sch->q.qlen; + qdisc_drop(skb, sch); + } + qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); + q->head = 0; + q->tail = tail; + } + + q->tab_mask = mask; + q->tab = ntab; + } else + sch_tree_lock(sch); + + q->flags = ctl->flags; + q->limit = ctl->limit; + + red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, + ctl->Plog, ctl->Scell_log, + nla_data(tb[TCA_CHOKE_STAB])); + + if (q->head == q->tail) + red_end_of_idle_period(&q->parms); + + sch_tree_unlock(sch); + choke_free(old); + return 0; +} + +static int choke_init(struct Qdisc *sch, struct nlattr *opt) +{ + return choke_change(sch, opt); +} + +static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct nlattr *opts = NULL; + struct tc_red_qopt opt = { + .limit = q->limit, + .flags = q->flags, + .qth_min = q->parms.qth_min >> q->parms.Wlog, + .qth_max = q->parms.qth_max >> q->parms.Wlog, + .Wlog = q->parms.Wlog, + .Plog = q->parms.Plog, + .Scell_log = q->parms.Scell_log, + }; + + opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; + + NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt); + return nla_nest_end(skb, opts); + +nla_put_failure: + nla_nest_cancel(skb, opts); + return -EMSGSIZE; +} + +static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct tc_choke_xstats st = { + .early = q->stats.prob_drop + q->stats.forced_drop, + .marked = q->stats.prob_mark + q->stats.forced_mark, + .pdrop = q->stats.pdrop, + .other = q->stats.other, + .matched = q->stats.matched, + }; + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static void choke_destroy(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + tcf_destroy_chain(&q->filter_list); + choke_free(q->tab); +} + +static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg) +{ + return NULL; +} + +static unsigned long choke_get(struct Qdisc *sch, u32 classid) +{ + return 0; +} + +static void choke_put(struct Qdisc *q, unsigned long cl) +{ +} + +static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return 0; +} + +static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return &q->filter_list; +} + +static int choke_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + tcm->tcm_handle |= TC_H_MIN(cl); + return 0; +} + +static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + if (!arg->stop) { + if (arg->fn(sch, 1, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } +} + +static const struct Qdisc_class_ops choke_class_ops = { + .leaf = choke_leaf, + .get = choke_get, + .put = choke_put, + .tcf_chain = choke_find_tcf, + .bind_tcf = choke_bind, + .unbind_tcf = choke_put, + .dump = choke_dump_class, + .walk = choke_walk, +}; + +static struct sk_buff *choke_peek_head(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + return (q->head != q->tail) ? q->tab[q->head] : NULL; +} + +static struct Qdisc_ops choke_qdisc_ops __read_mostly = { + .id = "choke", + .priv_size = sizeof(struct choke_sched_data), + + .enqueue = choke_enqueue, + .dequeue = choke_dequeue, + .peek = choke_peek_head, + .drop = choke_drop, + .init = choke_init, + .destroy = choke_destroy, + .reset = choke_reset, + .change = choke_change, + .dump = choke_dump, + .dump_stats = choke_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init choke_module_init(void) +{ + return register_qdisc(&choke_qdisc_ops); +} + +static void __exit choke_module_exit(void) +{ + unregister_qdisc(&choke_qdisc_ops); +} + +module_init(choke_module_init) +module_exit(choke_module_exit) + +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index de55e642eafc..6b7fe4a84f13 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -376,7 +376,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) } bstats_update(&cl->bstats, skb); - qdisc_bstats_update(sch, skb); sch->q.qlen++; return err; @@ -403,6 +402,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) skb = qdisc_dequeue_peeked(cl->qdisc); if (cl->qdisc->q.qlen == 0) list_del(&cl->alist); + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 4970d56b4aa7..2c790204d042 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -260,7 +260,6 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; } - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -283,6 +282,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) if (skb == NULL) return NULL; + qdisc_bstats_update(sch, skb); sch->q.qlen--; index = skb->tc_index & (p->indices - 1); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b3075f8a196b..66effe2da8e0 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -19,15 +19,9 @@ /* 1 band FIFO pseudo-"scheduler" */ -struct fifo_sched_data { - u32 limit; -}; - static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - struct fifo_sched_data *q = qdisc_priv(sch); - - if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit)) + if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_reshape_fail(skb, sch); @@ -35,9 +29,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - struct fifo_sched_data *q = qdisc_priv(sch); - - if (likely(skb_queue_len(&sch->q) < q->limit)) + if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_reshape_fail(skb, sch); @@ -45,17 +37,12 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - struct sk_buff *skb_head; - struct fifo_sched_data *q = qdisc_priv(sch); - - if (likely(skb_queue_len(&sch->q) < q->limit)) + if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); /* queue full, remove one skb to fulfill the limit */ - skb_head = qdisc_dequeue_head(sch); + __qdisc_queue_drop_head(sch, &sch->q); sch->qstats.drops++; - kfree_skb(skb_head); - qdisc_enqueue_tail(skb, sch); return NET_XMIT_CN; @@ -63,31 +50,40 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int fifo_init(struct Qdisc *sch, struct nlattr *opt) { - struct fifo_sched_data *q = qdisc_priv(sch); + bool bypass; + bool is_bfifo = sch->ops == &bfifo_qdisc_ops; if (opt == NULL) { u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; - if (sch->ops == &bfifo_qdisc_ops) + if (is_bfifo) limit *= psched_mtu(qdisc_dev(sch)); - q->limit = limit; + sch->limit = limit; } else { struct tc_fifo_qopt *ctl = nla_data(opt); if (nla_len(opt) < sizeof(*ctl)) return -EINVAL; - q->limit = ctl->limit; + sch->limit = ctl->limit; } + if (is_bfifo) + bypass = sch->limit >= psched_mtu(qdisc_dev(sch)); + else + bypass = sch->limit >= 1; + + if (bypass) + sch->flags |= TCQ_F_CAN_BYPASS; + else + sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) { - struct fifo_sched_data *q = qdisc_priv(sch); - struct tc_fifo_qopt opt = { .limit = q->limit }; + struct tc_fifo_qopt opt = { .limit = sch->limit }; NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); return skb->len; @@ -98,7 +94,7 @@ nla_put_failure: struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .id = "pfifo", - .priv_size = sizeof(struct fifo_sched_data), + .priv_size = 0, .enqueue = pfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, @@ -113,7 +109,7 @@ EXPORT_SYMBOL(pfifo_qdisc_ops); struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .id = "bfifo", - .priv_size = sizeof(struct fifo_sched_data), + .priv_size = 0, .enqueue = bfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, @@ -128,7 +124,7 @@ EXPORT_SYMBOL(bfifo_qdisc_ops); struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { .id = "pfifo_head_drop", - .priv_size = sizeof(struct fifo_sched_data), + .priv_size = 0, .enqueue = pfifo_tail_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2f1cb62130da..c84b65920d1b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -527,6 +527,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) skb_queue_head_init(band2list(priv, prio)); + /* Can by-pass the queue discipline */ + qdisc->flags |= TCQ_F_CAN_BYPASS; return 0; } @@ -548,21 +550,25 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, { void *p; struct Qdisc *sch; - unsigned int size; + unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; int err = -ENOBUFS; - /* ensure that the Qdisc and the private data are 64-byte aligned */ - size = QDISC_ALIGN(sizeof(*sch)); - size += ops->priv_size + (QDISC_ALIGNTO - 1); - p = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue)); if (!p) goto errout; sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); - sch->padded = (char *) sch - (char *) p; - + /* if we got non aligned memory, ask more and do alignment ourself */ + if (sch != p) { + kfree(p); + p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL, + netdev_queue_numa_node_read(dev_queue)); + if (!p) + goto errout; + sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); + sch->padded = (char *) sch - (char *) p; + } INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); spin_lock_init(&sch->busylock); @@ -632,7 +638,7 @@ void qdisc_destroy(struct Qdisc *qdisc) #ifdef CONFIG_NET_SCHED qdisc_list_del(qdisc); - qdisc_put_stab(qdisc->stab); + qdisc_put_stab(rtnl_dereference(qdisc->stab)); #endif gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); if (ops->reset) @@ -691,9 +697,6 @@ static void attach_one_default_qdisc(struct net_device *dev, netdev_info(dev, "activation failed\n"); return; } - - /* Can by-pass the queue discipline for default qdisc */ - qdisc->flags |= TCQ_F_CAN_BYPASS; } dev_queue->qdisc_sleeping = qdisc; } @@ -841,6 +844,7 @@ void dev_deactivate(struct net_device *dev) list_add(&dev->unreg_list, &single); dev_deactivate_many(&single); + list_del(&single); } EXPORT_SYMBOL(dev_deactivate); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index dea4009615f9..6488e6425652 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1598,7 +1598,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) set_active(cl, qdisc_pkt_len(skb)); bstats_update(&cl->bstats, skb); - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -1664,7 +1663,8 @@ hfsc_dequeue(struct Qdisc *sch) set_passive(cl); } - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 3e86fd3a1b78..e1429a85091f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -581,7 +581,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) } sch->q.qlen++; - qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } @@ -856,7 +855,7 @@ next: static struct sk_buff *htb_dequeue(struct Qdisc *sch) { - struct sk_buff *skb = NULL; + struct sk_buff *skb; struct htb_sched *q = qdisc_priv(sch); int level; psched_time_t next_event; @@ -865,7 +864,9 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) /* try to dequeue direct packets as high prio (!) to minimize cpu work */ skb = __skb_dequeue(&q->direct_queue); if (skb != NULL) { - sch->flags &= ~TCQ_F_THROTTLED; +ok: + qdisc_bstats_update(sch, skb); + qdisc_unthrottled(sch); sch->q.qlen--; return skb; } @@ -899,11 +900,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) m |= 1 << prio; skb = htb_dequeue_tree(q, prio, level); - if (likely(skb != NULL)) { - sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; - goto fin; - } + if (likely(skb != NULL)) + goto ok; } } sch->qstats.overlimits++; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index ecc302f4d2a1..ec5cbc848963 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -61,7 +61,6 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) TC_H_MIN(ntx + 1))); if (qdisc == NULL) goto err; - qdisc->flags |= TCQ_F_CAN_BYPASS; priv->qdiscs[ntx] = qdisc; } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 8620c65f480a..ea17cbed29ef 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -29,18 +29,18 @@ static void mqprio_destroy(struct Qdisc *sch) struct mqprio_sched *priv = qdisc_priv(sch); unsigned int ntx; - if (!priv->qdiscs) - return; - - for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) - qdisc_destroy(priv->qdiscs[ntx]); + if (priv->qdiscs) { + for (ntx = 0; + ntx < dev->num_tx_queues && priv->qdiscs[ntx]; + ntx++) + qdisc_destroy(priv->qdiscs[ntx]); + kfree(priv->qdiscs); + } if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) dev->netdev_ops->ndo_setup_tc(dev, 0); else netdev_set_num_tc(dev, 0); - - kfree(priv->qdiscs); } static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) @@ -130,7 +130,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) err = -ENOMEM; goto err; } - qdisc->flags |= TCQ_F_CAN_BYPASS; priv->qdiscs[i] = qdisc; } @@ -216,7 +215,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); - struct tc_mqprio_qopt opt; + struct tc_mqprio_qopt opt = { 0 }; struct Qdisc *qdisc; unsigned int i; @@ -312,7 +311,9 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, } static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, - struct gnet_dump *d) + struct gnet_dump *d) + __releases(d->lock) + __acquires(d->lock) { struct net_device *dev = qdisc_dev(sch); @@ -390,7 +391,7 @@ static const struct Qdisc_class_ops mqprio_class_ops = { .dump_stats = mqprio_dump_class_stats, }; -struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { +static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { .cl_ops = &mqprio_class_ops, .id = "mqprio", .priv_size = sizeof(struct mqprio_sched), diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 820f2a7ca14d..edc1950e0e77 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -83,7 +83,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -112,6 +111,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) qdisc = q->queues[q->curband]; skb = qdisc->dequeue(qdisc); if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index c2bbbe60d544..edbbf7ad6623 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -19,12 +19,13 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/skbuff.h> +#include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <net/netlink.h> #include <net/pkt_sched.h> -#define VERSION "1.2" +#define VERSION "1.3" /* Network Emulation Queuing algorithm. ==================================== @@ -47,6 +48,20 @@ layering other disciplines. It does not need to do bandwidth control either since that can be handled by using token bucket or other rate control. + + Correlated Loss Generator models + + Added generation of correlated loss according to the + "Gilbert-Elliot" model, a 4-state markov model. + + References: + [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG + [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general + and intuitive loss model for packet networks and its implementation + in the Netem module in the Linux kernel", available in [1] + + Authors: Stefano Salsano <stefano.salsano at uniroma2.it + Fabio Ludovici <fabio.ludovici at yahoo.it> */ struct netem_sched_data { @@ -73,6 +88,26 @@ struct netem_sched_data { u32 size; s16 table[0]; } *delay_dist; + + enum { + CLG_RANDOM, + CLG_4_STATES, + CLG_GILB_ELL, + } loss_model; + + /* Correlated Loss Generation models */ + struct clgstate { + /* state of the Markov chain */ + u8 state; + + /* 4-states and Gilbert-Elliot models */ + u32 a1; /* p13 for 4-states or p for GE */ + u32 a2; /* p31 for 4-states or r for GE */ + u32 a3; /* p32 for 4-states or h for GE */ + u32 a4; /* p14 for 4-states or 1-k for GE */ + u32 a5; /* p23 used only in 4-states */ + } clg; + }; /* Time stamp put into socket buffer control block */ @@ -115,6 +150,122 @@ static u32 get_crandom(struct crndstate *state) return answer; } +/* loss_4state - 4-state model loss generator + * Generates losses according to the 4-state Markov chain adopted in + * the GI (General and Intuitive) loss model. + */ +static bool loss_4state(struct netem_sched_data *q) +{ + struct clgstate *clg = &q->clg; + u32 rnd = net_random(); + + /* + * Makes a comparision between rnd and the transition + * probabilities outgoing from the current state, then decides the + * next state and if the next packet has to be transmitted or lost. + * The four states correspond to: + * 1 => successfully transmitted packets within a gap period + * 4 => isolated losses within a gap period + * 3 => lost packets within a burst period + * 2 => successfully transmitted packets within a burst period + */ + switch (clg->state) { + case 1: + if (rnd < clg->a4) { + clg->state = 4; + return true; + } else if (clg->a4 < rnd && rnd < clg->a1) { + clg->state = 3; + return true; + } else if (clg->a1 < rnd) + clg->state = 1; + + break; + case 2: + if (rnd < clg->a5) { + clg->state = 3; + return true; + } else + clg->state = 2; + + break; + case 3: + if (rnd < clg->a3) + clg->state = 2; + else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { + clg->state = 1; + return true; + } else if (clg->a2 + clg->a3 < rnd) { + clg->state = 3; + return true; + } + break; + case 4: + clg->state = 1; + break; + } + + return false; +} + +/* loss_gilb_ell - Gilbert-Elliot model loss generator + * Generates losses according to the Gilbert-Elliot loss model or + * its special cases (Gilbert or Simple Gilbert) + * + * Makes a comparision between random number and the transition + * probabilities outgoing from the current state, then decides the + * next state. A second random number is extracted and the comparision + * with the loss probability of the current state decides if the next + * packet will be transmitted or lost. + */ +static bool loss_gilb_ell(struct netem_sched_data *q) +{ + struct clgstate *clg = &q->clg; + + switch (clg->state) { + case 1: + if (net_random() < clg->a1) + clg->state = 2; + if (net_random() < clg->a4) + return true; + case 2: + if (net_random() < clg->a2) + clg->state = 1; + if (clg->a3 > net_random()) + return true; + } + + return false; +} + +static bool loss_event(struct netem_sched_data *q) +{ + switch (q->loss_model) { + case CLG_RANDOM: + /* Random packet drop 0 => none, ~0 => all */ + return q->loss && q->loss >= get_crandom(&q->loss_cor); + + case CLG_4_STATES: + /* 4state loss model algorithm (used also for GI model) + * Extracts a value from the markov 4 state loss generator, + * if it is 1 drops a packet and if needed writes the event in + * the kernel logs + */ + return loss_4state(q); + + case CLG_GILB_ELL: + /* Gilbert-Elliot loss model algorithm + * Extracts a value from the Gilbert-Elliot loss generator, + * if it is 1 drops a packet and if needed writes the event in + * the kernel logs + */ + return loss_gilb_ell(q); + } + + return false; /* not reached */ +} + + /* tabledist - return a pseudo-randomly distributed value with mean mu and * std deviation sigma. Uses table lookup to approximate the desired * distribution, and a uniformly-distributed pseudo-random source. @@ -161,14 +312,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; int count = 1; - pr_debug("netem_enqueue skb=%p\n", skb); - /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) ++count; - /* Random packet drop 0 => none, ~0 => all */ - if (q->loss && q->loss >= get_crandom(&q->loss_cor)) + /* Drop packet? */ + if (loss_event(q)) --count; if (count == 0) { @@ -238,15 +387,15 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = NET_XMIT_SUCCESS; } - if (likely(ret == NET_XMIT_SUCCESS)) { - sch->q.qlen++; - qdisc_bstats_update(sch, skb); - } else if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; + if (ret != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + return ret; + } } - pr_debug("netem: enqueue ret %d\n", ret); - return ret; + sch->q.qlen++; + return NET_XMIT_SUCCESS; } static unsigned int netem_drop(struct Qdisc *sch) @@ -266,7 +415,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - if (sch->flags & TCQ_F_THROTTLED) + if (qdisc_is_throttled(sch)) return NULL; skb = q->qdisc->ops->peek(q->qdisc); @@ -288,8 +437,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) skb->tstamp.tv64 = 0; #endif - pr_debug("netem_dequeue: return skb=%p\n", skb); + sch->q.qlen--; + qdisc_unthrottled(sch); + qdisc_bstats_update(sch, skb); return skb; } @@ -308,6 +459,16 @@ static void netem_reset(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); } +static void dist_free(struct disttable *d) +{ + if (d) { + if (is_vmalloc_addr(d)) + vfree(d); + else + kfree(d); + } +} + /* * Distribution data is a variable size payload containing * signed 16 bit values. @@ -315,16 +476,20 @@ static void netem_reset(struct Qdisc *sch) static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); - unsigned long n = nla_len(attr)/sizeof(__s16); + size_t n = nla_len(attr)/sizeof(__s16); const __s16 *data = nla_data(attr); spinlock_t *root_lock; struct disttable *d; int i; + size_t s; - if (n > 65536) + if (n > NETEM_DIST_MAX) return -EINVAL; - d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); + s = sizeof(struct disttable) + n * sizeof(s16); + d = kmalloc(s, GFP_KERNEL); + if (!d) + d = vmalloc(s); if (!d) return -ENOMEM; @@ -335,7 +500,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) root_lock = qdisc_root_sleeping_lock(sch); spin_lock_bh(root_lock); - kfree(q->delay_dist); + dist_free(q->delay_dist); q->delay_dist = d; spin_unlock_bh(root_lock); return 0; @@ -369,10 +534,66 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) init_crandom(&q->corrupt_cor, r->correlation); } +static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) +{ + struct netem_sched_data *q = qdisc_priv(sch); + const struct nlattr *la; + int rem; + + nla_for_each_nested(la, attr, rem) { + u16 type = nla_type(la); + + switch(type) { + case NETEM_LOSS_GI: { + const struct tc_netem_gimodel *gi = nla_data(la); + + if (nla_len(la) != sizeof(struct tc_netem_gimodel)) { + pr_info("netem: incorrect gi model size\n"); + return -EINVAL; + } + + q->loss_model = CLG_4_STATES; + + q->clg.state = 1; + q->clg.a1 = gi->p13; + q->clg.a2 = gi->p31; + q->clg.a3 = gi->p32; + q->clg.a4 = gi->p14; + q->clg.a5 = gi->p23; + break; + } + + case NETEM_LOSS_GE: { + const struct tc_netem_gemodel *ge = nla_data(la); + + if (nla_len(la) != sizeof(struct tc_netem_gemodel)) { + pr_info("netem: incorrect gi model size\n"); + return -EINVAL; + } + + q->loss_model = CLG_GILB_ELL; + q->clg.state = 1; + q->clg.a1 = ge->p; + q->clg.a2 = ge->r; + q->clg.a3 = ge->h; + q->clg.a4 = ge->k1; + break; + } + + default: + pr_info("netem: unknown loss type %u\n", type); + return -EINVAL; + } + } + + return 0; +} + static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, + [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -380,11 +601,15 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, { int nested_len = nla_len(nla) - NLA_ALIGN(len); - if (nested_len < 0) + if (nested_len < 0) { + pr_info("netem: invalid attributes len %d\n", nested_len); return -EINVAL; + } + if (nested_len >= nla_attr_size(0)) return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), nested_len, policy); + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; } @@ -407,7 +632,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) ret = fifo_set_limit(q->qdisc, qopt->limit); if (ret) { - pr_debug("netem: can't set fifo limit\n"); + pr_info("netem: can't set fifo limit\n"); return ret; } @@ -440,7 +665,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_NETEM_CORRUPT]) get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); - return 0; + q->loss_model = CLG_RANDOM; + if (tb[TCA_NETEM_LOSS]) + ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]); + + return ret; } /* @@ -476,7 +705,6 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) __skb_queue_after(list, skb, nskb); sch->qstats.backlog += qdisc_pkt_len(nskb); - qdisc_bstats_update(sch, nskb); return NET_XMIT_SUCCESS; } @@ -536,16 +764,17 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); + q->loss_model = CLG_RANDOM; q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { - pr_debug("netem: qdisc create failed\n"); + pr_notice("netem: qdisc create tfifo qdisc failed\n"); return -ENOMEM; } ret = netem_change(sch, opt); if (ret) { - pr_debug("netem: change failed\n"); + pr_info("netem: change failed\n"); qdisc_destroy(q->qdisc); } return ret; @@ -557,14 +786,61 @@ static void netem_destroy(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); qdisc_destroy(q->qdisc); - kfree(q->delay_dist); + dist_free(q->delay_dist); +} + +static int dump_loss_model(const struct netem_sched_data *q, + struct sk_buff *skb) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_NETEM_LOSS); + if (nest == NULL) + goto nla_put_failure; + + switch (q->loss_model) { + case CLG_RANDOM: + /* legacy loss model */ + nla_nest_cancel(skb, nest); + return 0; /* no data */ + + case CLG_4_STATES: { + struct tc_netem_gimodel gi = { + .p13 = q->clg.a1, + .p31 = q->clg.a2, + .p32 = q->clg.a3, + .p14 = q->clg.a4, + .p23 = q->clg.a5, + }; + + NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi); + break; + } + case CLG_GILB_ELL: { + struct tc_netem_gemodel ge = { + .p = q->clg.a1, + .r = q->clg.a2, + .h = q->clg.a3, + .k1 = q->clg.a4, + }; + + NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge); + break; + } + } + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; } static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) { const struct netem_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb_tail_pointer(skb); - struct nlattr *nla = (struct nlattr *) b; + struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb); struct tc_netem_qopt qopt; struct tc_netem_corr cor; struct tc_netem_reorder reorder; @@ -591,17 +867,87 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) corrupt.correlation = q->corrupt_cor.rho; NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); - nla->nla_len = skb_tail_pointer(skb) - b; + if (dump_loss_model(q, skb) != 0) + goto nla_put_failure; - return skb->len; + return nla_nest_end(skb, nla); nla_put_failure: - nlmsg_trim(skb, b); + nlmsg_trim(skb, nla); return -1; } +static int netem_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct netem_sched_data *q = qdisc_priv(sch); + + if (cl != 1) /* only one class */ + return -ENOENT; + + tcm->tcm_handle |= TC_H_MIN(1); + tcm->tcm_info = q->qdisc->handle; + + return 0; +} + +static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old) +{ + struct netem_sched_data *q = qdisc_priv(sch); + + if (new == NULL) + new = &noop_qdisc; + + sch_tree_lock(sch); + *old = q->qdisc; + q->qdisc = new; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + sch_tree_unlock(sch); + + return 0; +} + +static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct netem_sched_data *q = qdisc_priv(sch); + return q->qdisc; +} + +static unsigned long netem_get(struct Qdisc *sch, u32 classid) +{ + return 1; +} + +static void netem_put(struct Qdisc *sch, unsigned long arg) +{ +} + +static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) +{ + if (!walker->stop) { + if (walker->count >= walker->skip) + if (walker->fn(sch, 1, walker) < 0) { + walker->stop = 1; + return; + } + walker->count++; + } +} + +static const struct Qdisc_class_ops netem_class_ops = { + .graft = netem_graft, + .leaf = netem_leaf, + .get = netem_get, + .put = netem_put, + .walk = netem_walk, + .dump = netem_dump_class, +}; + static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .id = "netem", + .cl_ops = &netem_class_ops, .priv_size = sizeof(struct netem_sched_data), .enqueue = netem_enqueue, .dequeue = netem_dequeue, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3bea31e101b5..2a318f2dc3e5 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -83,7 +83,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -115,6 +114,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch) struct Qdisc *qdisc = q->queues[prio]; struct sk_buff *skb = qdisc->dequeue(qdisc); if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 689157555fa4..6649463da1b6 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -93,7 +93,6 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { - qdisc_bstats_update(sch, skb); sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; @@ -113,11 +112,13 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) struct Qdisc *child = q->qdisc; skb = child->dequeue(child); - if (skb) + if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; - else if (!red_is_idling(&q->parms)) - red_start_of_idle_period(&q->parms); - + } else { + if (!red_is_idling(&q->parms)) + red_start_of_idle_period(&q->parms); + } return skb; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c new file mode 100644 index 000000000000..0a833d0c1f61 --- /dev/null +++ b/net/sched/sch_sfb.c @@ -0,0 +1,709 @@ +/* + * net/sched/sch_sfb.c Stochastic Fair Blue + * + * Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr> + * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue: + * A New Class of Active Queue Management Algorithms. + * U. Michigan CSE-TR-387-99, April 1999. + * + * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf + * + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <linux/random.h> +#include <linux/jhash.h> +#include <net/ip.h> +#include <net/pkt_sched.h> +#include <net/inet_ecn.h> + +/* + * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level) + * This implementation uses L = 8 and N = 16 + * This permits us to split one 32bit hash (provided per packet by rxhash or + * external classifier) into 8 subhashes of 4 bits. + */ +#define SFB_BUCKET_SHIFT 4 +#define SFB_NUMBUCKETS (1 << SFB_BUCKET_SHIFT) /* N bins per Level */ +#define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1) +#define SFB_LEVELS (32 / SFB_BUCKET_SHIFT) /* L */ + +/* SFB algo uses a virtual queue, named "bin" */ +struct sfb_bucket { + u16 qlen; /* length of virtual queue */ + u16 p_mark; /* marking probability */ +}; + +/* We use a double buffering right before hash change + * (Section 4.4 of SFB reference : moving hash functions) + */ +struct sfb_bins { + u32 perturbation; /* jhash perturbation */ + struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS]; +}; + +struct sfb_sched_data { + struct Qdisc *qdisc; + struct tcf_proto *filter_list; + unsigned long rehash_interval; + unsigned long warmup_time; /* double buffering warmup time in jiffies */ + u32 max; + u32 bin_size; /* maximum queue length per bin */ + u32 increment; /* d1 */ + u32 decrement; /* d2 */ + u32 limit; /* HARD maximal queue length */ + u32 penalty_rate; + u32 penalty_burst; + u32 tokens_avail; + unsigned long rehash_time; + unsigned long token_time; + + u8 slot; /* current active bins (0 or 1) */ + bool double_buffering; + struct sfb_bins bins[2]; + + struct { + u32 earlydrop; + u32 penaltydrop; + u32 bucketdrop; + u32 queuedrop; + u32 childdrop; /* drops in child qdisc */ + u32 marked; /* ECN mark */ + } stats; +}; + +/* + * Each queued skb might be hashed on one or two bins + * We store in skb_cb the two hash values. + * (A zero value means double buffering was not used) + */ +struct sfb_skb_cb { + u32 hashes[2]; +}; + +static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(skb->cb) < + sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb)); + return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data; +} + +/* + * If using 'internal' SFB flow classifier, hash comes from skb rxhash + * If using external classifier, hash comes from the classid. + */ +static u32 sfb_hash(const struct sk_buff *skb, u32 slot) +{ + return sfb_skb_cb(skb)->hashes[slot]; +} + +/* Probabilities are coded as Q0.16 fixed-point values, + * with 0xFFFF representing 65535/65536 (almost 1.0) + * Addition and subtraction are saturating in [0, 65535] + */ +static u32 prob_plus(u32 p1, u32 p2) +{ + u32 res = p1 + p2; + + return min_t(u32, res, SFB_MAX_PROB); +} + +static u32 prob_minus(u32 p1, u32 p2) +{ + return p1 > p2 ? p1 - p2 : 0; +} + +static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) +{ + int i; + struct sfb_bucket *b = &q->bins[slot].bins[0][0]; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b[hash].qlen < 0xFFFF) + b[hash].qlen++; + b += SFB_NUMBUCKETS; /* next level */ + } +} + +static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) +{ + u32 sfbhash; + + sfbhash = sfb_hash(skb, 0); + if (sfbhash) + increment_one_qlen(sfbhash, 0, q); + + sfbhash = sfb_hash(skb, 1); + if (sfbhash) + increment_one_qlen(sfbhash, 1, q); +} + +static void decrement_one_qlen(u32 sfbhash, u32 slot, + struct sfb_sched_data *q) +{ + int i; + struct sfb_bucket *b = &q->bins[slot].bins[0][0]; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b[hash].qlen > 0) + b[hash].qlen--; + b += SFB_NUMBUCKETS; /* next level */ + } +} + +static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) +{ + u32 sfbhash; + + sfbhash = sfb_hash(skb, 0); + if (sfbhash) + decrement_one_qlen(sfbhash, 0, q); + + sfbhash = sfb_hash(skb, 1); + if (sfbhash) + decrement_one_qlen(sfbhash, 1, q); +} + +static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q) +{ + b->p_mark = prob_minus(b->p_mark, q->decrement); +} + +static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q) +{ + b->p_mark = prob_plus(b->p_mark, q->increment); +} + +static void sfb_zero_all_buckets(struct sfb_sched_data *q) +{ + memset(&q->bins, 0, sizeof(q->bins)); +} + +/* + * compute max qlen, max p_mark, and avg p_mark + */ +static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q) +{ + int i; + u32 qlen = 0, prob = 0, totalpm = 0; + const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0]; + + for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) { + if (qlen < b->qlen) + qlen = b->qlen; + totalpm += b->p_mark; + if (prob < b->p_mark) + prob = b->p_mark; + b++; + } + *prob_r = prob; + *avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS); + return qlen; +} + + +static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) +{ + q->bins[slot].perturbation = net_random(); +} + +static void sfb_swap_slot(struct sfb_sched_data *q) +{ + sfb_init_perturbation(q->slot, q); + q->slot ^= 1; + q->double_buffering = false; +} + +/* Non elastic flows are allowed to use part of the bandwidth, expressed + * in "penalty_rate" packets per second, with "penalty_burst" burst + */ +static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q) +{ + if (q->penalty_rate == 0 || q->penalty_burst == 0) + return true; + + if (q->tokens_avail < 1) { + unsigned long age = min(10UL * HZ, jiffies - q->token_time); + + q->tokens_avail = (age * q->penalty_rate) / HZ; + if (q->tokens_avail > q->penalty_burst) + q->tokens_avail = q->penalty_burst; + q->token_time = jiffies; + if (q->tokens_avail < 1) + return true; + } + + q->tokens_avail--; + return false; +} + +static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q, + int *qerr, u32 *salt) +{ + struct tcf_result res; + int result; + + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return false; + } +#endif + *salt = TC_H_MIN(res.classid); + return true; + } + return false; +} + +static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + int i; + u32 p_min = ~0; + u32 minqlen = ~0; + u32 r, slot, salt, sfbhash; + int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + + if (q->rehash_interval > 0) { + unsigned long limit = q->rehash_time + q->rehash_interval; + + if (unlikely(time_after(jiffies, limit))) { + sfb_swap_slot(q); + q->rehash_time = jiffies; + } else if (unlikely(!q->double_buffering && q->warmup_time > 0 && + time_after(jiffies, limit - q->warmup_time))) { + q->double_buffering = true; + } + } + + if (q->filter_list) { + /* If using external classifiers, get result and record it. */ + if (!sfb_classify(skb, q, &ret, &salt)) + goto other_drop; + } else { + salt = skb_get_rxhash(skb); + } + + slot = q->slot; + + sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + if (!sfbhash) + sfbhash = 1; + sfb_skb_cb(skb)->hashes[slot] = sfbhash; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b->qlen == 0) + decrement_prob(b, q); + else if (b->qlen >= q->bin_size) + increment_prob(b, q); + if (minqlen > b->qlen) + minqlen = b->qlen; + if (p_min > b->p_mark) + p_min = b->p_mark; + } + + slot ^= 1; + sfb_skb_cb(skb)->hashes[slot] = 0; + + if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) { + sch->qstats.overlimits++; + if (minqlen >= q->max) + q->stats.bucketdrop++; + else + q->stats.queuedrop++; + goto drop; + } + + if (unlikely(p_min >= SFB_MAX_PROB)) { + /* Inelastic flow */ + if (q->double_buffering) { + sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + if (!sfbhash) + sfbhash = 1; + sfb_skb_cb(skb)->hashes[slot] = sfbhash; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b->qlen == 0) + decrement_prob(b, q); + else if (b->qlen >= q->bin_size) + increment_prob(b, q); + } + } + if (sfb_rate_limit(skb, q)) { + sch->qstats.overlimits++; + q->stats.penaltydrop++; + goto drop; + } + goto enqueue; + } + + r = net_random() & SFB_MAX_PROB; + + if (unlikely(r < p_min)) { + if (unlikely(p_min > SFB_MAX_PROB / 2)) { + /* If we're marking that many packets, then either + * this flow is unresponsive, or we're badly congested. + * In either case, we want to start dropping packets. + */ + if (r < (p_min - SFB_MAX_PROB / 2) * 2) { + q->stats.earlydrop++; + goto drop; + } + } + if (INET_ECN_set_ce(skb)) { + q->stats.marked++; + } else { + q->stats.earlydrop++; + goto drop; + } + } + +enqueue: + ret = qdisc_enqueue(skb, child); + if (likely(ret == NET_XMIT_SUCCESS)) { + sch->q.qlen++; + increment_qlen(skb, q); + } else if (net_xmit_drop_count(ret)) { + q->stats.childdrop++; + sch->qstats.drops++; + } + return ret; + +drop: + qdisc_drop(skb, sch); + return NET_XMIT_CN; +other_drop: + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; +} + +static struct sk_buff *sfb_dequeue(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + struct sk_buff *skb; + + skb = child->dequeue(q->qdisc); + + if (skb) { + qdisc_bstats_update(sch, skb); + sch->q.qlen--; + decrement_qlen(skb, q); + } + + return skb; +} + +static struct sk_buff *sfb_peek(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + + return child->ops->peek(child); +} + +/* No sfb_drop -- impossible since the child doesn't return the dropped skb. */ + +static void sfb_reset(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + qdisc_reset(q->qdisc); + sch->q.qlen = 0; + q->slot = 0; + q->double_buffering = false; + sfb_zero_all_buckets(q); + sfb_init_perturbation(0, q); +} + +static void sfb_destroy(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + tcf_destroy_chain(&q->filter_list); + qdisc_destroy(q->qdisc); +} + +static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = { + [TCA_SFB_PARMS] = { .len = sizeof(struct tc_sfb_qopt) }, +}; + +static const struct tc_sfb_qopt sfb_default_ops = { + .rehash_interval = 600 * MSEC_PER_SEC, + .warmup_time = 60 * MSEC_PER_SEC, + .limit = 0, + .max = 25, + .bin_size = 20, + .increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */ + .decrement = (SFB_MAX_PROB + 3000) / 6000, + .penalty_rate = 10, + .penalty_burst = 20, +}; + +static int sfb_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child; + struct nlattr *tb[TCA_SFB_MAX + 1]; + const struct tc_sfb_qopt *ctl = &sfb_default_ops; + u32 limit; + int err; + + if (opt) { + err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy); + if (err < 0) + return -EINVAL; + + if (tb[TCA_SFB_PARMS] == NULL) + return -EINVAL; + + ctl = nla_data(tb[TCA_SFB_PARMS]); + } + + limit = ctl->limit; + if (limit == 0) + limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); + + child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit); + if (IS_ERR(child)) + return PTR_ERR(child); + + sch_tree_lock(sch); + + qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_destroy(q->qdisc); + q->qdisc = child; + + q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval); + q->warmup_time = msecs_to_jiffies(ctl->warmup_time); + q->rehash_time = jiffies; + q->limit = limit; + q->increment = ctl->increment; + q->decrement = ctl->decrement; + q->max = ctl->max; + q->bin_size = ctl->bin_size; + q->penalty_rate = ctl->penalty_rate; + q->penalty_burst = ctl->penalty_burst; + q->tokens_avail = ctl->penalty_burst; + q->token_time = jiffies; + + q->slot = 0; + q->double_buffering = false; + sfb_zero_all_buckets(q); + sfb_init_perturbation(0, q); + sfb_init_perturbation(1, q); + + sch_tree_unlock(sch); + + return 0; +} + +static int sfb_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + q->qdisc = &noop_qdisc; + return sfb_change(sch, opt); +} + +static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct nlattr *opts; + struct tc_sfb_qopt opt = { + .rehash_interval = jiffies_to_msecs(q->rehash_interval), + .warmup_time = jiffies_to_msecs(q->warmup_time), + .limit = q->limit, + .max = q->max, + .bin_size = q->bin_size, + .increment = q->increment, + .decrement = q->decrement, + .penalty_rate = q->penalty_rate, + .penalty_burst = q->penalty_burst, + }; + + sch->qstats.backlog = q->qdisc->qstats.backlog; + opts = nla_nest_start(skb, TCA_OPTIONS); + NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt); + return nla_nest_end(skb, opts); + +nla_put_failure: + nla_nest_cancel(skb, opts); + return -EMSGSIZE; +} + +static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct tc_sfb_xstats st = { + .earlydrop = q->stats.earlydrop, + .penaltydrop = q->stats.penaltydrop, + .bucketdrop = q->stats.bucketdrop, + .queuedrop = q->stats.queuedrop, + .childdrop = q->stats.childdrop, + .marked = q->stats.marked, + }; + + st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q); + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static int sfb_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + return -ENOSYS; +} + +static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + if (new == NULL) + new = &noop_qdisc; + + sch_tree_lock(sch); + *old = q->qdisc; + q->qdisc = new; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + sch_tree_unlock(sch); + return 0; +} + +static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + return q->qdisc; +} + +static unsigned long sfb_get(struct Qdisc *sch, u32 classid) +{ + return 1; +} + +static void sfb_put(struct Qdisc *sch, unsigned long arg) +{ +} + +static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + struct nlattr **tca, unsigned long *arg) +{ + return -ENOSYS; +} + +static int sfb_delete(struct Qdisc *sch, unsigned long cl) +{ + return -ENOSYS; +} + +static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) +{ + if (!walker->stop) { + if (walker->count >= walker->skip) + if (walker->fn(sch, 1, walker) < 0) { + walker->stop = 1; + return; + } + walker->count++; + } +} + +static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return &q->filter_list; +} + +static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return 0; +} + + +static const struct Qdisc_class_ops sfb_class_ops = { + .graft = sfb_graft, + .leaf = sfb_leaf, + .get = sfb_get, + .put = sfb_put, + .change = sfb_change_class, + .delete = sfb_delete, + .walk = sfb_walk, + .tcf_chain = sfb_find_tcf, + .bind_tcf = sfb_bind, + .unbind_tcf = sfb_put, + .dump = sfb_dump_class, +}; + +static struct Qdisc_ops sfb_qdisc_ops __read_mostly = { + .id = "sfb", + .priv_size = sizeof(struct sfb_sched_data), + .cl_ops = &sfb_class_ops, + .enqueue = sfb_enqueue, + .dequeue = sfb_dequeue, + .peek = sfb_peek, + .init = sfb_init, + .reset = sfb_reset, + .destroy = sfb_destroy, + .change = sfb_change, + .dump = sfb_dump, + .dump_stats = sfb_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init sfb_module_init(void) +{ + return register_qdisc(&sfb_qdisc_ops); +} + +static void __exit sfb_module_exit(void) +{ + unregister_qdisc(&sfb_qdisc_ops); +} + +module_init(sfb_module_init) +module_exit(sfb_module_exit) + +MODULE_DESCRIPTION("Stochastic Fair Blue queue discipline"); +MODULE_AUTHOR("Juliusz Chroboczek"); +MODULE_AUTHOR("Eric Dumazet"); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 54a36f43a1f1..c2e628dfaacc 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -21,6 +21,7 @@ #include <linux/skbuff.h> #include <linux/jhash.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <net/ip.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -76,7 +77,8 @@ #define SFQ_DEPTH 128 /* max number of packets per flow */ #define SFQ_SLOTS 128 /* max number of flows */ #define SFQ_EMPTY_SLOT 255 -#define SFQ_HASH_DIVISOR 1024 +#define SFQ_DEFAULT_HASH_DIVISOR 1024 + /* We use 16 bits to store allot, and want to handle packets up to 64K * Scale allot by 8 (1<<3) so that no overflow occurs. */ @@ -112,7 +114,7 @@ struct sfq_sched_data { int perturb_period; unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ int limit; - + unsigned int divisor; /* number of slots in hash table */ /* Variables */ struct tcf_proto *filter_list; struct timer_list perturb_timer; @@ -120,7 +122,7 @@ struct sfq_sched_data { sfq_index cur_depth; /* depth of longest slot */ unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ struct sfq_slot *tail; /* current slot in round */ - sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ + sfq_index *ht; /* Hash table (divisor slots) */ struct sfq_slot slots[SFQ_SLOTS]; struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ }; @@ -137,7 +139,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) { - return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); + return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1); } static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) @@ -201,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, if (TC_H_MAJ(skb->priority) == sch->handle && TC_H_MIN(skb->priority) > 0 && - TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) + TC_H_MIN(skb->priority) <= q->divisor) return TC_H_MIN(skb->priority); if (!q->filter_list) @@ -219,7 +221,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return 0; } #endif - if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) + if (TC_H_MIN(res.classid) <= q->divisor) return TC_H_MIN(res.classid); } return 0; @@ -400,10 +402,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->tail = slot; slot->allot = q->scaled_quantum; } - if (++sch->q.qlen <= q->limit) { - qdisc_bstats_update(sch, skb); + if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; - } sfq_drop(sch); return NET_XMIT_CN; @@ -443,6 +443,7 @@ next_slot: } skb = slot_dequeue_head(slot); sfq_dec(q, a); + qdisc_bstats_update(sch, skb); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); @@ -490,13 +491,18 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; + if (ctl->divisor && + (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) + return -EINVAL; + sch_tree_lock(sch); q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); - + if (ctl->divisor) + q->divisor = ctl->divisor; qlen = sch->q.qlen; while (sch->q.qlen > q->limit) sfq_drop(sch); @@ -514,15 +520,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) static int sfq_init(struct Qdisc *sch, struct nlattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); + size_t sz; int i; q->perturb_timer.function = sfq_perturbation; q->perturb_timer.data = (unsigned long)sch; init_timer_deferrable(&q->perturb_timer); - for (i = 0; i < SFQ_HASH_DIVISOR; i++) - q->ht[i] = SFQ_EMPTY_SLOT; - for (i = 0; i < SFQ_DEPTH; i++) { q->dep[i].next = i + SFQ_SLOTS; q->dep[i].prev = i + SFQ_SLOTS; @@ -531,6 +535,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->limit = SFQ_DEPTH - 1; q->cur_depth = 0; q->tail = NULL; + q->divisor = SFQ_DEFAULT_HASH_DIVISOR; if (opt == NULL) { q->quantum = psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); @@ -542,10 +547,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) return err; } + sz = sizeof(q->ht[0]) * q->divisor; + q->ht = kmalloc(sz, GFP_KERNEL); + if (!q->ht && sz > PAGE_SIZE) + q->ht = vmalloc(sz); + if (!q->ht) + return -ENOMEM; + for (i = 0; i < q->divisor; i++) + q->ht[i] = SFQ_EMPTY_SLOT; + for (i = 0; i < SFQ_SLOTS; i++) { slot_queue_init(&q->slots[i]); sfq_link(q, i); } + if (q->limit >= 1) + sch->flags |= TCQ_F_CAN_BYPASS; + else + sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } @@ -556,6 +574,10 @@ static void sfq_destroy(struct Qdisc *sch) tcf_destroy_chain(&q->filter_list); q->perturb_period = 0; del_timer_sync(&q->perturb_timer); + if (is_vmalloc_addr(q->ht)) + vfree(q->ht); + else + kfree(q->ht); } static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -568,7 +590,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) opt.perturb_period = q->perturb_period / HZ; opt.limit = q->limit; - opt.divisor = SFQ_HASH_DIVISOR; + opt.divisor = q->divisor; opt.flows = q->limit; NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); @@ -593,6 +615,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid) static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { + /* we cannot bypass queue discipline anymore */ + sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } @@ -646,7 +670,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) if (arg->stop) return; - for (i = 0; i < SFQ_HASH_DIVISOR; i++) { + for (i = 0; i < q->divisor; i++) { if (q->ht[i] == SFQ_EMPTY_SLOT || arg->count < arg->skip) { arg->count++; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 475edfb69c22..1dcfb5223a86 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -133,7 +133,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) } sch->q.qlen++; - qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } @@ -185,7 +184,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) q->tokens = toks; q->ptokens = ptoks; sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); + qdisc_bstats_update(sch, skb); return skb; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 64c071ded0f4..45cd30098e34 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -85,7 +85,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->q.qlen < dev->tx_queue_len) { __skb_queue_tail(&q->q, skb); - qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } @@ -109,6 +108,8 @@ teql_dequeue(struct Qdisc *sch) dat->m->slaves = sch; netif_wake_queue(m); } + } else { + qdisc_bstats_update(sch, skb); } sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; return skb; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5f1fb8bd862d..6b04287913cd 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1089,7 +1089,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) base.inqueue.immediate); struct sctp_endpoint *ep; struct sctp_chunk *chunk; - struct sock *sk; struct sctp_inq *inqueue; int state; sctp_subtype_t subtype; @@ -1097,7 +1096,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) /* The association should be held so we should be safe. */ ep = asoc->ep; - sk = asoc->base.sk; inqueue = &asoc->base.inqueue; sctp_association_hold(asoc); diff --git a/net/sctp/input.c b/net/sctp/input.c index ea2192444ce6..826661be73e7 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -948,14 +948,11 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, union sctp_addr addr; union sctp_addr *paddr = &addr; struct sctphdr *sh = sctp_hdr(skb); - sctp_chunkhdr_t *ch; union sctp_params params; sctp_init_chunk_t *init; struct sctp_transport *transport; struct sctp_af *af; - ch = (sctp_chunkhdr_t *) skb->data; - /* * This code will NOT touch anything inside the chunk--it is * strictly READ-ONLY. diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 95e0c8eda1a0..865ce7ba4e14 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -201,40 +201,40 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) { struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi fl; + struct flowi6 fl6; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); - fl.proto = sk->sk_protocol; + fl6.flowi6_proto = sk->sk_protocol; /* Fill in the dest address from the route entry passed with the skb * and the source address from the transport. */ - ipv6_addr_copy(&fl.fl6_dst, &transport->ipaddr.v6.sin6_addr); - ipv6_addr_copy(&fl.fl6_src, &transport->saddr.v6.sin6_addr); + ipv6_addr_copy(&fl6.daddr, &transport->ipaddr.v6.sin6_addr); + ipv6_addr_copy(&fl6.saddr, &transport->saddr.v6.sin6_addr); - fl.fl6_flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - if (ipv6_addr_type(&fl.fl6_src) & IPV6_ADDR_LINKLOCAL) - fl.oif = transport->saddr.v6.sin6_scope_id; + fl6.flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl6.flowlabel); + if (ipv6_addr_type(&fl6.saddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = transport->saddr.v6.sin6_scope_id; else - fl.oif = sk->sk_bound_dev_if; + fl6.flowi6_oif = sk->sk_bound_dev_if; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + ipv6_addr_copy(&fl6.daddr, rt0->addr); } SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, - &fl.fl6_src, &fl.fl6_dst); + &fl6.saddr, &fl6.daddr); SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; - return ip6_xmit(sk, skb, &fl, np->opt); + return ip6_xmit(sk, skb, &fl6, np->opt); } /* Returns the dst cache entry for the given source and destination ip @@ -245,22 +245,22 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc, union sctp_addr *saddr) { struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr); + memset(&fl6, 0, sizeof(fl6)); + ipv6_addr_copy(&fl6.daddr, &daddr->v6.sin6_addr); if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) - fl.oif = daddr->v6.sin6_scope_id; + fl6.flowi6_oif = daddr->v6.sin6_scope_id; - SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl.fl6_dst); + SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl6.daddr); if (saddr) { - ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr); - SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl.fl6_src); + ipv6_addr_copy(&fl6.saddr, &saddr->v6.sin6_addr); + SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6.saddr); } - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(&init_net, NULL, &fl6); if (!dst->error) { struct rt6_info *rt; rt = (struct rt6_info *)dst; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 8c6d379b4bb6..26dc005113a0 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -545,13 +545,11 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, struct sctp_transport *transport = pkt->transport; sctp_xmit_t status; struct sctp_chunk *chunk, *chunk1; - struct sctp_association *asoc; int fast_rtx; int error = 0; int timer = 0; int done = 0; - asoc = q->asoc; lqueue = &q->retransmit; fast_rtx = q->fast_rtx; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e58f9476f29c..152976ec0b74 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -468,32 +468,32 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, union sctp_addr *saddr) { struct rtable *rt; - struct flowi fl; + struct flowi4 fl4; struct sctp_bind_addr *bp; struct sctp_sockaddr_entry *laddr; struct dst_entry *dst = NULL; union sctp_addr dst_saddr; - memset(&fl, 0x0, sizeof(struct flowi)); - fl.fl4_dst = daddr->v4.sin_addr.s_addr; - fl.fl_ip_dport = daddr->v4.sin_port; - fl.proto = IPPROTO_SCTP; + memset(&fl4, 0x0, sizeof(struct flowi4)); + fl4.daddr = daddr->v4.sin_addr.s_addr; + fl4.fl4_dport = daddr->v4.sin_port; + fl4.flowi4_proto = IPPROTO_SCTP; if (asoc) { - fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); - fl.oif = asoc->base.sk->sk_bound_dev_if; - fl.fl_ip_sport = htons(asoc->base.bind_addr.port); + fl4.flowi4_tos = RT_CONN_FLAGS(asoc->base.sk); + fl4.flowi4_oif = asoc->base.sk->sk_bound_dev_if; + fl4.fl4_sport = htons(asoc->base.bind_addr.port); } if (saddr) { - fl.fl4_src = saddr->v4.sin_addr.s_addr; - fl.fl_ip_sport = saddr->v4.sin_port; + fl4.saddr = saddr->v4.sin_addr.s_addr; + fl4.fl4_sport = saddr->v4.sin_port; } SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", - __func__, &fl.fl4_dst, &fl.fl4_src); + __func__, &fl4.daddr, &fl4.saddr); - if (!ip_route_output_key(&init_net, &rt, &fl)) { + rt = ip_route_output_key(&init_net, &fl4); + if (!IS_ERR(rt)) dst = &rt->dst; - } /* If there is no association or if a source address is passed, no * more validation is required. @@ -533,9 +533,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, continue; if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { - fl.fl4_src = laddr->a.v4.sin_addr.s_addr; - fl.fl_ip_sport = laddr->a.v4.sin_port; - if (!ip_route_output_key(&init_net, &rt, &fl)) { + fl4.saddr = laddr->a.v4.sin_addr.s_addr; + fl4.fl4_sport = laddr->a.v4.sin_port; + rt = ip_route_output_key(&init_net, &fl4); + if (!IS_ERR(rt)) { dst = &rt->dst; goto out_unlock; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 2cc46f0962ca..de98665db524 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2029,11 +2029,11 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, *errp = sctp_make_op_error_fixed(asoc, chunk); if (*errp) { - sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, - WORD_ROUND(ntohs(param.p->length))); - sctp_addto_chunk_fixed(*errp, - WORD_ROUND(ntohs(param.p->length)), - param.v); + if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, + WORD_ROUND(ntohs(param.p->length)))) + sctp_addto_chunk_fixed(*errp, + WORD_ROUND(ntohs(param.p->length)), + param.v); } else { /* If there is no memory for generating the ERROR * report as specified, an ABORT will be triggered @@ -3375,7 +3375,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, struct sctp_fwdtsn_skip *skiplist) { struct sctp_chunk *retval = NULL; - struct sctp_fwdtsn_chunk *ftsn_chunk; struct sctp_fwdtsn_hdr ftsn_hdr; struct sctp_fwdtsn_skip skip; size_t hint; @@ -3388,8 +3387,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, if (!retval) return NULL; - ftsn_chunk = (struct sctp_fwdtsn_chunk *)retval->subh.fwdtsn_hdr; - ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn); retval->subh.fwdtsn_hdr = sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a09b0dd25f50..3951a10605bc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2928,7 +2928,6 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva unsigned int optlen) { struct sctp_sock *sp; - struct sctp_endpoint *ep; struct sctp_association *asoc = NULL; struct sctp_setpeerprim prim; struct sctp_chunk *chunk; @@ -2936,7 +2935,6 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva int err; sp = sctp_sk(sk); - ep = sp->ep; if (!sctp_addip_enable) return -EPERM; @@ -3428,7 +3426,7 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen); break; - case SCTP_DELAYED_ACK: + case SCTP_DELAYED_SACK: retval = sctp_setsockopt_delayed_ack(sk, optval, optlen); break; case SCTP_PARTIAL_DELIVERY_POINT: @@ -5333,7 +5331,7 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_peer_addr_params(sk, len, optval, optlen); break; - case SCTP_DELAYED_ACK: + case SCTP_DELAYED_SACK: retval = sctp_getsockopt_delayed_ack(sk, len, optval, optlen); break; @@ -6102,15 +6100,16 @@ static void __sctp_write_space(struct sctp_association *asoc) wake_up_interruptible(&asoc->wait); if (sctp_writeable(sk)) { - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + wait_queue_head_t *wq = sk_sleep(sk); + + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); /* Note that we try to include the Async I/O support * here by modeling from the current TCP/UDP code. * We have not tested with it yet. */ - if (sock->wq->fasync_list && - !(sk->sk_shutdown & SEND_SHUTDOWN)) + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); } diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 747d5412c463..f1e40cebc981 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -344,7 +344,7 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, /* Refresh the gap ack information. */ if (sctp_tsnmap_has_gap(map)) { - __u16 start, end; + __u16 start = 0, end = 0; sctp_tsnmap_iter_init(map, &iter); while (sctp_tsnmap_next_gap_ack(map, &iter, &start, diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index c7f7e49609cb..17678189d054 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -105,11 +105,8 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, gfp_t gfp) { struct sk_buff_head temp; - sctp_data_chunk_t *hdr; struct sctp_ulpevent *event; - hdr = (sctp_data_chunk_t *) chunk->chunk_hdr; - /* Create an event from the incoming chunk. */ event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp); if (!event) @@ -743,11 +740,9 @@ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_stream *in; - __u16 sid, csid; - __u16 ssn, cssn; + __u16 sid, csid, cssn; sid = event->stream; - ssn = event->ssn; in = &ulpq->asoc->ssnmap->in; event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; diff --git a/net/socket.c b/net/socket.c index ac2219f90d5d..937d0fcf74bc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -240,17 +240,19 @@ static struct kmem_cache *sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; + struct socket_wq *wq; ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; - ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); - if (!ei->socket.wq) { + wq = kmalloc(sizeof(*wq), GFP_KERNEL); + if (!wq) { kmem_cache_free(sock_inode_cachep, ei); return NULL; } - init_waitqueue_head(&ei->socket.wq->wait); - ei->socket.wq->fasync_list = NULL; + init_waitqueue_head(&wq->wait); + wq->fasync_list = NULL; + RCU_INIT_POINTER(ei->socket.wq, wq); ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; @@ -273,9 +275,11 @@ static void wq_free_rcu(struct rcu_head *head) static void sock_destroy_inode(struct inode *inode) { struct socket_alloc *ei; + struct socket_wq *wq; ei = container_of(inode, struct socket_alloc, vfs_inode); - call_rcu(&ei->socket.wq->rcu, wq_free_rcu); + wq = rcu_dereference_protected(ei->socket.wq, 1); + call_rcu(&wq->rcu, wq_free_rcu); kmem_cache_free(sock_inode_cachep, ei); } @@ -524,7 +528,7 @@ void sock_release(struct socket *sock) module_put(owner); } - if (sock->wq->fasync_list) + if (rcu_dereference_protected(sock->wq, 1)->fasync_list) printk(KERN_ERR "sock_release: fasync list not empty!\n"); percpu_sub(sockets_in_use, 1); @@ -1108,15 +1112,16 @@ static int sock_fasync(int fd, struct file *filp, int on) { struct socket *sock = filp->private_data; struct sock *sk = sock->sk; + struct socket_wq *wq; if (sk == NULL) return -EINVAL; lock_sock(sk); + wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); + fasync_helper(fd, filp, on, &wq->fasync_list); - fasync_helper(fd, filp, on, &sock->wq->fasync_list); - - if (!sock->wq->fasync_list) + if (!wq->fasync_list) sock_reset_flag(sk, SOCK_FASYNC); else sock_set_flag(sk, SOCK_FASYNC); @@ -2643,7 +2648,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd, old_fs = get_fs(); set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, &kifr); + err = dev_ioctl(net, cmd, + (struct ifreq __user __force *) &kifr); set_fs(old_fs); return err; @@ -2752,7 +2758,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, old_fs = get_fs(); set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, (void __user *)&ifr); + err = dev_ioctl(net, cmd, (void __user __force *)&ifr); set_fs(old_fs); if (cmd == SIOCGIFMAP && !err) { @@ -2857,7 +2863,8 @@ static int routing_ioctl(struct net *net, struct socket *sock, ret |= __get_user(rtdev, &(ur4->rt_dev)); if (rtdev) { ret |= copy_from_user(devname, compat_ptr(rtdev), 15); - r4.rt_dev = devname; devname[15] = 0; + r4.rt_dev = (char __user __force *)devname; + devname[15] = 0; } else r4.rt_dev = NULL; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 7bd3bbba4710..b7d435c3f19e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -420,6 +420,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, static void svc_udp_data_ready(struct sock *sk, int count) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); if (svsk) { dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", @@ -428,8 +429,8 @@ static void svc_udp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* @@ -438,6 +439,7 @@ static void svc_udp_data_ready(struct sock *sk, int count) static void svc_write_space(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); + wait_queue_head_t *wq = sk_sleep(sk); if (svsk) { dprintk("svc: socket %p(inet %p), write_space busy=%d\n", @@ -445,10 +447,10 @@ static void svc_write_space(struct sock *sk) svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { + if (wq && waitqueue_active(wq)) { dprintk("RPC svc_write_space: someone sleeping on %p\n", svsk); - wake_up_interruptible(sk_sleep(sk)); + wake_up_interruptible(wq); } } @@ -739,6 +741,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq; dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->sk_state); @@ -761,8 +764,9 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) printk("svc: socket %p: no user data\n", sk); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_all(sk_sleep(sk)); + wq = sk_sleep(sk); + if (wq && waitqueue_active(wq)) + wake_up_interruptible_all(wq); } /* @@ -771,6 +775,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) static void svc_tcp_state_change(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", sk, sk->sk_state, sk->sk_user_data); @@ -781,13 +786,14 @@ static void svc_tcp_state_change(struct sock *sk) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_all(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible_all(wq); } static void svc_tcp_data_ready(struct sock *sk, int count) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); dprintk("svc: socket %p TCP data ready (svsk %p)\n", sk, sk->sk_user_data); @@ -795,8 +801,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* @@ -1531,6 +1537,7 @@ static void svc_sock_detach(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct sock *sk = svsk->sk_sk; + wait_queue_head_t *wq; dprintk("svc: svc_sock_detach(%p)\n", svsk); @@ -1539,8 +1546,9 @@ static void svc_sock_detach(struct svc_xprt *xprt) sk->sk_data_ready = svsk->sk_odata; sk->sk_write_space = svsk->sk_owspace; - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + wq = sk_sleep(sk); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* @@ -1609,9 +1617,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, */ static void svc_bc_sock_free(struct svc_xprt *xprt) { - if (xprt) { - kfree(xprt->xpt_bc_sid); + if (xprt) kfree(container_of(xprt, struct svc_sock, sk_xprt)); - } } #endif /* CONFIG_NFS_V4_1 */ diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index 0436927369f3..2c5954b85933 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -29,18 +29,6 @@ config TIPC_ADVANCED Saying Y here will open some advanced configuration for TIPC. Most users do not need to bother; if unsure, just say N. -config TIPC_NODES - int "Maximum number of nodes in a cluster" - depends on TIPC_ADVANCED - range 8 2047 - default "255" - help - Specifies how many nodes can be supported in a TIPC cluster. - Can range from 8 to 2047 nodes; default is 255. - - Setting this to a smaller value saves some memory; - setting it to higher allows for more nodes. - config TIPC_PORTS int "Maximum number of ports in a node" depends on TIPC_ADVANCED diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 88463d9a6f12..a6fdab33877e 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -2,7 +2,7 @@ * net/tipc/addr.c: TIPC address utility routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,7 +41,7 @@ * tipc_addr_domain_valid - validates a network domain address * * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>, - * where Z, C, and N are non-zero and do not exceed the configured limits. + * where Z, C, and N are non-zero. * * Returns 1 if domain address is valid, otherwise 0 */ @@ -51,10 +51,6 @@ int tipc_addr_domain_valid(u32 addr) u32 n = tipc_node(addr); u32 c = tipc_cluster(addr); u32 z = tipc_zone(addr); - u32 max_nodes = tipc_max_nodes; - - if (n > max_nodes) - return 0; if (n && (!z || !c)) return 0; @@ -66,8 +62,7 @@ int tipc_addr_domain_valid(u32 addr) /** * tipc_addr_node_valid - validates a proposed network address for this node * - * Accepts <Z.C.N>, where Z, C, and N are non-zero and do not exceed - * the configured limits. + * Accepts <Z.C.N>, where Z, C, and N are non-zero. * * Returns 1 if address can be used, otherwise 0 */ @@ -81,9 +76,9 @@ int tipc_in_scope(u32 domain, u32 addr) { if (!domain || (domain == addr)) return 1; - if (domain == (addr & 0xfffff000u)) /* domain <Z.C.0> */ + if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */ return 1; - if (domain == (addr & 0xff000000u)) /* domain <Z.0.0> */ + if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */ return 1; return 0; } diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 2490fadd0caf..8971aba99aea 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -37,6 +37,16 @@ #ifndef _TIPC_ADDR_H #define _TIPC_ADDR_H +static inline u32 tipc_zone_mask(u32 addr) +{ + return addr & 0xff000000u; +} + +static inline u32 tipc_cluster_mask(u32 addr) +{ + return addr & 0xfffff000u; +} + static inline int in_own_cluster(u32 addr) { return !((addr ^ tipc_own_addr) >> 12); @@ -49,14 +59,13 @@ static inline int in_own_cluster(u32 addr) * after a network hop. */ -static inline int addr_domain(int sc) +static inline u32 addr_domain(u32 sc) { if (likely(sc == TIPC_NODE_SCOPE)) return tipc_own_addr; if (sc == TIPC_CLUSTER_SCOPE) - return tipc_addr(tipc_zone(tipc_own_addr), - tipc_cluster(tipc_own_addr), 0); - return tipc_addr(tipc_zone(tipc_own_addr), 0, 0); + return tipc_cluster_mask(tipc_own_addr); + return tipc_zone_mask(tipc_own_addr); } int tipc_addr_domain_valid(u32); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 70ab5ef48766..7dc1dc7151ea 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -3,7 +3,7 @@ * * Copyright (c) 2004-2006, Ericsson AB * Copyright (c) 2004, Intel Corporation. - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,8 +61,8 @@ */ struct bcbearer_pair { - struct bearer *primary; - struct bearer *secondary; + struct tipc_bearer *primary; + struct tipc_bearer *secondary; }; /** @@ -81,7 +81,7 @@ struct bcbearer_pair { */ struct bcbearer { - struct bearer bearer; + struct tipc_bearer bearer; struct media media; struct bcbearer_pair bpairs[MAX_BEARERS]; struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; @@ -93,6 +93,7 @@ struct bcbearer { * struct bclink - link used for broadcast messages * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node + * @retransmit_to: node that most recently requested a retransmit * * Handles sequence numbering, fragmentation, bundling, etc. */ @@ -100,6 +101,7 @@ struct bcbearer { struct bclink { struct link link; struct tipc_node node; + struct tipc_node *retransmit_to; }; @@ -184,6 +186,17 @@ static int bclink_ack_allowed(u32 n) /** + * tipc_bclink_retransmit_to - get most recent node to request retransmission + * + * Called with bc_lock locked + */ + +struct tipc_node *tipc_bclink_retransmit_to(void) +{ + return bclink->retransmit_to; +} + +/** * bclink_retransmit_pkt - retransmit broadcast packets * @after: sequence number of last packet to *not* retransmit * @to: sequence number of last packet to retransmit @@ -285,6 +298,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr) msg = buf_msg(buf); tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, n_ptr->addr); + msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tipc_net_id); msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in)); msg_set_bcgap_after(msg, n_ptr->bclink.gap_after); @@ -405,8 +419,6 @@ int tipc_bclink_send_msg(struct sk_buff *buf) else bclink_set_last_sent(); - if (bcl->out_queue_size > bcl->stats.max_queue_sz) - bcl->stats.max_queue_sz = bcl->out_queue_size; bcl->stats.queue_sz_counts++; bcl->stats.accu_queue_sz += bcl->out_queue_size; @@ -444,10 +456,9 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) tipc_node_unlock(node); spin_lock_bh(&bc_lock); bcl->stats.recv_nacks++; - bcl->owner->next = node; /* remember requestor */ + bclink->retransmit_to = node; bclink_retransmit_pkt(msg_bcgap_after(msg), msg_bcgap_to(msg)); - bcl->owner->next = NULL; spin_unlock_bh(&bc_lock); } else { tipc_bclink_peek_nack(msg_destnode(msg), @@ -574,8 +585,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, bcbearer->remains = tipc_bcast_nmap; for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { - struct bearer *p = bcbearer->bpairs[bp_index].primary; - struct bearer *s = bcbearer->bpairs[bp_index].secondary; + struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; + struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; if (!p) break; /* no more bearers to try */ @@ -584,11 +595,11 @@ static int tipc_bcbearer_send(struct sk_buff *buf, if (bcbearer->remains_new.count == bcbearer->remains.count) continue; /* bearer pair doesn't add anything */ - if (p->publ.blocked || - p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) { + if (p->blocked || + p->media->send_msg(buf, p, &p->media->bcast_addr)) { /* unable to send on primary bearer */ - if (!s || s->publ.blocked || - s->media->send_msg(buf, &s->publ, + if (!s || s->blocked || + s->media->send_msg(buf, s, &s->media->bcast_addr)) { /* unable to send on either bearer */ continue; @@ -633,7 +644,7 @@ void tipc_bcbearer_sort(void) memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - struct bearer *b = &tipc_bearers[b_index]; + struct tipc_bearer *b = &tipc_bearers[b_index]; if (!b->active || !b->nodes.count) continue; @@ -682,12 +693,12 @@ void tipc_bcbearer_sort(void) void tipc_bcbearer_push(void) { - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; spin_lock_bh(&bc_lock); b_ptr = &bcbearer->bearer; - if (b_ptr->publ.blocked) { - b_ptr->publ.blocked = 0; + if (b_ptr->blocked) { + b_ptr->blocked = 0; tipc_bearer_lock_push(b_ptr); } spin_unlock_bh(&bc_lock); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 51f8c5326ce6..500c97f1c859 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -2,7 +2,7 @@ * net/tipc/bcast.h: Include file for TIPC broadcast code * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -90,6 +90,7 @@ void tipc_port_list_free(struct port_list *pl_ptr); int tipc_bclink_init(void); void tipc_bclink_stop(void); +struct tipc_node *tipc_bclink_retransmit_to(void); void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); int tipc_bclink_send_msg(struct sk_buff *buf); void tipc_bclink_recv_pkt(struct sk_buff *buf); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 837b7a467885..411719feb803 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -2,7 +2,7 @@ * net/tipc/bearer.c: TIPC bearer code * * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2004-2006, Wind River Systems + * Copyright (c) 2004-2006, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,7 +44,7 @@ static struct media media_list[MAX_MEDIA]; static u32 media_count; -struct bearer tipc_bearers[MAX_BEARERS]; +struct tipc_bearer tipc_bearers[MAX_BEARERS]; /** * media_name_valid - validate media name @@ -158,7 +158,6 @@ int tipc_register_media(u32 media_type, m_ptr->disable_bearer = disable; m_ptr->addr2str = addr2str; memcpy(&m_ptr->bcast_addr, bcast_addr, sizeof(*bcast_addr)); - m_ptr->bcast = 1; strcpy(m_ptr->name, name); m_ptr->priority = bearer_priority; m_ptr->tolerance = link_tolerance; @@ -278,13 +277,13 @@ static int bearer_name_validate(const char *name, * bearer_find - locates bearer object with matching bearer name */ -static struct bearer *bearer_find(const char *name) +static struct tipc_bearer *bearer_find(const char *name) { - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; u32 i; for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { - if (b_ptr->active && (!strcmp(b_ptr->publ.name, name))) + if (b_ptr->active && (!strcmp(b_ptr->name, name))) return b_ptr; } return NULL; @@ -294,16 +293,16 @@ static struct bearer *bearer_find(const char *name) * tipc_bearer_find_interface - locates bearer object with matching interface name */ -struct bearer *tipc_bearer_find_interface(const char *if_name) +struct tipc_bearer *tipc_bearer_find_interface(const char *if_name) { - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; char *b_if_name; u32 i; for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { if (!b_ptr->active) continue; - b_if_name = strchr(b_ptr->publ.name, ':') + 1; + b_if_name = strchr(b_ptr->name, ':') + 1; if (!strcmp(b_if_name, if_name)) return b_ptr; } @@ -318,7 +317,7 @@ struct sk_buff *tipc_bearer_get_names(void) { struct sk_buff *buf; struct media *m_ptr; - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; int i, j; buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME)); @@ -331,8 +330,8 @@ struct sk_buff *tipc_bearer_get_names(void) b_ptr = &tipc_bearers[j]; if (b_ptr->active && (b_ptr->media == m_ptr)) { tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, - b_ptr->publ.name, - strlen(b_ptr->publ.name) + 1); + b_ptr->name, + strlen(b_ptr->name) + 1); } } } @@ -340,14 +339,14 @@ struct sk_buff *tipc_bearer_get_names(void) return buf; } -void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest) +void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest) { tipc_nmap_add(&b_ptr->nodes, dest); tipc_disc_update_link_req(b_ptr->link_req); tipc_bcbearer_sort(); } -void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest) +void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest) { tipc_nmap_remove(&b_ptr->nodes, dest); tipc_disc_update_link_req(b_ptr->link_req); @@ -362,12 +361,12 @@ void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest) * bearer.lock must be taken before calling * Returns binary true(1) ore false(0) */ -static int bearer_push(struct bearer *b_ptr) +static int bearer_push(struct tipc_bearer *b_ptr) { u32 res = 0; struct link *ln, *tln; - if (b_ptr->publ.blocked) + if (b_ptr->blocked) return 0; while (!list_empty(&b_ptr->cong_links) && (res != PUSH_FAILED)) { @@ -382,13 +381,13 @@ static int bearer_push(struct bearer *b_ptr) return list_empty(&b_ptr->cong_links); } -void tipc_bearer_lock_push(struct bearer *b_ptr) +void tipc_bearer_lock_push(struct tipc_bearer *b_ptr) { int res; - spin_lock_bh(&b_ptr->publ.lock); + spin_lock_bh(&b_ptr->lock); res = bearer_push(b_ptr); - spin_unlock_bh(&b_ptr->publ.lock); + spin_unlock_bh(&b_ptr->lock); if (res) tipc_bcbearer_push(); } @@ -398,16 +397,14 @@ void tipc_bearer_lock_push(struct bearer *b_ptr) * Interrupt enabling new requests after bearer congestion or blocking: * See bearer_send(). */ -void tipc_continue(struct tipc_bearer *tb_ptr) +void tipc_continue(struct tipc_bearer *b_ptr) { - struct bearer *b_ptr = (struct bearer *)tb_ptr; - - spin_lock_bh(&b_ptr->publ.lock); + spin_lock_bh(&b_ptr->lock); b_ptr->continue_count++; if (!list_empty(&b_ptr->cong_links)) tipc_k_signal((Handler)tipc_bearer_lock_push, (unsigned long)b_ptr); - b_ptr->publ.blocked = 0; - spin_unlock_bh(&b_ptr->publ.lock); + b_ptr->blocked = 0; + spin_unlock_bh(&b_ptr->lock); } /* @@ -418,7 +415,7 @@ void tipc_continue(struct tipc_bearer *tb_ptr) * bearer.lock is busy */ -static void tipc_bearer_schedule_unlocked(struct bearer *b_ptr, struct link *l_ptr) +static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr, struct link *l_ptr) { list_move_tail(&l_ptr->link_list, &b_ptr->cong_links); } @@ -431,11 +428,11 @@ static void tipc_bearer_schedule_unlocked(struct bearer *b_ptr, struct link *l_p * bearer.lock is free */ -void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr) +void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr) { - spin_lock_bh(&b_ptr->publ.lock); + spin_lock_bh(&b_ptr->lock); tipc_bearer_schedule_unlocked(b_ptr, l_ptr); - spin_unlock_bh(&b_ptr->publ.lock); + spin_unlock_bh(&b_ptr->lock); } @@ -444,18 +441,18 @@ void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr) * and if there is, try to resolve it before returning. * 'tipc_net_lock' is read_locked when this function is called */ -int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr) +int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr) { int res = 1; if (list_empty(&b_ptr->cong_links)) return 1; - spin_lock_bh(&b_ptr->publ.lock); + spin_lock_bh(&b_ptr->lock); if (!bearer_push(b_ptr)) { tipc_bearer_schedule_unlocked(b_ptr, l_ptr); res = 0; } - spin_unlock_bh(&b_ptr->publ.lock); + spin_unlock_bh(&b_ptr->lock); return res; } @@ -463,9 +460,9 @@ int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr) * tipc_bearer_congested - determines if bearer is currently congested */ -int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr) +int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr) { - if (unlikely(b_ptr->publ.blocked)) + if (unlikely(b_ptr->blocked)) return 1; if (likely(list_empty(&b_ptr->cong_links))) return 0; @@ -476,9 +473,9 @@ int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr) * tipc_enable_bearer - enable bearer with the given name */ -int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority) +int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) { - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; struct media *m_ptr; struct bearer_name b_name; char addr_string[16]; @@ -496,9 +493,9 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority) warn("Bearer <%s> rejected, illegal name\n", name); return -EINVAL; } - if (!tipc_addr_domain_valid(bcast_scope) || - !tipc_in_scope(bcast_scope, tipc_own_addr)) { - warn("Bearer <%s> rejected, illegal broadcast scope\n", name); + if (!tipc_addr_domain_valid(disc_domain) || + !tipc_in_scope(disc_domain, tipc_own_addr)) { + warn("Bearer <%s> rejected, illegal discovery domain\n", name); return -EINVAL; } if ((priority < TIPC_MIN_LINK_PRI || @@ -528,7 +525,7 @@ restart: bearer_id = i; continue; } - if (!strcmp(name, tipc_bearers[i].publ.name)) { + if (!strcmp(name, tipc_bearers[i].name)) { warn("Bearer <%s> rejected, already enabled\n", name); goto failed; } @@ -551,8 +548,8 @@ restart: } b_ptr = &tipc_bearers[bearer_id]; - strcpy(b_ptr->publ.name, name); - res = m_ptr->enable_bearer(&b_ptr->publ); + strcpy(b_ptr->name, name); + res = m_ptr->enable_bearer(b_ptr); if (res) { warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); goto failed; @@ -562,18 +559,15 @@ restart: b_ptr->media = m_ptr; b_ptr->net_plane = bearer_id + 'A'; b_ptr->active = 1; - b_ptr->detect_scope = bcast_scope; b_ptr->priority = priority; INIT_LIST_HEAD(&b_ptr->cong_links); INIT_LIST_HEAD(&b_ptr->links); - if (m_ptr->bcast) { - b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, - bcast_scope, 2); - } - spin_lock_init(&b_ptr->publ.lock); + b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, + disc_domain); + spin_lock_init(&b_ptr->lock); write_unlock_bh(&tipc_net_lock); info("Enabled bearer <%s>, discovery domain %s, priority %u\n", - name, tipc_addr_string_fill(addr_string, bcast_scope), priority); + name, tipc_addr_string_fill(addr_string, disc_domain), priority); return 0; failed: write_unlock_bh(&tipc_net_lock); @@ -587,7 +581,7 @@ failed: int tipc_block_bearer(const char *name) { - struct bearer *b_ptr = NULL; + struct tipc_bearer *b_ptr = NULL; struct link *l_ptr; struct link *temp_l_ptr; @@ -600,8 +594,8 @@ int tipc_block_bearer(const char *name) } info("Blocking bearer <%s>\n", name); - spin_lock_bh(&b_ptr->publ.lock); - b_ptr->publ.blocked = 1; + spin_lock_bh(&b_ptr->lock); + b_ptr->blocked = 1; list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { struct tipc_node *n_ptr = l_ptr->owner; @@ -609,7 +603,7 @@ int tipc_block_bearer(const char *name) tipc_link_reset(l_ptr); spin_unlock_bh(&n_ptr->lock); } - spin_unlock_bh(&b_ptr->publ.lock); + spin_unlock_bh(&b_ptr->lock); read_unlock_bh(&tipc_net_lock); return 0; } @@ -620,27 +614,27 @@ int tipc_block_bearer(const char *name) * Note: This routine assumes caller holds tipc_net_lock. */ -static void bearer_disable(struct bearer *b_ptr) +static void bearer_disable(struct tipc_bearer *b_ptr) { struct link *l_ptr; struct link *temp_l_ptr; - info("Disabling bearer <%s>\n", b_ptr->publ.name); + info("Disabling bearer <%s>\n", b_ptr->name); tipc_disc_stop_link_req(b_ptr->link_req); - spin_lock_bh(&b_ptr->publ.lock); + spin_lock_bh(&b_ptr->lock); b_ptr->link_req = NULL; - b_ptr->publ.blocked = 1; - b_ptr->media->disable_bearer(&b_ptr->publ); + b_ptr->blocked = 1; + b_ptr->media->disable_bearer(b_ptr); list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { tipc_link_delete(l_ptr); } - spin_unlock_bh(&b_ptr->publ.lock); - memset(b_ptr, 0, sizeof(struct bearer)); + spin_unlock_bh(&b_ptr->lock); + memset(b_ptr, 0, sizeof(struct tipc_bearer)); } int tipc_disable_bearer(const char *name) { - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; int res; write_lock_bh(&tipc_net_lock); diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 85f451d5aacf..31d6172b20fd 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -2,7 +2,7 @@ * net/tipc/bearer.h: Include file for TIPC bearer code * * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,26 +61,7 @@ struct tipc_media_addr { } dev_addr; }; -/** - * struct tipc_bearer - TIPC bearer info available to media code - * @usr_handle: pointer to additional media-specific information about bearer - * @mtu: max packet size bearer can support - * @blocked: non-zero if bearer is blocked - * @lock: spinlock for controlling access to bearer - * @addr: media-specific address associated with bearer - * @name: bearer name (format = media:interface) - * - * Note: TIPC initializes "name" and "lock" fields; media code is responsible - * for initialization all other fields when a bearer is enabled. - */ -struct tipc_bearer { - void *usr_handle; - u32 mtu; - int blocked; - spinlock_t lock; - struct tipc_media_addr addr; - char name[TIPC_MAX_BEARER_NAME]; -}; +struct tipc_bearer; /** * struct media - TIPC media information available to internal users @@ -89,7 +70,6 @@ struct tipc_bearer { * @disable_bearer: routine which disables a bearer * @addr2str: routine which converts bearer's address to string form * @bcast_addr: media address used in broadcasting - * @bcast: non-zero if media supports broadcasting [currently mandatory] * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion @@ -106,7 +86,6 @@ struct media { char *(*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); struct tipc_media_addr bcast_addr; - int bcast; u32 priority; u32 tolerance; u32 window; @@ -115,11 +94,15 @@ struct media { }; /** - * struct bearer - TIPC bearer information available to internal users - * @publ: bearer information available to privileged users + * struct tipc_bearer - TIPC bearer structure + * @usr_handle: pointer to additional media-specific information about bearer + * @mtu: max packet size bearer can support + * @blocked: non-zero if bearer is blocked + * @lock: spinlock for controlling access to bearer + * @addr: media-specific address associated with bearer + * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @priority: default link priority for bearer - * @detect_scope: network address mask used during automatic link creation * @identity: array index of this bearer within TIPC bearer array * @link_req: ptr to (optional) structure making periodic link setup requests * @links: list of non-congested links associated with bearer @@ -128,13 +111,20 @@ struct media { * @active: non-zero if bearer structure is represents a bearer * @net_plane: network plane ('A' through 'H') currently associated with bearer * @nodes: indicates which nodes in cluster can be reached through bearer + * + * Note: media-specific code is responsible for initialization of the fields + * indicated below when a bearer is enabled; TIPC's generic bearer code takes + * care of initializing all other fields. */ - -struct bearer { - struct tipc_bearer publ; +struct tipc_bearer { + void *usr_handle; /* initalized by media */ + u32 mtu; /* initalized by media */ + int blocked; /* initalized by media */ + struct tipc_media_addr addr; /* initalized by media */ + char name[TIPC_MAX_BEARER_NAME]; + spinlock_t lock; struct media *media; u32 priority; - u32 detect_scope; u32 identity; struct link_req *link_req; struct list_head links; @@ -152,7 +142,7 @@ struct bearer_name { struct link; -extern struct bearer tipc_bearers[]; +extern struct tipc_bearer tipc_bearers[]; /* * TIPC routines available to supported media types @@ -173,7 +163,7 @@ void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); int tipc_block_bearer(const char *name); void tipc_continue(struct tipc_bearer *tb_ptr); -int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority); +int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); int tipc_disable_bearer(const char *name); /* @@ -186,14 +176,14 @@ void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); struct sk_buff *tipc_bearer_get_names(void); -void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest); -void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest); -void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr); -struct bearer *tipc_bearer_find_interface(const char *if_name); -int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr); -int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr); +void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest); +void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest); +void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr); +struct tipc_bearer *tipc_bearer_find_interface(const char *if_name); +int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr); +int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr); void tipc_bearer_stop(void); -void tipc_bearer_lock_push(struct bearer *b_ptr); +void tipc_bearer_lock_push(struct tipc_bearer *b_ptr); /** @@ -214,10 +204,11 @@ void tipc_bearer_lock_push(struct bearer *b_ptr); * and let TIPC's link code deal with the undelivered message. */ -static inline int tipc_bearer_send(struct bearer *b_ptr, struct sk_buff *buf, +static inline int tipc_bearer_send(struct tipc_bearer *b_ptr, + struct sk_buff *buf, struct tipc_media_addr *dest) { - return !b_ptr->media->send_msg(buf, &b_ptr->publ, dest); + return !b_ptr->media->send_msg(buf, b_ptr, dest); } #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/config.c b/net/tipc/config.c index e16750dcf3c1..b25a396b7e1e 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems + * Copyright (c) 2004-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -148,7 +148,7 @@ static struct sk_buff *cfg_enable_bearer(void) args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area); if (tipc_enable_bearer(args->name, - ntohl(args->detect_scope), + ntohl(args->disc_domain), ntohl(args->priority))) return tipc_cfg_reply_error_string("unable to enable bearer"); @@ -260,25 +260,6 @@ static struct sk_buff *cfg_set_max_ports(void) return tipc_cfg_reply_none(); } -static struct sk_buff *cfg_set_max_nodes(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_max_nodes) - return tipc_cfg_reply_none(); - if (value != delimit(value, 8, 2047)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max nodes must be 8-2047)"); - if (tipc_mode == TIPC_NET_MODE) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change max nodes once TIPC has joined a network)"); - tipc_max_nodes = value; - return tipc_cfg_reply_none(); -} - static struct sk_buff *cfg_set_netid(void) { u32 value; @@ -397,9 +378,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_MAX_SUBSCR: rep_tlv_buf = cfg_set_max_subscriptions(); break; - case TIPC_CMD_SET_MAX_NODES: - rep_tlv_buf = cfg_set_max_nodes(); - break; case TIPC_CMD_SET_NETID: rep_tlv_buf = cfg_set_netid(); break; @@ -415,9 +393,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_SUBSCR: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions); break; - case TIPC_CMD_GET_MAX_NODES: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_nodes); - break; case TIPC_CMD_GET_NETID: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); break; @@ -431,6 +406,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_SLAVES: case TIPC_CMD_SET_MAX_CLUSTERS: case TIPC_CMD_GET_MAX_CLUSTERS: + case TIPC_CMD_SET_MAX_NODES: + case TIPC_CMD_GET_MAX_NODES: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (obsolete command)"); break; diff --git a/net/tipc/core.c b/net/tipc/core.c index e071579e0850..c9a73e7763f6 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -2,7 +2,7 @@ * net/tipc/core.c: TIPC module code * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,10 +41,6 @@ #include "config.h" -#ifndef CONFIG_TIPC_NODES -#define CONFIG_TIPC_NODES 255 -#endif - #ifndef CONFIG_TIPC_PORTS #define CONFIG_TIPC_PORTS 8191 #endif @@ -57,7 +53,6 @@ int tipc_mode = TIPC_NOT_RUNNING; int tipc_random; -atomic_t tipc_user_count = ATOMIC_INIT(0); const char tipc_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_."; @@ -65,7 +60,6 @@ const char tipc_alphabet[] = /* configurable TIPC parameters */ u32 tipc_own_addr; -int tipc_max_nodes; int tipc_max_ports; int tipc_max_subscriptions; int tipc_max_publications; @@ -193,7 +187,6 @@ static int __init tipc_init(void) tipc_max_publications = 10000; tipc_max_subscriptions = 2000; tipc_max_ports = CONFIG_TIPC_PORTS; - tipc_max_nodes = CONFIG_TIPC_NODES; tipc_net_id = 4711; res = tipc_core_start(); diff --git a/net/tipc/core.h b/net/tipc/core.h index 997158546e25..436dda1159d2 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -2,7 +2,7 @@ * net/tipc/core.h: Include file for TIPC global declarations * * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -147,7 +147,6 @@ void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *); */ extern u32 tipc_own_addr; -extern int tipc_max_nodes; extern int tipc_max_ports; extern int tipc_max_subscriptions; extern int tipc_max_publications; @@ -161,7 +160,6 @@ extern int tipc_remote_management; extern int tipc_mode; extern int tipc_random; extern const char tipc_alphabet[]; -extern atomic_t tipc_user_count; /* diff --git a/net/tipc/discover.c b/net/tipc/discover.c index fa026bd91a68..491eff56b9da 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -2,7 +2,7 @@ * net/tipc/discover.c * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,7 +57,7 @@ * @timer_intv: current interval between requests (in ms) */ struct link_req { - struct bearer *bearer; + struct tipc_bearer *bearer; struct tipc_media_addr dest; struct sk_buff *buf; struct timer_list timer; @@ -67,27 +67,24 @@ struct link_req { /** * tipc_disc_init_msg - initialize a link setup message * @type: message type (request or response) - * @req_links: number of links associated with message * @dest_domain: network domain of node(s) which should respond to message * @b_ptr: ptr to bearer issuing message */ static struct sk_buff *tipc_disc_init_msg(u32 type, - u32 req_links, u32 dest_domain, - struct bearer *b_ptr) + struct tipc_bearer *b_ptr) { - struct sk_buff *buf = tipc_buf_acquire(DSC_H_SIZE); + struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); struct tipc_msg *msg; if (buf) { msg = buf_msg(buf); - tipc_msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain); + tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); - msg_set_req_links(msg, req_links); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tipc_net_id); - msg_set_media_addr(msg, &b_ptr->publ.addr); + msg_set_media_addr(msg, &b_ptr->addr); } return buf; } @@ -99,7 +96,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type, * @media_addr: media address advertised by duplicated node */ -static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr, +static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, struct tipc_media_addr *media_addr) { char node_addr_str[16]; @@ -111,7 +108,7 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr, tipc_media_addr_printf(&pb, media_addr); tipc_printbuf_validate(&pb); warn("Duplicate %s using %s seen on <%s>\n", - node_addr_str, media_addr_str, b_ptr->publ.name); + node_addr_str, media_addr_str, b_ptr->name); } /** @@ -120,19 +117,23 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr, * @b_ptr: bearer that message arrived on */ -void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) +void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) { + struct tipc_node *n_ptr; struct link *link; - struct tipc_media_addr media_addr; + struct tipc_media_addr media_addr, *addr; + struct sk_buff *rbuf; struct tipc_msg *msg = buf_msg(buf); u32 dest = msg_dest_domain(msg); u32 orig = msg_prevnode(msg); u32 net_id = msg_bc_netid(msg); u32 type = msg_type(msg); + int link_fully_up; msg_get_media_addr(msg, &media_addr); buf_discard(buf); + /* Validate discovery message from requesting node */ if (net_id != tipc_net_id) return; if (!tipc_addr_domain_valid(dest)) @@ -140,63 +141,76 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) if (!tipc_addr_node_valid(orig)) return; if (orig == tipc_own_addr) { - if (memcmp(&media_addr, &b_ptr->publ.addr, sizeof(media_addr))) + if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr))) disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr); return; } if (!tipc_in_scope(dest, tipc_own_addr)) return; - if (in_own_cluster(orig)) { - /* Always accept link here */ - struct sk_buff *rbuf; - struct tipc_media_addr *addr; - struct tipc_node *n_ptr = tipc_node_find(orig); - int link_fully_up; - - if (n_ptr == NULL) { - n_ptr = tipc_node_create(orig); - if (!n_ptr) - return; - } - spin_lock_bh(&n_ptr->lock); - - /* Don't talk to neighbor during cleanup after last session */ + if (!in_own_cluster(orig)) + return; - if (n_ptr->cleanup_required) { - spin_unlock_bh(&n_ptr->lock); + /* Locate structure corresponding to requesting node */ + n_ptr = tipc_node_find(orig); + if (!n_ptr) { + n_ptr = tipc_node_create(orig); + if (!n_ptr) return; - } + } + tipc_node_lock(n_ptr); + + /* Don't talk to neighbor during cleanup after last session */ + if (n_ptr->cleanup_required) { + tipc_node_unlock(n_ptr); + return; + } + + link = n_ptr->links[b_ptr->identity]; - link = n_ptr->links[b_ptr->identity]; + /* Create a link endpoint for this bearer, if necessary */ + if (!link) { + link = tipc_link_create(n_ptr, b_ptr, &media_addr); if (!link) { - link = tipc_link_create(b_ptr, orig, &media_addr); - if (!link) { - spin_unlock_bh(&n_ptr->lock); - return; - } - } - addr = &link->media_addr; - if (memcmp(addr, &media_addr, sizeof(*addr))) { - if (tipc_link_is_up(link) || (!link->started)) { - disc_dupl_alert(b_ptr, orig, &media_addr); - spin_unlock_bh(&n_ptr->lock); - return; - } - warn("Resetting link <%s>, peer interface address changed\n", - link->name); - memcpy(addr, &media_addr, sizeof(*addr)); - tipc_link_reset(link); + tipc_node_unlock(n_ptr); + return; } - link_fully_up = link_working_working(link); - spin_unlock_bh(&n_ptr->lock); - if ((type == DSC_RESP_MSG) || link_fully_up) + } + + /* + * Ensure requesting node's media address is correct + * + * If media address doesn't match and the link is working, reject the + * request (must be from a duplicate node). + * + * If media address doesn't match and the link is not working, accept + * the new media address and reset the link to ensure it starts up + * cleanly. + */ + addr = &link->media_addr; + if (memcmp(addr, &media_addr, sizeof(*addr))) { + if (tipc_link_is_up(link) || (!link->started)) { + disc_dupl_alert(b_ptr, orig, &media_addr); + tipc_node_unlock(n_ptr); return; - rbuf = tipc_disc_init_msg(DSC_RESP_MSG, 1, orig, b_ptr); - if (rbuf != NULL) { - b_ptr->media->send_msg(rbuf, &b_ptr->publ, &media_addr); + } + warn("Resetting link <%s>, peer interface address changed\n", + link->name); + memcpy(addr, &media_addr, sizeof(*addr)); + tipc_link_reset(link); + } + + /* Accept discovery message & send response, if necessary */ + link_fully_up = link_working_working(link); + + if ((type == DSC_REQ_MSG) && !link_fully_up && !b_ptr->blocked) { + rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); + if (rbuf) { + b_ptr->media->send_msg(rbuf, b_ptr, &media_addr); buf_discard(rbuf); } } + + tipc_node_unlock(n_ptr); } /** @@ -249,9 +263,9 @@ void tipc_disc_update_link_req(struct link_req *req) static void disc_timeout(struct link_req *req) { - spin_lock_bh(&req->bearer->publ.lock); + spin_lock_bh(&req->bearer->lock); - req->bearer->media->send_msg(req->buf, &req->bearer->publ, &req->dest); + req->bearer->media->send_msg(req->buf, req->bearer, &req->dest); if ((req->timer_intv == TIPC_LINK_REQ_SLOW) || (req->timer_intv == TIPC_LINK_REQ_FAST)) { @@ -266,7 +280,7 @@ static void disc_timeout(struct link_req *req) } k_start_timer(&req->timer, req->timer_intv); - spin_unlock_bh(&req->bearer->publ.lock); + spin_unlock_bh(&req->bearer->lock); } /** @@ -274,15 +288,13 @@ static void disc_timeout(struct link_req *req) * @b_ptr: ptr to bearer issuing requests * @dest: destination address for request messages * @dest_domain: network domain of node(s) which should respond to message - * @req_links: max number of desired links * * Returns pointer to link request structure, or NULL if unable to create. */ -struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, +struct link_req *tipc_disc_init_link_req(struct tipc_bearer *b_ptr, const struct tipc_media_addr *dest, - u32 dest_domain, - u32 req_links) + u32 dest_domain) { struct link_req *req; @@ -290,7 +302,7 @@ struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, if (!req) return NULL; - req->buf = tipc_disc_init_msg(DSC_REQ_MSG, req_links, dest_domain, b_ptr); + req->buf = tipc_disc_init_msg(DSC_REQ_MSG, dest_domain, b_ptr); if (!req->buf) { kfree(req); return NULL; diff --git a/net/tipc/discover.h b/net/tipc/discover.h index d2c3cffb79fc..e48a167e47b2 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -2,7 +2,7 @@ * net/tipc/discover.h * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,13 +39,12 @@ struct link_req; -struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, +struct link_req *tipc_disc_init_link_req(struct tipc_bearer *b_ptr, const struct tipc_media_addr *dest, - u32 dest_domain, - u32 req_links); + u32 dest_domain); void tipc_disc_update_link_req(struct link_req *req); void tipc_disc_stop_link_req(struct link_req *req); -void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr); +void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr); #endif diff --git a/net/tipc/link.c b/net/tipc/link.c index 18702f58d111..43639ff1cbec 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2,7 +2,7 @@ * net/tipc/link.c: TIPC link code * * Copyright (c) 1996-2007, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems + * Copyright (c) 2004-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -90,7 +90,7 @@ static void link_handle_out_of_seq_msg(struct link *l_ptr, static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf); static int link_recv_changeover_msg(struct link **l_ptr, struct sk_buff **buf); static void link_set_supervision_props(struct link *l_ptr, u32 tolerance); -static int link_send_sections_long(struct port *sender, +static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, u32 num_sect, u32 destnode); static void link_check_defragm_bufs(struct link *l_ptr); @@ -113,7 +113,7 @@ static void link_init_max_pkt(struct link *l_ptr) { u32 max_pkt; - max_pkt = (l_ptr->b_ptr->publ.mtu & ~3); + max_pkt = (l_ptr->b_ptr->mtu & ~3); if (max_pkt > MAX_MSG_SIZE) max_pkt = MAX_MSG_SIZE; @@ -246,9 +246,6 @@ static void link_timeout(struct link *l_ptr) l_ptr->stats.accu_queue_sz += l_ptr->out_queue_size; l_ptr->stats.queue_sz_counts++; - if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz) - l_ptr->stats.max_queue_sz = l_ptr->out_queue_size; - if (l_ptr->first_out) { struct tipc_msg *msg = buf_msg(l_ptr->first_out); u32 length = msg_size(msg); @@ -296,19 +293,35 @@ static void link_set_timer(struct link *l_ptr, u32 time) /** * tipc_link_create - create a new link + * @n_ptr: pointer to associated node * @b_ptr: pointer to associated bearer - * @peer: network address of node at other end of link * @media_addr: media address to use when sending messages over link * * Returns pointer to link. */ -struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, +struct link *tipc_link_create(struct tipc_node *n_ptr, + struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr) { struct link *l_ptr; struct tipc_msg *msg; char *if_name; + char addr_string[16]; + u32 peer = n_ptr->addr; + + if (n_ptr->link_cnt >= 2) { + tipc_addr_string_fill(addr_string, n_ptr->addr); + err("Attempt to establish third link to %s\n", addr_string); + return NULL; + } + + if (n_ptr->links[b_ptr->identity]) { + tipc_addr_string_fill(addr_string, n_ptr->addr); + err("Attempt to establish second link on <%s> to %s\n", + b_ptr->name, addr_string); + return NULL; + } l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC); if (!l_ptr) { @@ -317,7 +330,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, } l_ptr->addr = peer; - if_name = strchr(b_ptr->publ.name, ':') + 1; + if_name = strchr(b_ptr->name, ':') + 1; sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:", tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), tipc_node(tipc_own_addr), @@ -325,6 +338,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); /* note: peer i/f is appended to link name by reset/activate */ memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); + l_ptr->owner = n_ptr; l_ptr->checkpoint = 1; l_ptr->b_ptr = b_ptr; link_set_supervision_props(l_ptr, b_ptr->media->tolerance); @@ -348,11 +362,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, link_reset_statistics(l_ptr); - l_ptr->owner = tipc_node_attach_link(l_ptr); - if (!l_ptr->owner) { - kfree(l_ptr); - return NULL; - } + tipc_node_attach_link(n_ptr, l_ptr); k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); list_add_tail(&l_ptr->link_list, &b_ptr->links); @@ -391,7 +401,9 @@ void tipc_link_delete(struct link *l_ptr) static void link_start(struct link *l_ptr) { + tipc_node_lock(l_ptr->owner); link_state_event(l_ptr, STARTING_EVT); + tipc_node_unlock(l_ptr->owner); } /** @@ -406,7 +418,7 @@ static void link_start(struct link *l_ptr) static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz) { - struct port *p_ptr; + struct tipc_port *p_ptr; spin_lock_bh(&tipc_port_list_lock); p_ptr = tipc_port_lock(origport); @@ -415,7 +427,7 @@ static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz) goto exit; if (!list_empty(&p_ptr->wait_list)) goto exit; - p_ptr->publ.congested = 1; + p_ptr->congested = 1; p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt); list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports); l_ptr->stats.link_congs++; @@ -428,8 +440,8 @@ exit: void tipc_link_wakeup_ports(struct link *l_ptr, int all) { - struct port *p_ptr; - struct port *temp_p_ptr; + struct tipc_port *p_ptr; + struct tipc_port *temp_p_ptr; int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size; if (all) @@ -445,11 +457,11 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all) if (win <= 0) break; list_del_init(&p_ptr->wait_list); - spin_lock_bh(p_ptr->publ.lock); - p_ptr->publ.congested = 0; - p_ptr->wakeup(&p_ptr->publ); + spin_lock_bh(p_ptr->lock); + p_ptr->congested = 0; + p_ptr->wakeup(p_ptr); win -= p_ptr->waiting_pkts; - spin_unlock_bh(p_ptr->publ.lock); + spin_unlock_bh(p_ptr->lock); } exit: @@ -549,7 +561,7 @@ void tipc_link_reset(struct link *l_ptr) tipc_node_link_down(l_ptr->owner, l_ptr); tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); - if (was_active_link && tipc_node_has_active_links(l_ptr->owner) && + if (was_active_link && tipc_node_active_links(l_ptr->owner) && l_ptr->owner->permit_changeover) { l_ptr->reset_checkpoint = checkpoint; l_ptr->exp_msg_count = START_CHANGEOVER; @@ -824,7 +836,10 @@ static void link_add_to_outqueue(struct link *l_ptr, l_ptr->last_out = buf; } else l_ptr->first_out = l_ptr->last_out = buf; + l_ptr->out_queue_size++; + if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz) + l_ptr->stats.max_queue_sz = l_ptr->out_queue_size; } /* @@ -867,9 +882,6 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) /* Packet can be queued or sent: */ - if (queue_size > l_ptr->stats.max_queue_sz) - l_ptr->stats.max_queue_sz = queue_size; - if (likely(!tipc_bearer_congested(l_ptr->b_ptr, l_ptr) && !link_congested(l_ptr))) { link_add_to_outqueue(l_ptr, buf, msg); @@ -1027,12 +1039,12 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) * except for total message length. * Returns user data length or errno. */ -int tipc_link_send_sections_fast(struct port *sender, +int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, const u32 num_sect, u32 destaddr) { - struct tipc_msg *hdr = &sender->publ.phdr; + struct tipc_msg *hdr = &sender->phdr; struct link *l_ptr; struct sk_buff *buf; struct tipc_node *node; @@ -1045,7 +1057,7 @@ again: * (Must not hold any locks while building message.) */ - res = tipc_msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt, + res = tipc_msg_build(hdr, msg_sect, num_sect, sender->max_pkt, !sender->user_port, &buf); read_lock_bh(&tipc_net_lock); @@ -1056,7 +1068,7 @@ again: if (likely(l_ptr)) { if (likely(buf)) { res = link_send_buf_fast(l_ptr, buf, - &sender->publ.max_pkt); + &sender->max_pkt); if (unlikely(res < 0)) buf_discard(buf); exit: @@ -1075,7 +1087,7 @@ exit: if (link_congested(l_ptr) || !list_empty(&l_ptr->b_ptr->cong_links)) { res = link_schedule_port(l_ptr, - sender->publ.ref, res); + sender->ref, res); goto exit; } @@ -1084,12 +1096,12 @@ exit: * then re-try fast path or fragment the message */ - sender->publ.max_pkt = l_ptr->max_pkt; + sender->max_pkt = l_ptr->max_pkt; tipc_node_unlock(node); read_unlock_bh(&tipc_net_lock); - if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt) + if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) goto again; return link_send_sections_long(sender, msg_sect, @@ -1123,14 +1135,14 @@ exit: * * Returns user data length or errno. */ -static int link_send_sections_long(struct port *sender, +static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, u32 num_sect, u32 destaddr) { struct link *l_ptr; struct tipc_node *node; - struct tipc_msg *hdr = &sender->publ.phdr; + struct tipc_msg *hdr = &sender->phdr; u32 dsz = msg_data_sz(hdr); u32 max_pkt, fragm_sz, rest; struct tipc_msg fragm_hdr; @@ -1142,7 +1154,7 @@ static int link_send_sections_long(struct port *sender, again: fragm_no = 1; - max_pkt = sender->publ.max_pkt - INT_H_SIZE; + max_pkt = sender->max_pkt - INT_H_SIZE; /* leave room for tunnel header in case of link changeover */ fragm_sz = max_pkt - INT_H_SIZE; /* leave room for fragmentation header in each fragment */ @@ -1157,7 +1169,7 @@ again: tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(hdr)); - msg_set_link_selector(&fragm_hdr, sender->publ.ref); + msg_set_link_selector(&fragm_hdr, sender->ref); msg_set_size(&fragm_hdr, max_pkt); msg_set_fragm_no(&fragm_hdr, 1); @@ -1238,13 +1250,13 @@ error: node = tipc_node_find(destaddr); if (likely(node)) { tipc_node_lock(node); - l_ptr = node->active_links[sender->publ.ref & 1]; + l_ptr = node->active_links[sender->ref & 1]; if (!l_ptr) { tipc_node_unlock(node); goto reject; } if (l_ptr->max_pkt < max_pkt) { - sender->publ.max_pkt = l_ptr->max_pkt; + sender->max_pkt = l_ptr->max_pkt; tipc_node_unlock(node); for (; buf_chain; buf_chain = buf) { buf = buf_chain->next; @@ -1441,7 +1453,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) info("Outstanding acks: %lu\n", (unsigned long) TIPC_SKB_CB(buf)->handle); - n_ptr = l_ptr->owner->next; + n_ptr = tipc_bclink_retransmit_to(); tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); @@ -1595,11 +1607,10 @@ static int link_recv_buf_validate(struct sk_buff *buf) * structure (i.e. cannot be NULL), but bearer can be inactive. */ -void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) +void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) { read_lock_bh(&tipc_net_lock); while (head) { - struct bearer *b_ptr = (struct bearer *)tb_ptr; struct tipc_node *n_ptr; struct link *l_ptr; struct sk_buff *crs; @@ -1735,10 +1746,6 @@ deliver: tipc_node_unlock(n_ptr); tipc_link_recv_bundle(buf); continue; - case ROUTE_DISTRIBUTOR: - tipc_node_unlock(n_ptr); - buf_discard(buf); - continue; case NAME_DISTRIBUTOR: tipc_node_unlock(n_ptr); tipc_named_recv(buf); @@ -1765,6 +1772,10 @@ deliver: goto protocol_check; } break; + default: + buf_discard(buf); + buf = NULL; + break; } } tipc_node_unlock(n_ptr); @@ -1900,6 +1911,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; u32 msg_size = sizeof(l_ptr->proto_msg); + int r_flag; if (link_blocked(l_ptr)) return; @@ -1950,15 +1962,14 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1)); msg_set_seq_gap(msg, 0); msg_set_next_sent(msg, 1); + msg_set_probe(msg, 0); msg_set_link_tolerance(msg, l_ptr->tolerance); msg_set_linkprio(msg, l_ptr->priority); msg_set_max_pkt(msg, l_ptr->max_pkt_target); } - if (tipc_node_has_redundant_links(l_ptr->owner)) - msg_set_redundant_link(msg); - else - msg_clear_redundant_link(msg); + r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr)); + msg_set_redundant_link(msg, r_flag); msg_set_linkprio(msg, l_ptr->priority); /* Ensure sequence number will not fit : */ @@ -1978,7 +1989,6 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); return; } - msg_set_timestamp(msg, jiffies_to_msecs(jiffies)); /* Message can be sent */ @@ -2066,7 +2076,7 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) l_ptr->peer_bearer_id = msg_bearer_id(msg); /* Synchronize broadcast sequence numbers */ - if (!tipc_node_has_redundant_links(l_ptr->owner)) + if (!tipc_node_redundant_links(l_ptr->owner)) l_ptr->owner->bclink.last_in = mod(msg_last_bcast(msg)); break; case STATE_MSG: @@ -2413,9 +2423,6 @@ static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) else destaddr = msg_destnode(inmsg); - if (msg_routed(inmsg)) - msg_set_prevnode(inmsg, tipc_own_addr); - /* Prepare reusable fragment header: */ tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, @@ -2618,6 +2625,9 @@ static void link_check_defragm_bufs(struct link *l_ptr) static void link_set_supervision_props(struct link *l_ptr, u32 tolerance) { + if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL)) + return; + l_ptr->tolerance = tolerance; l_ptr->continuity_interval = ((tolerance / 4) > 500) ? 500 : tolerance / 4; @@ -2658,7 +2668,7 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window) static struct link *link_find_link(const char *name, struct tipc_node **node) { struct link_name link_name_parts; - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; struct link *l_ptr; if (!link_name_validate(name, &link_name_parts)) @@ -2961,7 +2971,7 @@ static void link_print(struct link *l_ptr, const char *str) tipc_printf(buf, str); tipc_printf(buf, "Link %x<%s>:", - l_ptr->addr, l_ptr->b_ptr->publ.name); + l_ptr->addr, l_ptr->b_ptr->name); #ifdef CONFIG_TIPC_DEBUG if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr)) @@ -2981,9 +2991,9 @@ static void link_print(struct link *l_ptr, const char *str) != (l_ptr->out_queue_size - 1)) || (l_ptr->last_out->next != NULL)) { tipc_printf(buf, "\nSend queue inconsistency\n"); - tipc_printf(buf, "first_out= %x ", l_ptr->first_out); - tipc_printf(buf, "next_out= %x ", l_ptr->next_out); - tipc_printf(buf, "last_out= %x ", l_ptr->last_out); + tipc_printf(buf, "first_out= %p ", l_ptr->first_out); + tipc_printf(buf, "next_out= %p ", l_ptr->next_out); + tipc_printf(buf, "last_out= %p ", l_ptr->last_out); } } else tipc_printf(buf, "[]"); diff --git a/net/tipc/link.h b/net/tipc/link.h index 70967e637027..e6a30dbe1aaa 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -2,7 +2,7 @@ * net/tipc/link.h: Include file for TIPC link code * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -122,7 +122,7 @@ struct link { u32 checkpoint; u32 peer_session; u32 peer_bearer_id; - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; u32 tolerance; u32 continuity_interval; u32 abort_limit; @@ -196,24 +196,19 @@ struct link { u32 bearer_congs; u32 deferred_recv; u32 duplicates; - - /* for statistical profiling of send queue size */ - - u32 max_queue_sz; - u32 accu_queue_sz; - u32 queue_sz_counts; - - /* for statistical profiling of message lengths */ - - u32 msg_length_counts; - u32 msg_lengths_total; - u32 msg_length_profile[7]; + u32 max_queue_sz; /* send queue size high water mark */ + u32 accu_queue_sz; /* used for send queue size profiling */ + u32 queue_sz_counts; /* used for send queue size profiling */ + u32 msg_length_counts; /* used for message length profiling */ + u32 msg_lengths_total; /* used for message length profiling */ + u32 msg_length_profile[7]; /* used for msg. length profiling */ } stats; }; -struct port; +struct tipc_port; -struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, +struct link *tipc_link_create(struct tipc_node *n_ptr, + struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); void tipc_link_delete(struct link *l_ptr); void tipc_link_changeover(struct link *l_ptr); @@ -230,7 +225,7 @@ void tipc_link_reset(struct link *l_ptr); int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); -int tipc_link_send_sections_fast(struct port *sender, +int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, const u32 num_sect, u32 destnode); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index bb6180c4fcbb..6d92d17e7fb5 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -2,7 +2,7 @@ * net/tipc/msg.c: TIPC message header routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -192,8 +192,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) default: tipc_printf(buf, "UNKNOWN TYPE %u", msg_type(msg)); } - if (msg_routed(msg) && !msg_non_seq(msg)) - tipc_printf(buf, "ROUT:"); if (msg_reroute_cnt(msg)) tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); @@ -210,8 +208,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) default: tipc_printf(buf, "UNKNOWN:%x", msg_type(msg)); } - if (msg_routed(msg)) - tipc_printf(buf, "ROUT:"); if (msg_reroute_cnt(msg)) tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); @@ -232,13 +228,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) default: tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); } - if (msg_routed(msg)) - tipc_printf(buf, "ROUT:"); if (msg_reroute_cnt(msg)) tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); break; case LINK_PROTOCOL: - tipc_printf(buf, "PROT:TIM(%u):", msg_timestamp(msg)); switch (msg_type(msg)) { case STATE_MSG: tipc_printf(buf, "STATE:"); @@ -275,33 +268,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); } break; - case ROUTE_DISTRIBUTOR: - tipc_printf(buf, "ROUTING_MNG:"); - switch (msg_type(msg)) { - case EXT_ROUTING_TABLE: - tipc_printf(buf, "EXT_TBL:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case LOCAL_ROUTING_TABLE: - tipc_printf(buf, "LOCAL_TBL:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case SLAVE_ROUTING_TABLE: - tipc_printf(buf, "DP_TBL:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case ROUTE_ADDITION: - tipc_printf(buf, "ADD:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case ROUTE_REMOVAL: - tipc_printf(buf, "REMOVE:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - default: - tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); - } - break; case LINK_CONFIG: tipc_printf(buf, "CFG:"); switch (msg_type(msg)) { @@ -381,20 +347,15 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, ":OPRT(%u):", msg_origport(msg)); tipc_printf(buf, ":DPRT(%u):", msg_destport(msg)); } - if (msg_routed(msg) && !msg_non_seq(msg)) - tipc_printf(buf, ":TSEQN(%u)", msg_transp_seqno(msg)); } if (msg_user(msg) == NAME_DISTRIBUTOR) { tipc_printf(buf, ":ONOD(%x):", msg_orignode(msg)); tipc_printf(buf, ":DNOD(%x):", msg_destnode(msg)); - if (msg_routed(msg)) - tipc_printf(buf, ":CSEQN(%u)", msg_transp_seqno(msg)); } if (msg_user(msg) == LINK_CONFIG) { u32 *raw = (u32 *)msg; struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5]; - tipc_printf(buf, ":REQL(%u):", msg_req_links(msg)); tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg)); tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg)); tipc_media_addr_printf(buf, orig); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 92c4c4fd7b3f..de02339fc175 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -2,7 +2,7 @@ * net/tipc/msg.h: Include file for TIPC message header routines * * Copyright (c) 2000-2007, Ericsson AB - * Copyright (c) 2005-2008, Wind River Systems + * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -421,13 +421,6 @@ static inline int msg_is_dest(struct tipc_msg *m, u32 d) return msg_short(m) || (msg_destnode(m) == d); } -static inline u32 msg_routed(struct tipc_msg *m) -{ - if (likely(msg_short(m))) - return 0; - return (msg_destnode(m) ^ msg_orignode(m)) >> 11; -} - static inline u32 msg_nametype(struct tipc_msg *m) { return msg_word(m, 8); @@ -438,26 +431,6 @@ static inline void msg_set_nametype(struct tipc_msg *m, u32 n) msg_set_word(m, 8, n); } -static inline u32 msg_transp_seqno(struct tipc_msg *m) -{ - return msg_word(m, 8); -} - -static inline void msg_set_timestamp(struct tipc_msg *m, u32 n) -{ - msg_set_word(m, 8, n); -} - -static inline u32 msg_timestamp(struct tipc_msg *m) -{ - return msg_word(m, 8); -} - -static inline void msg_set_transp_seqno(struct tipc_msg *m, u32 n) -{ - msg_set_word(m, 8, n); -} - static inline u32 msg_nameinst(struct tipc_msg *m) { return msg_word(m, 9); @@ -545,7 +518,6 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 -#define DSC_H_SIZE 40 /* * Connection management protocol messages @@ -577,16 +549,6 @@ static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 16, 0x1fff, n); } -static inline u32 msg_req_links(struct tipc_msg *m) -{ - return msg_bits(m, 1, 16, 0xfff); -} - -static inline void msg_set_req_links(struct tipc_msg *m, u32 n) -{ - msg_set_bits(m, 1, 16, 0xfff, n); -} - /* * Word 2 @@ -749,14 +711,9 @@ static inline u32 msg_redundant_link(struct tipc_msg *m) return msg_bits(m, 5, 12, 0x1); } -static inline void msg_set_redundant_link(struct tipc_msg *m) +static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) { - msg_set_bits(m, 5, 12, 0x1, 1); -} - -static inline void msg_clear_redundant_link(struct tipc_msg *m) -{ - msg_set_bits(m, 5, 12, 0x1, 0); + msg_set_bits(m, 5, 12, 0x1, r); } @@ -805,21 +762,6 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) } /* - * Routing table message data - */ - - -static inline u32 msg_remote_node(struct tipc_msg *m) -{ - return msg_word(m, msg_hdr_sz(m)/4); -} - -static inline void msg_set_remote_node(struct tipc_msg *m, u32 a) -{ - msg_set_word(m, msg_hdr_sz(m)/4, a); -} - -/* * Segmentation message types */ diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 483c226c9581..c9fa6dfcf287 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -2,7 +2,7 @@ * net/tipc/name_distr.c: TIPC name distribution code * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -109,11 +109,9 @@ static void named_cluster_distribute(struct sk_buff *buf) { struct sk_buff *buf_copy; struct tipc_node *n_ptr; - u32 n_num; - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { - n_ptr = tipc_net.nodes[n_num]; - if (n_ptr && tipc_node_has_active_links(n_ptr)) { + list_for_each_entry(n_ptr, &tipc_node_list, list) { + if (tipc_node_active_links(n_ptr)) { buf_copy = skb_copy(buf, GFP_ATOMIC); if (!buf_copy) break; @@ -214,17 +212,16 @@ exit: } /** - * node_is_down - remove publication associated with a failed node + * named_purge_publ - remove publication associated with a failed node * * Invoked for each publication issued by a newly failed node. * Removes publication structure from name table & deletes it. * In rare cases the link may have come back up again when this * function is called, and we have two items representing the same * publication. Nudge this item's key to distinguish it from the other. - * (Note: Publication's node subscription is already unsubscribed.) */ -static void node_is_down(struct publication *publ) +static void named_purge_publ(struct publication *publ) { struct publication *p; @@ -232,6 +229,8 @@ static void node_is_down(struct publication *publ) publ->key += 1222345; p = tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); + if (p) + tipc_nodesub_unsubscribe(&p->subscr); write_unlock_bh(&tipc_nametbl_lock); if (p != publ) { @@ -268,7 +267,8 @@ void tipc_named_recv(struct sk_buff *buf) tipc_nodesub_subscribe(&publ->subscr, msg_orignode(msg), publ, - (net_ev_handler)node_is_down); + (net_ev_handler) + named_purge_publ); } } else if (msg_type(msg) == WITHDRAWAL) { publ = tipc_nametbl_remove_publ(ntohl(item->type), diff --git a/net/tipc/net.c b/net/tipc/net.c index 9bacfd00b91e..68b3dd637291 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -2,7 +2,7 @@ * net/tipc/net.c: TIPC network routing code * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,6 +39,7 @@ #include "name_distr.h" #include "subscr.h" #include "port.h" +#include "node.h" #include "config.h" /* @@ -108,26 +109,6 @@ */ DEFINE_RWLOCK(tipc_net_lock); -struct network tipc_net; - -static int net_start(void) -{ - tipc_net.nodes = kcalloc(tipc_max_nodes + 1, - sizeof(*tipc_net.nodes), GFP_ATOMIC); - tipc_net.highest_node = 0; - - return tipc_net.nodes ? 0 : -ENOMEM; -} - -static void net_stop(void) -{ - u32 n_num; - - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) - tipc_node_delete(tipc_net.nodes[n_num]); - kfree(tipc_net.nodes); - tipc_net.nodes = NULL; -} static void net_route_named_msg(struct sk_buff *buf) { @@ -217,9 +198,6 @@ int tipc_net_start(u32 addr) tipc_named_reinit(); tipc_port_reinit(); - res = net_start(); - if (res) - return res; res = tipc_bclink_init(); if (res) return res; @@ -235,14 +213,16 @@ int tipc_net_start(u32 addr) void tipc_net_stop(void) { + struct tipc_node *node, *t_node; + if (tipc_mode != TIPC_NET_MODE) return; write_lock_bh(&tipc_net_lock); tipc_bearer_stop(); tipc_mode = TIPC_NODE_MODE; tipc_bclink_stop(); - net_stop(); + list_for_each_entry_safe(node, t_node, &tipc_node_list, list) + tipc_node_delete(node); write_unlock_bh(&tipc_net_lock); info("Left network mode\n"); } - diff --git a/net/tipc/net.h b/net/tipc/net.h index 4ae59ad04893..9eb4b9e220eb 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -2,7 +2,7 @@ * net/tipc/net.h: Include file for TIPC network routing code * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,23 +37,6 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -struct tipc_node; - -/** - * struct network - TIPC network structure - * @nodes: array of pointers to all nodes within cluster - * @highest_node: id of highest numbered node within cluster - * @links: number of (unicast) links to cluster - */ - -struct network { - struct tipc_node **nodes; - u32 highest_node; - u32 links; -}; - - -extern struct network tipc_net; extern rwlock_t tipc_net_lock; void tipc_net_route_msg(struct sk_buff *buf); diff --git a/net/tipc/node.c b/net/tipc/node.c index 3af53e327f49..2d106ef4fa4c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2,7 +2,7 @@ * net/tipc/node.c: TIPC node management routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,9 +44,33 @@ static void node_established_contact(struct tipc_node *n_ptr); static DEFINE_SPINLOCK(node_create_lock); +static struct hlist_head node_htable[NODE_HTABLE_SIZE]; +LIST_HEAD(tipc_node_list); +static u32 tipc_num_nodes; + +static atomic_t tipc_num_links = ATOMIC_INIT(0); u32 tipc_own_tag; /** + * tipc_node_find - locate specified node object, if it exists + */ + +struct tipc_node *tipc_node_find(u32 addr) +{ + struct tipc_node *node; + struct hlist_node *pos; + + if (unlikely(!in_own_cluster(addr))) + return NULL; + + hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) { + if (node->addr == addr) + return node; + } + return NULL; +} + +/** * tipc_node_create - create neighboring node * * Currently, this routine is called by neighbor discovery code, which holds @@ -58,8 +82,7 @@ u32 tipc_own_tag; struct tipc_node *tipc_node_create(u32 addr) { - struct tipc_node *n_ptr; - u32 n_num; + struct tipc_node *n_ptr, *temp_node; spin_lock_bh(&node_create_lock); @@ -78,12 +101,19 @@ struct tipc_node *tipc_node_create(u32 addr) n_ptr->addr = addr; spin_lock_init(&n_ptr->lock); + INIT_HLIST_NODE(&n_ptr->hash); + INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->nsub); - n_num = tipc_node(addr); - tipc_net.nodes[n_num] = n_ptr; - if (n_num > tipc_net.highest_node) - tipc_net.highest_node = n_num; + hlist_add_head(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]); + + list_for_each_entry(temp_node, &tipc_node_list, list) { + if (n_ptr->addr < temp_node->addr) + break; + } + list_add_tail(&n_ptr->list, &temp_node->list); + + tipc_num_nodes++; spin_unlock_bh(&node_create_lock); return n_ptr; @@ -91,18 +121,11 @@ struct tipc_node *tipc_node_create(u32 addr) void tipc_node_delete(struct tipc_node *n_ptr) { - u32 n_num; - - if (!n_ptr) - return; - - n_num = tipc_node(n_ptr->addr); - tipc_net.nodes[n_num] = NULL; + list_del(&n_ptr->list); + hlist_del(&n_ptr->hash); kfree(n_ptr); - while (!tipc_net.nodes[tipc_net.highest_node]) - if (--tipc_net.highest_node == 0) - break; + tipc_num_nodes--; } @@ -200,54 +223,32 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr) node_lost_contact(n_ptr); } -int tipc_node_has_active_links(struct tipc_node *n_ptr) +int tipc_node_active_links(struct tipc_node *n_ptr) { return n_ptr->active_links[0] != NULL; } -int tipc_node_has_redundant_links(struct tipc_node *n_ptr) +int tipc_node_redundant_links(struct tipc_node *n_ptr) { return n_ptr->working_links > 1; } int tipc_node_is_up(struct tipc_node *n_ptr) { - return tipc_node_has_active_links(n_ptr); + return tipc_node_active_links(n_ptr); } -struct tipc_node *tipc_node_attach_link(struct link *l_ptr) +void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr) { - struct tipc_node *n_ptr = tipc_node_find(l_ptr->addr); - - if (!n_ptr) - n_ptr = tipc_node_create(l_ptr->addr); - if (n_ptr) { - u32 bearer_id = l_ptr->b_ptr->identity; - char addr_string[16]; - - if (n_ptr->link_cnt >= 2) { - err("Attempt to create third link to %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); - return NULL; - } - - if (!n_ptr->links[bearer_id]) { - n_ptr->links[bearer_id] = l_ptr; - tipc_net.links++; - n_ptr->link_cnt++; - return n_ptr; - } - err("Attempt to establish second link on <%s> to %s\n", - l_ptr->b_ptr->publ.name, - tipc_addr_string_fill(addr_string, l_ptr->addr)); - } - return NULL; + n_ptr->links[l_ptr->b_ptr->identity] = l_ptr; + atomic_inc(&tipc_num_links); + n_ptr->link_cnt++; } void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr) { n_ptr->links[l_ptr->b_ptr->identity] = NULL; - tipc_net.links--; + atomic_dec(&tipc_num_links); n_ptr->link_cnt--; } @@ -327,7 +328,6 @@ static void node_cleanup_finished(unsigned long node_addr) static void node_lost_contact(struct tipc_node *n_ptr) { - struct tipc_node_subscr *ns, *tns; char addr_string[16]; u32 i; @@ -365,12 +365,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) } /* Notify subscribers */ - list_for_each_entry_safe(ns, tns, &n_ptr->nsub, nodesub_list) { - ns->node = NULL; - list_del_init(&ns->nodesub_list); - tipc_k_signal((Handler)ns->handle_node_down, - (unsigned long)ns->usr_handle); - } + tipc_nodesub_notify(n_ptr); /* Prevent re-contact with node until all cleanup is done */ @@ -385,7 +380,6 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) struct tipc_node *n_ptr; struct tipc_node_info node_info; u32 payload_size; - u32 n_num; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -396,15 +390,14 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) " (network address)"); read_lock_bh(&tipc_net_lock); - if (!tipc_net.nodes) { + if (!tipc_num_nodes) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_none(); } /* For now, get space for all other nodes */ - payload_size = TLV_SPACE(sizeof(node_info)) * - (tipc_net.highest_node - 1); + payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes; if (payload_size > 32768u) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED @@ -418,9 +411,8 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) /* Add TLVs for all nodes in scope */ - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { - n_ptr = tipc_net.nodes[n_num]; - if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr)) + list_for_each_entry(n_ptr, &tipc_node_list, list) { + if (!tipc_in_scope(domain, n_ptr->addr)) continue; node_info.addr = htonl(n_ptr->addr); node_info.up = htonl(tipc_node_is_up(n_ptr)); @@ -439,7 +431,6 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) struct tipc_node *n_ptr; struct tipc_link_info link_info; u32 payload_size; - u32 n_num; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -456,7 +447,8 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Get space for all unicast links + multicast link */ - payload_size = TLV_SPACE(sizeof(link_info)) * (tipc_net.links + 1); + payload_size = TLV_SPACE(sizeof(link_info)) * + (atomic_read(&tipc_num_links) + 1); if (payload_size > 32768u) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED @@ -470,18 +462,17 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Add TLV for broadcast link */ - link_info.dest = htonl(tipc_own_addr & 0xfffff00); + link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr)); link_info.up = htonl(1); strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); /* Add TLVs for any other links in scope */ - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { + list_for_each_entry(n_ptr, &tipc_node_list, list) { u32 i; - n_ptr = tipc_net.nodes[n_num]; - if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr)) + if (!tipc_in_scope(domain, n_ptr->addr)) continue; tipc_node_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { diff --git a/net/tipc/node.h b/net/tipc/node.h index 206a8efa410e..5c61afc7a0b9 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -2,7 +2,7 @@ * net/tipc/node.h: Include file for TIPC node management routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,7 +46,8 @@ * struct tipc_node - TIPC node structure * @addr: network address of node * @lock: spinlock governing access to structure - * @next: pointer to next node in sorted list of cluster's nodes + * @hash: links to adjacent nodes in unsorted hash chain + * @list: links to adjacent nodes in sorted list of cluster's nodes * @nsub: list of "node down" subscriptions monitoring node * @active_links: pointers to active links to node * @links: pointers to all links to node @@ -69,7 +70,8 @@ struct tipc_node { u32 addr; spinlock_t lock; - struct tipc_node *next; + struct hlist_node hash; + struct list_head list; struct list_head nsub; struct link *active_links[2]; struct link *links[MAX_BEARERS]; @@ -90,27 +92,35 @@ struct tipc_node { } bclink; }; +#define NODE_HTABLE_SIZE 512 +extern struct list_head tipc_node_list; + +/* + * A trivial power-of-two bitmask technique is used for speed, since this + * operation is done for every incoming TIPC packet. The number of hash table + * entries has been chosen so that no hash chain exceeds 8 nodes and will + * usually be much smaller (typically only a single node). + */ +static inline unsigned int tipc_hashfn(u32 addr) +{ + return addr & (NODE_HTABLE_SIZE - 1); +} + extern u32 tipc_own_tag; +struct tipc_node *tipc_node_find(u32 addr); struct tipc_node *tipc_node_create(u32 addr); void tipc_node_delete(struct tipc_node *n_ptr); -struct tipc_node *tipc_node_attach_link(struct link *l_ptr); +void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr); -int tipc_node_has_active_links(struct tipc_node *n_ptr); -int tipc_node_has_redundant_links(struct tipc_node *n_ptr); +int tipc_node_active_links(struct tipc_node *n_ptr); +int tipc_node_redundant_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); -static inline struct tipc_node *tipc_node_find(u32 addr) -{ - if (likely(in_own_cluster(addr))) - return tipc_net.nodes[tipc_node(addr)]; - return NULL; -} - static inline void tipc_node_lock(struct tipc_node *n_ptr) { spin_lock_bh(&n_ptr->lock); diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 018a55332d91..c3c2815ae630 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -2,7 +2,7 @@ * net/tipc/node_subscr.c: TIPC "node down" subscription handling * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -76,3 +76,22 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub) list_del_init(&node_sub->nodesub_list); tipc_node_unlock(node_sub->node); } + +/** + * tipc_nodesub_notify - notify subscribers that a node is unreachable + * + * Note: node is locked by caller + */ + +void tipc_nodesub_notify(struct tipc_node *node) +{ + struct tipc_node_subscr *ns; + + list_for_each_entry(ns, &node->nsub, nodesub_list) { + if (ns->handle_node_down) { + tipc_k_signal((Handler)ns->handle_node_down, + (unsigned long)ns->usr_handle); + ns->handle_node_down = NULL; + } + } +} diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h index 006ed739f515..4bc2ca0867a1 100644 --- a/net/tipc/node_subscr.h +++ b/net/tipc/node_subscr.h @@ -2,7 +2,7 @@ * net/tipc/node_subscr.h: Include file for TIPC "node down" subscription handling * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,5 +59,6 @@ struct tipc_node_subscr { void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, void *usr_handle, net_ev_handler handle_down); void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub); +void tipc_nodesub_notify(struct tipc_node *node); #endif diff --git a/net/tipc/port.c b/net/tipc/port.c index 067bab2a0b98..6ff78f9c7d65 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -2,7 +2,7 @@ * net/tipc/port.c: TIPC port code * * Copyright (c) 1992-2007, Ericsson AB - * Copyright (c) 2004-2008, Wind River Systems + * Copyright (c) 2004-2008, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -54,33 +54,19 @@ static DEFINE_SPINLOCK(queue_lock); static LIST_HEAD(ports); static void port_handle_node_down(unsigned long ref); -static struct sk_buff *port_build_self_abort_msg(struct port *, u32 err); -static struct sk_buff *port_build_peer_abort_msg(struct port *, u32 err); +static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err); +static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err); static void port_timeout(unsigned long ref); -static u32 port_peernode(struct port *p_ptr) +static u32 port_peernode(struct tipc_port *p_ptr) { - return msg_destnode(&p_ptr->publ.phdr); + return msg_destnode(&p_ptr->phdr); } -static u32 port_peerport(struct port *p_ptr) +static u32 port_peerport(struct tipc_port *p_ptr) { - return msg_destport(&p_ptr->publ.phdr); -} - -static u32 port_out_seqno(struct port *p_ptr) -{ - return msg_transp_seqno(&p_ptr->publ.phdr); -} - -static void port_incr_out_seqno(struct port *p_ptr) -{ - struct tipc_msg *m = &p_ptr->publ.phdr; - - if (likely(!msg_routed(m))) - return; - msg_set_transp_seqno(m, (msg_transp_seqno(m) + 1)); + return msg_destport(&p_ptr->phdr); } /** @@ -94,7 +80,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, struct sk_buff *buf; struct sk_buff *ibuf = NULL; struct port_list dports = {0, NULL, }; - struct port *oport = tipc_port_deref(ref); + struct tipc_port *oport = tipc_port_deref(ref); int ext_targets; int res; @@ -103,7 +89,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, /* Create multicast message */ - hdr = &oport->publ.phdr; + hdr = &oport->phdr; msg_set_type(hdr, TIPC_MCAST_MSG); msg_set_nametype(hdr, seq->type); msg_set_namelower(hdr, seq->lower); @@ -211,7 +197,7 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, void (*wakeup)(struct tipc_port *), const u32 importance) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; u32 ref; @@ -220,21 +206,19 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, warn("Port creation failed, no memory\n"); return NULL; } - ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock); + ref = tipc_ref_acquire(p_ptr, &p_ptr->lock); if (!ref) { warn("Port creation failed, reference table exhausted\n"); kfree(p_ptr); return NULL; } - p_ptr->publ.usr_handle = usr_handle; - p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; - p_ptr->publ.ref = ref; - msg = &p_ptr->publ.phdr; + p_ptr->usr_handle = usr_handle; + p_ptr->max_pkt = MAX_PKT_DEFAULT; + p_ptr->ref = ref; + msg = &p_ptr->phdr; tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0); msg_set_origport(msg, ref); - p_ptr->last_in_seqno = 41; - p_ptr->sent = 1; INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); p_ptr->dispatcher = dispatcher; @@ -246,12 +230,12 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, INIT_LIST_HEAD(&p_ptr->port_list); list_add_tail(&p_ptr->port_list, &ports); spin_unlock_bh(&tipc_port_list_lock); - return &(p_ptr->publ); + return p_ptr; } int tipc_deleteport(u32 ref) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct sk_buff *buf = NULL; tipc_withdraw(ref, 0, NULL); @@ -263,7 +247,7 @@ int tipc_deleteport(u32 ref) tipc_port_unlock(p_ptr); k_cancel_timer(&p_ptr->timer); - if (p_ptr->publ.connected) { + if (p_ptr->connected) { buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); tipc_nodesub_unsubscribe(&p_ptr->subscription); } @@ -279,14 +263,14 @@ int tipc_deleteport(u32 ref) return 0; } -static int port_unreliable(struct port *p_ptr) +static int port_unreliable(struct tipc_port *p_ptr) { - return msg_src_droppable(&p_ptr->publ.phdr); + return msg_src_droppable(&p_ptr->phdr); } int tipc_portunreliable(u32 ref, unsigned int *isunreliable) { - struct port *p_ptr; + struct tipc_port *p_ptr; p_ptr = tipc_port_lock(ref); if (!p_ptr) @@ -298,24 +282,24 @@ int tipc_portunreliable(u32 ref, unsigned int *isunreliable) int tipc_set_portunreliable(u32 ref, unsigned int isunreliable) { - struct port *p_ptr; + struct tipc_port *p_ptr; p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - msg_set_src_droppable(&p_ptr->publ.phdr, (isunreliable != 0)); + msg_set_src_droppable(&p_ptr->phdr, (isunreliable != 0)); tipc_port_unlock(p_ptr); return 0; } -static int port_unreturnable(struct port *p_ptr) +static int port_unreturnable(struct tipc_port *p_ptr) { - return msg_dest_droppable(&p_ptr->publ.phdr); + return msg_dest_droppable(&p_ptr->phdr); } int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable) { - struct port *p_ptr; + struct tipc_port *p_ptr; p_ptr = tipc_port_lock(ref); if (!p_ptr) @@ -327,12 +311,12 @@ int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable) int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) { - struct port *p_ptr; + struct tipc_port *p_ptr; p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - msg_set_dest_droppable(&p_ptr->publ.phdr, (isunrejectable != 0)); + msg_set_dest_droppable(&p_ptr->phdr, (isunrejectable != 0)); tipc_port_unlock(p_ptr); return 0; } @@ -345,7 +329,7 @@ int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, u32 origport, u32 orignode, u32 usr, u32 type, u32 err, - u32 seqno, u32 ack) + u32 ack) { struct sk_buff *buf; struct tipc_msg *msg; @@ -358,7 +342,6 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, msg_set_destport(msg, destport); msg_set_origport(msg, origport); msg_set_orignode(msg, orignode); - msg_set_transp_seqno(msg, seqno); msg_set_msgcnt(msg, ack); } return buf; @@ -413,10 +396,10 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) /* send self-abort message when rejecting on a connected port */ if (msg_connected(msg)) { struct sk_buff *abuf = NULL; - struct port *p_ptr = tipc_port_lock(msg_destport(msg)); + struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg)); if (p_ptr) { - if (p_ptr->publ.connected) + if (p_ptr->connected) abuf = port_build_self_abort_msg(p_ptr, err); tipc_port_unlock(p_ptr); } @@ -429,7 +412,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) return data_sz; } -int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, +int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, int err) { @@ -446,13 +429,13 @@ int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, static void port_timeout(unsigned long ref) { - struct port *p_ptr = tipc_port_lock(ref); + struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; if (!p_ptr) return; - if (!p_ptr->publ.connected) { + if (!p_ptr->connected) { tipc_port_unlock(p_ptr); return; } @@ -463,14 +446,12 @@ static void port_timeout(unsigned long ref) } else { buf = port_build_proto_msg(port_peerport(p_ptr), port_peernode(p_ptr), - p_ptr->publ.ref, + p_ptr->ref, tipc_own_addr, CONN_MANAGER, CONN_PROBE, TIPC_OK, - port_out_seqno(p_ptr), 0); - port_incr_out_seqno(p_ptr); p_ptr->probing_state = PROBING; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); } @@ -481,7 +462,7 @@ static void port_timeout(unsigned long ref) static void port_handle_node_down(unsigned long ref) { - struct port *p_ptr = tipc_port_lock(ref); + struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; if (!p_ptr) @@ -492,73 +473,71 @@ static void port_handle_node_down(unsigned long ref) } -static struct sk_buff *port_build_self_abort_msg(struct port *p_ptr, u32 err) +static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 err) { - u32 imp = msg_importance(&p_ptr->publ.phdr); + u32 imp = msg_importance(&p_ptr->phdr); - if (!p_ptr->publ.connected) + if (!p_ptr->connected) return NULL; if (imp < TIPC_CRITICAL_IMPORTANCE) imp++; - return port_build_proto_msg(p_ptr->publ.ref, + return port_build_proto_msg(p_ptr->ref, tipc_own_addr, port_peerport(p_ptr), port_peernode(p_ptr), imp, TIPC_CONN_MSG, err, - p_ptr->last_in_seqno + 1, 0); } -static struct sk_buff *port_build_peer_abort_msg(struct port *p_ptr, u32 err) +static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 err) { - u32 imp = msg_importance(&p_ptr->publ.phdr); + u32 imp = msg_importance(&p_ptr->phdr); - if (!p_ptr->publ.connected) + if (!p_ptr->connected) return NULL; if (imp < TIPC_CRITICAL_IMPORTANCE) imp++; return port_build_proto_msg(port_peerport(p_ptr), port_peernode(p_ptr), - p_ptr->publ.ref, + p_ptr->ref, tipc_own_addr, imp, TIPC_CONN_MSG, err, - port_out_seqno(p_ptr), 0); } void tipc_port_recv_proto_msg(struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - struct port *p_ptr = tipc_port_lock(msg_destport(msg)); + struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg)); u32 err = TIPC_OK; struct sk_buff *r_buf = NULL; struct sk_buff *abort_buf = NULL; if (!p_ptr) { err = TIPC_ERR_NO_PORT; - } else if (p_ptr->publ.connected) { + } else if (p_ptr->connected) { if ((port_peernode(p_ptr) != msg_orignode(msg)) || (port_peerport(p_ptr) != msg_origport(msg))) { err = TIPC_ERR_NO_PORT; } else if (msg_type(msg) == CONN_ACK) { int wakeup = tipc_port_congested(p_ptr) && - p_ptr->publ.congested && + p_ptr->congested && p_ptr->wakeup; p_ptr->acked += msg_msgcnt(msg); if (tipc_port_congested(p_ptr)) goto exit; - p_ptr->publ.congested = 0; + p_ptr->congested = 0; if (!wakeup) goto exit; - p_ptr->wakeup(&p_ptr->publ); + p_ptr->wakeup(p_ptr); goto exit; } - } else if (p_ptr->publ.published) { + } else if (p_ptr->published) { err = TIPC_ERR_NO_PORT; } if (err) { @@ -569,7 +548,6 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf) TIPC_HIGH_IMPORTANCE, TIPC_CONN_MSG, err, - 0, 0); goto exit; } @@ -583,11 +561,9 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf) CONN_MANAGER, CONN_PROBE_REPLY, TIPC_OK, - port_out_seqno(p_ptr), 0); } p_ptr->probing_state = CONFIRMED; - port_incr_out_seqno(p_ptr); exit: if (p_ptr) tipc_port_unlock(p_ptr); @@ -596,29 +572,29 @@ exit: buf_discard(buf); } -static void port_print(struct port *p_ptr, struct print_buf *buf, int full_id) +static void port_print(struct tipc_port *p_ptr, struct print_buf *buf, int full_id) { struct publication *publ; if (full_id) tipc_printf(buf, "<%u.%u.%u:%u>:", tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), p_ptr->publ.ref); + tipc_node(tipc_own_addr), p_ptr->ref); else - tipc_printf(buf, "%-10u:", p_ptr->publ.ref); + tipc_printf(buf, "%-10u:", p_ptr->ref); - if (p_ptr->publ.connected) { + if (p_ptr->connected) { u32 dport = port_peerport(p_ptr); u32 destnode = port_peernode(p_ptr); tipc_printf(buf, " connected to <%u.%u.%u:%u>", tipc_zone(destnode), tipc_cluster(destnode), tipc_node(destnode), dport); - if (p_ptr->publ.conn_type != 0) + if (p_ptr->conn_type != 0) tipc_printf(buf, " via {%u,%u}", - p_ptr->publ.conn_type, - p_ptr->publ.conn_instance); - } else if (p_ptr->publ.published) { + p_ptr->conn_type, + p_ptr->conn_instance); + } else if (p_ptr->published) { tipc_printf(buf, " bound to"); list_for_each_entry(publ, &p_ptr->publications, pport_list) { if (publ->lower == publ->upper) @@ -639,7 +615,7 @@ struct sk_buff *tipc_port_get_ports(void) struct sk_buff *buf; struct tlv_desc *rep_tlv; struct print_buf pb; - struct port *p_ptr; + struct tipc_port *p_ptr; int str_len; buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_QUERY)); @@ -650,9 +626,9 @@ struct sk_buff *tipc_port_get_ports(void) tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_QUERY); spin_lock_bh(&tipc_port_list_lock); list_for_each_entry(p_ptr, &ports, port_list) { - spin_lock_bh(p_ptr->publ.lock); + spin_lock_bh(p_ptr->lock); port_print(p_ptr, &pb, 0); - spin_unlock_bh(p_ptr->publ.lock); + spin_unlock_bh(p_ptr->lock); } spin_unlock_bh(&tipc_port_list_lock); str_len = tipc_printbuf_validate(&pb); @@ -665,12 +641,12 @@ struct sk_buff *tipc_port_get_ports(void) void tipc_port_reinit(void) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; spin_lock_bh(&tipc_port_list_lock); list_for_each_entry(p_ptr, &ports, port_list) { - msg = &p_ptr->publ.phdr; + msg = &p_ptr->phdr; if (msg_orignode(msg) == tipc_own_addr) break; msg_set_prevnode(msg, tipc_own_addr); @@ -695,7 +671,7 @@ static void port_dispatcher_sigh(void *dummy) spin_unlock_bh(&queue_lock); while (buf) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct user_port *up_ptr; struct tipc_portid orig; struct tipc_name_seq dseq; @@ -720,8 +696,8 @@ static void port_dispatcher_sigh(void *dummy) orig.node = msg_orignode(msg); up_ptr = p_ptr->user_port; usr_handle = up_ptr->usr_handle; - connected = p_ptr->publ.connected; - published = p_ptr->publ.published; + connected = p_ptr->connected; + published = p_ptr->published; if (unlikely(msg_errcode(msg))) goto err; @@ -732,6 +708,7 @@ static void port_dispatcher_sigh(void *dummy) tipc_conn_msg_event cb = up_ptr->conn_msg_cb; u32 peer_port = port_peerport(p_ptr); u32 peer_node = port_peernode(p_ptr); + u32 dsz; tipc_port_unlock(p_ptr); if (unlikely(!cb)) @@ -742,13 +719,14 @@ static void port_dispatcher_sigh(void *dummy) } else if ((msg_origport(msg) != peer_port) || (msg_orignode(msg) != peer_node)) goto reject; - if (unlikely(++p_ptr->publ.conn_unacked >= - TIPC_FLOW_CONTROL_WIN)) + dsz = msg_data_sz(msg); + if (unlikely(dsz && + (++p_ptr->conn_unacked >= + TIPC_FLOW_CONTROL_WIN))) tipc_acknowledge(dref, - p_ptr->publ.conn_unacked); + p_ptr->conn_unacked); skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg)); + cb(usr_handle, dref, &buf, msg_data(msg), dsz); break; } case TIPC_DIRECT_MSG:{ @@ -872,7 +850,7 @@ static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf) static void port_wakeup_sh(unsigned long ref) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct user_port *up_ptr; tipc_continue_event cb = NULL; void *uh = NULL; @@ -898,14 +876,14 @@ static void port_wakeup(struct tipc_port *p_ptr) void tipc_acknowledge(u32 ref, u32 ack) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct sk_buff *buf = NULL; p_ptr = tipc_port_lock(ref); if (!p_ptr) return; - if (p_ptr->publ.connected) { - p_ptr->publ.conn_unacked -= ack; + if (p_ptr->connected) { + p_ptr->conn_unacked -= ack; buf = port_build_proto_msg(port_peerport(p_ptr), port_peernode(p_ptr), ref, @@ -913,7 +891,6 @@ void tipc_acknowledge(u32 ref, u32 ack) CONN_MANAGER, CONN_ACK, TIPC_OK, - port_out_seqno(p_ptr), ack); } tipc_port_unlock(p_ptr); @@ -936,14 +913,14 @@ int tipc_createport(void *usr_handle, u32 *portref) { struct user_port *up_ptr; - struct port *p_ptr; + struct tipc_port *p_ptr; up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC); if (!up_ptr) { warn("Port creation failed, no memory\n"); return -ENOMEM; } - p_ptr = (struct port *)tipc_createport_raw(NULL, port_dispatcher, + p_ptr = (struct tipc_port *)tipc_createport_raw(NULL, port_dispatcher, port_wakeup, importance); if (!p_ptr) { kfree(up_ptr); @@ -952,7 +929,7 @@ int tipc_createport(void *usr_handle, p_ptr->user_port = up_ptr; up_ptr->usr_handle = usr_handle; - up_ptr->ref = p_ptr->publ.ref; + up_ptr->ref = p_ptr->ref; up_ptr->err_cb = error_cb; up_ptr->named_err_cb = named_error_cb; up_ptr->conn_err_cb = conn_error_cb; @@ -960,26 +937,26 @@ int tipc_createport(void *usr_handle, up_ptr->named_msg_cb = named_msg_cb; up_ptr->conn_msg_cb = conn_msg_cb; up_ptr->continue_event_cb = continue_event_cb; - *portref = p_ptr->publ.ref; + *portref = p_ptr->ref; tipc_port_unlock(p_ptr); return 0; } int tipc_portimportance(u32 ref, unsigned int *importance) { - struct port *p_ptr; + struct tipc_port *p_ptr; p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - *importance = (unsigned int)msg_importance(&p_ptr->publ.phdr); + *importance = (unsigned int)msg_importance(&p_ptr->phdr); tipc_port_unlock(p_ptr); return 0; } int tipc_set_portimportance(u32 ref, unsigned int imp) { - struct port *p_ptr; + struct tipc_port *p_ptr; if (imp > TIPC_CRITICAL_IMPORTANCE) return -EINVAL; @@ -987,7 +964,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp) p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - msg_set_importance(&p_ptr->publ.phdr, (u32)imp); + msg_set_importance(&p_ptr->phdr, (u32)imp); tipc_port_unlock(p_ptr); return 0; } @@ -995,7 +972,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp) int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct publication *publ; u32 key; int res = -EINVAL; @@ -1004,7 +981,7 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) if (!p_ptr) return -EINVAL; - if (p_ptr->publ.connected) + if (p_ptr->connected) goto exit; if (seq->lower > seq->upper) goto exit; @@ -1016,11 +993,11 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) goto exit; } publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, - scope, p_ptr->publ.ref, key); + scope, p_ptr->ref, key); if (publ) { list_add(&publ->pport_list, &p_ptr->publications); p_ptr->pub_count++; - p_ptr->publ.published = 1; + p_ptr->published = 1; res = 0; } exit: @@ -1030,7 +1007,7 @@ exit: int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct publication *publ; struct publication *tpubl; int res = -EINVAL; @@ -1063,37 +1040,36 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) } } if (list_empty(&p_ptr->publications)) - p_ptr->publ.published = 0; + p_ptr->published = 0; tipc_port_unlock(p_ptr); return res; } int tipc_connect2port(u32 ref, struct tipc_portid const *peer) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; int res = -EINVAL; p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - if (p_ptr->publ.published || p_ptr->publ.connected) + if (p_ptr->published || p_ptr->connected) goto exit; if (!peer->ref) goto exit; - msg = &p_ptr->publ.phdr; + msg = &p_ptr->phdr; msg_set_destnode(msg, peer->node); msg_set_destport(msg, peer->ref); msg_set_orignode(msg, tipc_own_addr); - msg_set_origport(msg, p_ptr->publ.ref); - msg_set_transp_seqno(msg, 42); + msg_set_origport(msg, p_ptr->ref); msg_set_type(msg, TIPC_CONN_MSG); msg_set_hdr_sz(msg, SHORT_H_SIZE); p_ptr->probing_interval = PROBING_INTERVAL; p_ptr->probing_state = CONFIRMED; - p_ptr->publ.connected = 1; + p_ptr->connected = 1; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); tipc_nodesub_subscribe(&p_ptr->subscription, peer->node, @@ -1102,7 +1078,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer) res = 0; exit: tipc_port_unlock(p_ptr); - p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref); + p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); return res; } @@ -1120,7 +1096,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr) tp_ptr->connected = 0; /* let timer expire on it's own to avoid deadlock! */ tipc_nodesub_unsubscribe( - &((struct port *)tp_ptr)->subscription); + &((struct tipc_port *)tp_ptr)->subscription); res = 0; } else { res = -ENOTCONN; @@ -1135,7 +1111,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr) int tipc_disconnect(u32 ref) { - struct port *p_ptr; + struct tipc_port *p_ptr; int res; p_ptr = tipc_port_lock(ref); @@ -1151,15 +1127,15 @@ int tipc_disconnect(u32 ref) */ int tipc_shutdown(u32 ref) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct sk_buff *buf = NULL; p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - if (p_ptr->publ.connected) { - u32 imp = msg_importance(&p_ptr->publ.phdr); + if (p_ptr->connected) { + u32 imp = msg_importance(&p_ptr->phdr); if (imp < TIPC_CRITICAL_IMPORTANCE) imp++; buf = port_build_proto_msg(port_peerport(p_ptr), @@ -1169,7 +1145,6 @@ int tipc_shutdown(u32 ref) imp, TIPC_CONN_MSG, TIPC_CONN_SHUTDOWN, - port_out_seqno(p_ptr), 0); } tipc_port_unlock(p_ptr); @@ -1182,13 +1157,13 @@ int tipc_shutdown(u32 ref) * message for this node. */ -static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect, +static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_sect, struct iovec const *msg_sect) { struct sk_buff *buf; int res; - res = tipc_msg_build(&sender->publ.phdr, msg_sect, num_sect, + res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, MAX_MSG_SIZE, !sender->user_port, &buf); if (likely(buf)) tipc_port_recv_msg(buf); @@ -1201,15 +1176,15 @@ static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect, int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) { - struct port *p_ptr; + struct tipc_port *p_ptr; u32 destnode; int res; p_ptr = tipc_port_deref(ref); - if (!p_ptr || !p_ptr->publ.connected) + if (!p_ptr || !p_ptr->connected) return -EINVAL; - p_ptr->publ.congested = 1; + p_ptr->congested = 1; if (!tipc_port_congested(p_ptr)) { destnode = port_peernode(p_ptr); if (likely(destnode != tipc_own_addr)) @@ -1219,14 +1194,14 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect); if (likely(res != -ELINKCONG)) { - port_incr_out_seqno(p_ptr); - p_ptr->publ.congested = 0; - p_ptr->sent++; + p_ptr->congested = 0; + if (res > 0) + p_ptr->sent++; return res; } } if (port_unreliable(p_ptr)) { - p_ptr->publ.congested = 0; + p_ptr->congested = 0; /* Just calculate msg length and return */ return tipc_msg_calc_data_size(msg_sect, num_sect); } @@ -1240,17 +1215,17 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, unsigned int num_sect, struct iovec const *msg_sect) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; u32 destnode = domain; u32 destport; int res; p_ptr = tipc_port_deref(ref); - if (!p_ptr || p_ptr->publ.connected) + if (!p_ptr || p_ptr->connected) return -EINVAL; - msg = &p_ptr->publ.phdr; + msg = &p_ptr->phdr; msg_set_type(msg, TIPC_NAMED_MSG); msg_set_orignode(msg, tipc_own_addr); msg_set_origport(msg, ref); @@ -1263,13 +1238,17 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, msg_set_destport(msg, destport); if (likely(destport)) { - p_ptr->sent++; if (likely(destnode == tipc_own_addr)) - return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); - res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, - destnode); - if (likely(res != -ELINKCONG)) + res = tipc_port_recv_sections(p_ptr, num_sect, + msg_sect); + else + res = tipc_link_send_sections_fast(p_ptr, msg_sect, + num_sect, destnode); + if (likely(res != -ELINKCONG)) { + if (res > 0) + p_ptr->sent++; return res; + } if (port_unreliable(p_ptr)) { /* Just calculate msg length and return */ return tipc_msg_calc_data_size(msg_sect, num_sect); @@ -1287,27 +1266,32 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, int tipc_send2port(u32 ref, struct tipc_portid const *dest, unsigned int num_sect, struct iovec const *msg_sect) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; int res; p_ptr = tipc_port_deref(ref); - if (!p_ptr || p_ptr->publ.connected) + if (!p_ptr || p_ptr->connected) return -EINVAL; - msg = &p_ptr->publ.phdr; + msg = &p_ptr->phdr; msg_set_type(msg, TIPC_DIRECT_MSG); msg_set_orignode(msg, tipc_own_addr); msg_set_origport(msg, ref); msg_set_destnode(msg, dest->node); msg_set_destport(msg, dest->ref); msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); - p_ptr->sent++; + if (dest->node == tipc_own_addr) - return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); - res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, dest->node); - if (likely(res != -ELINKCONG)) + res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect); + else + res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, + dest->node); + if (likely(res != -ELINKCONG)) { + if (res > 0) + p_ptr->sent++; return res; + } if (port_unreliable(p_ptr)) { /* Just calculate msg length and return */ return tipc_msg_calc_data_size(msg_sect, num_sect); @@ -1322,15 +1306,15 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest, int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, struct sk_buff *buf, unsigned int dsz) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; int res; - p_ptr = (struct port *)tipc_ref_deref(ref); - if (!p_ptr || p_ptr->publ.connected) + p_ptr = (struct tipc_port *)tipc_ref_deref(ref); + if (!p_ptr || p_ptr->connected) return -EINVAL; - msg = &p_ptr->publ.phdr; + msg = &p_ptr->phdr; msg_set_type(msg, TIPC_DIRECT_MSG); msg_set_orignode(msg, tipc_own_addr); msg_set_origport(msg, ref); @@ -1343,12 +1327,16 @@ int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, skb_push(buf, DIR_MSG_H_SIZE); skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE); - p_ptr->sent++; + if (dest->node == tipc_own_addr) - return tipc_port_recv_msg(buf); - res = tipc_send_buf_fast(buf, dest->node); - if (likely(res != -ELINKCONG)) + res = tipc_port_recv_msg(buf); + else + res = tipc_send_buf_fast(buf, dest->node); + if (likely(res != -ELINKCONG)) { + if (res > 0) + p_ptr->sent++; return res; + } if (port_unreliable(p_ptr)) return dsz; return -ELINKCONG; diff --git a/net/tipc/port.h b/net/tipc/port.h index 8e84b989949c..87b9424ae0ec 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -2,7 +2,7 @@ * net/tipc/port.h: Include file for TIPC port code * * Copyright (c) 1994-2007, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems + * Copyright (c) 2004-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,7 +95,7 @@ struct user_port { }; /** - * struct tipc_port - TIPC port info available to socket API + * struct tipc_port - TIPC port structure * @usr_handle: pointer to additional user-defined information about port * @lock: pointer to spinlock for controlling access to port * @connected: non-zero if port is currently connected to a peer port @@ -107,43 +107,33 @@ struct user_port { * @max_pkt: maximum packet size "hint" used when building messages sent by port * @ref: unique reference to port in TIPC object registry * @phdr: preformatted message header used when sending messages - */ -struct tipc_port { - void *usr_handle; - spinlock_t *lock; - int connected; - u32 conn_type; - u32 conn_instance; - u32 conn_unacked; - int published; - u32 congested; - u32 max_pkt; - u32 ref; - struct tipc_msg phdr; -}; - -/** - * struct port - TIPC port structure - * @publ: TIPC port info available to privileged users * @port_list: adjacent ports in TIPC's global list of ports * @dispatcher: ptr to routine which handles received messages * @wakeup: ptr to routine to call when port is no longer congested * @user_port: ptr to user port associated with port (if any) * @wait_list: adjacent ports in list of ports waiting on link congestion * @waiting_pkts: - * @sent: - * @acked: + * @sent: # of non-empty messages sent by port + * @acked: # of non-empty message acknowledgements from connected port's peer * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: * @probing_interval: - * @last_in_seqno: * @timer_ref: * @subscription: "node down" subscription used to terminate failed connections */ - -struct port { - struct tipc_port publ; +struct tipc_port { + void *usr_handle; + spinlock_t *lock; + int connected; + u32 conn_type; + u32 conn_instance; + u32 conn_unacked; + int published; + u32 congested; + u32 max_pkt; + u32 ref; + struct tipc_msg phdr; struct list_head port_list; u32 (*dispatcher)(struct tipc_port *, struct sk_buff *); void (*wakeup)(struct tipc_port *); @@ -156,7 +146,6 @@ struct port { u32 pub_count; u32 probing_state; u32 probing_interval; - u32 last_in_seqno; struct timer_list timer; struct tipc_node_subscr subscription; }; @@ -230,7 +219,7 @@ int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest, int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, unsigned int section_count, struct iovec const *msg); -int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, +int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, int err); struct sk_buff *tipc_port_get_ports(void); @@ -242,9 +231,9 @@ void tipc_port_reinit(void); * tipc_port_lock - lock port instance referred to and return its pointer */ -static inline struct port *tipc_port_lock(u32 ref) +static inline struct tipc_port *tipc_port_lock(u32 ref) { - return (struct port *)tipc_ref_lock(ref); + return (struct tipc_port *)tipc_ref_lock(ref); } /** @@ -253,27 +242,27 @@ static inline struct port *tipc_port_lock(u32 ref) * Can use pointer instead of tipc_ref_unlock() since port is already locked. */ -static inline void tipc_port_unlock(struct port *p_ptr) +static inline void tipc_port_unlock(struct tipc_port *p_ptr) { - spin_unlock_bh(p_ptr->publ.lock); + spin_unlock_bh(p_ptr->lock); } -static inline struct port *tipc_port_deref(u32 ref) +static inline struct tipc_port *tipc_port_deref(u32 ref) { - return (struct port *)tipc_ref_deref(ref); + return (struct tipc_port *)tipc_ref_deref(ref); } -static inline u32 tipc_peer_port(struct port *p_ptr) +static inline u32 tipc_peer_port(struct tipc_port *p_ptr) { - return msg_destport(&p_ptr->publ.phdr); + return msg_destport(&p_ptr->phdr); } -static inline u32 tipc_peer_node(struct port *p_ptr) +static inline u32 tipc_peer_node(struct tipc_port *p_ptr) { - return msg_destnode(&p_ptr->publ.phdr); + return msg_destnode(&p_ptr->phdr); } -static inline int tipc_port_congested(struct port *p_ptr) +static inline int tipc_port_congested(struct tipc_port *p_ptr) { return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2); } @@ -284,7 +273,7 @@ static inline int tipc_port_congested(struct port *p_ptr) static inline int tipc_port_recv_msg(struct sk_buff *buf) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg = buf_msg(buf); u32 destport = msg_destport(msg); u32 dsz = msg_data_sz(msg); @@ -299,7 +288,7 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf) /* validate destination & pass to port, otherwise reject message */ p_ptr = tipc_port_lock(destport); if (likely(p_ptr)) { - if (likely(p_ptr->publ.connected)) { + if (likely(p_ptr->connected)) { if ((unlikely(msg_origport(msg) != tipc_peer_port(p_ptr))) || (unlikely(msg_orignode(msg) != tipc_peer_node(p_ptr))) || (unlikely(!msg_connected(msg)))) { @@ -308,7 +297,7 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf) goto reject; } } - err = p_ptr->dispatcher(&p_ptr->publ, buf); + err = p_ptr->dispatcher(p_ptr, buf); tipc_port_unlock(p_ptr); if (likely(!err)) return dsz; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 2b02a3a80313..29d94d53198d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2,7 +2,7 @@ * net/tipc/socket.c: TIPC socket API * * Copyright (c) 2001-2007, Ericsson AB - * Copyright (c) 2004-2008, Wind River Systems + * Copyright (c) 2004-2008, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,6 +58,9 @@ struct tipc_sock { #define tipc_sk(sk) ((struct tipc_sock *)(sk)) #define tipc_sk_port(sk) ((struct tipc_port *)(tipc_sk(sk)->p)) +#define tipc_rx_ready(sock) (!skb_queue_empty(&sock->sk->sk_receive_queue) || \ + (sock->state == SS_DISCONNECTING)) + static int backlog_rcv(struct sock *sk, struct sk_buff *skb); static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *tport); @@ -241,7 +244,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, tipc_set_portunreliable(tp_ptr->ref, 1); } - atomic_inc(&tipc_user_count); return 0; } @@ -290,7 +292,7 @@ static int release(struct socket *sock) if (buf == NULL) break; atomic_dec(&tipc_queue_size); - if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) + if (TIPC_SKB_CB(buf)->handle != 0) buf_discard(buf); else { if ((sock->state == SS_CONNECTING) || @@ -321,7 +323,6 @@ static int release(struct socket *sock) sock_put(sk); sock->sk = NULL; - atomic_dec(&tipc_user_count); return res; } @@ -495,6 +496,8 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) if (likely(dest->addr.name.name.type != TIPC_CFG_SRV)) return -EACCES; + if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr))) + return -EMSGSIZE; if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr))) return -EFAULT; if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN))) @@ -911,15 +914,13 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sk_buff *buf; struct tipc_msg *msg; + long timeout; unsigned int sz; u32 err; int res; /* Catch invalid receive requests */ - if (m->msg_iovlen != 1) - return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */ - if (unlikely(!buf_len)) return -EINVAL; @@ -930,6 +931,7 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: /* Look for a message in receive queue; wait if necessary */ @@ -939,17 +941,15 @@ restart: res = -ENOTCONN; goto exit; } - if (flags & MSG_DONTWAIT) { - res = -EWOULDBLOCK; + if (timeout <= 0L) { + res = timeout ? timeout : -EWOULDBLOCK; goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - (!skb_queue_empty(&sk->sk_receive_queue) || - (sock->state == SS_DISCONNECTING))); + timeout = wait_event_interruptible_timeout(*sk_sleep(sk), + tipc_rx_ready(sock), + timeout); lock_sock(sk); - if (res) - goto exit; } /* Look at first message in receive queue */ @@ -991,11 +991,10 @@ restart: sz = buf_len; m->msg_flags |= MSG_TRUNC; } - if (unlikely(copy_to_user(m->msg_iov->iov_base, msg_data(msg), - sz))) { - res = -EFAULT; + res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg), + m->msg_iov, sz); + if (res) goto exit; - } res = sz; } else { if ((sock->state == SS_READY) || @@ -1038,19 +1037,15 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sk_buff *buf; struct tipc_msg *msg; + long timeout; unsigned int sz; int sz_to_copy, target, needed; int sz_copied = 0; - char __user *crs = m->msg_iov->iov_base; - unsigned char *buf_crs; u32 err; int res = 0; /* Catch invalid receive attempts */ - if (m->msg_iovlen != 1) - return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */ - if (unlikely(!buf_len)) return -EINVAL; @@ -1063,7 +1058,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, } target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); - + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: /* Look for a message in receive queue; wait if necessary */ @@ -1073,17 +1068,15 @@ restart: res = -ENOTCONN; goto exit; } - if (flags & MSG_DONTWAIT) { - res = -EWOULDBLOCK; + if (timeout <= 0L) { + res = timeout ? timeout : -EWOULDBLOCK; goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - (!skb_queue_empty(&sk->sk_receive_queue) || - (sock->state == SS_DISCONNECTING))); + timeout = wait_event_interruptible_timeout(*sk_sleep(sk), + tipc_rx_ready(sock), + timeout); lock_sock(sk); - if (res) - goto exit; } /* Look at first message in receive queue */ @@ -1112,24 +1105,25 @@ restart: /* Capture message data (if valid) & compute return value (always) */ if (!err) { - buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle); - sz = (unsigned char *)msg + msg_size(msg) - buf_crs; + u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle); + sz -= offset; needed = (buf_len - sz_copied); sz_to_copy = (sz <= needed) ? sz : needed; - if (unlikely(copy_to_user(crs, buf_crs, sz_to_copy))) { - res = -EFAULT; + + res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset, + m->msg_iov, sz_to_copy); + if (res) goto exit; - } + sz_copied += sz_to_copy; if (sz_to_copy < sz) { if (!(flags & MSG_PEEK)) - TIPC_SKB_CB(buf)->handle = buf_crs + sz_to_copy; + TIPC_SKB_CB(buf)->handle = + (void *)(unsigned long)(offset + sz_to_copy); goto exit; } - - crs += sz_to_copy; } else { if (sz_copied != 0) goto exit; /* can't add error msg to valid data */ @@ -1256,7 +1250,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) /* Enqueue message (finally!) */ - TIPC_SKB_CB(buf)->handle = msg_data(msg); + TIPC_SKB_CB(buf)->handle = 0; atomic_inc(&tipc_queue_size); __skb_queue_tail(&sk->sk_receive_queue, buf); @@ -1608,7 +1602,7 @@ restart: buf = __skb_dequeue(&sk->sk_receive_queue); if (buf) { atomic_dec(&tipc_queue_size); - if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) { + if (TIPC_SKB_CB(buf)->handle != 0) { buf_discard(buf); goto restart; } diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index ca04479c3d42..aae9eae13404 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -2,7 +2,7 @@ * net/tipc/subscr.c: TIPC network topology service * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -160,7 +160,7 @@ void tipc_subscr_report_overlap(struct subscription *sub, static void subscr_timeout(struct subscription *sub) { - struct port *server_port; + struct tipc_port *server_port; /* Validate server port reference (in case subscriber is terminating) */ @@ -472,8 +472,6 @@ static void subscr_named_msg_event(void *usr_handle, struct tipc_portid const *orig, struct tipc_name_seq const *dest) { - static struct iovec msg_sect = {NULL, 0}; - struct subscriber *subscriber; u32 server_port_ref; @@ -508,7 +506,7 @@ static void subscr_named_msg_event(void *usr_handle, /* Lock server port (& save lock address for future use) */ - subscriber->lock = tipc_port_lock(subscriber->port_ref)->publ.lock; + subscriber->lock = tipc_port_lock(subscriber->port_ref)->lock; /* Add subscriber to topology server's subscriber list */ @@ -523,7 +521,7 @@ static void subscr_named_msg_event(void *usr_handle, /* Send an ACK- to complete connection handshaking */ - tipc_send(server_port_ref, 1, &msg_sect); + tipc_send(server_port_ref, 0, NULL); /* Handle optional subscription request */ @@ -542,7 +540,6 @@ int tipc_subscr_start(void) spin_lock_init(&topsrv.lock); INIT_LIST_HEAD(&topsrv.subscriber_list); - spin_lock_bh(&topsrv.lock); res = tipc_createport(NULL, TIPC_CRITICAL_IMPORTANCE, NULL, @@ -563,12 +560,10 @@ int tipc_subscr_start(void) goto failed; } - spin_unlock_bh(&topsrv.lock); return 0; failed: err("Failed to create subscription service\n"); - spin_unlock_bh(&topsrv.lock); return res; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d8d98d5b508c..de870184e457 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1171,7 +1171,7 @@ restart: newsk->sk_type = sk->sk_type; init_peercred(newsk); newu = unix_sk(newsk); - newsk->sk_wq = &newu->peer_wq; + RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); /* copy address information from listening to new sock*/ @@ -1567,7 +1567,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct sock *other = NULL; - struct sockaddr_un *sunaddr = msg->msg_name; int err, size; struct sk_buff *skb; int sent = 0; @@ -1590,7 +1589,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; } else { - sunaddr = NULL; err = -ENOTCONN; other = unix_peer(sk); if (!other) @@ -1730,7 +1728,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = 0; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) { + err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); + goto out; + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { @@ -1870,7 +1872,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, memset(&tmp_scm, 0, sizeof(tmp_scm)); } - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) { + err = sock_intr_errno(timeo); + goto out; + } do { int chunk; @@ -1901,11 +1907,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, timeo = unix_stream_data_wait(sk, timeo); - if (signal_pending(current)) { + if (signal_pending(current) + || mutex_lock_interruptible(&u->readlock)) { err = sock_intr_errno(timeo); goto out; } - mutex_lock(&u->readlock); + continue; unlock: unix_state_unlock(sk); diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index d0ee29063e5d..1f1ef70f34f2 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -95,7 +95,7 @@ config CFG80211_DEBUGFS If unsure, say N. config CFG80211_INTERNAL_REGDB - bool "use statically compiled regulatory rules database" if EMBEDDED + bool "use statically compiled regulatory rules database" if EXPERT default n depends on CFG80211 ---help--- diff --git a/net/wireless/core.c b/net/wireless/core.c index e9a5f8ca4c27..fe01de29bfe8 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -718,13 +718,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, wdev->ps = false; /* allow mac80211 to determine the timeout */ wdev->ps_timeout = -1; - if (rdev->ops->set_power_mgmt) - if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, - wdev->ps, - wdev->ps_timeout)) { - /* assume this means it's off */ - wdev->ps = false; - } if (!dev->ethtool_ops) dev->ethtool_ops = &cfg80211_ethtool_ops; @@ -813,6 +806,19 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, rdev->opencount++; mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); + + /* + * Configure power management to the driver here so that its + * correctly set also after interface type changes etc. + */ + if (wdev->iftype == NL80211_IFTYPE_STATION && + rdev->ops->set_power_mgmt) + if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, + wdev->ps, + wdev->ps_timeout)) { + /* assume this means it's off */ + wdev->ps = false; + } break; case NETDEV_UNREGISTER: /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9b62710891a2..4ebce4284e9d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1968,13 +1968,41 @@ static int parse_station_flags(struct genl_info *info, return 0; } +static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, + int attr) +{ + struct nlattr *rate; + u16 bitrate; + + rate = nla_nest_start(msg, attr); + if (!rate) + goto nla_put_failure; + + /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */ + bitrate = cfg80211_calculate_bitrate(info); + if (bitrate > 0) + NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate); + + if (info->flags & RATE_INFO_FLAGS_MCS) + NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS, info->mcs); + if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) + NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH); + if (info->flags & RATE_INFO_FLAGS_SHORT_GI) + NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI); + + nla_nest_end(msg, rate); + return true; + +nla_put_failure: + return false; +} + static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo) { void *hdr; - struct nlattr *sinfoattr, *txrate; - u16 bitrate; + struct nlattr *sinfoattr; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) @@ -2013,24 +2041,14 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, sinfo->signal_avg); if (sinfo->filled & STATION_INFO_TX_BITRATE) { - txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); - if (!txrate) + if (!nl80211_put_sta_rate(msg, &sinfo->txrate, + NL80211_STA_INFO_TX_BITRATE)) + goto nla_put_failure; + } + if (sinfo->filled & STATION_INFO_RX_BITRATE) { + if (!nl80211_put_sta_rate(msg, &sinfo->rxrate, + NL80211_STA_INFO_RX_BITRATE)) goto nla_put_failure; - - /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */ - bitrate = cfg80211_calculate_bitrate(&sinfo->txrate); - if (bitrate > 0) - NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate); - - if (sinfo->txrate.flags & RATE_INFO_FLAGS_MCS) - NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS, - sinfo->txrate.mcs); - if (sinfo->txrate.flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) - NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH); - if (sinfo->txrate.flags & RATE_INFO_FLAGS_SHORT_GI) - NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI); - - nla_nest_end(msg, txrate); } if (sinfo->filled & STATION_INFO_RX_PACKETS) NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS, @@ -2718,7 +2736,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) - goto nla_put_failure; + goto out; pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG); if (!pinfoattr) goto nla_put_failure; @@ -2759,6 +2777,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_failure: genlmsg_cancel(msg, hdr); + out: nlmsg_free(msg); return -ENOBUFS; } @@ -2954,7 +2973,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, NL80211_CMD_GET_REG); if (!hdr) - goto nla_put_failure; + goto put_failure; NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2, cfg80211_regdomain->alpha2); @@ -3001,6 +3020,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nla_put_failure: genlmsg_cancel(msg, hdr); +put_failure: nlmsg_free(msg); err = -EMSGSIZE; out: diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 37693b6ef23a..3332d5bce317 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -63,6 +63,10 @@ static struct regulatory_request *last_request; /* To trigger userspace events */ static struct platform_device *reg_pdev; +static struct device_type reg_device_type = { + .uevent = reg_device_uevent, +}; + /* * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no @@ -362,16 +366,11 @@ static inline void reg_regdb_query(const char *alpha2) {} /* * This lets us keep regulatory code which is updated on a regulatory - * basis in userspace. + * basis in userspace. Country information is filled in by + * reg_device_uevent */ static int call_crda(const char *alpha2) { - char country_env[9 + 2] = "COUNTRY="; - char *envp[] = { - country_env, - NULL - }; - if (!is_world_regdom((char *) alpha2)) pr_info("Calling CRDA for country: %c%c\n", alpha2[0], alpha2[1]); @@ -381,10 +380,7 @@ static int call_crda(const char *alpha2) /* query internal regulatory database (if it exists) */ reg_regdb_query(alpha2); - country_env[8] = alpha2[0]; - country_env[9] = alpha2[1]; - - return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, envp); + return kobject_uevent(®_pdev->dev.kobj, KOBJ_CHANGE); } /* Used by nl80211 before kmalloc'ing our regulatory domain */ @@ -1801,9 +1797,9 @@ void regulatory_hint_disconnect(void) static bool freq_is_chan_12_13_14(u16 freq) { - if (freq == ieee80211_channel_to_frequency(12) || - freq == ieee80211_channel_to_frequency(13) || - freq == ieee80211_channel_to_frequency(14)) + if (freq == ieee80211_channel_to_frequency(12, IEEE80211_BAND_2GHZ) || + freq == ieee80211_channel_to_frequency(13, IEEE80211_BAND_2GHZ) || + freq == ieee80211_channel_to_frequency(14, IEEE80211_BAND_2GHZ)) return true; return false; } @@ -2087,6 +2083,25 @@ int set_regdom(const struct ieee80211_regdomain *rd) return r; } +#ifdef CONFIG_HOTPLUG +int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + if (last_request && !last_request->processed) { + if (add_uevent_var(env, "COUNTRY=%c%c", + last_request->alpha2[0], + last_request->alpha2[1])) + return -ENOMEM; + } + + return 0; +} +#else +int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return -ENODEV; +} +#endif /* CONFIG_HOTPLUG */ + /* Caller must hold cfg80211_mutex */ void reg_device_remove(struct wiphy *wiphy) { @@ -2118,6 +2133,8 @@ int __init regulatory_init(void) if (IS_ERR(reg_pdev)) return PTR_ERR(reg_pdev); + reg_pdev->dev.type = ®_device_type; + spin_lock_init(®_requests_lock); spin_lock_init(®_pending_beacons_lock); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index c4695d07af23..b67d1c3a2fb9 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -8,6 +8,7 @@ bool reg_is_valid_request(const char *alpha2); int regulatory_hint_user(const char *alpha2); +int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env); void reg_device_remove(struct wiphy *wiphy); int __init regulatory_init(void); diff --git a/net/wireless/util.c b/net/wireless/util.c index 7620ae2fcf18..6a750bc6bcfe 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -29,29 +29,37 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_get_response_rate); -int ieee80211_channel_to_frequency(int chan) +int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) { - if (chan < 14) - return 2407 + chan * 5; - - if (chan == 14) - return 2484; - - /* FIXME: 802.11j 17.3.8.3.2 */ - return (chan + 1000) * 5; + /* see 802.11 17.3.8.3.2 and Annex J + * there are overlapping channel numbers in 5GHz and 2GHz bands */ + if (band == IEEE80211_BAND_5GHZ) { + if (chan >= 182 && chan <= 196) + return 4000 + chan * 5; + else + return 5000 + chan * 5; + } else { /* IEEE80211_BAND_2GHZ */ + if (chan == 14) + return 2484; + else if (chan < 14) + return 2407 + chan * 5; + else + return 0; /* not supported */ + } } EXPORT_SYMBOL(ieee80211_channel_to_frequency); int ieee80211_frequency_to_channel(int freq) { + /* see 802.11 17.3.8.3.2 and Annex J */ if (freq == 2484) return 14; - - if (freq < 2484) + else if (freq < 2484) return (freq - 2407) / 5; - - /* FIXME: 802.11j 17.3.8.3.2 */ - return freq/5 - 1000; + else if (freq >= 4910 && freq <= 4980) + return (freq - 4000) / 5; + else + return (freq - 5000) / 5; } EXPORT_SYMBOL(ieee80211_frequency_to_channel); @@ -159,12 +167,15 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, /* * Disallow pairwise keys with non-zero index unless it's WEP - * (because current deployments use pairwise WEP keys with - * non-zero indizes but 802.11i clearly specifies to use zero) + * or a vendor specific cipher (because current deployments use + * pairwise WEP keys with non-zero indices and for vendor specific + * ciphers this should be validated in the driver or hardware level + * - but 802.11i clearly specifies to use zero) */ if (pairwise && key_idx && - params->cipher != WLAN_CIPHER_SUITE_WEP40 && - params->cipher != WLAN_CIPHER_SUITE_WEP104) + ((params->cipher == WLAN_CIPHER_SUITE_TKIP) || + (params->cipher == WLAN_CIPHER_SUITE_CCMP) || + (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC))) return -EINVAL; switch (params->cipher) { diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 3e5dbd4e4cd5..0bf169bb770e 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -267,9 +267,12 @@ int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq) * -EINVAL for impossible things. */ if (freq->e == 0) { + enum ieee80211_band band = IEEE80211_BAND_2GHZ; if (freq->m < 0) return 0; - return ieee80211_channel_to_frequency(freq->m); + if (freq->m > 14) + band = IEEE80211_BAND_5GHZ; + return ieee80211_channel_to_frequency(freq->m, band); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) @@ -802,11 +805,11 @@ int cfg80211_wext_siwfreq(struct net_device *dev, return freq; if (freq == 0) return -EINVAL; - wdev_lock(wdev); mutex_lock(&rdev->devlist_mtx); + wdev_lock(wdev); err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT); - mutex_unlock(&rdev->devlist_mtx); wdev_unlock(wdev); + mutex_unlock(&rdev->devlist_mtx); return err; default: return -EOPNOTSUPP; diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 55187c8f6420..406207515b5e 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -27,9 +27,19 @@ #include <net/sock.h> #include <net/x25.h> -/* - * Parse a set of facilities into the facilities structures. Unrecognised - * facilities are written to the debug log file. +/** + * x25_parse_facilities - Parse facilities from skb into the facilities structs + * + * @skb: sk_buff to parse + * @facilities: Regular facilites, updated as facilities are found + * @dte_facs: ITU DTE facilities, updated as DTE facilities are found + * @vc_fac_mask: mask is updated with all facilities found + * + * Return codes: + * -1 - Parsing error, caller should drop call and clean up + * 0 - Parse OK, this skb has no facilities + * >0 - Parse OK, returns the length of the facilities header + * */ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, struct x25_dte_facilities *dte_facs, unsigned long *vc_fac_mask) @@ -62,7 +72,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, switch (*p & X25_FAC_CLASS_MASK) { case X25_FAC_CLASS_A: if (len < 2) - return 0; + return -1; switch (*p) { case X25_FAC_REVERSE: if((p[1] & 0x81) == 0x81) { @@ -107,7 +117,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, break; case X25_FAC_CLASS_B: if (len < 3) - return 0; + return -1; switch (*p) { case X25_FAC_PACKET_SIZE: facilities->pacsize_in = p[1]; @@ -130,7 +140,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, break; case X25_FAC_CLASS_C: if (len < 4) - return 0; + return -1; printk(KERN_DEBUG "X.25: unknown facility %02X, " "values %02X, %02X, %02X\n", p[0], p[1], p[2], p[3]); @@ -139,18 +149,18 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, break; case X25_FAC_CLASS_D: if (len < p[1] + 2) - return 0; + return -1; switch (*p) { case X25_FAC_CALLING_AE: if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1) - return 0; + return -1; dte_facs->calling_len = p[2]; memcpy(dte_facs->calling_ae, &p[3], p[1] - 1); *vc_fac_mask |= X25_MASK_CALLING_AE; break; case X25_FAC_CALLED_AE: if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1) - return 0; + return -1; dte_facs->called_len = p[2]; memcpy(dte_facs->called_ae, &p[3], p[1] - 1); *vc_fac_mask |= X25_MASK_CALLED_AE; diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index f729f022be69..15de65f04719 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -91,10 +91,10 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp { struct x25_address source_addr, dest_addr; int len; + struct x25_sock *x25 = x25_sk(sk); switch (frametype) { case X25_CALL_ACCEPTED: { - struct x25_sock *x25 = x25_sk(sk); x25_stop_timer(sk); x25->condition = 0x00; @@ -113,14 +113,16 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp &dest_addr); if (len > 0) skb_pull(skb, len); + else if (len < 0) + goto out_clear; len = x25_parse_facilities(skb, &x25->facilities, &x25->dte_facilities, &x25->vc_facil_mask); if (len > 0) skb_pull(skb, len); - else - return -1; + else if (len < 0) + goto out_clear; /* * Copy any Call User Data. */ @@ -144,6 +146,12 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp } return 0; + +out_clear: + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25->state = X25_STATE_2; + x25_start_t23timer(sk); + return 0; } /* diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 4cbc942f762a..21306928d47f 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -396,9 +396,12 @@ void __exit x25_link_free(void) write_lock_bh(&x25_neigh_list_lock); list_for_each_safe(entry, tmp, &x25_neigh_list) { + struct net_device *dev; + nb = list_entry(entry, struct x25_neigh, node); + dev = nb->dev; __x25_remove_neigh(nb); - dev_put(nb->dev); + dev_put(dev); } write_unlock_bh(&x25_neigh_list_lock); } diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index c631047e1b27..aa429eefe919 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o xfrm_algo.o \ - xfrm_sysctl.o + xfrm_sysctl.o xfrm_replay.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 8b4d6e3246e5..58064d9e565d 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -618,21 +618,21 @@ static int xfrm_alg_name_match(const struct xfrm_algo_desc *entry, (entry->compat && !strcmp(name, entry->compat))); } -struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_aalg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name, probe); } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); -struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_ealg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name, probe); } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); -struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name, probe); @@ -654,7 +654,7 @@ static int xfrm_aead_name_match(const struct xfrm_algo_desc *entry, !strcmp(name, entry->name); } -struct xfrm_algo_desc *xfrm_aead_get_byname(char *name, int icv_len, int probe) +struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len, int probe) { struct xfrm_aead_name data = { .name = name, diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index 8e69533d2313..7199d78b2aa1 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -4,29 +4,32 @@ #include <linux/xfrm.h> #include <linux/socket.h> -static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr) { return ntohl(addr->a4); } -static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr) { return ntohl(addr->a6[2] ^ addr->a6[3]); } -static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr) { u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4; return ntohl((__force __be32)sum); } -static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr) { return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ saddr->a6[2] ^ saddr->a6[3]); } -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, +static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, u32 reqid, unsigned short family, unsigned int hmask) { @@ -42,8 +45,8 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t return (h ^ (h >> 16)) & hmask; } -static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr, - xfrm_address_t *saddr, +static inline unsigned __xfrm_src_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, unsigned short family, unsigned int hmask) { @@ -60,8 +63,8 @@ static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr, } static inline unsigned int -__xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, - unsigned int hmask) +__xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto, + unsigned short family, unsigned int hmask) { unsigned int h = (__force u32)spi ^ proto; switch (family) { @@ -80,10 +83,11 @@ static inline unsigned int __idx_hash(u32 index, unsigned int hmask) return (index ^ (index >> 8)) & hmask; } -static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) +static inline unsigned int __sel_hash(const struct xfrm_selector *sel, + unsigned short family, unsigned int hmask) { - xfrm_address_t *daddr = &sel->daddr; - xfrm_address_t *saddr = &sel->saddr; + const xfrm_address_t *daddr = &sel->daddr; + const xfrm_address_t *saddr = &sel->saddr; unsigned int h = 0; switch (family) { @@ -107,7 +111,9 @@ static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short return h & hmask; } -static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) +static inline unsigned int __addr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + unsigned short family, unsigned int hmask) { unsigned int h = 0; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 45f1c98d4fce..872065ca7f8c 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -107,6 +107,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) struct net *net = dev_net(skb->dev); int err; __be32 seq; + __be32 seq_hi; struct xfrm_state *x; xfrm_address_t *daddr; struct xfrm_mode *inner_mode; @@ -118,7 +119,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (encap_type < 0) { async = 1; x = xfrm_input_state(skb); - seq = XFRM_SKB_CB(skb)->seq.input; + seq = XFRM_SKB_CB(skb)->seq.input.low; goto resume; } @@ -172,7 +173,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop_unlock; } - if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) { + if (x->props.replay_window && x->repl->check(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } @@ -184,7 +185,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_unlock(&x->lock); - XFRM_SKB_CB(skb)->seq.input = seq; + seq_hi = htonl(xfrm_replay_seqhi(x, seq)); + + XFRM_SKB_CB(skb)->seq.input.low = seq; + XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; nexthdr = x->type->input(x, skb); @@ -206,8 +210,7 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; - if (x->props.replay_window) - xfrm_replay_advance(x, seq); + x->repl->advance(x, seq); x->curlft.bytes += skb->len; x->curlft.packets++; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 64f2ae1fdc15..1aba03f449cc 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -67,17 +67,10 @@ static int xfrm_output_one(struct sk_buff *skb, int err) goto error; } - if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { - XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq; - if (unlikely(x->replay.oseq == 0)) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); - x->replay.oseq--; - xfrm_audit_state_replay_overflow(x, skb); - err = -EOVERFLOW; - goto error; - } - if (xfrm_aevent_is_on(net)) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + err = x->repl->overflow(x, skb); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); + goto error; } x->curlft.bytes += skb->len; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8b3ef404c794..1ba0258b49c7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -50,37 +50,40 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); -static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, - struct flowi *fl, int family, int strict); +static int xfrm_bundle_ok(struct xfrm_dst *xdst, int family); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); static inline int -__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) +__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { - return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && - addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && - (fl->proto == sel->proto || !sel->proto) && - (fl->oif == sel->ifindex || !sel->ifindex); + const struct flowi4 *fl4 = &fl->u.ip4; + + return addr_match(&fl4->daddr, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl4->saddr, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && + (fl4->flowi4_proto == sel->proto || !sel->proto) && + (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); } static inline int -__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) +__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { - return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && - addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && - (fl->proto == sel->proto || !sel->proto) && - (fl->oif == sel->ifindex || !sel->ifindex); + const struct flowi6 *fl6 = &fl->u.ip6; + + return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && + (fl6->flowi6_proto == sel->proto || !sel->proto) && + (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); } -int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, - unsigned short family) +int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, + unsigned short family) { switch (family) { case AF_INET: @@ -92,8 +95,8 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, } static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, - xfrm_address_t *saddr, - xfrm_address_t *daddr, + const xfrm_address_t *saddr, + const xfrm_address_t *daddr, int family) { struct xfrm_policy_afinfo *afinfo; @@ -311,7 +314,9 @@ static inline unsigned int idx_hash(struct net *net, u32 index) return __idx_hash(index, net->xfrm.policy_idx_hmask); } -static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir) +static struct hlist_head *policy_hash_bysel(struct net *net, + const struct xfrm_selector *sel, + unsigned short family, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __sel_hash(sel, family, hmask); @@ -321,7 +326,10 @@ static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selecto net->xfrm.policy_bydst[dir].table + hash); } -static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +static struct hlist_head *policy_hash_direct(struct net *net, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + unsigned short family, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __addr_hash(daddr, saddr, family, hmask); @@ -864,32 +872,33 @@ EXPORT_SYMBOL(xfrm_policy_walk_done); * * Returns 0 if policy found, else an -errno. */ -static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, +static int xfrm_policy_match(const struct xfrm_policy *pol, + const struct flowi *fl, u8 type, u16 family, int dir) { - struct xfrm_selector *sel = &pol->selector; + const struct xfrm_selector *sel = &pol->selector; int match, ret = -ESRCH; if (pol->family != family || - (fl->mark & pol->mark.m) != pol->mark.v || + (fl->flowi_mark & pol->mark.m) != pol->mark.v || pol->type != type) return ret; match = xfrm_selector_match(sel, fl, family); if (match) - ret = security_xfrm_policy_lookup(pol->security, fl->secid, + ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, dir); return ret; } static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, - struct flowi *fl, + const struct flowi *fl, u16 family, u8 dir) { int err; struct xfrm_policy *pol, *ret; - xfrm_address_t *daddr, *saddr; + const xfrm_address_t *daddr, *saddr; struct hlist_node *entry; struct hlist_head *chain; u32 priority = ~0U; @@ -941,7 +950,7 @@ fail: } static struct xfrm_policy * -__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir) +__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; @@ -954,7 +963,7 @@ __xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir) } static struct flow_cache_object * -xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *old_obj, void *ctx) { struct xfrm_policy *pol; @@ -990,7 +999,8 @@ static inline int policy_to_flow_dir(int dir) } } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, + const struct flowi *fl) { struct xfrm_policy *pol; @@ -1006,7 +1016,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc goto out; } err = security_xfrm_policy_lookup(pol->security, - fl->secid, + fl->flowi_secid, policy_to_flow_dir(dir)); if (!err) xfrm_pol_hold(pol); @@ -1098,7 +1108,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) return 0; } -static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) +static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) { struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); @@ -1157,9 +1167,8 @@ xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, /* Resolve list of templates for the flow, given policy. */ static int -xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, + struct xfrm_state **xfrm, unsigned short family) { struct net *net = xp_net(policy); int nx; @@ -1214,9 +1223,8 @@ fail: } static int -xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, + struct xfrm_state **xfrm, unsigned short family) { struct xfrm_state *tp[XFRM_MAX_DEPTH]; struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; @@ -1256,7 +1264,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, * still valid. */ -static inline int xfrm_get_tos(struct flowi *fl, int family) +static inline int xfrm_get_tos(const struct flowi *fl, int family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); int tos; @@ -1340,10 +1348,13 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); + xdst = dst_alloc(dst_ops, 0); xfrm_policy_put_afinfo(afinfo); - xdst->flo.ops = &xfrm_bundle_fc_ops; + if (likely(xdst)) + xdst->flo.ops = &xfrm_bundle_fc_ops; + else + xdst = ERR_PTR(-ENOBUFS); return xdst; } @@ -1366,7 +1377,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, } static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, - struct flowi *fl) + const struct flowi *fl) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); @@ -1389,7 +1400,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, - struct flowi *fl, + const struct flowi *fl, struct dst_entry *dst) { struct net *net = xp_net(policy); @@ -1505,7 +1516,7 @@ free_dst: } static int inline -xfrm_dst_alloc_copy(void **target, void *src, int size) +xfrm_dst_alloc_copy(void **target, const void *src, int size) { if (!*target) { *target = kmalloc(size, GFP_ATOMIC); @@ -1517,7 +1528,7 @@ xfrm_dst_alloc_copy(void **target, void *src, int size) } static int inline -xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) +xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1529,7 +1540,7 @@ xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) } static int inline -xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) +xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1539,7 +1550,7 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) #endif } -static int xfrm_expand_policies(struct flowi *fl, u16 family, +static int xfrm_expand_policies(const struct flowi *fl, u16 family, struct xfrm_policy **pols, int *num_pols, int *num_xfrms) { @@ -1585,7 +1596,7 @@ static int xfrm_expand_policies(struct flowi *fl, u16 family, static struct xfrm_dst * xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, - struct flowi *fl, u16 family, + const struct flowi *fl, u16 family, struct dst_entry *dst_orig) { struct net *net = xp_net(pols[0]); @@ -1628,7 +1639,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, } static struct flow_cache_object * -xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *oldflo, void *ctx) { struct dst_entry *dst_orig = (struct dst_entry *)ctx; @@ -1727,18 +1738,36 @@ error: return ERR_PTR(err); } +static struct dst_entry *make_blackhole(struct net *net, u16 family, + struct dst_entry *dst_orig) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct dst_entry *ret; + + if (!afinfo) { + dst_release(dst_orig); + ret = ERR_PTR(-EINVAL); + } else { + ret = afinfo->blackhole_route(net, dst_orig); + } + xfrm_policy_put_afinfo(afinfo); + + return ret; +} + /* Main function: finds/creates a bundle for given flow. * * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) +struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, + struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct flow_cache_object *flo; struct xfrm_dst *xdst; - struct dst_entry *dst, *dst_orig = *dst_p, *route; + struct dst_entry *dst, *route; u16 family = dst_orig->ops->family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); int i, err, num_pols, num_xfrms = 0, drop_pols = 0; @@ -1820,9 +1849,10 @@ restart: dst_release(dst); xfrm_pols_put(pols, drop_pols); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); - return -EREMOTE; + + return make_blackhole(net, family, dst_orig); } - if (flags & XFRM_LOOKUP_WAIT) { + if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(&net->xfrm.km_waitq, &wait); @@ -1864,47 +1894,33 @@ no_transform: goto error; } else if (num_xfrms > 0) { /* Flow transformed */ - *dst_p = dst; dst_release(dst_orig); } else { /* Flow passes untransformed */ dst_release(dst); + dst = dst_orig; } ok: xfrm_pols_put(pols, drop_pols); - return 0; + return dst; nopol: - if (!(flags & XFRM_LOOKUP_ICMP)) + if (!(flags & XFRM_LOOKUP_ICMP)) { + dst = dst_orig; goto ok; + } err = -ENOENT; error: dst_release(dst); dropdst: dst_release(dst_orig); - *dst_p = NULL; xfrm_pols_put(pols, drop_pols); - return err; -} -EXPORT_SYMBOL(__xfrm_lookup); - -int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) -{ - int err = __xfrm_lookup(net, dst_p, fl, sk, flags); - - if (err == -EREMOTE) { - dst_release(*dst_p); - *dst_p = NULL; - err = -EAGAIN; - } - - return err; + return ERR_PTR(err); } EXPORT_SYMBOL(xfrm_lookup); static inline int -xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) +xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) { struct xfrm_state *x; @@ -1923,7 +1939,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) */ static inline int -xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, +xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, unsigned short family) { if (xfrm_state_kern(x)) @@ -1946,7 +1962,7 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, * Otherwise "-2 - errored_index" is returned. */ static inline int -xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, +xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, unsigned short family) { int idx = start; @@ -1978,13 +1994,13 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, return -EAFNOSUPPORT; afinfo->decode_session(skb, fl, reverse); - err = security_xfrm_decode_session(skb, &fl->secid); + err = security_xfrm_decode_session(skb, &fl->flowi_secid); xfrm_policy_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(__xfrm_decode_session); -static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) +static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp) { for (; k < sp->len; k++) { if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { @@ -2159,7 +2175,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) struct net *net = dev_net(skb->dev); struct flowi fl; struct dst_entry *dst; - int res; + int res = 0; if (xfrm_decode_session(skb, &fl, family) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); @@ -2167,9 +2183,12 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } skb_dst_force(skb); - dst = skb_dst(skb); - res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; + dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); + if (IS_ERR(dst)) { + res = 1; + dst = NULL; + } skb_dst_set(skb, dst); return res; } @@ -2207,7 +2226,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); + return !xfrm_bundle_ok((struct xfrm_dst *)dst, AF_UNSPEC); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -2279,8 +2298,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) * still valid. */ -static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, - struct flowi *fl, int family, int strict) +static int xfrm_bundle_ok(struct xfrm_dst *first, int family) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -2289,26 +2307,12 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || (dst->dev && !netif_running(dst->dev))) return 0; -#ifdef CONFIG_XFRM_SUB_POLICY - if (fl) { - if (first->origin && !flow_cache_uli_match(first->origin, fl)) - return 0; - if (first->partner && - !xfrm_selector_match(first->partner, fl, family)) - return 0; - } -#endif last = NULL; do { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) - return 0; - if (fl && pol && - !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl)) - return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; if (xdst->xfrm_genid != dst->xfrm->genid) @@ -2317,11 +2321,6 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) return 0; - if (strict && fl && - !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) && - !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) - return 0; - mtu = dst_mtu(dst->child); if (xdst->child_mtu_cached != mtu) { last = xdst; @@ -2732,8 +2731,8 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); #endif #ifdef CONFIG_XFRM_MIGRATE -static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, - struct xfrm_selector *sel_tgt) +static int xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, + const struct xfrm_selector *sel_tgt) { if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { if (sel_tgt->family == sel_cmp->family && @@ -2753,7 +2752,7 @@ static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, return 0; } -static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, +static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel, u8 dir, u8 type) { struct xfrm_policy *pol, *ret = NULL; @@ -2789,7 +2788,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, return ret; } -static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) +static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) { int match = 0; @@ -2859,7 +2858,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, return 0; } -static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) +static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) { int i, j; @@ -2893,7 +2892,7 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) return 0; } -int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, +int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k) { diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c new file mode 100644 index 000000000000..2f5be5b15740 --- /dev/null +++ b/net/xfrm/xfrm_replay.c @@ -0,0 +1,534 @@ +/* + * xfrm_replay.c - xfrm replay detection, derived from xfrm_state.c. + * + * Copyright (C) 2010 secunet Security Networks AG + * Copyright (C) 2010 Steffen Klassert <steffen.klassert@secunet.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <net/xfrm.h> + +u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) +{ + u32 seq, seq_hi, bottom; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (!(x->props.flags & XFRM_STATE_ESN)) + return 0; + + seq = ntohl(net_seq); + seq_hi = replay_esn->seq_hi; + bottom = replay_esn->seq - replay_esn->replay_window + 1; + + if (likely(replay_esn->seq >= replay_esn->replay_window - 1)) { + /* A. same subspace */ + if (unlikely(seq < bottom)) + seq_hi++; + } else { + /* B. window spans two subspaces */ + if (unlikely(seq >= bottom)) + seq_hi--; + } + + return seq_hi; +} + +static void xfrm_replay_notify(struct xfrm_state *x, int event) +{ + struct km_event c; + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (x->replay_maxdiff && + (x->replay.seq - x->preplay.seq < x->replay_maxdiff) && + (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) { + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(&x->replay, &x->preplay, + sizeof(struct xfrm_replay_state)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + +static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; + if (unlikely(x->replay.oseq == 0)) { + x->replay.oseq--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + u32 diff; + u32 seq = ntohl(net_seq); + + if (unlikely(seq == 0)) + goto err; + + if (likely(seq > x->replay.seq)) + return 0; + + diff = x->replay.seq - seq; + if (diff >= min_t(unsigned int, x->props.replay_window, + sizeof(x->replay.bitmap) * 8)) { + x->stats.replay_window++; + goto err; + } + + if (x->replay.bitmap & (1U << diff)) { + x->stats.replay++; + goto err; + } + return 0; + +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) +{ + u32 diff; + u32 seq = ntohl(net_seq); + + if (!x->props.replay_window) + return; + + if (seq > x->replay.seq) { + diff = seq - x->replay.seq; + if (diff < x->props.replay_window) + x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; + else + x->replay.bitmap = 1; + x->replay.seq = seq; + } else { + diff = x->replay.seq - seq; + x->replay.bitmap |= (1U << diff); + } + + if (xfrm_aevent_is_on(xs_net(x))) + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); +} + +static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + if (unlikely(replay_esn->oseq == 0)) { + replay_esn->oseq--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check_bmp(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + unsigned int bitnr, nr; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 seq = ntohl(net_seq); + u32 diff = replay_esn->seq - seq; + u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (unlikely(seq == 0)) + goto err; + + if (likely(seq > replay_esn->seq)) + return 0; + + if (diff >= replay_esn->replay_window) { + x->stats.replay_window++; + goto err; + } + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } + return 0; + +err_replay: + x->stats.replay++; +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) +{ + unsigned int bitnr, nr, i; + u32 diff; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 seq = ntohl(net_seq); + u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (!replay_esn->replay_window) + return; + + if (seq > replay_esn->seq) { + diff = seq - replay_esn->seq; + + if (diff < replay_esn->replay_window) { + for (i = 1; i < diff; i++) { + bitnr = (pos + i) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] &= ~(1U << bitnr); + } + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + nr = replay_esn->replay_window >> 5; + for (i = 0; i <= nr; i++) + replay_esn->bmp[i] = 0; + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + + replay_esn->seq = seq; + } else { + diff = replay_esn->seq - seq; + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + } + + if (xfrm_aevent_is_on(xs_net(x))) + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); +} + +static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) +{ + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (x->replay_maxdiff && + (replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) && + (replay_esn->oseq - preplay_esn->oseq < x->replay_maxdiff)) { + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + +static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + XFRM_SKB_CB(skb)->seq.output.hi = replay_esn->oseq_hi; + + if (unlikely(replay_esn->oseq == 0)) { + XFRM_SKB_CB(skb)->seq.output.hi = ++replay_esn->oseq_hi; + + if (replay_esn->oseq_hi == 0) { + replay_esn->oseq--; + replay_esn->oseq_hi--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + unsigned int bitnr, nr; + u32 diff; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 seq = ntohl(net_seq); + u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + u32 wsize = replay_esn->replay_window; + u32 top = replay_esn->seq; + u32 bottom = top - wsize + 1; + + if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && + (replay_esn->seq < replay_esn->replay_window - 1))) + goto err; + + diff = top - seq; + + if (likely(top >= wsize - 1)) { + /* A. same subspace */ + if (likely(seq > top) || seq < bottom) + return 0; + } else { + /* B. window spans two subspaces */ + if (likely(seq > top && seq < bottom)) + return 0; + if (seq >= bottom) + diff = ~seq + top + 1; + } + + if (diff >= replay_esn->replay_window) { + x->stats.replay_window++; + goto err; + } + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } + return 0; + +err_replay: + x->stats.replay++; +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) +{ + unsigned int bitnr, nr, i; + int wrap; + u32 diff, pos, seq, seq_hi; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (!replay_esn->replay_window) + return; + + seq = ntohl(net_seq); + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + seq_hi = xfrm_replay_seqhi(x, net_seq); + wrap = seq_hi - replay_esn->seq_hi; + + if ((!wrap && seq > replay_esn->seq) || wrap > 0) { + if (likely(!wrap)) + diff = seq - replay_esn->seq; + else + diff = ~replay_esn->seq + seq + 1; + + if (diff < replay_esn->replay_window) { + for (i = 1; i < diff; i++) { + bitnr = (pos + i) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] &= ~(1U << bitnr); + } + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + nr = replay_esn->replay_window >> 5; + for (i = 0; i <= nr; i++) + replay_esn->bmp[i] = 0; + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + + replay_esn->seq = seq; + + if (unlikely(wrap > 0)) + replay_esn->seq_hi++; + } else { + diff = replay_esn->seq - seq; + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + } + + if (xfrm_aevent_is_on(xs_net(x))) + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); +} + +static struct xfrm_replay xfrm_replay_legacy = { + .advance = xfrm_replay_advance, + .check = xfrm_replay_check, + .notify = xfrm_replay_notify, + .overflow = xfrm_replay_overflow, +}; + +static struct xfrm_replay xfrm_replay_bmp = { + .advance = xfrm_replay_advance_bmp, + .check = xfrm_replay_check_bmp, + .notify = xfrm_replay_notify_bmp, + .overflow = xfrm_replay_overflow_bmp, +}; + +static struct xfrm_replay xfrm_replay_esn = { + .advance = xfrm_replay_advance_esn, + .check = xfrm_replay_check_esn, + .notify = xfrm_replay_notify_bmp, + .overflow = xfrm_replay_overflow_esn, +}; + +int xfrm_init_replay(struct xfrm_state *x) +{ + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (replay_esn) { + if (replay_esn->replay_window > + replay_esn->bmp_len * sizeof(__u32)) + return -EINVAL; + + if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn) + x->repl = &xfrm_replay_esn; + else + x->repl = &xfrm_replay_bmp; + } else + x->repl = &xfrm_replay_legacy; + + return 0; +} +EXPORT_SYMBOL(xfrm_init_replay); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 220ebc05c7af..d575f0534868 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -42,16 +42,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); -#ifdef CONFIG_AUDITSYSCALL -static void xfrm_audit_state_replay(struct xfrm_state *x, - struct sk_buff *skb, __be32 net_seq); -#else -#define xfrm_audit_state_replay(x, s, sq) do { ; } while (0) -#endif /* CONFIG_AUDITSYSCALL */ - static inline unsigned int xfrm_dst_hash(struct net *net, - xfrm_address_t *daddr, - xfrm_address_t *saddr, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, u32 reqid, unsigned short family) { @@ -59,15 +52,16 @@ static inline unsigned int xfrm_dst_hash(struct net *net, } static inline unsigned int xfrm_src_hash(struct net *net, - xfrm_address_t *daddr, - xfrm_address_t *saddr, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, unsigned short family) { return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); } static inline unsigned int -xfrm_spi_hash(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, + __be32 spi, u8 proto, unsigned short family) { return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); } @@ -362,6 +356,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->calg); kfree(x->encap); kfree(x->coaddr); + kfree(x->replay_esn); + kfree(x->preplay_esn); if (x->inner_mode) xfrm_put_mode(x->inner_mode); if (x->inner_mode_iaf) @@ -656,9 +652,9 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) EXPORT_SYMBOL(xfrm_sad_getinfo); static int -xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, + const struct xfrm_tmpl *tmpl, + const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); @@ -677,7 +673,10 @@ xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl, return 0; } -static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, + const xfrm_address_t *daddr, + __be32 spi, u8 proto, + unsigned short family) { unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; @@ -699,7 +698,10 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_ad return NULL; } -static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + u8 proto, unsigned short family) { unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; @@ -746,8 +748,7 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) } static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, - struct flowi *fl, unsigned short family, - xfrm_address_t *daddr, xfrm_address_t *saddr, + const struct flowi *fl, unsigned short family, struct xfrm_state **best, int *acq_in_progress, int *error) { @@ -784,8 +785,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, } struct xfrm_state * -xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, - struct flowi *fl, struct xfrm_tmpl *tmpl, +xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, + const struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family) { @@ -813,7 +814,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, &best, &acquire_in_progress, &error); } if (best) @@ -829,7 +830,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, &best, &acquire_in_progress, &error); } @@ -853,7 +854,7 @@ found: xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); - error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid); if (error) { x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -991,7 +992,11 @@ void xfrm_state_insert(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, + unsigned short family, u8 mode, + u32 reqid, u8 proto, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, int create) { unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); struct hlist_node *entry; @@ -1369,7 +1374,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_check_expire); struct xfrm_state * -xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, +xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; @@ -1383,7 +1388,7 @@ EXPORT_SYMBOL(xfrm_state_lookup); struct xfrm_state * xfrm_state_lookup_byaddr(struct net *net, u32 mark, - xfrm_address_t *daddr, xfrm_address_t *saddr, + const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; @@ -1397,7 +1402,7 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, + const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; @@ -1609,54 +1614,6 @@ void xfrm_state_walk_done(struct xfrm_state_walk *walk) } EXPORT_SYMBOL(xfrm_state_walk_done); - -void xfrm_replay_notify(struct xfrm_state *x, int event) -{ - struct km_event c; - /* we send notify messages in case - * 1. we updated on of the sequence numbers, and the seqno difference - * is at least x->replay_maxdiff, in this case we also update the - * timeout of our timer function - * 2. if x->replay_maxage has elapsed since last update, - * and there were changes - * - * The state structure must be locked! - */ - - switch (event) { - case XFRM_REPLAY_UPDATE: - if (x->replay_maxdiff && - (x->replay.seq - x->preplay.seq < x->replay_maxdiff) && - (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) { - if (x->xflags & XFRM_TIME_DEFER) - event = XFRM_REPLAY_TIMEOUT; - else - return; - } - - break; - - case XFRM_REPLAY_TIMEOUT: - if ((x->replay.seq == x->preplay.seq) && - (x->replay.bitmap == x->preplay.bitmap) && - (x->replay.oseq == x->preplay.oseq)) { - x->xflags |= XFRM_TIME_DEFER; - return; - } - - break; - } - - memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); - c.event = XFRM_MSG_NEWAE; - c.data.aevent = event; - km_state_notify(x, &c); - - if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) - x->xflags &= ~XFRM_TIME_DEFER; -} - static void xfrm_replay_timer_handler(unsigned long data) { struct xfrm_state *x = (struct xfrm_state*)data; @@ -1665,7 +1622,7 @@ static void xfrm_replay_timer_handler(unsigned long data) if (x->km.state == XFRM_STATE_VALID) { if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); + x->repl->notify(x, XFRM_REPLAY_TIMEOUT); else x->xflags |= XFRM_TIME_DEFER; } @@ -1673,61 +1630,10 @@ static void xfrm_replay_timer_handler(unsigned long data) spin_unlock(&x->lock); } -int xfrm_replay_check(struct xfrm_state *x, - struct sk_buff *skb, __be32 net_seq) -{ - u32 diff; - u32 seq = ntohl(net_seq); - - if (unlikely(seq == 0)) - goto err; - - if (likely(seq > x->replay.seq)) - return 0; - - diff = x->replay.seq - seq; - if (diff >= min_t(unsigned int, x->props.replay_window, - sizeof(x->replay.bitmap) * 8)) { - x->stats.replay_window++; - goto err; - } - - if (x->replay.bitmap & (1U << diff)) { - x->stats.replay++; - goto err; - } - return 0; - -err: - xfrm_audit_state_replay(x, skb, net_seq); - return -EINVAL; -} - -void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) -{ - u32 diff; - u32 seq = ntohl(net_seq); - - if (seq > x->replay.seq) { - diff = seq - x->replay.seq; - if (diff < x->props.replay_window) - x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; - else - x->replay.bitmap = 1; - x->replay.seq = seq; - } else { - diff = x->replay.seq - seq; - x->replay.bitmap |= (1U << diff); - } - - if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); -} - static LIST_HEAD(xfrm_km_list); static DEFINE_RWLOCK(xfrm_km_lock); -void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct xfrm_mgr *km; @@ -1738,7 +1644,7 @@ void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) read_unlock(&xfrm_km_lock); } -void km_state_notify(struct xfrm_state *x, struct km_event *c) +void km_state_notify(struct xfrm_state *x, const struct km_event *c) { struct xfrm_mgr *km; read_lock(&xfrm_km_lock); @@ -1819,9 +1725,9 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) EXPORT_SYMBOL(km_policy_expired); #ifdef CONFIG_XFRM_MIGRATE -int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate, - struct xfrm_kmaddress *k) +int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) { int err = -EINVAL; int ret; @@ -2236,7 +2142,7 @@ void xfrm_audit_state_replay_overflow(struct xfrm_state *x, } EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow); -static void xfrm_audit_state_replay(struct xfrm_state *x, +void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { struct audit_buffer *audit_buf; @@ -2251,6 +2157,7 @@ static void xfrm_audit_state_replay(struct xfrm_state *x, spi, spi, ntohl(net_seq)); audit_log_end(audit_buf); } +EXPORT_SYMBOL_GPL(xfrm_audit_state_replay); void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d5e1e0b08890..706385ae3e4b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -119,6 +119,19 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs) return 0; } +static inline int verify_replay(struct xfrm_usersa_info *p, + struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + + if (!rt) + return 0; + + if (p->replay_window != 0) + return -EINVAL; + + return 0; +} static int verify_newsa_info(struct xfrm_usersa_info *p, struct nlattr **attrs) @@ -214,6 +227,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(attrs))) goto out; + if ((err = verify_replay(p, attrs))) + goto out; err = -EINVAL; switch (p->mode) { @@ -234,7 +249,7 @@ out: } static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, - struct xfrm_algo_desc *(*get_byname)(char *, int), + struct xfrm_algo_desc *(*get_byname)(const char *, int), struct nlattr *rta) { struct xfrm_algo *p, *ualg; @@ -345,6 +360,33 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, return 0; } +static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn, + struct xfrm_replay_state_esn **preplay_esn, + struct nlattr *rta) +{ + struct xfrm_replay_state_esn *p, *pp, *up; + + if (!rta) + return 0; + + up = nla_data(rta); + + p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + if (!p) + return -ENOMEM; + + pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + if (!pp) { + kfree(p); + return -ENOMEM; + } + + *replay_esn = p; + *preplay_esn = pp; + + return 0; +} + static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { int len = 0; @@ -380,10 +422,20 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) { struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; + if (re) { + struct xfrm_replay_state_esn *replay_esn; + replay_esn = nla_data(re); + memcpy(x->replay_esn, replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + memcpy(x->preplay_esn, replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + } + if (rp) { struct xfrm_replay_state *replay; replay = nla_data(rp); @@ -467,16 +519,19 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) goto error; + if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, + attrs[XFRMA_REPLAY_ESN_VAL]))) + goto error; + x->km.seq = p->seq; x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; /* sysctl_xfrm_aevent_etime is in 100ms units */ x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; - x->preplay.bitmap = 0; - x->preplay.seq = x->replay.seq+x->replay_maxdiff; - x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; - /* override default values from above */ + if ((err = xfrm_init_replay(x))) + goto error; + /* override default values from above */ xfrm_update_ae_params(x, attrs); return x; @@ -497,9 +552,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_state *x; int err; struct km_event c; - uid_t loginuid = NETLINK_CB(skb).loginuid; - u32 sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; err = verify_newsa_info(p, attrs); if (err) @@ -515,6 +570,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, else err = xfrm_state_update(x); + security_task_getsecid(current, &sid); xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid); if (err < 0) { @@ -575,9 +631,9 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); - uid_t loginuid = NETLINK_CB(skb).loginuid; - u32 sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) @@ -602,6 +658,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_notify(x, &c); out: + security_task_getsecid(current, &sid); xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid); xfrm_state_put(x); return err; @@ -705,6 +762,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (xfrm_mark_put(skb, &x->mark)) goto nla_put_failure; + if (x->replay_esn) + NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), x->replay_esn); + if (x->security && copy_sec_ctx(x->security, skb) < 0) goto nla_put_failure; @@ -1265,9 +1326,9 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int err; int excl; - uid_t loginuid = NETLINK_CB(skb).loginuid; - u32 sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; err = verify_newpolicy_info(p); if (err) @@ -1286,6 +1347,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); + security_task_getsecid(current, &sid); xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid); if (err) { @@ -1522,10 +1584,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).pid); } } else { - uid_t loginuid = NETLINK_CB(skb).loginuid; - u32 sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; + security_task_getsecid(current, &sid); xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid, sid); @@ -1553,9 +1616,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_audit audit_info; int err; - audit_info.loginuid = NETLINK_CB(skb).loginuid; - audit_info.sessionid = NETLINK_CB(skb).sessionid; - audit_info.secid = NETLINK_CB(skb).sid; + audit_info.loginuid = audit_get_loginuid(current); + audit_info.sessionid = audit_get_sessionid(current); + security_task_getsecid(current, &audit_info.secid); err = xfrm_state_flush(net, p->proto, &audit_info); if (err) { if (err == -ESRCH) /* empty table */ @@ -1572,17 +1635,21 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } -static inline size_t xfrm_aevent_msgsize(void) +static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) { + size_t replay_size = x->replay_esn ? + xfrm_replay_state_esn_len(x->replay_esn) : + sizeof(struct xfrm_replay_state); + return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) - + nla_total_size(sizeof(struct xfrm_replay_state)) + + nla_total_size(replay_size) + nla_total_size(sizeof(struct xfrm_lifetime_cur)) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ + nla_total_size(4); /* XFRM_AE_ETHR */ } -static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) +static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) { struct xfrm_aevent_id *id; struct nlmsghdr *nlh; @@ -1600,7 +1667,13 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve id->reqid = x->props.reqid; id->flags = c->data.aevent; - NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + if (x->replay_esn) + NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), + x->replay_esn); + else + NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); if (id->flags & XFRM_AE_RTHR) @@ -1633,16 +1706,16 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_aevent_id *p = nlmsg_data(nlh); struct xfrm_usersa_id *id = &p->sa_id; - r_skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); - if (r_skb == NULL) - return -ENOMEM; - mark = xfrm_mark_get(attrs, &m); x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family); - if (x == NULL) { - kfree_skb(r_skb); + if (x == NULL) return -ESRCH; + + r_skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); + if (r_skb == NULL) { + xfrm_state_put(x); + return -ENOMEM; } /* @@ -1674,9 +1747,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_mark m; struct xfrm_aevent_id *p = nlmsg_data(nlh); struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; - if (!lt && !rp) + if (!lt && !rp && !re) return err; /* pedantic mode - thou shalt sayeth replaceth */ @@ -1720,9 +1794,9 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; - audit_info.loginuid = NETLINK_CB(skb).loginuid; - audit_info.sessionid = NETLINK_CB(skb).sessionid; - audit_info.secid = NETLINK_CB(skb).sid; + audit_info.loginuid = audit_get_loginuid(current); + audit_info.sessionid = audit_get_sessionid(current); + security_task_getsecid(current, &audit_info.secid); err = xfrm_policy_flush(net, type, &audit_info); if (err) { if (err == -ESRCH) /* empty table */ @@ -1789,9 +1863,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { - uid_t loginuid = NETLINK_CB(skb).loginuid; - uid_t sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; + + security_task_getsecid(current, &sid); xfrm_policy_delete(xp, p->dir); xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid); @@ -1830,9 +1906,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_expired(x, ue->hard, current->pid); if (ue->hard) { - uid_t loginuid = NETLINK_CB(skb).loginuid; - uid_t sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; + + security_task_getsecid(current, &sid); __xfrm_state_delete(x); xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid); } @@ -1986,7 +2064,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, #endif #ifdef CONFIG_XFRM_MIGRATE -static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) +static int copy_to_user_migrate(const struct xfrm_migrate *m, struct sk_buff *skb) { struct xfrm_user_migrate um; @@ -2004,7 +2082,7 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); } -static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) +static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff *skb) { struct xfrm_user_kmaddress uk; @@ -2025,11 +2103,11 @@ static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma) + userpolicy_type_attrsize(); } -static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, - int num_migrate, struct xfrm_kmaddress *k, - struct xfrm_selector *sel, u8 dir, u8 type) +static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, + int num_migrate, const struct xfrm_kmaddress *k, + const struct xfrm_selector *sel, u8 dir, u8 type) { - struct xfrm_migrate *mp; + const struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; struct nlmsghdr *nlh; int i; @@ -2061,9 +2139,9 @@ nlmsg_failure: return -EMSGSIZE; } -static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate, - struct xfrm_kmaddress *k) +static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) { struct net *net = &init_net; struct sk_buff *skb; @@ -2079,9 +2157,9 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else -static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate, - struct xfrm_kmaddress *k) +static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } @@ -2137,6 +2215,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, [XFRMA_TFCPAD] = { .type = NLA_U32 }, + [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, }; static struct xfrm_link { @@ -2189,7 +2268,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && - (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + (nlh->nlmsg_flags & NLM_F_DUMP)) { if (link->dump == NULL) return -EINVAL; @@ -2220,7 +2299,7 @@ static inline size_t xfrm_expire_msgsize(void) + nla_total_size(sizeof(struct xfrm_mark)); } -static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) +static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) { struct xfrm_user_expire *ue; struct nlmsghdr *nlh; @@ -2242,7 +2321,7 @@ nla_put_failure: return -EMSGSIZE; } -static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = xs_net(x); struct sk_buff *skb; @@ -2259,12 +2338,12 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } -static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = xs_net(x); struct sk_buff *skb; - skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); + skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -2274,7 +2353,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); } -static int xfrm_notify_sa_flush(struct km_event *c) +static int xfrm_notify_sa_flush(const struct km_event *c) { struct net *net = c->net; struct xfrm_usersa_flush *p; @@ -2318,6 +2397,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(*x->encap)); if (x->tfcpad) l += nla_total_size(sizeof(x->tfcpad)); + if (x->replay_esn) + l += nla_total_size(xfrm_replay_state_esn_len(x->replay_esn)); if (x->security) l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + x->security->ctx_len); @@ -2330,7 +2411,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) return l; } -static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) +static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) { struct net *net = xs_net(x); struct xfrm_usersa_info *p; @@ -2387,7 +2468,7 @@ nla_put_failure: return -1; } -static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c) { switch (c->event) { @@ -2546,7 +2627,7 @@ static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) } static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, - int dir, struct km_event *c) + int dir, const struct km_event *c) { struct xfrm_user_polexpire *upe; struct nlmsghdr *nlh; @@ -2576,7 +2657,7 @@ nlmsg_failure: return -EMSGSIZE; } -static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct net *net = xp_net(xp); struct sk_buff *skb; @@ -2591,7 +2672,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } -static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; @@ -2656,7 +2737,7 @@ nlmsg_failure: return -1; } -static int xfrm_notify_policy_flush(struct km_event *c) +static int xfrm_notify_policy_flush(const struct km_event *c) { struct net *net = c->net; struct nlmsghdr *nlh; @@ -2681,7 +2762,7 @@ nlmsg_failure: return -1; } -static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { switch (c->event) { |