From a947b0a93efa9a25c012aa88848f4cf8d9b41280 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 22 Feb 2013 10:54:54 +0100 Subject: xfrm: allow to avoid copying DSCP during encapsulation By default, DSCP is copying during encapsulation. Copying the DSCP in IPsec tunneling may be a bit dangerous because packets with different DSCP may get reordered relative to each other in the network and then dropped by the remote IPsec GW if the reordering becomes too big compared to the replay window. It is possible to avoid this copy with netfilter rules, but it's very convenient to be able to configure it for each SA directly. This patch adds a toogle for this purpose. By default, it's not set to maintain backward compatibility. Field flags in struct xfrm_usersa_info is full, hence I add a new attribute. Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 24c8886fd969..ae16531d0d35 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -162,6 +162,7 @@ struct xfrm_state { xfrm_address_t saddr; int header_len; int trailer_len; + u32 extra_flags; } props; struct xfrm_lifetime_cfg lft; -- cgit v1.2.3 From f8bacc210408f7a2a182f184a9fa1475b8a67440 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Feb 2013 23:27:01 +0100 Subject: cfg80211: clean up mesh plink station change API Make the ability to leave the plink_state unchanged not use a magic -1 variable that isn't in the enum, but an explicit change flag; reject invalid plink states or actions and move the needed constants for plink actions to the right header file. Also reject plink_state changes for non-mesh interfaces. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 15 ++------------- include/uapi/linux/nl80211.h | 20 +++++++++++++++++++- net/mac80211/cfg.c | 14 +++++++++++--- net/wireless/nl80211.c | 29 ++++++++++++++++++++++------- 4 files changed, 54 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d581c6de5d64..9b54574c3df9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -610,23 +610,11 @@ struct cfg80211_ap_settings { bool radar_required; }; -/** - * enum plink_action - actions to perform in mesh peers - * - * @PLINK_ACTION_INVALID: action 0 is reserved - * @PLINK_ACTION_OPEN: start mesh peer link establishment - * @PLINK_ACTION_BLOCK: block traffic from this mesh peer - */ -enum plink_actions { - PLINK_ACTION_INVALID, - PLINK_ACTION_OPEN, - PLINK_ACTION_BLOCK, -}; - /** * enum station_parameters_apply_mask - station parameter values to apply * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp) * @STATION_PARAM_APPLY_CAPABILITY: apply new capability + * @STATION_PARAM_APPLY_PLINK_STATE: apply new plink state * * Not all station parameters have in-band "no change" signalling, * for those that don't these flags will are used. @@ -634,6 +622,7 @@ enum plink_actions { enum station_parameters_apply_mask { STATION_PARAM_APPLY_UAPSD = BIT(0), STATION_PARAM_APPLY_CAPABILITY = BIT(1), + STATION_PARAM_APPLY_PLINK_STATE = BIT(2), }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c46bb016f4e4..7dcc69f73d2c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -884,7 +884,8 @@ enum nl80211_commands { * consisting of a nested array. * * @NL80211_ATTR_MESH_ID: mesh id (1-32 bytes). - * @NL80211_ATTR_STA_PLINK_ACTION: action to perform on the mesh peer link. + * @NL80211_ATTR_STA_PLINK_ACTION: action to perform on the mesh peer link + * (see &enum nl80211_plink_action). * @NL80211_ATTR_MPATH_NEXT_HOP: MAC address of the next hop for a mesh path. * @NL80211_ATTR_MPATH_INFO: information about a mesh_path, part of mesh path * info given for %NL80211_CMD_GET_MPATH, nested attribute described at @@ -3307,6 +3308,23 @@ enum nl80211_plink_state { MAX_NL80211_PLINK_STATES = NUM_NL80211_PLINK_STATES - 1 }; +/** + * enum nl80211_plink_action - actions to perform in mesh peers + * + * @NL80211_PLINK_ACTION_NO_ACTION: perform no action + * @NL80211_PLINK_ACTION_OPEN: start mesh peer link establishment + * @NL80211_PLINK_ACTION_BLOCK: block traffic from this mesh peer + * @NUM_NL80211_PLINK_ACTIONS: number of possible actions + */ +enum plink_actions { + NL80211_PLINK_ACTION_NO_ACTION, + NL80211_PLINK_ACTION_OPEN, + NL80211_PLINK_ACTION_BLOCK, + + NUM_NL80211_PLINK_ACTIONS, +}; + + #define NL80211_KCK_LEN 16 #define NL80211_KEK_LEN 16 #define NL80211_REPLAY_CTR_LEN 8 diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb306814576a..ca28405d5f65 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1261,7 +1261,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH u32 changed = 0; - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) { + if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED && + (params->sta_modify_mask & + STATION_PARAM_APPLY_PLINK_STATE)) { switch (params->plink_state) { case NL80211_PLINK_ESTAB: if (sta->plink_state != NL80211_PLINK_ESTAB) @@ -1292,12 +1294,18 @@ static int sta_apply_parameters(struct ieee80211_local *local, /* nothing */ break; } + } else if (params->sta_modify_mask & + STATION_PARAM_APPLY_PLINK_STATE) { + return -EINVAL; } else { switch (params->plink_action) { - case PLINK_ACTION_OPEN: + case NL80211_PLINK_ACTION_NO_ACTION: + /* nothing */ + break; + case NL80211_PLINK_ACTION_OPEN: changed |= mesh_plink_open(sta); break; - case PLINK_ACTION_BLOCK: + case NL80211_PLINK_ACTION_BLOCK: changed |= mesh_plink_block(sta); break; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d44ab216c0ec..9e7ece0e5e5e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3412,7 +3412,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); params.listen_interval = -1; - params.plink_state = -1; if (info->attrs[NL80211_ATTR_STA_AID]) return -EINVAL; @@ -3451,13 +3450,20 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) { params.plink_action = - nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS) + return -EINVAL; + } - if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) + if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) { params.plink_state = - nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + if (params.plink_state >= NUM_NL80211_PLINK_STATES) + return -EINVAL; + params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE; + } if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) { enum nl80211_mesh_power_mode pm = nla_get_u32( @@ -3479,6 +3485,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (params.local_pm) return -EINVAL; + if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; /* TDLS can't be set, ... */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) @@ -3542,6 +3550,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (params.local_pm) return -EINVAL; + if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; /* reject any changes other than AUTHORIZED or WME (for TDLS) */ if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_WME))) @@ -3553,6 +3563,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (params.local_pm) return -EINVAL; + if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || info->attrs[NL80211_ATTR_VHT_CAPABILITY]) return -EINVAL; @@ -3652,9 +3664,12 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); - if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) { params.plink_action = - nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS) + return -EINVAL; + } if (!rdev->ops->add_station) return -EOPNOTSUPP; -- cgit v1.2.3 From 2c1aabf33d1832befc5291a14c870cd09dc2182d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Feb 2013 23:33:40 +0100 Subject: cfg80211: constify station parameter pointers All the pointers point right into the skb data and not to anything that would be useful to change, so make them const. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9b54574c3df9..7ca321d2b599 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -658,7 +658,7 @@ enum station_parameters_apply_mask { * @ext_capab_len: number of extended capabilities */ struct station_parameters { - u8 *supported_rates; + const u8 *supported_rates; struct net_device *vlan; u32 sta_flags_mask, sta_flags_set; u32 sta_modify_mask; @@ -667,13 +667,13 @@ struct station_parameters { u8 supported_rates_len; u8 plink_action; u8 plink_state; - struct ieee80211_ht_cap *ht_capa; - struct ieee80211_vht_cap *vht_capa; + const struct ieee80211_ht_cap *ht_capa; + const struct ieee80211_vht_cap *vht_capa; u8 uapsd_queues; u8 max_sp; enum nl80211_mesh_power_mode local_pm; u16 capability; - u8 *ext_capab; + const u8 *ext_capab; u8 ext_capab_len; }; -- cgit v1.2.3 From 77ee7c891a04c3d254711ddf1bde5d7381339fb3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Feb 2013 00:48:33 +0100 Subject: cfg80211: comprehensively check station changes The station change API isn't being checked properly before drivers are called, and as a result it is difficult to see what should be allowed and what not. In order to comprehensively check the API parameters parse everything first, and then have the driver call a function (cfg80211_check_station_change()) with the additionally information about the kind of station that is being changed; this allows the function to make better decisions than the old code could. While at it, also add a few checks, particularly in mesh and clarify the TDLS station lifetime in documentation. To be able to reduce a few checks, ignore any flag set bits when the mask isn't set, they shouldn't be applied then. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 8 +- include/net/cfg80211.h | 48 ++++- include/uapi/linux/nl80211.h | 16 +- net/mac80211/cfg.c | 125 ++++++++----- net/wireless/nl80211.c | 288 +++++++++++++++++------------ 5 files changed, 311 insertions(+), 174 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 752ffc4f4166..28c413f861a2 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -2990,13 +2990,15 @@ static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev, { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); + int err; if (vif->nw_type != AP_NETWORK) return -EOPNOTSUPP; - /* Use this only for authorizing/unauthorizing a station */ - if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED))) - return -EOPNOTSUPP; + err = cfg80211_check_station_change(wiphy, params, + CFG80211_STA_AP_MLME_CLIENT); + if (err) + return err; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED)) return ath6kl_wmi_ap_set_mlme(ar->wmi, vif->fw_vif_idx, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7ca321d2b599..ed2b08da3b93 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -677,6 +677,49 @@ struct station_parameters { u8 ext_capab_len; }; +/** + * enum cfg80211_station_type - the type of station being modified + * @CFG80211_STA_AP_CLIENT: client of an AP interface + * @CFG80211_STA_AP_MLME_CLIENT: client of an AP interface that has + * the AP MLME in the device + * @CFG80211_STA_AP_STA: AP station on managed interface + * @CFG80211_STA_IBSS: IBSS station + * @CFG80211_STA_TDLS_PEER_SETUP: TDLS peer on managed interface (dummy entry + * while TDLS setup is in progress, it moves out of this state when + * being marked authorized; use this only if TDLS with external setup is + * supported/used) + * @CFG80211_STA_TDLS_PEER_ACTIVE: TDLS peer on managed interface (active + * entry that is operating, has been marked authorized by userspace) + * @CFG80211_STA_MESH_PEER_NONSEC: peer on mesh interface (non-secured) + * @CFG80211_STA_MESH_PEER_SECURE: peer on mesh interface (secured) + */ +enum cfg80211_station_type { + CFG80211_STA_AP_CLIENT, + CFG80211_STA_AP_MLME_CLIENT, + CFG80211_STA_AP_STA, + CFG80211_STA_IBSS, + CFG80211_STA_TDLS_PEER_SETUP, + CFG80211_STA_TDLS_PEER_ACTIVE, + CFG80211_STA_MESH_PEER_NONSEC, + CFG80211_STA_MESH_PEER_SECURE, +}; + +/** + * cfg80211_check_station_change - validate parameter changes + * @wiphy: the wiphy this operates on + * @params: the new parameters for a station + * @statype: the type of station being modified + * + * Utility function for the @change_station driver method. Call this function + * with the appropriate station type looking up the station (and checking that + * it exists). It will verify whether the station change is acceptable, and if + * not will return an error code. Note that it may modify the parameters for + * backward compatibility reasons, so don't use them before calling this. + */ +int cfg80211_check_station_change(struct wiphy *wiphy, + struct station_parameters *params, + enum cfg80211_station_type statype); + /** * enum station_info_flags - station information flags * @@ -1770,9 +1813,8 @@ struct cfg80211_gtk_rekey_data { * @change_station: Modify a given station. Note that flags changes are not much * validated in cfg80211, in particular the auth/assoc/authorized flags * might come to the driver in invalid combinations -- make sure to check - * them, also against the existing state! Also, supported_rates changes are - * not checked in station mode -- drivers need to reject (or ignore) them - * for anything but TDLS peers. + * them, also against the existing state! Drivers must call + * cfg80211_check_station_change() to validate the information. * @get_station: get station information for the station identified by @mac * @dump_station: dump station callback -- resume dump at index @idx * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 7dcc69f73d2c..523ed3d65b41 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -36,7 +36,21 @@ * The station is still assumed to belong to the AP interface it was added * to. * - * TODO: need more info? + * Station handling varies per interface type and depending on the driver's + * capabilities. + * + * For drivers supporting TDLS with external setup (WIPHY_FLAG_SUPPORTS_TDLS + * and WIPHY_FLAG_TDLS_EXTERNAL_SETUP), the station lifetime is as follows: + * - a setup station entry is added, not yet authorized, without any rate + * or capability information, this just exists to avoid race conditions + * - when the TDLS setup is done, a single NL80211_CMD_SET_STATION is valid + * to add rate and capability information to the station and at the same + * time mark it authorized. + * - %NL80211_TDLS_ENABLE_LINK is then used + * - after this, the only valid operation is to remove it by tearing down + * the TDLS link (%NL80211_TDLS_DISABLE_LINK) + * + * TODO: need more info for other interface types */ /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ca28405d5f65..c115f82c037c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1177,6 +1177,18 @@ static int sta_apply_parameters(struct ieee80211_local *local, mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) set |= BIT(NL80211_STA_FLAG_ASSOCIATED); + } else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + /* + * TDLS -- everything follows authorized, but + * only becoming authorized is possible, not + * going back + */ + if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) { + set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED); + mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED); + } } ret = sta_apply_auth_flags(local, sta, mask, set); @@ -1261,9 +1273,8 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH u32 changed = 0; - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED && - (params->sta_modify_mask & - STATION_PARAM_APPLY_PLINK_STATE)) { + + if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) { switch (params->plink_state) { case NL80211_PLINK_ESTAB: if (sta->plink_state != NL80211_PLINK_ESTAB) @@ -1294,21 +1305,18 @@ static int sta_apply_parameters(struct ieee80211_local *local, /* nothing */ break; } - } else if (params->sta_modify_mask & - STATION_PARAM_APPLY_PLINK_STATE) { - return -EINVAL; - } else { - switch (params->plink_action) { - case NL80211_PLINK_ACTION_NO_ACTION: - /* nothing */ - break; - case NL80211_PLINK_ACTION_OPEN: - changed |= mesh_plink_open(sta); - break; - case NL80211_PLINK_ACTION_BLOCK: - changed |= mesh_plink_block(sta); - break; - } + } + + switch (params->plink_action) { + case NL80211_PLINK_ACTION_NO_ACTION: + /* nothing */ + break; + case NL80211_PLINK_ACTION_OPEN: + changed |= mesh_plink_open(sta); + break; + case NL80211_PLINK_ACTION_BLOCK: + changed |= mesh_plink_block(sta); + break; } if (params->local_pm) @@ -1354,8 +1362,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, * defaults -- if userspace wants something else we'll * change it accordingly in sta_apply_parameters() */ - sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); - sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) { + sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); + sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + } err = sta_apply_parameters(local, sta, params); if (err) { @@ -1364,8 +1374,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, } /* - * for TDLS, rate control should be initialized only when supported - * rates are known. + * for TDLS, rate control should be initialized only when + * rates are known and station is marked authorized */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) rate_control_rate_init(sta); @@ -1402,50 +1412,67 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_change_station(struct wiphy *wiphy, - struct net_device *dev, - u8 *mac, + struct net_device *dev, u8 *mac, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; + enum cfg80211_station_type statype; int err; mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mac); if (!sta) { - mutex_unlock(&local->sta_mtx); - return -ENOENT; + err = -ENOENT; + goto out_err; } - /* in station mode, some updates are only valid with TDLS */ - if (sdata->vif.type == NL80211_IFTYPE_STATION && - (params->supported_rates || params->ht_capa || params->vht_capa || - params->sta_modify_mask || - (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME))) && - !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { - mutex_unlock(&local->sta_mtx); - return -EINVAL; + switch (sdata->vif.type) { + case NL80211_IFTYPE_MESH_POINT: + if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) + statype = CFG80211_STA_MESH_PEER_SECURE; + else + statype = CFG80211_STA_MESH_PEER_NONSEC; + break; + case NL80211_IFTYPE_ADHOC: + statype = CFG80211_STA_IBSS; + break; + case NL80211_IFTYPE_STATION: + if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + statype = CFG80211_STA_AP_STA; + break; + } + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + statype = CFG80211_STA_TDLS_PEER_ACTIVE; + else + statype = CFG80211_STA_TDLS_PEER_SETUP; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + statype = CFG80211_STA_AP_CLIENT; + break; + default: + err = -EOPNOTSUPP; + goto out_err; } + err = cfg80211_check_station_change(wiphy, params, statype); + if (err) + goto out_err; + if (params->vlan && params->vlan != sta->sdata->dev) { bool prev_4addr = false; bool new_4addr = false; vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN && - vlansdata->vif.type != NL80211_IFTYPE_AP) { - mutex_unlock(&local->sta_mtx); - return -EINVAL; - } - if (params->vlan->ieee80211_ptr->use_4addr) { if (vlansdata->u.vlan.sta) { - mutex_unlock(&local->sta_mtx); - return -EBUSY; + err = -EBUSY; + goto out_err; } rcu_assign_pointer(vlansdata->u.vlan.sta, sta); @@ -1472,12 +1499,12 @@ static int ieee80211_change_station(struct wiphy *wiphy, } err = sta_apply_parameters(local, sta, params); - if (err) { - mutex_unlock(&local->sta_mtx); - return err; - } + if (err) + goto out_err; - if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && params->supported_rates) + /* When peer becomes authorized, init rate control as well */ + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && + test_sta_flag(sta, WLAN_STA_AUTHORIZED)) rate_control_rate_init(sta); mutex_unlock(&local->sta_mtx); @@ -1487,7 +1514,11 @@ static int ieee80211_change_station(struct wiphy *wiphy, ieee80211_recalc_ps(local, -1); ieee80211_recalc_ps_vif(sdata); } + return 0; +out_err: + mutex_unlock(&local->sta_mtx); + return err; } #ifdef CONFIG_MAC80211_MESH diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9e7c10420da8..83151a50e5ad 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2967,6 +2967,7 @@ static int parse_station_flags(struct genl_info *info, sta_flags = nla_data(nla); params->sta_flags_mask = sta_flags->mask; params->sta_flags_set = sta_flags->set; + params->sta_flags_set &= params->sta_flags_mask; if ((params->sta_flags_mask | params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID)) return -EINVAL; @@ -3320,6 +3321,136 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) return genlmsg_reply(msg, info); } +int cfg80211_check_station_change(struct wiphy *wiphy, + struct station_parameters *params, + enum cfg80211_station_type statype) +{ + if (params->listen_interval != -1) + return -EINVAL; + if (params->aid) + return -EINVAL; + + /* When you run into this, adjust the code below for the new flag */ + BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); + + switch (statype) { + case CFG80211_STA_MESH_PEER_NONSEC: + case CFG80211_STA_MESH_PEER_SECURE: + /* + * No ignoring the TDLS flag here -- the userspace mesh + * code doesn't have the bug of including TDLS in the + * mask everywhere. + */ + if (params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_MFP) | + BIT(NL80211_STA_FLAG_AUTHORIZED))) + return -EINVAL; + break; + case CFG80211_STA_TDLS_PEER_SETUP: + case CFG80211_STA_TDLS_PEER_ACTIVE: + if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) + return -EINVAL; + /* ignore since it can't change */ + params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + break; + default: + /* disallow mesh-specific things */ + if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) + return -EINVAL; + if (params->local_pm) + return -EINVAL; + if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; + } + + if (statype != CFG80211_STA_TDLS_PEER_SETUP && + statype != CFG80211_STA_TDLS_PEER_ACTIVE) { + /* TDLS can't be set, ... */ + if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + return -EINVAL; + /* + * ... but don't bother the driver with it. This works around + * a hostapd/wpa_supplicant issue -- it always includes the + * TLDS_PEER flag in the mask even for AP mode. + */ + params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + } + + if (statype != CFG80211_STA_TDLS_PEER_SETUP) { + /* reject other things that can't change */ + if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) + return -EINVAL; + if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY) + return -EINVAL; + if (params->supported_rates) + return -EINVAL; + if (params->ext_capab || params->ht_capa || params->vht_capa) + return -EINVAL; + } + + if (statype != CFG80211_STA_AP_CLIENT) { + if (params->vlan) + return -EINVAL; + } + + switch (statype) { + case CFG80211_STA_AP_MLME_CLIENT: + /* Use this only for authorizing/unauthorizing a station */ + if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED))) + return -EOPNOTSUPP; + break; + case CFG80211_STA_AP_CLIENT: + /* accept only the listed bits */ + if (params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | + BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | + BIT(NL80211_STA_FLAG_WME) | + BIT(NL80211_STA_FLAG_MFP))) + return -EINVAL; + + /* but authenticated/associated only if driver handles it */ + if (!(wiphy->features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params->sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + break; + case CFG80211_STA_IBSS: + case CFG80211_STA_AP_STA: + /* reject any changes other than AUTHORIZED */ + if (params->sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) + return -EINVAL; + break; + case CFG80211_STA_TDLS_PEER_SETUP: + /* reject any changes other than AUTHORIZED or WME */ + if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_WME))) + return -EINVAL; + /* force (at least) rates when authorizing */ + if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) && + !params->supported_rates) + return -EINVAL; + break; + case CFG80211_STA_TDLS_PEER_ACTIVE: + /* reject any changes */ + return -EINVAL; + case CFG80211_STA_MESH_PEER_NONSEC: + if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; + break; + case CFG80211_STA_MESH_PEER_SECURE: + if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) + return -EINVAL; + break; + } + + return 0; +} +EXPORT_SYMBOL(cfg80211_check_station_change); + /* * Get vlan interface making sure it is running and on the right wiphy. */ @@ -3342,6 +3473,13 @@ static struct net_device *get_vlan(struct genl_info *info, goto error; } + if (v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + v->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { + ret = -EINVAL; + goto error; + } + if (!netif_running(v)) { ret = -ENETDOWN; goto error; @@ -3410,15 +3548,18 @@ static int nl80211_set_station_tdls(struct genl_info *info, static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - int err; struct net_device *dev = info->user_ptr[1]; struct station_parameters params; - u8 *mac_addr = NULL; + u8 *mac_addr; + int err; memset(¶ms, 0, sizeof(params)); params.listen_interval = -1; + if (!rdev->ops->change_station) + return -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_STA_AID]) return -EINVAL; @@ -3450,9 +3591,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) return -EINVAL; - if (!rdev->ops->change_station) - return -EOPNOTSUPP; - if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; @@ -3482,133 +3620,33 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.local_pm = pm; } + /* Include parameters for TDLS peer (will check later) */ + err = nl80211_set_station_tdls(info, ¶ms); + if (err) + return err; + + params.vlan = get_vlan(info, rdev); + if (IS_ERR(params.vlan)) + return PTR_ERR(params.vlan); + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: - /* disallow mesh-specific things */ - if (params.plink_action) - return -EINVAL; - if (params.local_pm) - return -EINVAL; - if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) - return -EINVAL; - - /* TDLS can't be set, ... */ - if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) - return -EINVAL; - /* - * ... but don't bother the driver with it. This works around - * a hostapd/wpa_supplicant issue -- it always includes the - * TLDS_PEER flag in the mask even for AP mode. - */ - params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); - - /* accept only the listed bits */ - if (params.sta_flags_mask & - ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | - BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_ASSOCIATED) | - BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | - BIT(NL80211_STA_FLAG_WME) | - BIT(NL80211_STA_FLAG_MFP))) - return -EINVAL; - - /* but authenticated/associated only if driver handles it */ - if (!(rdev->wiphy.features & - NL80211_FEATURE_FULL_AP_CLIENT_STATE) && - params.sta_flags_mask & - (BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_ASSOCIATED))) - return -EINVAL; - - /* reject other things that can't change */ - if (params.supported_rates) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || - info->attrs[NL80211_ATTR_VHT_CAPABILITY]) - return -EINVAL; - - /* must be last in here for error handling */ - params.vlan = get_vlan(info, rdev); - if (IS_ERR(params.vlan)) - return PTR_ERR(params.vlan); - break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - /* - * Don't allow userspace to change the TDLS_PEER flag, - * but silently ignore attempts to change it since we - * don't have state here to verify that it doesn't try - * to change the flag. - */ - params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); - /* Include parameters for TDLS peer (driver will check) */ - err = nl80211_set_station_tdls(info, ¶ms); - if (err) - return err; - /* disallow things sta doesn't support */ - if (params.plink_action) - return -EINVAL; - if (params.local_pm) - return -EINVAL; - if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) - return -EINVAL; - /* reject any changes other than AUTHORIZED or WME (for TDLS) */ - if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | - BIT(NL80211_STA_FLAG_WME))) - return -EINVAL; - break; case NL80211_IFTYPE_ADHOC: - /* disallow things sta doesn't support */ - if (params.plink_action) - return -EINVAL; - if (params.local_pm) - return -EINVAL; - if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) - return -EINVAL; - if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || - info->attrs[NL80211_ATTR_VHT_CAPABILITY]) - return -EINVAL; - /* reject any changes other than AUTHORIZED */ - if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) - return -EINVAL; - break; case NL80211_IFTYPE_MESH_POINT: - /* disallow things mesh doesn't support */ - if (params.vlan) - return -EINVAL; - if (params.supported_rates) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || - info->attrs[NL80211_ATTR_VHT_CAPABILITY]) - return -EINVAL; - /* - * No special handling for TDLS here -- the userspace - * mesh code doesn't have this bug. - */ - if (params.sta_flags_mask & - ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_MFP) | - BIT(NL80211_STA_FLAG_AUTHORIZED))) - return -EINVAL; break; default: - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out_put_vlan; } - /* be aware of params.vlan when changing code here */ - + /* driver will call cfg80211_check_station_change() */ err = rdev_change_station(rdev, dev, mac_addr, ¶ms); + out_put_vlan: if (params.vlan) dev_put(params.vlan); @@ -3687,6 +3725,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; + /* When you run into this, adjust the code below for the new flag */ + BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: @@ -3730,8 +3771,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* ignore uAPSD data */ params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; - /* associated is disallowed */ - if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + /* these are disallowed */ + if (params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_ASSOCIATED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED))) return -EINVAL; /* Only TDLS peers can be added */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) @@ -3742,6 +3785,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* ... with external setup is supported */ if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP)) return -EOPNOTSUPP; + /* + * Older wpa_supplicant versions always mark the TDLS peer + * as authorized, but it shouldn't yet be. + */ + params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED); break; default: return -EOPNOTSUPP; -- cgit v1.2.3 From ee2aca343c9aa64d277a75a5df043299dc84cfd9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Feb 2013 17:36:01 +0100 Subject: cfg80211: add ability to override VHT capabilities For testing it's sometimes useful to be able to override certain VHT capability advertisement, add the ability to do that in cfg80211. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 +++++++++++ include/uapi/linux/nl80211.h | 3 +++ net/wireless/core.h | 10 ++++++++-- net/wireless/mlme.c | 35 ++++++++++++++++++++++++++++++--- net/wireless/nl80211.c | 47 ++++++++++++++++++++++++++++++++++++++++++-- net/wireless/sme.c | 4 +++- 6 files changed, 103 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ed2b08da3b93..73a523901c73 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1430,9 +1430,11 @@ struct cfg80211_auth_request { * enum cfg80211_assoc_req_flags - Over-ride default behaviour in association. * * @ASSOC_REQ_DISABLE_HT: Disable HT (802.11n) + * @ASSOC_REQ_DISABLE_VHT: Disable VHT */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), + ASSOC_REQ_DISABLE_VHT = BIT(1), }; /** @@ -1454,6 +1456,8 @@ enum cfg80211_assoc_req_flags { * @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask * will be used in ht_capa. Un-supported values will be ignored. * @ht_capa_mask: The bits of ht_capa which are to be used. + * @vht_capa: VHT capability override + * @vht_capa_mask: VHT capability mask indicating which fields to use */ struct cfg80211_assoc_request { struct cfg80211_bss *bss; @@ -1464,6 +1468,7 @@ struct cfg80211_assoc_request { u32 flags; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; + struct ieee80211_vht_cap vht_capa, vht_capa_mask; }; /** @@ -1574,6 +1579,8 @@ struct cfg80211_ibss_params { * @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask * will be used in ht_capa. Un-supported values will be ignored. * @ht_capa_mask: The bits of ht_capa which are to be used. + * @vht_capa: VHT Capability overrides + * @vht_capa_mask: The bits of vht_capa which are to be used. */ struct cfg80211_connect_params { struct ieee80211_channel *channel; @@ -1592,6 +1599,8 @@ struct cfg80211_connect_params { int bg_scan_period; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; + struct ieee80211_vht_cap vht_capa; + struct ieee80211_vht_cap vht_capa_mask; }; /** @@ -2516,6 +2525,8 @@ struct wiphy_wowlan_support { * @ap_sme_capa: AP SME capabilities, flags from &enum nl80211_ap_sme_features. * @ht_capa_mod_mask: Specify what ht_cap values can be over-ridden. * If null, then none can be over-ridden. + * @vht_capa_mod_mask: Specify what VHT capabilities can be over-ridden. + * If null, then none can be over-ridden. * * @max_acl_mac_addrs: Maximum number of MAC addresses that the device * supports for ACL. @@ -2624,6 +2635,7 @@ struct wiphy { struct dentry *debugfsdir; const struct ieee80211_ht_cap *ht_capa_mod_mask; + const struct ieee80211_vht_cap *vht_capa_mod_mask; #ifdef CONFIG_NET_NS /* the network namespace this phy lives in currently */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9844c10a2999..2c3e88360037 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1685,6 +1685,9 @@ enum nl80211_attrs { NL80211_ATTR_PROTOCOL_FEATURES, NL80211_ATTR_SPLIT_WIPHY_DUMP, + NL80211_ATTR_DISABLE_VHT, + NL80211_ATTR_VHT_CAPABILITY_MASK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/core.h b/net/wireless/core.h index 3aec0e429d8a..c2f94f22bb22 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -335,7 +335,9 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, const u8 *ie, int ie_len, bool use_mfp, struct cfg80211_crypto_settings *crypt, u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask); + struct ieee80211_ht_cap *ht_capa_mask, + struct ieee80211_vht_cap *vht_capa, + struct ieee80211_vht_cap *vht_capa_mask); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, const u8 *bssid, const u8 *prev_bssid, @@ -343,7 +345,9 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, const u8 *ie, int ie_len, bool use_mfp, struct cfg80211_crypto_settings *crypt, u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask); + struct ieee80211_ht_cap *ht_capa_mask, + struct ieee80211_vht_cap *vht_capa, + struct ieee80211_vht_cap *vht_capa_mask); int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, @@ -375,6 +379,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, bool no_cck, bool dont_wait_for_ack, u64 *cookie); void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask); +void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, + const struct ieee80211_vht_cap *vht_capa_mask); /* SME */ int __cfg80211_connect(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 5a97ce6d283b..c82adfee7697 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -343,6 +343,23 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, p1[i] &= p2[i]; } +/* Do a logical ht_capa &= ht_capa_mask. */ +void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, + const struct ieee80211_vht_cap *vht_capa_mask) +{ + int i; + u8 *p1, *p2; + if (!vht_capa_mask) { + memset(vht_capa, 0, sizeof(*vht_capa)); + return; + } + + p1 = (u8*)(vht_capa); + p2 = (u8*)(vht_capa_mask); + for (i = 0; i < sizeof(*vht_capa); i++) + p1[i] &= p2[i]; +} + int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, @@ -351,7 +368,9 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, const u8 *ie, int ie_len, bool use_mfp, struct cfg80211_crypto_settings *crypt, u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask) + struct ieee80211_ht_cap *ht_capa_mask, + struct ieee80211_vht_cap *vht_capa, + struct ieee80211_vht_cap *vht_capa_mask) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_assoc_request req; @@ -388,6 +407,13 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, sizeof(req.ht_capa_mask)); cfg80211_oper_and_ht_capa(&req.ht_capa_mask, rdev->wiphy.ht_capa_mod_mask); + if (vht_capa) + memcpy(&req.vht_capa, vht_capa, sizeof(req.vht_capa)); + if (vht_capa_mask) + memcpy(&req.vht_capa_mask, vht_capa_mask, + sizeof(req.vht_capa_mask)); + cfg80211_oper_and_vht_capa(&req.vht_capa_mask, + rdev->wiphy.vht_capa_mod_mask); req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); @@ -422,7 +448,9 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, const u8 *ie, int ie_len, bool use_mfp, struct cfg80211_crypto_settings *crypt, u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask) + struct ieee80211_ht_cap *ht_capa_mask, + struct ieee80211_vht_cap *vht_capa, + struct ieee80211_vht_cap *vht_capa_mask) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -431,7 +459,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, wdev_lock(wdev); err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, ssid, ssid_len, ie, ie_len, use_mfp, crypt, - assoc_flags, ht_capa, ht_capa_mask); + assoc_flags, ht_capa, ht_capa_mask, + vht_capa, vht_capa_mask); wdev_unlock(wdev); mutex_unlock(&rdev->devlist_mtx); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0e5176784b42..6a5893f5e481 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -371,6 +371,10 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, [NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, }, + [NL80211_ATTR_DISABLE_VHT] = { .type = NLA_FLAG }, + [NL80211_ATTR_VHT_CAPABILITY_MASK] = { + .len = NL80211_VHT_CAPABILITY_LEN, + }, }; /* policy for the key attributes */ @@ -1522,6 +1526,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, dev->wiphy.extended_capabilities_mask))) goto nla_put_failure; + if (dev->wiphy.vht_capa_mod_mask && + nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK, + sizeof(*dev->wiphy.vht_capa_mod_mask), + dev->wiphy.vht_capa_mod_mask)) + goto nla_put_failure; + /* done */ *split_start = 0; break; @@ -5982,6 +5992,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) u32 flags = 0; struct ieee80211_ht_cap *ht_capa = NULL; struct ieee80211_ht_cap *ht_capa_mask = NULL; + struct ieee80211_vht_cap *vht_capa = NULL; + struct ieee80211_vht_cap *vht_capa_mask = NULL; if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) return -EINVAL; @@ -6038,12 +6050,25 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); } + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) + flags |= ASSOC_REQ_DISABLE_VHT; + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) + vht_capa_mask = + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]); + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { + if (!vht_capa_mask) + return -EINVAL; + vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + } + err = nl80211_crypto_settings(rdev, info, &crypto, 1); if (!err) err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, ssid, ssid_len, ie, ie_len, use_mfp, - &crypto, flags, ht_capa, - ht_capa_mask); + &crypto, flags, ht_capa, ht_capa_mask, + vht_capa, vht_capa_mask); return err; } @@ -6623,6 +6648,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) sizeof(connect.ht_capa)); } + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) + connect.flags |= ASSOC_REQ_DISABLE_VHT; + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) + memcpy(&connect.vht_capa_mask, + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), + sizeof(connect.vht_capa_mask)); + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { + if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) { + kfree(connkeys); + return -EINVAL; + } + memcpy(&connect.vht_capa, + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]), + sizeof(connect.vht_capa)); + } + err = cfg80211_connect(rdev, dev, &connect, connkeys); if (err) kfree(connkeys); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f432bd3755b1..7da118c034f0 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -195,7 +195,9 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) params->mfp != NL80211_MFP_NO, ¶ms->crypto, params->flags, ¶ms->ht_capa, - ¶ms->ht_capa_mask); + ¶ms->ht_capa_mask, + ¶ms->vht_capa, + ¶ms->vht_capa_mask); if (err) __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, -- cgit v1.2.3 From d339d5ca8eee34f3c70386cf2545edc53e546a13 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 12 Feb 2013 09:34:13 +0200 Subject: mac80211: Allow drivers to differentiate between ROC types Some devices can handle remain on channel requests differently based on the request type/priority. Add support to differentiate between different ROC types, i.e., indicate that the ROC is required for sending managment frames. Signed-off-by: Ilan Peer Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 3 ++- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 7 ++++--- drivers/net/wireless/mac80211_hwsim.c | 3 ++- drivers/net/wireless/ti/wlcore/main.c | 3 ++- include/net/mac80211.h | 21 ++++++++++++++++++++- net/mac80211/cfg.c | 21 +++++++++++++++------ net/mac80211/driver-ops.h | 7 ++++--- net/mac80211/ieee80211_i.h | 1 + net/mac80211/offchannel.c | 2 +- net/mac80211/trace.h | 11 +++++++---- 10 files changed, 58 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index 323e4a33fcac..c7cd2dffa5cd 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1137,7 +1137,8 @@ done: static int iwlagn_mac_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *channel, - int duration) + int duration, + enum ieee80211_roc_type type) { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); struct iwl_rxon_context *ctx = &priv->contexts[IWL_RXON_CTX_PAN]; diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index e8264e11b12d..23460f4a4c75 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1081,7 +1081,8 @@ static void iwl_mvm_mac_update_tkip_key(struct ieee80211_hw *hw, static int iwl_mvm_roc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *channel, - int duration) + int duration, + enum ieee80211_roc_type type) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); struct cfg80211_chan_def chandef; @@ -1092,8 +1093,8 @@ static int iwl_mvm_roc(struct ieee80211_hw *hw, return -EINVAL; } - IWL_DEBUG_MAC80211(mvm, "enter (%d, %d)\n", channel->hw_value, - duration); + IWL_DEBUG_MAC80211(mvm, "enter (%d, %d, %d)\n", channel->hw_value, + duration, type); mutex_lock(&mvm->mutex); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index cffdf4fbf161..7490c4fc7177 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1535,7 +1535,8 @@ static void hw_roc_done(struct work_struct *work) static int mac80211_hwsim_roc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *chan, - int duration) + int duration, + enum ieee80211_roc_type type) { struct mac80211_hwsim_data *hwsim = hw->priv; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 2c2ff3e1f849..d7e306333f6c 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -4956,7 +4956,8 @@ static void wlcore_op_flush(struct ieee80211_hw *hw, bool drop) static int wlcore_op_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *chan, - int duration) + int duration, + enum ieee80211_roc_type type) { struct wl12xx_vif *wlvif = wl12xx_vif_to_data(vif); struct wl1271 *wl = hw->priv; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f7eba1300d82..9b0d342c0675 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2134,6 +2134,24 @@ enum ieee80211_rate_control_changed { IEEE80211_RC_NSS_CHANGED = BIT(3), }; +/** + * enum ieee80211_roc_type - remain on channel type + * + * With the support for multi channel contexts and multi channel operations, + * remain on channel operations might be limited/deferred/aborted by other + * flows/operations which have higher priority (and vise versa). + * Specifying the ROC type can be used by devices to prioritize the ROC + * operations compared to other operations/flows. + * + * @IEEE80211_ROC_TYPE_NORMAL: There are no special requirements for this ROC. + * @IEEE80211_ROC_TYPE_MGMT_TX: The remain on channel request is required + * for sending managment frames offchannel. + */ +enum ieee80211_roc_type { + IEEE80211_ROC_TYPE_NORMAL = 0, + IEEE80211_ROC_TYPE_MGMT_TX, +}; + /** * struct ieee80211_ops - callbacks from mac80211 to the driver * @@ -2687,7 +2705,8 @@ struct ieee80211_ops { int (*remain_on_channel)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *chan, - int duration); + int duration, + enum ieee80211_roc_type type); int (*cancel_remain_on_channel)(struct ieee80211_hw *hw); int (*set_ringparam)(struct ieee80211_hw *hw, u32 tx, u32 rx); void (*get_ringparam)(struct ieee80211_hw *hw, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c115f82c037c..f9cbdc29946d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2410,7 +2410,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, unsigned int duration, u64 *cookie, - struct sk_buff *txskb) + struct sk_buff *txskb, + enum ieee80211_roc_type type) { struct ieee80211_roc_work *roc, *tmp; bool queued = false; @@ -2429,6 +2430,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, roc->duration = duration; roc->req_duration = duration; roc->frame = txskb; + roc->type = type; roc->mgmt_tx_cookie = (unsigned long)txskb; roc->sdata = sdata; INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); @@ -2459,7 +2461,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, if (!duration) duration = 10; - ret = drv_remain_on_channel(local, sdata, channel, duration); + ret = drv_remain_on_channel(local, sdata, channel, duration, type); if (ret) { kfree(roc); return ret; @@ -2478,10 +2480,13 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, * * If it hasn't started yet, just increase the duration * and add the new one to the list of dependents. + * If the type of the new ROC has higher priority, modify the + * type of the previous one to match that of the new one. */ if (!tmp->started) { list_add_tail(&roc->list, &tmp->dependents); tmp->duration = max(tmp->duration, roc->duration); + tmp->type = max(tmp->type, roc->type); queued = true; break; } @@ -2493,16 +2498,18 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, /* * In the offloaded ROC case, if it hasn't begun, add * this new one to the dependent list to be handled - * when the the master one begins. If it has begun, + * when the master one begins. If it has begun, * check that there's still a minimum time left and * if so, start this one, transmitting the frame, but - * add it to the list directly after this one with a + * add it to the list directly after this one with * a reduced time so we'll ask the driver to execute * it right after finishing the previous one, in the * hope that it'll also be executed right afterwards, * effectively extending the old one. * If there's no minimum time left, just add it to the * normal list. + * TODO: the ROC type is ignored here, assuming that it + * is better to immediately use the current ROC. */ if (!tmp->hw_begun) { list_add_tail(&roc->list, &tmp->dependents); @@ -2596,7 +2603,8 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy, mutex_lock(&local->mtx); ret = ieee80211_start_roc_work(local, sdata, chan, - duration, cookie, NULL); + duration, cookie, NULL, + IEEE80211_ROC_TYPE_NORMAL); mutex_unlock(&local->mtx); return ret; @@ -2829,7 +2837,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, /* This will handle all kinds of coalescing and immediate TX */ ret = ieee80211_start_roc_work(local, sdata, chan, - wait, cookie, skb); + wait, cookie, skb, + IEEE80211_ROC_TYPE_MGMT_TX); if (ret) kfree_skb(skb); out_unlock: diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index ee56d0779d8b..832acea4a5cb 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -787,15 +787,16 @@ static inline int drv_get_antenna(struct ieee80211_local *local, static inline int drv_remain_on_channel(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, - unsigned int duration) + unsigned int duration, + enum ieee80211_roc_type type) { int ret; might_sleep(); - trace_drv_remain_on_channel(local, sdata, chan, duration); + trace_drv_remain_on_channel(local, sdata, chan, duration, type); ret = local->ops->remain_on_channel(&local->hw, &sdata->vif, - chan, duration); + chan, duration, type); trace_drv_return_int(local, ret); return ret; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8da53a067306..2518f0429b87 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -315,6 +315,7 @@ struct ieee80211_roc_work { u32 duration, req_duration; struct sk_buff *frame; u64 cookie, mgmt_tx_cookie; + enum ieee80211_roc_type type; }; /* flags used in struct ieee80211_if_managed.flags */ diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index cc79b4a2e821..db547fceaeb9 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -277,7 +277,7 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) duration = 10; ret = drv_remain_on_channel(local, roc->sdata, roc->chan, - duration); + duration, roc->type); roc->started = true; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 3d7cd2a0582f..e7db2b804e0c 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1042,15 +1042,17 @@ TRACE_EVENT(drv_remain_on_channel, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, - unsigned int duration), + unsigned int duration, + enum ieee80211_roc_type type), - TP_ARGS(local, sdata, chan, duration), + TP_ARGS(local, sdata, chan, duration, type), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(int, center_freq) __field(unsigned int, duration) + __field(u32, type) ), TP_fast_assign( @@ -1058,12 +1060,13 @@ TRACE_EVENT(drv_remain_on_channel, VIF_ASSIGN; __entry->center_freq = chan->center_freq; __entry->duration = duration; + __entry->type = type; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms", + LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms type=%d", LOCAL_PR_ARG, VIF_PR_ARG, - __entry->center_freq, __entry->duration + __entry->center_freq, __entry->duration, __entry->type ) ); -- cgit v1.2.3 From 355199e02b831fd4f652c34d6c7673d973da1369 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 27 Feb 2013 17:14:27 +0200 Subject: cfg80211: Extend support for IEEE 802.11r Fast BSS Transition Add NL80211_CMD_UPDATE_FT_IES to support update of FT IEs to the WLAN driver and NL80211_CMD_FT_EVENT to send FT events from the WLAN driver. This will carry the target AP's MAC address along with the relevant Information Elements. This event is used to report received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE). These changes allow FT to be supported with drivers that use an internal SME instead of user space option (like FT implementation in wpa_supplicant with mac80211-based drivers). Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 41 ++++++++++++++++++++++++ include/uapi/linux/nl80211.h | 19 +++++++++++ net/wireless/nl80211.c | 76 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 13 ++++++++ net/wireless/trace.h | 46 +++++++++++++++++++++++++++ 5 files changed, 195 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 73a523901c73..dfef0d5b5d3d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1762,6 +1762,21 @@ struct cfg80211_gtk_rekey_data { u8 replay_ctr[NL80211_REPLAY_CTR_LEN]; }; +/** + * struct cfg80211_update_ft_ies_params - FT IE Information + * + * This structure provides information needed to update the fast transition IE + * + * @md: The Mobility Domain ID, 2 Octet value + * @ie: Fast Transition IEs + * @ie_len: Length of ft_ie in octets + */ +struct cfg80211_update_ft_ies_params { + u16 md; + const u8 *ie; + size_t ie_len; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -2208,6 +2223,8 @@ struct cfg80211_ops { int (*start_radar_detection)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef); + int (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_update_ft_ies_params *ftie); }; /* @@ -4044,6 +4061,30 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate); */ void cfg80211_unregister_wdev(struct wireless_dev *wdev); +/** + * struct cfg80211_ft_event - FT Information Elements + * @ies: FT IEs + * @ies_len: length of the FT IE in bytes + * @target_ap: target AP's MAC address + * @ric_ies: RIC IE + * @ric_ies_len: length of the RIC IE in bytes + */ +struct cfg80211_ft_event_params { + const u8 *ies; + size_t ies_len; + const u8 *target_ap; + const u8 *ric_ies; + size_t ric_ies_len; +}; + +/** + * cfg80211_ft_event - notify userspace about FT IE and RIC IE + * @netdev: network device + * @ft_event: IE information + */ +void cfg80211_ft_event(struct net_device *netdev, + struct cfg80211_ft_event_params *ft_event); + /** * cfg80211_get_p2p_attr - find and copy a P2P attribute from IE buffer * @ies: the input IE buffer diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 2c3e88360037..2d0cff57ff89 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -629,6 +629,14 @@ * i.e. features for the nl80211 protocol rather than device features. * Returns the features in the %NL80211_ATTR_PROTOCOL_FEATURES bitmap. * + * @NL80211_CMD_UPDATE_FT_IES: Pass down the most up-to-date Fast Transition + * Information Element to the WLAN driver + * + * @NL80211_CMD_FT_EVENT: Send a Fast transition event from the WLAN driver + * to the supplicant. This will carry the target AP's MAC address along + * with the relevant Information Elements. This event is used to report + * received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE). + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -785,6 +793,9 @@ enum nl80211_commands { NL80211_CMD_GET_PROTOCOL_FEATURES, + NL80211_CMD_UPDATE_FT_IES, + NL80211_CMD_FT_EVENT, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1396,6 +1407,11 @@ enum nl80211_commands { * receiving the data for a single wiphy split across multiple * messages, given with wiphy dump message * + * @NL80211_ATTR_MDID: Mobility Domain Identifier + * + * @NL80211_ATTR_IE_RIC: Resource Information Container Information + * Element + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1688,6 +1704,9 @@ enum nl80211_attrs { NL80211_ATTR_DISABLE_VHT, NL80211_ATTR_VHT_CAPABILITY_MASK, + NL80211_ATTR_MDID, + NL80211_ATTR_IE_RIC, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a8bd453d22b9..08de0c6035f1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -375,6 +375,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_VHT_CAPABILITY_MASK] = { .len = NL80211_VHT_CAPABILITY_LEN, }, + [NL80211_ATTR_MDID] = { .type = NLA_U16 }, + [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_DATA_LEN }, }; /* policy for the key attributes */ @@ -8160,6 +8163,27 @@ static int nl80211_get_protocol_features(struct sk_buff *skb, return -ENOBUFS; } +static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_update_ft_ies_params ft_params; + struct net_device *dev = info->user_ptr[1]; + + if (!rdev->ops->update_ft_ies) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MDID] || + !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) + return -EINVAL; + + memset(&ft_params, 0, sizeof(ft_params)); + ft_params.md = nla_get_u16(info->attrs[NL80211_ATTR_MDID]); + ft_params.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + ft_params.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + + return rdev_update_ft_ies(rdev, dev, &ft_params); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -8841,6 +8865,14 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_get_protocol_features, .policy = nl80211_policy, }, + { + .cmd = NL80211_CMD_UPDATE_FT_IES, + .doit = nl80211_update_ft_ies, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -10542,6 +10574,50 @@ static struct notifier_block nl80211_netlink_notifier = { .notifier_call = nl80211_netlink_notify, }; +void cfg80211_ft_event(struct net_device *netdev, + struct cfg80211_ft_event_params *ft_event) +{ + struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct sk_buff *msg; + void *hdr; + int err; + + trace_cfg80211_ft_event(wiphy, netdev, ft_event); + + if (!ft_event->target_ap) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap); + if (ft_event->ies) + nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies); + if (ft_event->ric_ies) + nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, + ft_event->ric_ies); + + err = genlmsg_end(msg, hdr); + if (err < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, GFP_KERNEL); +} +EXPORT_SYMBOL(cfg80211_ft_event); + /* initialisation/exit functions */ int nl80211_init(void) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 422d38291d66..8c8b26f574e8 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -887,4 +887,17 @@ static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, trace_rdev_return_int(&rdev->wiphy, ret); return ret; } + +static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_update_ft_ies_params *ftie) +{ + int ret; + + trace_rdev_update_ft_ies(&rdev->wiphy, dev, ftie); + ret = rdev->ops->update_ft_ies(&rdev->wiphy, dev, ftie); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index b7a531380e19..ccadef2106ac 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1785,6 +1785,26 @@ TRACE_EVENT(rdev_set_mac_acl, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) ); +TRACE_EVENT(rdev_update_ft_ies, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_update_ft_ies_params *ftie), + TP_ARGS(wiphy, netdev, ftie), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u16, md) + __dynamic_array(u8, ie, ftie->ie_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->md = ftie->md; + memcpy(__get_dynamic_array(ie), ftie->ie, ftie->ie_len); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", md: 0x%x", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ @@ -2413,6 +2433,32 @@ TRACE_EVENT(cfg80211_report_wowlan_wakeup, TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) ); +TRACE_EVENT(cfg80211_ft_event, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_ft_event_params *ft_event), + TP_ARGS(wiphy, netdev, ft_event), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __dynamic_array(u8, ies, ft_event->ies_len) + MAC_ENTRY(target_ap) + __dynamic_array(u8, ric_ies, ft_event->ric_ies_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + if (ft_event->ies) + memcpy(__get_dynamic_array(ies), ft_event->ies, + ft_event->ies_len); + MAC_ASSIGN(target_ap, ft_event->target_ap); + if (ft_event->ric_ies) + memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies, + ft_event->ric_ies_len); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap)) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From a87121051ce80831a302c67286119013104f7a5a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 23 Feb 2013 00:22:44 +0100 Subject: mac80211: remove IEEE80211_KEY_FLAG_WMM_STA There's no driver using this flag, so it seems that all drivers support HW crypto with WMM or don't support it at all. Remove the flag and code setting it. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 --- net/mac80211/key.c | 26 -------------------------- 2 files changed, 29 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9b0d342c0675..421c3ac8c521 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1101,8 +1101,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) * These flags are used for communication about keys between the driver * and mac80211, with the @flags parameter of &struct ieee80211_key_conf. * - * @IEEE80211_KEY_FLAG_WMM_STA: Set by mac80211, this flag indicates - * that the STA this key will be used with could be using QoS. * @IEEE80211_KEY_FLAG_GENERATE_IV: This flag should be set by the * driver to indicate that it requires IV generation for this * particular key. @@ -1127,7 +1125,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) * %IEEE80211_KEY_FLAG_SW_MGMT_TX flag to encrypt such frames in SW. */ enum ieee80211_key_flags { - IEEE80211_KEY_FLAG_WMM_STA = 1<<0, IEEE80211_KEY_FLAG_GENERATE_IV = 1<<1, IEEE80211_KEY_FLAG_GENERATE_MMIC= 1<<2, IEEE80211_KEY_FLAG_PAIRWISE = 1<<3, diff --git a/net/mac80211/key.c b/net/mac80211/key.c index df81b178c594..6eb4888a70ed 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -440,32 +440,6 @@ int ieee80211_key_link(struct ieee80211_key *key, key->sdata = sdata; key->sta = sta; - if (sta) { - /* - * some hardware cannot handle TKIP with QoS, so - * we indicate whether QoS could be in use. - */ - if (test_sta_flag(sta, WLAN_STA_WME)) - key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; - } else { - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - struct sta_info *ap; - - /* - * We're getting a sta pointer in, so must be under - * appropriate locking for sta_info_get(). - */ - - /* same here, the AP could be using QoS */ - ap = sta_info_get(key->sdata, key->sdata->u.mgd.bssid); - if (ap) { - if (test_sta_flag(ap, WLAN_STA_WME)) - key->conf.flags |= - IEEE80211_KEY_FLAG_WMM_STA; - } - } - } - mutex_lock(&sdata->local->key_mtx); if (sta && pairwise) -- cgit v1.2.3 From 55d942f4246c79a8f3f17f92c224e641c5c26125 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 1 Mar 2013 13:07:48 +0100 Subject: mac80211: restrict peer's VHT capabilities to own Implement restricting peer VHT capabilities to the device's own capabilities. This is useful when a single driver supports more than one device and the devices have different capabilities (often they will differ in the number of spatial streams), but in particular is also necessary for VHT capability overrides to work correctly -- otherwise it'd be possible to e.g. advertise, due to overrides, that TX-STBC is not supported, but then still use it to TX to the AP because it supports RX-STBC. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +- include/net/mac80211.h | 5 +- net/mac80211/vht.c | 114 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 117 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 35c1f96d9365..4cf0c9e4dd99 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1333,10 +1333,11 @@ struct ieee80211_vht_operation { #define IEEE80211_VHT_CAP_RXSTBC_2 0x00000200 #define IEEE80211_VHT_CAP_RXSTBC_3 0x00000300 #define IEEE80211_VHT_CAP_RXSTBC_4 0x00000400 +#define IEEE80211_VHT_CAP_RXSTBC_MASK 0x00000700 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE 0x00000800 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE 0x00001000 #define IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX 0x00006000 -#define IEEE80211_VHT_CAP_SOUNDING_DIMENTION_MAX 0x00030000 +#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX 0x00030000 #define IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE 0x00080000 #define IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE 0x00100000 #define IEEE80211_VHT_CAP_VHT_TXOP_PS 0x00200000 diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 421c3ac8c521..cdd7cea1fd4c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1228,9 +1228,8 @@ enum ieee80211_sta_rx_bandwidth { * @addr: MAC address * @aid: AID we assigned to the station if we're an AP * @supp_rates: Bitmap of supported rates (per band) - * @ht_cap: HT capabilities of this STA; restricted to our own TX capabilities - * @vht_cap: VHT capabilities of this STA; Not restricting any capabilities - * of remote STA. Taking as is. + * @ht_cap: HT capabilities of this STA; restricted to our own capabilities + * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities * @wme: indicates whether the STA supports WME. Only valid during AP-mode. * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index cacc1c74556a..171344d4eb7c 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -118,6 +118,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + struct ieee80211_sta_vht_cap own_cap; + u32 cap_info, i; memset(vht_cap, 0, sizeof(*vht_cap)); @@ -133,12 +135,122 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, vht_cap->vht_supported = true; - vht_cap->cap = le32_to_cpu(vht_cap_ie->vht_cap_info); + own_cap = sband->vht_cap; + /* + * If user has specified capability overrides, take care + * of that if the station we're setting up is the AP that + * we advertised a restricted capability set to. Override + * our own capabilities and then use those below. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) + ieee80211_apply_vhtcap_overrides(sdata, &own_cap); + + /* take some capabilities as-is */ + cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info); + vht_cap->cap = cap_info; + vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 | + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | + IEEE80211_VHT_CAP_RXLDPC | + IEEE80211_VHT_CAP_VHT_TXOP_PS | + IEEE80211_VHT_CAP_HTC_VHT | + IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK | + IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB | + IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB | + IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | + IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN; + + /* and some based on our own capabilities */ + switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; + break; + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; + break; + default: + /* nothing */ + break; + } + + /* symmetric capabilities */ + vht_cap->cap |= cap_info & own_cap.cap & + (IEEE80211_VHT_CAP_SHORT_GI_80 | + IEEE80211_VHT_CAP_SHORT_GI_160); + + /* remaining ones */ + if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) { + vht_cap->cap |= cap_info & + (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | + IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX | + IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX); + } + + if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_TXSTBC) + vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_RXSTBC_MASK; + + if (own_cap.cap & IEEE80211_VHT_CAP_RXSTBC_MASK) + vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_TXSTBC; /* Copy peer MCS info, the driver might need them. */ memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs, sizeof(struct ieee80211_vht_mcs_info)); + /* but also restrict MCSes */ + for (i = 0; i < 8; i++) { + u16 own_rx, own_tx, peer_rx, peer_tx; + + own_rx = le16_to_cpu(own_cap.vht_mcs.rx_mcs_map); + own_rx = (own_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + own_tx = le16_to_cpu(own_cap.vht_mcs.tx_mcs_map); + own_tx = (own_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + peer_rx = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); + peer_rx = (peer_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + peer_tx = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); + peer_tx = (peer_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + if (peer_tx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + if (own_rx == IEEE80211_VHT_MCS_NOT_SUPPORTED) + peer_tx = IEEE80211_VHT_MCS_NOT_SUPPORTED; + else if (own_rx < peer_tx) + peer_tx = own_rx; + } + + if (peer_rx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + if (own_tx == IEEE80211_VHT_MCS_NOT_SUPPORTED) + peer_rx = IEEE80211_VHT_MCS_NOT_SUPPORTED; + else if (own_tx < peer_rx) + peer_rx = own_tx; + } + + vht_cap->vht_mcs.rx_mcs_map &= + ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); + vht_cap->vht_mcs.rx_mcs_map |= cpu_to_le16(peer_rx << i * 2); + + vht_cap->vht_mcs.tx_mcs_map &= + ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); + vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2); + } + + /* finally set up the bandwidth */ switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: -- cgit v1.2.3 From bb2798d45fc0575f5d08c0bb7baf4d5d5e8cc0c3 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 4 Mar 2013 13:06:10 -0800 Subject: nl80211: explicit userspace MPM Secure mesh had the implicit requirement that the Mesh Peering Management entity be in userspace. However userspace might want to implement an open MPM as well, so specify a mesh setup parameter to indicate this. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 25 ++++++++++++++++++------- net/wireless/mesh.c | 1 + net/wireless/nl80211.c | 8 ++++++++ 4 files changed, 29 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index dfef0d5b5d3d..69b2b2631b9a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1151,6 +1151,7 @@ struct mesh_config { * @ie_len: length of vendor information elements * @is_authenticated: this mesh requires authentication * @is_secure: this mesh uses security + * @user_mpm: userspace handles all MPM functions * @dtim_period: DTIM period to use * @beacon_interval: beacon interval to use * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a] @@ -1168,6 +1169,7 @@ struct mesh_setup { u8 ie_len; bool is_authenticated; bool is_secure; + bool user_mpm; u8 dtim_period; u16 beacon_interval; int mcast_rate[IEEE80211_NUM_BANDS]; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 2d0cff57ff89..8134c6a96f57 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -513,9 +513,11 @@ * @NL80211_CMD_NEW_PEER_CANDIDATE: Notification on the reception of a * beacon or probe response from a compatible mesh peer. This is only * sent while no station information (sta_info) exists for the new peer - * candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH is set. On - * reception of this notification, userspace may decide to create a new - * station (@NL80211_CMD_NEW_STATION). To stop this notification from + * candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH, + * @NL80211_MESH_SETUP_USERSPACE_AMPE, or + * @NL80211_MESH_SETUP_USERSPACE_MPM is set. On reception of this + * notification, userspace may decide to create a new station + * (@NL80211_CMD_NEW_STATION). To stop this notification from * reoccurring, the userspace authentication daemon may want to create the * new station with the AUTHENTICATED flag unset and maybe change it later * depending on the authentication result. @@ -1199,10 +1201,10 @@ enum nl80211_commands { * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver * allows auth frames in a mesh to be passed to userspace for processing via * the @NL80211_MESH_SETUP_USERSPACE_AUTH flag. - * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as - * defined in &enum nl80211_plink_state. Used when userspace is - * driving the peer link management state machine. - * @NL80211_MESH_SETUP_USERSPACE_AMPE must be enabled. + * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as defined in + * &enum nl80211_plink_state. Used when userspace is driving the peer link + * management state machine. @NL80211_MESH_SETUP_USERSPACE_AMPE or + * @NL80211_MESH_SETUP_USERSPACE_MPM must be enabled. * * @NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED: indicates, as part of the wiphy * capabilities, the supported WoWLAN triggers @@ -2612,6 +2614,9 @@ enum nl80211_meshconf_params { * vendor specific synchronization method or disable it to use the default * neighbor offset synchronization * + * @NL80211_MESH_SETUP_USERSPACE_MPM: Enable this option if userspace will + * implement an MPM which handles peer allocation and state. + * * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number * * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use @@ -2624,6 +2629,7 @@ enum nl80211_mesh_setup_params { NL80211_MESH_SETUP_USERSPACE_AUTH, NL80211_MESH_SETUP_USERSPACE_AMPE, NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC, + NL80211_MESH_SETUP_USERSPACE_MPM, /* keep last */ __NL80211_MESH_SETUP_ATTR_AFTER_LAST, @@ -3526,6 +3532,10 @@ enum nl80211_ap_sme_features { * stations the authenticated/associated bits have to be set in the mask. * @NL80211_FEATURE_ADVERTISE_CHAN_LIMITS: cfg80211 advertises channel limits * (HT40, VHT 80/160 MHz) if this flag is set + * @NL80211_FEATURE_USERSPACE_MPM: This driver supports a userspace Mesh + * Peering Management entity which may be implemented by registering for + * beacons or NL80211_CMD_NEW_PEER_CANDIDATE events. The mesh beacon is + * still generated by the driver. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3544,6 +3554,7 @@ enum nl80211_feature_flags { /* bit 13 is reserved */ NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14, NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 15, + NL80211_FEATURE_USERSPACE_MPM = 1 << 16, }; /** diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 9688b249a805..0bb93f3061a4 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -85,6 +85,7 @@ const struct mesh_setup default_mesh_setup = { .ie = NULL, .ie_len = 0, .is_secure = false, + .user_mpm = false, .beacon_interval = MESH_DEFAULT_BEACON_INTERVAL, .dtim_period = MESH_DEFAULT_DTIM_PERIOD, }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7469020175d5..bdf39836d9d8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4618,6 +4618,7 @@ static const struct nla_policy [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, + [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG }, @@ -4756,6 +4757,7 @@ do { \ static int nl80211_parse_mesh_setup(struct genl_info *info, struct mesh_setup *setup) { + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1]; if (!info->attrs[NL80211_ATTR_MESH_SETUP]) @@ -4792,8 +4794,14 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, setup->ie = nla_data(ieattr); setup->ie_len = nla_len(ieattr); } + if (tb[NL80211_MESH_SETUP_USERSPACE_MPM] && + !(rdev->wiphy.features & NL80211_FEATURE_USERSPACE_MPM)) + return -EINVAL; + setup->user_mpm = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_MPM]); setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]); setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]); + if (setup->is_secure) + setup->user_mpm = true; return 0; } -- cgit v1.2.3 From eef941e6d6be8bce72b5c2963b69f948be4df7a7 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 4 Mar 2013 13:06:11 -0800 Subject: cfg80211: rename mesh station types The mesh station types used to refer to whether the station was secure or nonsecure. Really the salient information is whether it is managed by the kernel or userspace Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++---- net/mac80211/cfg.c | 4 ++-- net/wireless/nl80211.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 69b2b2631b9a..bdba9b619064 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -690,8 +690,8 @@ struct station_parameters { * supported/used) * @CFG80211_STA_TDLS_PEER_ACTIVE: TDLS peer on managed interface (active * entry that is operating, has been marked authorized by userspace) - * @CFG80211_STA_MESH_PEER_NONSEC: peer on mesh interface (non-secured) - * @CFG80211_STA_MESH_PEER_SECURE: peer on mesh interface (secured) + * @CFG80211_STA_MESH_PEER_KERNEL: peer on mesh interface (kernel managed) + * @CFG80211_STA_MESH_PEER_USER: peer on mesh interface (user managed) */ enum cfg80211_station_type { CFG80211_STA_AP_CLIENT, @@ -700,8 +700,8 @@ enum cfg80211_station_type { CFG80211_STA_IBSS, CFG80211_STA_TDLS_PEER_SETUP, CFG80211_STA_TDLS_PEER_ACTIVE, - CFG80211_STA_MESH_PEER_NONSEC, - CFG80211_STA_MESH_PEER_SECURE, + CFG80211_STA_MESH_PEER_KERNEL, + CFG80211_STA_MESH_PEER_USER, }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9d708f9e246e..6ac89e5c2963 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1436,9 +1436,9 @@ static int ieee80211_change_station(struct wiphy *wiphy, switch (sdata->vif.type) { case NL80211_IFTYPE_MESH_POINT: if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) - statype = CFG80211_STA_MESH_PEER_SECURE; + statype = CFG80211_STA_MESH_PEER_USER; else - statype = CFG80211_STA_MESH_PEER_NONSEC; + statype = CFG80211_STA_MESH_PEER_KERNEL; break; case NL80211_IFTYPE_ADHOC: statype = CFG80211_STA_IBSS; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index bdf39836d9d8..946b2e7acdf2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3617,8 +3617,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy, BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); switch (statype) { - case CFG80211_STA_MESH_PEER_NONSEC: - case CFG80211_STA_MESH_PEER_SECURE: + case CFG80211_STA_MESH_PEER_KERNEL: + case CFG80211_STA_MESH_PEER_USER: /* * No ignoring the TDLS flag here -- the userspace mesh * code doesn't have the bug of including TDLS in the @@ -3720,11 +3720,11 @@ int cfg80211_check_station_change(struct wiphy *wiphy, case CFG80211_STA_TDLS_PEER_ACTIVE: /* reject any changes */ return -EINVAL; - case CFG80211_STA_MESH_PEER_NONSEC: + case CFG80211_STA_MESH_PEER_KERNEL: if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) return -EINVAL; break; - case CFG80211_STA_MESH_PEER_SECURE: + case CFG80211_STA_MESH_PEER_USER: if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) return -EINVAL; break; -- cgit v1.2.3 From e943789edbb1f9de71b129d9992489eb79ed341f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Feb 2013 21:38:08 +0100 Subject: mac80211: provide ieee80211_sta_eosp() The irqsafe version ieee80211_sta_eosp_irqsafe() exists, but drivers must not mix calls to any irqsafe/non-irqsafe function. Both ath9k and iwlwifi, the likely first users of this interface, use non-irqsafe RX/TX/TX status so must also use a non-irqsafe version of this function. Since no driver uses the _irqsafe() version, remove that. Signed-off-by: Johannes Berg --- Documentation/DocBook/80211.tmpl | 2 +- include/net/mac80211.h | 25 ++++++++++++++----------- net/mac80211/ieee80211_i.h | 5 ----- net/mac80211/main.c | 14 -------------- net/mac80211/sta_info.c | 20 +++----------------- 5 files changed, 18 insertions(+), 48 deletions(-) (limited to 'include/net') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index 284ced7a228f..0f6a3edcd44b 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -437,7 +437,7 @@ !Finclude/net/mac80211.h ieee80211_get_buffered_bc !Finclude/net/mac80211.h ieee80211_beacon_get -!Finclude/net/mac80211.h ieee80211_sta_eosp_irqsafe +!Finclude/net/mac80211.h ieee80211_sta_eosp !Finclude/net/mac80211.h ieee80211_frame_release_type !Finclude/net/mac80211.h ieee80211_sta_ps_transition !Finclude/net/mac80211.h ieee80211_sta_ps_transition_ni diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cdd7cea1fd4c..8c0ca11a39c4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1946,14 +1946,14 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * filter those response frames except in the case of frames that * are buffered in the driver -- those must remain buffered to avoid * reordering. Because it is possible that no frames are released - * in this case, the driver must call ieee80211_sta_eosp_irqsafe() + * in this case, the driver must call ieee80211_sta_eosp() * to indicate to mac80211 that the service period ended anyway. * * Finally, if frames from multiple TIDs are released from mac80211 * but the driver might reorder them, it must clear & set the flags * appropriately (only the last frame may have %IEEE80211_TX_STATUS_EOSP) * and also take care of the EOSP and MORE_DATA bits in the frame. - * The driver may also use ieee80211_sta_eosp_irqsafe() in this case. + * The driver may also use ieee80211_sta_eosp() in this case. */ /** @@ -2506,7 +2506,7 @@ enum ieee80211_roc_type { * setting the EOSP flag in the QoS header of the frames. Also, when the * service period ends, the driver must set %IEEE80211_TX_STATUS_EOSP * on the last frame in the SP. Alternatively, it may call the function - * ieee80211_sta_eosp_irqsafe() to inform mac80211 of the end of the SP. + * ieee80211_sta_eosp() to inform mac80211 of the end of the SP. * This callback must be atomic. * @allow_buffered_frames: Prepare device to allow the given number of frames * to go out to the given station. The frames will be sent by mac80211 @@ -2517,7 +2517,7 @@ enum ieee80211_roc_type { * them between the TIDs, it must set the %IEEE80211_TX_STATUS_EOSP flag * on the last frame and clear it on all others and also handle the EOSP * bit in the QoS header correctly. Alternatively, it can also call the - * ieee80211_sta_eosp_irqsafe() function. + * ieee80211_sta_eosp() function. * The @tids parameter is a bitmap and tells the driver which TIDs the * frames will be on; it will at most have two bits set. * This callback must be atomic. @@ -3857,14 +3857,17 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, * %IEEE80211_TX_STATUS_EOSP bit and call this function instead. * This applies for PS-Poll as well as uAPSD. * - * Note that there is no non-_irqsafe version right now as - * it wasn't needed, but just like _tx_status() and _rx() - * must not be mixed in irqsafe/non-irqsafe versions, this - * function must not be mixed with those either. Use the - * all irqsafe, or all non-irqsafe, don't mix! If you need - * the non-irqsafe version of this, you need to add it. + * Note that just like with _tx_status() and _rx() drivers must + * not mix calls to irqsafe/non-irqsafe versions, this function + * must not be mixed with those either. Use the all irqsafe, or + * all non-irqsafe, don't mix! + * + * NB: the _irqsafe version of this function doesn't exist, no + * driver needs it right now. Don't call this function if + * you'd need the _irqsafe version, look at the git history + * and restore the _irqsafe version! */ -void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta); +void ieee80211_sta_eosp(struct ieee80211_sta *pubsta); /** * ieee80211_iter_keys - iterate keys programmed into the device diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f4433f081e77..95beb18588f6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -800,11 +800,6 @@ enum sdata_queue_type { enum { IEEE80211_RX_MSG = 1, IEEE80211_TX_STATUS_MSG = 2, - IEEE80211_EOSP_MSG = 3, -}; - -struct skb_eosp_msg_data { - u8 sta[ETH_ALEN], iface[ETH_ALEN]; }; enum queue_stop_reason { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 5a53aa5ede80..5531c89909d8 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -226,8 +226,6 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) static void ieee80211_tasklet_handler(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *) data; - struct sta_info *sta, *tmp; - struct skb_eosp_msg_data *eosp_data; struct sk_buff *skb; while ((skb = skb_dequeue(&local->skb_queue)) || @@ -243,18 +241,6 @@ static void ieee80211_tasklet_handler(unsigned long data) skb->pkt_type = 0; ieee80211_tx_status(&local->hw, skb); break; - case IEEE80211_EOSP_MSG: - eosp_data = (void *)skb->cb; - for_each_sta_info(local, eosp_data->sta, sta, tmp) { - /* skip wrong virtual interface */ - if (memcmp(eosp_data->iface, - sta->sdata->vif.addr, ETH_ALEN)) - continue; - clear_sta_flag(sta, WLAN_STA_SP); - break; - } - dev_kfree_skb(skb); - break; default: WARN(1, "mac80211: Packet is of unknown type %d\n", skb->pkt_type); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 3644ad79688a..852bf45fcfa3 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1390,30 +1390,16 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_sta_block_awake); -void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta) +void ieee80211_sta_eosp(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_local *local = sta->local; - struct sk_buff *skb; - struct skb_eosp_msg_data *data; trace_api_eosp(local, pubsta); - skb = alloc_skb(0, GFP_ATOMIC); - if (!skb) { - /* too bad ... but race is better than loss */ - clear_sta_flag(sta, WLAN_STA_SP); - return; - } - - data = (void *)skb->cb; - memcpy(data->sta, pubsta->addr, ETH_ALEN); - memcpy(data->iface, sta->sdata->vif.addr, ETH_ALEN); - skb->pkt_type = IEEE80211_EOSP_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); + clear_sta_flag(sta, WLAN_STA_SP); } -EXPORT_SYMBOL(ieee80211_sta_eosp_irqsafe); +EXPORT_SYMBOL(ieee80211_sta_eosp); void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, u8 tid, bool buffered) -- cgit v1.2.3 From b2fb4f54ecd47c42413d54b4666b06cf93c05abf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2013 12:58:01 +0000 Subject: tcp: uninline tcp_prequeue() tcp_prequeue() became too big to be inlined. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 45 +-------------------------------------------- net/ipv4/tcp_ipv4.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 44 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index cf0694d4ad60..a2baa5e4ba31 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1030,50 +1030,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) #endif } -/* Packet is added to VJ-style prequeue for processing in process - * context, if a reader task is waiting. Apparently, this exciting - * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) - * failed somewhere. Latency? Burstiness? Well, at least now we will - * see, why it failed. 8)8) --ANK - * - * NOTE: is this not too big to inline? - */ -static inline bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (sysctl_tcp_low_latency || !tp->ucopy.task) - return false; - - if (skb->len <= tcp_hdrlen(skb) && - skb_queue_len(&tp->ucopy.prequeue) == 0) - return false; - - __skb_queue_tail(&tp->ucopy.prequeue, skb); - tp->ucopy.memory += skb->truesize; - if (tp->ucopy.memory > sk->sk_rcvbuf) { - struct sk_buff *skb1; - - BUG_ON(sock_owned_by_user(sk)); - - while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { - sk_backlog_rcv(sk, skb1); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPPREQUEUEDROPPED); - } - - tp->ucopy.memory = 0; - } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_sync_poll(sk_sleep(sk), - POLLIN | POLLRDNORM | POLLRDBAND); - if (!inet_csk_ack_scheduled(sk)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * tcp_rto_min(sk)) / 4, - TCP_RTO_MAX); - } - return true; -} - +extern bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4a8ec457310f..8cdee120a50c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1950,6 +1950,50 @@ void tcp_v4_early_demux(struct sk_buff *skb) } } +/* Packet is added to VJ-style prequeue for processing in process + * context, if a reader task is waiting. Apparently, this exciting + * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) + * failed somewhere. Latency? Burstiness? Well, at least now we will + * see, why it failed. 8)8) --ANK + * + */ +bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (sysctl_tcp_low_latency || !tp->ucopy.task) + return false; + + if (skb->len <= tcp_hdrlen(skb) && + skb_queue_len(&tp->ucopy.prequeue) == 0) + return false; + + __skb_queue_tail(&tp->ucopy.prequeue, skb); + tp->ucopy.memory += skb->truesize; + if (tp->ucopy.memory > sk->sk_rcvbuf) { + struct sk_buff *skb1; + + BUG_ON(sock_owned_by_user(sk)); + + while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { + sk_backlog_rcv(sk, skb1); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPPREQUEUEDROPPED); + } + + tp->ucopy.memory = 0; + } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { + wake_up_interruptible_sync_poll(sk_sleep(sk), + POLLIN | POLLRDNORM | POLLRDBAND); + if (!inet_csk_ack_scheduled(sk)) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + (3 * tcp_rto_min(sk)) / 4, + TCP_RTO_MAX); + } + return true; +} +EXPORT_SYMBOL(tcp_prequeue); + /* * From tcp_input.c */ -- cgit v1.2.3 From 7f0e44ac9f7f12a2519bfed9ea4df3c1471bd8bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2013 04:20:32 +0000 Subject: ipv6 flowlabel: add __rcu annotations Commit 18367681a10b (ipv6 flowlabel: Convert np->ipv6_fl_list to RCU.) omitted proper __rcu annotations. Signed-off-by: Eric Dumazet Cc: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 8 ++++---- net/ipv6/ip6_flowlabel.c | 11 ++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 64d12e77719a..988c9f28f0fc 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -217,7 +217,7 @@ struct ipv6_txoptions { }; struct ip6_flowlabel { - struct ip6_flowlabel *next; + struct ip6_flowlabel __rcu *next; __be32 label; atomic_t users; struct in6_addr dst; @@ -238,9 +238,9 @@ struct ip6_flowlabel { #define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) struct ipv6_fl_socklist { - struct ipv6_fl_socklist *next; - struct ip6_flowlabel *fl; - struct rcu_head rcu; + struct ipv6_fl_socklist __rcu *next; + struct ip6_flowlabel *fl; + struct rcu_head rcu; }; extern struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b973ed3d06cf..46e88433ec7d 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -144,7 +144,9 @@ static void ip6_fl_gc(unsigned long dummy) spin_lock(&ip6_fl_lock); for (i=0; i<=FL_HASH_MASK; i++) { - struct ip6_flowlabel *fl, **flp; + struct ip6_flowlabel *fl; + struct ip6_flowlabel __rcu **flp; + flp = &fl_ht[i]; while ((fl = rcu_dereference_protected(*flp, lockdep_is_held(&ip6_fl_lock))) != NULL) { @@ -179,7 +181,9 @@ static void __net_exit ip6_fl_purge(struct net *net) spin_lock(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { - struct ip6_flowlabel *fl, **flp; + struct ip6_flowlabel *fl; + struct ip6_flowlabel __rcu **flp; + flp = &fl_ht[i]; while ((fl = rcu_dereference_protected(*flp, lockdep_is_held(&ip6_fl_lock))) != NULL) { @@ -506,7 +510,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; struct ipv6_fl_socklist *sfl1=NULL; - struct ipv6_fl_socklist *sfl, **sflp; + struct ipv6_fl_socklist *sfl; + struct ipv6_fl_socklist __rcu **sflp; struct ip6_flowlabel *fl, *fl1 = NULL; -- cgit v1.2.3 From bed71748346ae0807c7f7a2913965508dbd61403 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Wed, 30 Jan 2013 11:50:56 -0300 Subject: Bluetooth: Rename hci_acl_disconn As hci_acl_disconn function basically sends the HCI Disconnect Command and it is used to disconnect ACL, SCO and LE links, renaming it to hci_disconnect is more suitable. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 4 ++-- net/bluetooth/hci_core.c | 2 +- net/bluetooth/hci_event.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 90cf75afcb02..787d3b9bbd58 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -574,7 +574,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, return NULL; } -void hci_acl_disconn(struct hci_conn *conn, __u8 reason); +void hci_disconnect(struct hci_conn *conn, __u8 reason); void hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 4925a02ae7e4..b9f90169940b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -117,7 +117,7 @@ static void hci_acl_create_connection_cancel(struct hci_conn *conn) hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp); } -void hci_acl_disconn(struct hci_conn *conn, __u8 reason) +void hci_disconnect(struct hci_conn *conn, __u8 reason) { struct hci_cp_disconnect cp; @@ -253,7 +253,7 @@ static void hci_conn_disconnect(struct hci_conn *conn) hci_amp_disconn(conn, reason); break; default: - hci_acl_disconn(conn, reason); + hci_disconnect(conn, reason); break; } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 60793e7b768b..4cb46c24a749 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2398,7 +2398,7 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) if (c->type == type && c->sent) { BT_ERR("%s killing stalled connection %pMR", hdev->name, &c->dst); - hci_acl_disconn(c, HCI_ERROR_REMOTE_USER_TERM); + hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); } } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 477726a63512..5892e54835a1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2399,7 +2399,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); if (ev->status && conn->state == BT_CONNECTED) { - hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); + hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_put(conn); goto unlock; } @@ -3472,7 +3472,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); if (ev->status && conn->state == BT_CONNECTED) { - hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); + hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_put(conn); goto unlock; } -- cgit v1.2.3 From be9f97f04565a6c438b7521ad679870d25645475 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sun, 24 Feb 2013 19:36:52 +0100 Subject: Bluetooth: change bt_sock_unregister() to return void There is no reason a caller ever wants to check the return type of this call. _Iff_ a user successfully called bt_sock_register(), they're allowed to call bt_sock_unregister(). All other calls in the kernel (device_del, device_unregister, kfree(), ..) that are logically equivalent return void. Lets not make callers think they have to check the return type of this call and instead simply return void. We guarantee that after bt_sock_unregister() is called, the socket type _is_ unregistered. If that is not what the caller wants, they're using the wrong function, anyway. Signed-off-by: David Herrmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/bluetooth.h | 2 +- net/bluetooth/af_bluetooth.c | 15 +++------------ 2 files changed, 4 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 9531beee09b5..5f51bef13e61 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -232,7 +232,7 @@ struct bt_sock_list { }; int bt_sock_register(int proto, const struct net_proto_family *ops); -int bt_sock_unregister(int proto); +void bt_sock_unregister(int proto); void bt_sock_link(struct bt_sock_list *l, struct sock *s); void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index d3ee69b35a78..81598e588f7f 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -92,23 +92,14 @@ int bt_sock_register(int proto, const struct net_proto_family *ops) } EXPORT_SYMBOL(bt_sock_register); -int bt_sock_unregister(int proto) +void bt_sock_unregister(int proto) { - int err = 0; - if (proto < 0 || proto >= BT_MAX_PROTO) - return -EINVAL; + return; write_lock(&bt_proto_lock); - - if (!bt_proto[proto]) - err = -ENOENT; - else - bt_proto[proto] = NULL; - + bt_proto[proto] = NULL; write_unlock(&bt_proto_lock); - - return err; } EXPORT_SYMBOL(bt_sock_unregister); -- cgit v1.2.3 From 8ff52f7d04d9cc31f1e81dcf9a2ba6335ed34905 Mon Sep 17 00:00:00 2001 From: Dean Jenkins Date: Thu, 28 Feb 2013 14:21:55 +0000 Subject: Bluetooth: Return RFCOMM session ptrs to avoid freed session Unfortunately, the design retains local copies of the s RFCOMM session pointer in various code blocks and this invites the erroneous access to a freed RFCOMM session structure. Therefore, return the RFCOMM session pointer back up the call stack to avoid accessing a freed RFCOMM session structure. When the RFCOMM session is deleted, NULL is passed up the call stack. If active DLCs exist when the rfcomm session is terminating, avoid a memory leak of rfcomm_dlc structures by ensuring that rfcomm_session_close() is used instead of rfcomm_session_del(). Signed-off-by: Dean Jenkins Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/rfcomm.h | 3 +- net/bluetooth/rfcomm/core.c | 106 ++++++++++++++++++++++------------------- 2 files changed, 58 insertions(+), 51 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index e2e3ecad1008..a4e38ead2282 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -278,7 +278,8 @@ void rfcomm_session_getaddr(struct rfcomm_session *s, bdaddr_t *src, static inline void rfcomm_session_hold(struct rfcomm_session *s) { - atomic_inc(&s->refcnt); + if (s) + atomic_inc(&s->refcnt); } /* ---- RFCOMM sockets ---- */ diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index d9e97cf1792b..2b5c543638ba 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -69,7 +69,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, u8 sec_level, int *err); static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst); -static void rfcomm_session_del(struct rfcomm_session *s); +static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s); /* ---- RFCOMM frame parsing macros ---- */ #define __get_dlci(b) ((b & 0xfc) >> 2) @@ -108,10 +108,12 @@ static void rfcomm_schedule(void) wake_up_process(rfcomm_thread); } -static void rfcomm_session_put(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_session_put(struct rfcomm_session *s) { - if (atomic_dec_and_test(&s->refcnt)) - rfcomm_session_del(s); + if (s && atomic_dec_and_test(&s->refcnt)) + s = rfcomm_session_del(s); + + return s; } /* ---- RFCOMM FCS computation ---- */ @@ -631,7 +633,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state) return s; } -static void rfcomm_session_del(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s) { int state = s->state; @@ -648,6 +650,8 @@ static void rfcomm_session_del(struct rfcomm_session *s) if (state != BT_LISTEN) module_put(THIS_MODULE); + + return NULL; } static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) @@ -666,7 +670,8 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) return NULL; } -static void rfcomm_session_close(struct rfcomm_session *s, int err) +static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s, + int err) { struct rfcomm_dlc *d; struct list_head *p, *n; @@ -685,7 +690,7 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err) } rfcomm_session_clear_timer(s); - rfcomm_session_put(s); + return rfcomm_session_put(s); } static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, @@ -737,8 +742,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, if (*err == 0 || *err == -EINPROGRESS) return s; - rfcomm_session_del(s); - return NULL; + return rfcomm_session_del(s); failed: sock_release(sock); @@ -1127,7 +1131,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr) } /* ---- RFCOMM frame reception ---- */ -static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) { BT_DBG("session %p state %ld dlci %d", s, s->state, dlci); @@ -1136,7 +1140,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci); if (!d) { rfcomm_send_dm(s, dlci); - return 0; + return s; } switch (d->state) { @@ -1172,25 +1176,14 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) break; case BT_DISCONN: - /* rfcomm_session_put is called later so don't do - * anything here otherwise we will mess up the session - * reference counter: - * - * (a) when we are the initiator dlc_unlink will drive - * the reference counter to 0 (there is no initial put - * after session_add) - * - * (b) when we are not the initiator rfcomm_rx_process - * will explicitly call put to balance the initial hold - * done after session add. - */ + s = rfcomm_session_close(s, ECONNRESET); break; } } - return 0; + return s; } -static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) { int err = 0; @@ -1215,12 +1208,13 @@ static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) err = ECONNRESET; s->state = BT_CLOSED; - rfcomm_session_close(s, err); + s = rfcomm_session_close(s, err); } - return 0; + return s; } -static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_disc(struct rfcomm_session *s, + u8 dlci) { int err = 0; @@ -1250,10 +1244,9 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) err = ECONNRESET; s->state = BT_CLOSED; - rfcomm_session_close(s, err); + s = rfcomm_session_close(s, err); } - - return 0; + return s; } void rfcomm_dlc_accept(struct rfcomm_dlc *d) @@ -1674,11 +1667,18 @@ drop: return 0; } -static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) +static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s, + struct sk_buff *skb) { struct rfcomm_hdr *hdr = (void *) skb->data; u8 type, dlci, fcs; + if (!s) { + /* no session, so free socket data */ + kfree_skb(skb); + return s; + } + dlci = __get_dlci(hdr->addr); type = __get_type(hdr->ctrl); @@ -1689,7 +1689,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) if (__check_fcs(skb->data, type, fcs)) { BT_ERR("bad checksum in packet"); kfree_skb(skb); - return -EILSEQ; + return s; } if (__test_ea(hdr->len)) @@ -1705,22 +1705,23 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) case RFCOMM_DISC: if (__test_pf(hdr->ctrl)) - rfcomm_recv_disc(s, dlci); + s = rfcomm_recv_disc(s, dlci); break; case RFCOMM_UA: if (__test_pf(hdr->ctrl)) - rfcomm_recv_ua(s, dlci); + s = rfcomm_recv_ua(s, dlci); break; case RFCOMM_DM: - rfcomm_recv_dm(s, dlci); + s = rfcomm_recv_dm(s, dlci); break; case RFCOMM_UIH: - if (dlci) - return rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb); - + if (dlci) { + rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb); + return s; + } rfcomm_recv_mcc(s, skb); break; @@ -1729,7 +1730,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) break; } kfree_skb(skb); - return 0; + return s; } /* ---- Connection and data processing ---- */ @@ -1866,7 +1867,7 @@ static void rfcomm_process_dlcs(struct rfcomm_session *s) } } -static void rfcomm_process_rx(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) { struct socket *sock = s->sock; struct sock *sk = sock->sk; @@ -1878,17 +1879,20 @@ static void rfcomm_process_rx(struct rfcomm_session *s) while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); if (!skb_linearize(skb)) - rfcomm_recv_frame(s, skb); + s = rfcomm_recv_frame(s, skb); else kfree_skb(skb); } - if (sk->sk_state == BT_CLOSED) { + if (s && (sk->sk_state == BT_CLOSED)) { if (!s->initiator) - rfcomm_session_put(s); + s = rfcomm_session_put(s); - rfcomm_session_close(s, sk->sk_err); + if (s) + s = rfcomm_session_close(s, sk->sk_err); } + + return s; } static void rfcomm_accept_connection(struct rfcomm_session *s) @@ -1925,7 +1929,7 @@ static void rfcomm_accept_connection(struct rfcomm_session *s) sock_release(nsock); } -static void rfcomm_check_connection(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s) { struct sock *sk = s->sock->sk; @@ -1944,9 +1948,10 @@ static void rfcomm_check_connection(struct rfcomm_session *s) case BT_CLOSED: s->state = BT_CLOSED; - rfcomm_session_close(s, sk->sk_err); + s = rfcomm_session_close(s, sk->sk_err); break; } + return s; } static void rfcomm_process_sessions(void) @@ -1975,15 +1980,16 @@ static void rfcomm_process_sessions(void) switch (s->state) { case BT_BOUND: - rfcomm_check_connection(s); + s = rfcomm_check_connection(s); break; default: - rfcomm_process_rx(s); + s = rfcomm_process_rx(s); break; } - rfcomm_process_dlcs(s); + if (s) + rfcomm_process_dlcs(s); rfcomm_session_put(s); } -- cgit v1.2.3 From 08c30aca9e698faddebd34f81e1196295f9dc063 Mon Sep 17 00:00:00 2001 From: Dean Jenkins Date: Thu, 28 Feb 2013 14:21:56 +0000 Subject: Bluetooth: Remove RFCOMM session refcnt Previous commits have improved the handling of the RFCOMM session timer and the RFCOMM session pointers such that freed RFCOMM session structures should no longer be erroneously accessed. The RFCOMM session refcnt now has no purpose and will be deleted by this commit. Note that the RFCOMM session is now deleted as soon as the RFCOMM control channel link is no longer required. This makes the lifetime of the RFCOMM session deterministic and absolute. Previously with the refcnt, there was uncertainty about when the session structure would be deleted because the relative refcnt prevented the session structure from being deleted at will. It was noted that the refcnt could malfunction under very heavy real-time processor loading in embedded SMP environments. This could cause premature RFCOMM session deletion or double session deletion that could result in kernel crashes. Removal of the refcnt prevents this issue. There are 4 connection / disconnection RFCOMM session scenarios: host initiated control link ---> host disconnected control link host initiated ctrl link ---> remote device disconnected ctrl link remote device initiated ctrl link ---> host disconnected ctrl link remote device initiated ctrl link ---> remote device disc'ed ctrl link The control channel connection procedures are independent of the disconnection procedures. Strangely, the RFCOMM session refcnt was applying special treatment so erroneously combining connection and disconnection events. This commit fixes this issue by removing some session code that used the "initiator" member of the session structure that was intended for use with the data channels. Signed-off-by: Dean Jenkins Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/rfcomm.h | 7 ------- net/bluetooth/rfcomm/core.c | 43 +++++------------------------------------- 2 files changed, 5 insertions(+), 45 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index a4e38ead2282..7afd4199d6b6 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -158,7 +158,6 @@ struct rfcomm_session { struct timer_list timer; unsigned long state; unsigned long flags; - atomic_t refcnt; int initiator; /* Default DLC parameters */ @@ -276,12 +275,6 @@ static inline void rfcomm_dlc_unthrottle(struct rfcomm_dlc *d) void rfcomm_session_getaddr(struct rfcomm_session *s, bdaddr_t *src, bdaddr_t *dst); -static inline void rfcomm_session_hold(struct rfcomm_session *s) -{ - if (s) - atomic_inc(&s->refcnt); -} - /* ---- RFCOMM sockets ---- */ struct sockaddr_rc { sa_family_t rc_family; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 2b5c543638ba..75b7bbd8acb7 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -108,14 +108,6 @@ static void rfcomm_schedule(void) wake_up_process(rfcomm_thread); } -static struct rfcomm_session *rfcomm_session_put(struct rfcomm_session *s) -{ - if (s && atomic_dec_and_test(&s->refcnt)) - s = rfcomm_session_del(s); - - return s; -} - /* ---- RFCOMM FCS computation ---- */ /* reversed, 8-bit, poly=0x07 */ @@ -251,16 +243,14 @@ static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout) { BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout); - if (!mod_timer(&s->timer, jiffies + timeout)) - rfcomm_session_hold(s); + mod_timer(&s->timer, jiffies + timeout); } static void rfcomm_session_clear_timer(struct rfcomm_session *s) { BT_DBG("session %p state %ld", s, s->state); - if (del_timer_sync(&s->timer)) - rfcomm_session_put(s); + del_timer_sync(&s->timer); } /* ---- RFCOMM DLCs ---- */ @@ -338,8 +328,6 @@ static void rfcomm_dlc_link(struct rfcomm_session *s, struct rfcomm_dlc *d) { BT_DBG("dlc %p session %p", d, s); - rfcomm_session_hold(s); - rfcomm_session_clear_timer(s); rfcomm_dlc_hold(d); list_add(&d->list, &s->dlcs); @@ -358,8 +346,6 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d) if (list_empty(&s->dlcs)) rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT); - - rfcomm_session_put(s); } static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci) @@ -678,8 +664,6 @@ static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s, BT_DBG("session %p state %ld err %d", s, s->state, err); - rfcomm_session_hold(s); - s->state = BT_CLOSED; /* Close all dlcs */ @@ -690,7 +674,7 @@ static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s, } rfcomm_session_clear_timer(s); - return rfcomm_session_put(s); + return rfcomm_session_del(s); } static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, @@ -1884,13 +1868,8 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) kfree_skb(skb); } - if (s && (sk->sk_state == BT_CLOSED)) { - if (!s->initiator) - s = rfcomm_session_put(s); - - if (s) - s = rfcomm_session_close(s, sk->sk_err); - } + if (s && (sk->sk_state == BT_CLOSED)) + s = rfcomm_session_close(s, sk->sk_err); return s; } @@ -1917,8 +1896,6 @@ static void rfcomm_accept_connection(struct rfcomm_session *s) s = rfcomm_session_add(nsock, BT_OPEN); if (s) { - rfcomm_session_hold(s); - /* We should adjust MTU on incoming sessions. * L2CAP MTU minus UIH header and FCS. */ s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu, @@ -1967,7 +1944,6 @@ static void rfcomm_process_sessions(void) if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) { s->state = BT_DISCONN; rfcomm_send_disc(s, 0); - rfcomm_session_put(s); continue; } @@ -1976,8 +1952,6 @@ static void rfcomm_process_sessions(void) continue; } - rfcomm_session_hold(s); - switch (s->state) { case BT_BOUND: s = rfcomm_check_connection(s); @@ -1990,8 +1964,6 @@ static void rfcomm_process_sessions(void) if (s) rfcomm_process_dlcs(s); - - rfcomm_session_put(s); } rfcomm_unlock(); @@ -2041,7 +2013,6 @@ static int rfcomm_add_listener(bdaddr_t *ba) if (!s) goto failed; - rfcomm_session_hold(s); return 0; failed: sock_release(sock); @@ -2099,8 +2070,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) if (!s) return; - rfcomm_session_hold(s); - list_for_each_safe(p, n, &s->dlcs) { d = list_entry(p, struct rfcomm_dlc, list); @@ -2132,8 +2101,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) set_bit(RFCOMM_AUTH_REJECT, &d->flags); } - rfcomm_session_put(s); - rfcomm_schedule(); } -- cgit v1.2.3 From 3119ae9599e5cdc1b9838563905c500b582ab6a5 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Mar 2013 20:37:44 +0200 Subject: Bluetooth: Add initial skeleton for asynchronous HCI requests This patch adds the initial definitions and functions for asynchronous HCI requests. Asynchronous requests are essentially a group of HCI commands together with an optional completion callback. The request is tracked through the already existing command queue by having the necessary context information as part of the control buffer of each skb. The only information needed in the skb control buffer is a flag for indicating that the skb is the start of a request as well as the optional complete callback that should be used when the request is complete (this will be found in the last skb of the request). Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/bluetooth.h | 10 ++++++++++ include/net/bluetooth/hci_core.h | 8 ++++++++ net/bluetooth/hci_core.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 5f51bef13e61..ed6e9552252e 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -260,12 +260,22 @@ struct l2cap_ctrl { __u8 retries; }; +struct hci_dev; + +typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status); + +struct hci_req_ctrl { + bool start; + hci_req_complete_t complete; +}; + struct bt_skb_cb { __u8 pkt_type; __u8 incoming; __u16 expect; __u8 force_active; struct l2cap_ctrl control; + struct hci_req_ctrl req; }; #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb)) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 787d3b9bbd58..7191217c6bd1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1041,6 +1041,14 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, int hci_register_cb(struct hci_cb *hcb); int hci_unregister_cb(struct hci_cb *hcb); +struct hci_request { + struct hci_dev *hdev; + struct sk_buff_head cmd_q; +}; + +void hci_req_init(struct hci_request *req, struct hci_dev *hdev); +int hci_req_run(struct hci_request *req, hci_req_complete_t complete); + int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6ab38fecf1fe..94b08aa9a081 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2439,6 +2439,36 @@ static int hci_send_frame(struct sk_buff *skb) return hdev->send(skb); } +void hci_req_init(struct hci_request *req, struct hci_dev *hdev) +{ + skb_queue_head_init(&req->cmd_q); + req->hdev = hdev; +} + +int hci_req_run(struct hci_request *req, hci_req_complete_t complete) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + unsigned long flags; + + BT_DBG("length %u", skb_queue_len(&req->cmd_q)); + + /* Do not allow empty requests */ + if (skb_queue_empty(&req->cmd_q)) + return -EINVAL; + + skb = skb_peek_tail(&req->cmd_q); + bt_cb(skb)->req.complete = complete; + + spin_lock_irqsave(&hdev->cmd_q.lock, flags); + skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); + spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + + queue_work(hdev->workqueue, &hdev->cmd_work); + + return 0; +} + /* Send HCI command */ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) { -- cgit v1.2.3 From 71c76a170e979d60e01bd093c9b79e3adeb710cc Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Mar 2013 20:37:46 +0200 Subject: Bluetooth: Introduce new hci_req_add function This function is analogous to hci_send_cmd() but instead of directly queuing the command to hdev->cmd_q it adds it to the local queue of the asynchronous HCI request being build (inside struct hci_request). This is the main function used for building asynchronous requests and there should be one or more calls to it between calls to hci_req_init and hci_req_run. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7191217c6bd1..67fe661259ba 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1048,6 +1048,7 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); +int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d2edcc4643c3..6e6a9dd8a155 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2517,6 +2517,28 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) return 0; } +/* Queue a command to an asynchronous HCI request */ +int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + + BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); + + skb = hci_prepare_cmd(hdev, opcode, plen, param); + if (!skb) { + BT_ERR("%s no memory for command", hdev->name); + return -ENOMEM; + } + + if (skb_queue_empty(&req->cmd_q)) + bt_cb(skb)->req.start = true; + + skb_queue_tail(&req->cmd_q, skb); + + return 0; +} + /* Get data from the previously sent command */ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { -- cgit v1.2.3 From 9238f36a5a5097018b90baa42c473d2f916a46f5 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Mar 2013 20:37:48 +0200 Subject: Bluetooth: Add request cmd_complete and cmd_status functions This patch introduces functions to process the HCI request state when receiving HCI Command Status or Command Complete events. Some HCI commands, like Inquiry do not result in a Command complete event so special handling is needed for them. Inquiry is a particularly important one since it is the only forseeable "non-cmd_complete" command that will make good use of the request functionality, and its completion is either indicated by an Inquiry Complete event of a successful Command Complete for HCI_Inquiry_Cancel. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 + net/bluetooth/hci_core.c | 85 ++++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_event.c | 7 ++++ 3 files changed, 94 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 67fe661259ba..d732d6894adc 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1049,6 +1049,8 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); +void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status); int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4f8142bdf655..0ada2ec36e7b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3208,6 +3208,91 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb); } +static bool hci_req_is_complete(struct hci_dev *hdev) +{ + struct sk_buff *skb; + + skb = skb_peek(&hdev->cmd_q); + if (!skb) + return true; + + return bt_cb(skb)->req.start; +} + +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) +{ + hci_req_complete_t req_complete = NULL; + struct sk_buff *skb; + unsigned long flags; + + BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); + + /* Check that the completed command really matches the last one + * that was sent. + */ + if (!hci_sent_cmd_data(hdev, opcode)) + return; + + /* If the command succeeded and there's still more commands in + * this request the request is not yet complete. + */ + if (!status && !hci_req_is_complete(hdev)) + return; + + /* If this was the last command in a request the complete + * callback would be found in hdev->sent_cmd instead of the + * command queue (hdev->cmd_q). + */ + if (hdev->sent_cmd) { + req_complete = bt_cb(hdev->sent_cmd)->req.complete; + if (req_complete) + goto call_complete; + } + + /* Remove all pending commands belonging to this request */ + spin_lock_irqsave(&hdev->cmd_q.lock, flags); + while ((skb = __skb_dequeue(&hdev->cmd_q))) { + if (bt_cb(skb)->req.start) { + __skb_queue_head(&hdev->cmd_q, skb); + break; + } + + req_complete = bt_cb(skb)->req.complete; + kfree_skb(skb); + } + spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + +call_complete: + if (req_complete) + req_complete(hdev, status); +} + +void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status) +{ + hci_req_complete_t req_complete = NULL; + + BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); + + if (status) { + hci_req_cmd_complete(hdev, opcode, status); + return; + } + + /* No need to handle success status if there are more commands */ + if (!hci_req_is_complete(hdev)) + return; + + if (hdev->sent_cmd) + req_complete = bt_cb(hdev->sent_cmd)->req.complete; + + /* If the request doesn't have a complete callback or there + * are other commands/requests in the hdev queue we consider + * this request as completed. + */ + if (!req_complete || !skb_queue_empty(&hdev->cmd_q)) + hci_req_cmd_complete(hdev, opcode, status); +} + static void hci_rx_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 14e872aa0d2c..8b878a3bdf69 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -53,6 +53,7 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); hci_dev_unlock(hdev); + hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status); hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); hci_conn_check_pending(hdev); @@ -1692,6 +1693,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); + hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status); hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); @@ -2254,6 +2256,7 @@ static void hci_qos_setup_complete_evt(struct hci_dev *hdev, static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_cmd_complete *ev = (void *) skb->data; + u8 status = skb->data[sizeof(*ev)]; __u16 opcode; skb_pull(skb, sizeof(*ev)); @@ -2497,6 +2500,8 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->opcode != HCI_OP_NOP) del_timer(&hdev->cmd_timer); + hci_req_cmd_complete(hdev, ev->opcode, status); + if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) @@ -2590,6 +2595,8 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->opcode != HCI_OP_NOP) del_timer(&hdev->cmd_timer); + hci_req_cmd_status(hdev, ev->opcode, ev->status); + if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) -- cgit v1.2.3 From 42c6b129cd8c2aa5012a78ec39672e7052cc677a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Mar 2013 20:37:49 +0200 Subject: Bluetooth: Use async requests internally in hci_req_sync This patch converts the hci_req_sync() procedure to internaly use the asynchronous HCI requests. The hci_req_sync mechanism relies on hci_req_complete() calls from hci_event.c into hci_core.c whenever a HCI command completes. This is very similar to what asynchronous requests do and makes the conversion fairly straight forward by converting hci_req_complete into a request complete callback. By this change hci_req_complete (renamed to hci_req_sync_complete) becomes private to hci_core.c and all calls to it can be removed from hci_event.c. The commands in each hci_req_sync procedure are collected into their own request by passing the hci_request pointer to the request callback (instead of the hci_dev pointer). The one slight exception is the HCI init request which has the special handling of HCI driver specific initialization commands. These commands are run in their own request prior to the "main" init request. One other extra change that this patch must contain is the handling of spontaneous HCI reset complete events that some controllers exhibit. These were previously handled in the hci_req_complete function but the right place for them now becomes the hci_req_cmd_complete function. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 - net/bluetooth/hci_core.c | 271 ++++++++++++++++++++++----------------- net/bluetooth/hci_event.c | 78 +---------- 3 files changed, 156 insertions(+), 195 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d732d6894adc..3f124f43fcb1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1164,8 +1164,6 @@ struct hci_sec_filter { #define hci_req_lock(d) mutex_lock(&d->req_lock) #define hci_req_unlock(d) mutex_unlock(&d->req_lock) -void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result); - void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __u8 rand[8], diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0ada2ec36e7b..6218eced1530 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -57,36 +57,9 @@ static void hci_notify(struct hci_dev *hdev, int event) /* ---- HCI requests ---- */ -void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result) +static void hci_req_sync_complete(struct hci_dev *hdev, u8 result) { - BT_DBG("%s command 0x%4.4x result 0x%2.2x", hdev->name, cmd, result); - - /* If this is the init phase check if the completed command matches - * the last init command, and if not just return. - */ - if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd) { - struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; - u16 opcode = __le16_to_cpu(sent->opcode); - struct sk_buff *skb; - - /* Some CSR based controllers generate a spontaneous - * reset complete event during init and any pending - * command will never be completed. In such a case we - * need to resend whatever was the last sent - * command. - */ - - if (cmd != HCI_OP_RESET || opcode == HCI_OP_RESET) - return; - - skb = skb_clone(hdev->sent_cmd, GFP_ATOMIC); - if (skb) { - skb_queue_head(&hdev->cmd_q, skb); - queue_work(hdev->workqueue, &hdev->cmd_work); - } - - return; - } + BT_DBG("%s result 0x%2.2x", hdev->name, result); if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; @@ -108,26 +81,36 @@ static void hci_req_cancel(struct hci_dev *hdev, int err) /* Execute request and wait for completion. */ static int __hci_req_sync(struct hci_dev *hdev, - void (*req)(struct hci_dev *hdev, unsigned long opt), + void (*func)(struct hci_request *req, + unsigned long opt), unsigned long opt, __u32 timeout) { + struct hci_request req; DECLARE_WAITQUEUE(wait, current); int err = 0; BT_DBG("%s start", hdev->name); + hci_req_init(&req, hdev); + hdev->req_status = HCI_REQ_PEND; add_wait_queue(&hdev->req_wait_q, &wait); set_current_state(TASK_INTERRUPTIBLE); - req(hdev, opt); + func(&req, opt); - /* If the request didn't send any commands return immediately */ - if (skb_queue_empty(&hdev->cmd_q) && atomic_read(&hdev->cmd_cnt)) { + err = hci_req_run(&req, hci_req_sync_complete); + if (err < 0) { hdev->req_status = 0; remove_wait_queue(&hdev->req_wait_q, &wait); - return err; + /* req_run will fail if the request did not add any + * commands to the queue, something that can happen when + * a request with conditionals doesn't trigger any + * commands to be sent. This is normal behavior and + * should not trigger an error return. + */ + return 0; } schedule_timeout(timeout); @@ -159,7 +142,8 @@ static int __hci_req_sync(struct hci_dev *hdev, } static int hci_req_sync(struct hci_dev *hdev, - void (*req)(struct hci_dev *hdev, unsigned long opt), + void (*req)(struct hci_request *req, + unsigned long opt), unsigned long opt, __u32 timeout) { int ret; @@ -175,72 +159,80 @@ static int hci_req_sync(struct hci_dev *hdev, return ret; } -static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) +static void hci_reset_req(struct hci_request *req, unsigned long opt) { - BT_DBG("%s %ld", hdev->name, opt); + BT_DBG("%s %ld", req->hdev->name, opt); /* Reset device */ - set_bit(HCI_RESET, &hdev->flags); - hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); + set_bit(HCI_RESET, &req->hdev->flags); + hci_req_add(req, HCI_OP_RESET, 0, NULL); } -static void bredr_init(struct hci_dev *hdev) +static void bredr_init(struct hci_request *req) { - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; + req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; /* Read Local Supported Features */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); /* Read Local Version */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL); /* Read BD Address */ - hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL); + hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL); } -static void amp_init(struct hci_dev *hdev) +static void amp_init(struct hci_request *req) { - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; + req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; /* Read Local Version */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL); /* Read Local AMP Info */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); /* Read Data Blk size */ - hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); + hci_req_add(req, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); } -static void hci_init1_req(struct hci_dev *hdev, unsigned long opt) +static void hci_init1_req(struct hci_request *req, unsigned long opt) { + struct hci_dev *hdev = req->hdev; + struct hci_request init_req; struct sk_buff *skb; BT_DBG("%s %ld", hdev->name, opt); /* Driver initialization */ + hci_req_init(&init_req, hdev); + /* Special commands */ while ((skb = skb_dequeue(&hdev->driver_init))) { bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; - skb_queue_tail(&hdev->cmd_q, skb); - queue_work(hdev->workqueue, &hdev->cmd_work); + if (skb_queue_empty(&init_req.cmd_q)) + bt_cb(skb)->req.start = true; + + skb_queue_tail(&init_req.cmd_q, skb); } skb_queue_purge(&hdev->driver_init); + hci_req_run(&init_req, NULL); + /* Reset */ if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) - hci_reset_req(hdev, 0); + hci_reset_req(req, 0); switch (hdev->dev_type) { case HCI_BREDR: - bredr_init(hdev); + bredr_init(req); break; case HCI_AMP: - amp_init(hdev); + amp_init(req); break; default: @@ -249,53 +241,53 @@ static void hci_init1_req(struct hci_dev *hdev, unsigned long opt) } } -static void bredr_setup(struct hci_dev *hdev) +static void bredr_setup(struct hci_request *req) { struct hci_cp_delete_stored_link_key cp; __le16 param; __u8 flt_type; /* Read Buffer Size (ACL mtu, max pkt, etc.) */ - hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL); + hci_req_add(req, HCI_OP_READ_BUFFER_SIZE, 0, NULL); /* Read Class of Device */ - hci_send_cmd(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL); + hci_req_add(req, HCI_OP_READ_CLASS_OF_DEV, 0, NULL); /* Read Local Name */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_NAME, 0, NULL); /* Read Voice Setting */ - hci_send_cmd(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL); + hci_req_add(req, HCI_OP_READ_VOICE_SETTING, 0, NULL); /* Clear Event Filters */ flt_type = HCI_FLT_CLEAR_ALL; - hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type); + hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &flt_type); /* Connection accept timeout ~20 secs */ param = __constant_cpu_to_le16(0x7d00); - hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); + hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); bacpy(&cp.bdaddr, BDADDR_ANY); cp.delete_all = 0x01; - hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); } -static void le_setup(struct hci_dev *hdev) +static void le_setup(struct hci_request *req) { /* Read LE Buffer Size */ - hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); + hci_req_add(req, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); /* Read LE Local Supported Features */ - hci_send_cmd(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); + hci_req_add(req, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); /* Read LE Advertising Channel TX Power */ - hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); + hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); /* Read LE White List Size */ - hci_send_cmd(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); + hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); /* Read LE Supported States */ - hci_send_cmd(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); + hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); } static u8 hci_get_inquiry_mode(struct hci_dev *hdev) @@ -326,17 +318,19 @@ static u8 hci_get_inquiry_mode(struct hci_dev *hdev) return 0x00; } -static void hci_setup_inquiry_mode(struct hci_dev *hdev) +static void hci_setup_inquiry_mode(struct hci_request *req) { u8 mode; - mode = hci_get_inquiry_mode(hdev); + mode = hci_get_inquiry_mode(req->hdev); - hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); + hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); } -static void hci_setup_event_mask(struct hci_dev *hdev) +static void hci_setup_event_mask(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; + /* The second byte is 0xff instead of 0x9f (two reserved bits * disabled) since a Broadcom 1.2 dongle doesn't respond to the * command otherwise. @@ -392,67 +386,70 @@ static void hci_setup_event_mask(struct hci_dev *hdev) if (lmp_le_capable(hdev)) events[7] |= 0x20; /* LE Meta-Event */ - hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events); + hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events); if (lmp_le_capable(hdev)) { memset(events, 0, sizeof(events)); events[0] = 0x1f; - hci_send_cmd(hdev, HCI_OP_LE_SET_EVENT_MASK, - sizeof(events), events); + hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, + sizeof(events), events); } } -static void hci_init2_req(struct hci_dev *hdev, unsigned long opt) +static void hci_init2_req(struct hci_request *req, unsigned long opt) { + struct hci_dev *hdev = req->hdev; + if (lmp_bredr_capable(hdev)) - bredr_setup(hdev); + bredr_setup(req); if (lmp_le_capable(hdev)) - le_setup(hdev); + le_setup(req); - hci_setup_event_mask(hdev); + hci_setup_event_mask(req); if (hdev->hci_ver > BLUETOOTH_VER_1_1) - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); if (lmp_ssp_capable(hdev)) { if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { u8 mode = 0x01; - hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, - sizeof(mode), &mode); + hci_req_add(req, HCI_OP_WRITE_SSP_MODE, + sizeof(mode), &mode); } else { struct hci_cp_write_eir cp; memset(hdev->eir, 0, sizeof(hdev->eir)); memset(&cp, 0, sizeof(cp)); - hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); } } if (lmp_inq_rssi_capable(hdev)) - hci_setup_inquiry_mode(hdev); + hci_setup_inquiry_mode(req); if (lmp_inq_tx_pwr_capable(hdev)) - hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); + hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); if (lmp_ext_feat_capable(hdev)) { struct hci_cp_read_local_ext_features cp; cp.page = 0x01; - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp), - &cp); + hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES, + sizeof(cp), &cp); } if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) { u8 enable = 1; - hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), - &enable); + hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), + &enable); } } -static void hci_setup_link_policy(struct hci_dev *hdev) +static void hci_setup_link_policy(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_cp_write_def_link_policy cp; u16 link_policy = 0; @@ -466,11 +463,12 @@ static void hci_setup_link_policy(struct hci_dev *hdev) link_policy |= HCI_LP_PARK; cp.policy = cpu_to_le16(link_policy); - hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp); } -static void hci_set_le_support(struct hci_dev *hdev) +static void hci_set_le_support(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_cp_write_le_host_supported cp; memset(&cp, 0, sizeof(cp)); @@ -481,17 +479,19 @@ static void hci_set_le_support(struct hci_dev *hdev) } if (cp.le != lmp_host_le_capable(hdev)) - hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), - &cp); + hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), + &cp); } -static void hci_init3_req(struct hci_dev *hdev, unsigned long opt) +static void hci_init3_req(struct hci_request *req, unsigned long opt) { + struct hci_dev *hdev = req->hdev; + if (hdev->commands[5] & 0x10) - hci_setup_link_policy(hdev); + hci_setup_link_policy(req); if (lmp_le_capable(hdev)) - hci_set_le_support(hdev); + hci_set_le_support(req); } static int __hci_init(struct hci_dev *hdev) @@ -516,44 +516,44 @@ static int __hci_init(struct hci_dev *hdev) return __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT); } -static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) +static void hci_scan_req(struct hci_request *req, unsigned long opt) { __u8 scan = opt; - BT_DBG("%s %x", hdev->name, scan); + BT_DBG("%s %x", req->hdev->name, scan); /* Inquiry and Page scans */ - hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } -static void hci_auth_req(struct hci_dev *hdev, unsigned long opt) +static void hci_auth_req(struct hci_request *req, unsigned long opt) { __u8 auth = opt; - BT_DBG("%s %x", hdev->name, auth); + BT_DBG("%s %x", req->hdev->name, auth); /* Authentication */ - hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); + hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); } -static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) +static void hci_encrypt_req(struct hci_request *req, unsigned long opt) { __u8 encrypt = opt; - BT_DBG("%s %x", hdev->name, encrypt); + BT_DBG("%s %x", req->hdev->name, encrypt); /* Encryption */ - hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); + hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); } -static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt) +static void hci_linkpol_req(struct hci_request *req, unsigned long opt) { __le16 policy = cpu_to_le16(opt); - BT_DBG("%s %x", hdev->name, policy); + BT_DBG("%s %x", req->hdev->name, policy); /* Default link policy */ - hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); + hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); } /* Get HCI device by index. @@ -790,9 +790,10 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf) return copied; } -static void hci_inq_req(struct hci_dev *hdev, unsigned long opt) +static void hci_inq_req(struct hci_request *req, unsigned long opt) { struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt; + struct hci_dev *hdev = req->hdev; struct hci_cp_inquiry cp; BT_DBG("%s", hdev->name); @@ -804,7 +805,7 @@ static void hci_inq_req(struct hci_dev *hdev, unsigned long opt) memcpy(&cp.lap, &ir->lap, 3); cp.length = ir->length; cp.num_rsp = ir->num_rsp; - hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); } int hci_inquiry(void __user *arg) @@ -1845,7 +1846,7 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) return mgmt_device_unblocked(hdev, bdaddr, type); } -static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt) +static void le_scan_param_req(struct hci_request *req, unsigned long opt) { struct le_scan_params *param = (struct le_scan_params *) opt; struct hci_cp_le_set_scan_param cp; @@ -1855,10 +1856,10 @@ static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt) cp.interval = cpu_to_le16(param->interval); cp.window = cpu_to_le16(param->window); - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); } -static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt) +static void le_scan_enable_req(struct hci_request *req, unsigned long opt) { struct hci_cp_le_set_scan_enable cp; @@ -1866,7 +1867,7 @@ static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt) cp.enable = 1; cp.filter_dup = 1; - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); } static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, @@ -3219,6 +3220,28 @@ static bool hci_req_is_complete(struct hci_dev *hdev) return bt_cb(skb)->req.start; } +static void hci_resend_last(struct hci_dev *hdev) +{ + struct hci_command_hdr *sent; + struct sk_buff *skb; + u16 opcode; + + if (!hdev->sent_cmd) + return; + + sent = (void *) hdev->sent_cmd->data; + opcode = __le16_to_cpu(sent->opcode); + if (opcode == HCI_OP_RESET) + return; + + skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); + if (!skb) + return; + + skb_queue_head(&hdev->cmd_q, skb); + queue_work(hdev->workqueue, &hdev->cmd_work); +} + void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) { hci_req_complete_t req_complete = NULL; @@ -3227,11 +3250,21 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); - /* Check that the completed command really matches the last one - * that was sent. + /* If the completed command doesn't match the last one that was + * sent we need to do special handling of it. */ - if (!hci_sent_cmd_data(hdev, opcode)) + if (!hci_sent_cmd_data(hdev, opcode)) { + /* Some CSR based controllers generate a spontaneous + * reset complete event during init and any pending + * command will never be completed. In such a case we + * need to resend whatever was the last sent + * command. + */ + if (test_bit(HCI_INIT, &hdev->flags) && opcode == HCI_OP_RESET) + hci_resend_last(hdev); + return; + } /* If the command succeeded and there's still more commands in * this request the request is not yet complete. diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8b878a3bdf69..0dd85a0c05f4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -54,7 +54,6 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status); - hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); hci_conn_check_pending(hdev); } @@ -184,8 +183,6 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev, if (!status) hdev->link_policy = get_unaligned_le16(sent); - - hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status); } static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) @@ -196,8 +193,6 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_RESET, &hdev->flags); - hci_req_complete(hdev, HCI_OP_RESET, status); - /* Reset all non-persistent flags */ hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PENDING_CLASS) | BIT(HCI_PERIODIC_INQ)); @@ -232,8 +227,6 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) if (!status && !test_bit(HCI_INIT, &hdev->flags)) hci_update_ad(hdev); - - hci_req_complete(hdev, HCI_OP_WRITE_LOCAL_NAME, status); } static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -271,8 +264,6 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_MGMT, &hdev->dev_flags)) mgmt_auth_enable_complete(hdev, status); - - hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status); } static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) @@ -294,8 +285,6 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) else clear_bit(HCI_ENCRYPT, &hdev->flags); } - - hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status); } static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) @@ -344,7 +333,6 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) done: hci_dev_unlock(hdev); - hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status); } static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) @@ -441,8 +429,6 @@ static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status); } static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) @@ -480,7 +466,7 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - goto done; + return; hdev->hci_ver = rp->hci_ver; hdev->hci_rev = __le16_to_cpu(rp->hci_rev); @@ -490,9 +476,6 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s manufacturer 0x%4.4x hci ver %d:%d", hdev->name, hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); - -done: - hci_req_complete(hdev, HCI_OP_READ_LOCAL_VERSION, rp->status); } static void hci_cc_read_local_commands(struct hci_dev *hdev, @@ -504,8 +487,6 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, if (!rp->status) memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); - - hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status); } static void hci_cc_read_local_features(struct hci_dev *hdev, @@ -572,7 +553,7 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - goto done; + return; switch (rp->page) { case 0: @@ -582,9 +563,6 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, memcpy(hdev->host_features, rp->features, 8); break; } - -done: - hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status); } static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, @@ -594,12 +572,8 @@ static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (rp->status) - return; - - hdev->flow_ctl_mode = rp->mode; - - hci_req_complete(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, rp->status); + if (!rp->status) + hdev->flow_ctl_mode = rp->mode; } static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) @@ -636,8 +610,6 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) if (!rp->status) bacpy(&hdev->bdaddr, &rp->bdaddr); - - hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status); } static void hci_cc_read_data_block_size(struct hci_dev *hdev, @@ -658,8 +630,6 @@ static void hci_cc_read_data_block_size(struct hci_dev *hdev, BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu, hdev->block_cnt, hdev->block_len); - - hci_req_complete(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, rp->status); } static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) @@ -667,8 +637,6 @@ static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status); } static void hci_cc_read_local_amp_info(struct hci_dev *hdev, @@ -692,8 +660,6 @@ static void hci_cc_read_local_amp_info(struct hci_dev *hdev, hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to); hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to); - hci_req_complete(hdev, HCI_OP_READ_LOCAL_AMP_INFO, rp->status); - a2mp_rsp: a2mp_send_getinfo_rsp(hdev); } @@ -741,8 +707,6 @@ static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status); } static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) @@ -750,8 +714,6 @@ static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status); } static void hci_cc_write_inquiry_mode(struct hci_dev *hdev, @@ -760,8 +722,6 @@ static void hci_cc_write_inquiry_mode(struct hci_dev *hdev, __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status); } static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, @@ -773,8 +733,6 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, if (!rp->status) hdev->inq_tx_power = rp->tx_power; - - hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, rp->status); } static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb) @@ -782,8 +740,6 @@ static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb) __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status); } static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -845,8 +801,6 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, hdev->le_cnt = hdev->le_pkts; BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts); - - hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status); } static void hci_cc_le_read_local_features(struct hci_dev *hdev, @@ -858,8 +812,6 @@ static void hci_cc_le_read_local_features(struct hci_dev *hdev, if (!rp->status) memcpy(hdev->le_features, rp->features, 8); - - hci_req_complete(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, rp->status); } static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, @@ -874,8 +826,6 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, if (!test_bit(HCI_INIT, &hdev->flags)) hci_update_ad(hdev); } - - hci_req_complete(hdev, HCI_OP_LE_READ_ADV_TX_POWER, rp->status); } static void hci_cc_le_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) @@ -883,8 +833,6 @@ static void hci_cc_le_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_LE_SET_EVENT_MASK, status); } static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -985,8 +933,6 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) if (!test_bit(HCI_INIT, &hdev->flags)) hci_update_ad(hdev); - - hci_req_complete(hdev, HCI_OP_LE_SET_ADV_ENABLE, status); } static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) @@ -995,8 +941,6 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); - hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_PARAM, status); - if (status) { hci_dev_lock(hdev); mgmt_start_discovery_failed(hdev, status); @@ -1019,8 +963,6 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, switch (cp->enable) { case LE_SCANNING_ENABLED: - hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_ENABLE, status); - if (status) { hci_dev_lock(hdev); mgmt_start_discovery_failed(hdev, status); @@ -1071,8 +1013,6 @@ static void hci_cc_le_read_white_list_size(struct hci_dev *hdev, if (!rp->status) hdev->le_white_list_size = rp->size; - - hci_req_complete(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, rp->status); } static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -1083,8 +1023,6 @@ static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - - hci_req_complete(hdev, HCI_OP_LE_LTK_REPLY, rp->status); } static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -1095,8 +1033,6 @@ static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - - hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status); } static void hci_cc_le_read_supported_states(struct hci_dev *hdev, @@ -1108,8 +1044,6 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev, if (!rp->status) memcpy(hdev->le_states, rp->le_states, 8); - - hci_req_complete(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, rp->status); } static void hci_cc_write_le_host_supported(struct hci_dev *hdev, @@ -1139,8 +1073,6 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev, if (test_bit(HCI_MGMT, &hdev->dev_flags) && !test_bit(HCI_INIT, &hdev->flags)) mgmt_le_enable_complete(hdev, sent->le, status); - - hci_req_complete(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, status); } static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev, @@ -1162,7 +1094,6 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) BT_DBG("%s status 0x%2.2x", hdev->name, status); if (status) { - hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); hci_dev_lock(hdev); if (test_bit(HCI_MGMT, &hdev->dev_flags)) @@ -1694,7 +1625,6 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status); - hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); -- cgit v1.2.3 From cecbb967b2f5c52e090978ff6afe7deddbfbeda5 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Mar 2013 20:37:50 +0200 Subject: Bluetooth: Remove unused hdev->init_last_cmd This variable is no longer needed (due to async HCI request support and the conversion of hci_req_sync to use it), so it can be safely removed. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 -- net/bluetooth/hci_core.c | 6 ------ 2 files changed, 8 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3f124f43fcb1..3a9cbf2b55c0 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -248,8 +248,6 @@ struct hci_dev { __u32 req_status; __u32 req_result; - __u16 init_last_cmd; - struct list_head mgmt_pending; struct discovery_state discovery; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6218eced1530..3fc699db8fb5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1015,10 +1015,7 @@ int hci_dev_open(__u16 dev) if (!test_bit(HCI_RAW, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); set_bit(HCI_INIT, &hdev->flags); - hdev->init_last_cmd = 0; - ret = __hci_init(hdev); - clear_bit(HCI_INIT, &hdev->flags); } @@ -2509,9 +2506,6 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) return -ENOMEM; } - if (test_bit(HCI_INIT, &hdev->flags)) - hdev->init_last_cmd = opcode; - /* Stand-alone HCI commands must be flaged as * single-command requests. */ -- cgit v1.2.3 From b7ef213ef65256168df83ddfbb8131ed9adc10f9 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 8 Mar 2013 02:07:16 +0000 Subject: ipv6: introdcue __ipv6_addr_needs_scope_id and ipv6_iface_scope_id helper functions __ipv6_addr_needs_scope_id checks if an ipv6 address needs to supply a 'sin6_scope_id != 0'. 'sin6_scope_id != 0' was enforced in case of link-local addresses. To support interface-local multicast these checks had to be enhanced and are now consolidated into these new helper functions. v2: a) migrated to struct ipv6_addr_props v3: a) reverted changes for ipv6_addr_props b) test for address type instead of comparing scope v4: a) unchanged Suggested-by: YOSHIFUJI Hideaki Cc: YOSHIFUJI Hideaki Acked-by: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 988c9f28f0fc..42ef6abf25c3 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -320,6 +320,18 @@ static inline int ipv6_addr_src_scope(const struct in6_addr *addr) return __ipv6_addr_src_scope(__ipv6_addr_type(addr)); } +static inline bool __ipv6_addr_needs_scope_id(int type) +{ + return type & IPV6_ADDR_LINKLOCAL || + (type & IPV6_ADDR_MULTICAST && + (type & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL))); +} + +static inline __u32 ipv6_iface_scope_id(const struct in6_addr *addr, int iface) +{ + return __ipv6_addr_needs_scope_id(__ipv6_addr_type(addr)) ? iface : 0; +} + static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2) { return memcmp(a1, a2, sizeof(struct in6_addr)); -- cgit v1.2.3 From 5d73e0342fd9bf500583868906325d42c4d2bf6f Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 8 Mar 2013 11:20:16 -0300 Subject: Bluetooth: HCI request error handling When we are building a HCI request with more than one HCI command and one of the hci_req_add calls fail, we should have some cleanup routine so the HCI commands already queued on HCI request can be deleted. Otherwise, we will face some memory leaks issues. This patch implements the HCI request error handling which is the following: If a hci_req_add fails, we save the error code in hci_ request. Once hci_req_run is called, we verify the error field. If it is different from zero, we delete all HCI commands already queued and return the error code. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 5 +++++ net/bluetooth/hci_core.c | 13 ++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3a9cbf2b55c0..332ee5099a52 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1042,6 +1042,11 @@ int hci_unregister_cb(struct hci_cb *hcb); struct hci_request { struct hci_dev *hdev; struct sk_buff_head cmd_q; + + /* If something goes wrong when building the HCI request, the error + * value is stored in this field. + */ + int err; }; void hci_req_init(struct hci_request *req, struct hci_dev *hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4603464b91e2..b432baafdf12 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2443,6 +2443,7 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev) { skb_queue_head_init(&req->cmd_q); req->hdev = hdev; + req->err = 0; } int hci_req_run(struct hci_request *req, hci_req_complete_t complete) @@ -2453,6 +2454,14 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete) BT_DBG("length %u", skb_queue_len(&req->cmd_q)); + /* If an error occured during request building, remove all HCI + * commands queued on the HCI request queue. + */ + if (req->err) { + skb_queue_purge(&req->cmd_q); + return req->err; + } + /* Do not allow empty requests */ if (skb_queue_empty(&req->cmd_q)) return -ENODATA; @@ -2529,7 +2538,9 @@ int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) skb = hci_prepare_cmd(hdev, opcode, plen, param); if (!skb) { - BT_ERR("%s no memory for command", hdev->name); + BT_ERR("%s no memory for command (opcode 0x%4.4x)", + hdev->name, opcode); + req->err = -ENOMEM; return -ENOMEM; } -- cgit v1.2.3 From e348fe6bbab85c513816d2536ffabac4be016442 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 8 Mar 2013 11:20:17 -0300 Subject: Bluetooth: Make hci_req_add returning void Since no one checks the returning value of hci_req_add and HCI request errors are now handled in hci_req_run, we can make hci_ req_add returning void. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_core.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 332ee5099a52..d6c32561fc02 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1051,7 +1051,7 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); -int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b432baafdf12..1c678757c83a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2529,7 +2529,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) } /* Queue a command to an asynchronous HCI request */ -int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) { struct hci_dev *hdev = req->hdev; struct sk_buff *skb; @@ -2541,15 +2541,13 @@ int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) BT_ERR("%s no memory for command (opcode 0x%4.4x)", hdev->name, opcode); req->err = -ENOMEM; - return -ENOMEM; + return; } if (skb_queue_empty(&req->cmd_q)) bt_cb(skb)->req.start = true; skb_queue_tail(&req->cmd_q, skb); - - return 0; } /* Get data from the previously sent command */ -- cgit v1.2.3 From 6aed0c8bf7d2f389b472834053eb6e3bd6926999 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 9 Mar 2013 16:38:39 +0000 Subject: tunnel: use iptunnel_xmit() again With recent patches from Pravin, most tunnels can't use iptunnel_xmit() any more, due to ip_select_ident() and skb->ip_summed. But we can just move these operations out of iptunnel_xmit(), so that tunnels can use it again. This by the way fixes a bug in vxlan (missing nf_reset()) for net-next. Cc: Pravin B Shelar Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 14 +------------- include/net/ipip.h | 3 --- net/ipv4/ip_gre.c | 16 +--------------- net/ipv4/ipip.c | 18 +----------------- net/ipv6/sit.c | 2 ++ 5 files changed, 5 insertions(+), 48 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f057ec00bba3..f3a135cb50a9 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -855,7 +855,6 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) __u16 src_port; __be16 df = 0; __u8 tos, ttl; - int err; bool did_rsc = false; const struct vxlan_fdb *f; @@ -980,18 +979,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) if (handle_offloads(skb)) goto drop; - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - struct vxlan_stats *stats = this_cpu_ptr(vxlan->stats); - - u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += pkt_len; - u64_stats_update_end(&stats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } + iptunnel_xmit(skb, dev); return NETDEV_TX_OK; drop: diff --git a/include/net/ipip.h b/include/net/ipip.h index fd19625ff99d..0c046e3bca05 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -51,13 +51,10 @@ struct ip_tunnel_prl_entry { static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) { int err; - struct iphdr *iph = ip_hdr(skb); int pkt_len = skb->len - skb_transport_offset(skb); struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); nf_reset(skb); - skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(iph, skb_dst(skb), NULL); err = ip_local_out(skb); if (likely(net_xmit_eval(err) == 0)) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d0ef0e674ec5..a13a0972a57f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -762,7 +762,6 @@ error: static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *old_iph; const struct iphdr *tiph; @@ -778,7 +777,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev int mtu; u8 ttl; int err; - int pkt_len; skb = handle_offloads(tunnel, skb); if (IS_ERR(skb)) { @@ -1022,19 +1020,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - nf_reset(skb); - - pkt_len = skb->len - skb_transport_offset(skb); - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } + iptunnel_xmit(skb, dev); return NETDEV_TX_OK; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index b50435ba0ce5..34e006fe2d87 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -478,8 +478,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; - int err; - int pkt_len; if (skb->protocol != htons(ETH_P_IP)) goto tx_error; @@ -600,21 +598,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if ((iph->ttl = tiph->ttl) == 0) iph->ttl = old_iph->ttl; - nf_reset(skb); - - pkt_len = skb->len - skb_transport_offset(skb); - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); - - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } + iptunnel_xmit(skb, dev); return NETDEV_TX_OK; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 02f96dcbcf02..898e671a526b 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -899,6 +899,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if ((iph->ttl = tiph->ttl) == 0) iph->ttl = iph6->hop_limit; + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(iph, skb_dst(skb), NULL); iptunnel_xmit(skb, dev); return NETDEV_TX_OK; -- cgit v1.2.3 From e8f72ea4a1380eeca10a551bc8d678e7d4388d42 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 9 Mar 2013 23:00:39 +0000 Subject: ipv6: introduce ip6tunnel_xmit() helper Similar to iptunnel_xmit(), group these operations into a helper function. This by the way fixes the missing u64_stats_update_begin() and u64_stats_update_end() for 32 bit arch. Cc: Eric Dumazet Cc: Pravin B Shelar Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 20 ++++++++++++++++++++ net/ipv6/ip6_gre.c | 17 +---------------- net/ipv6/ip6_tunnel.c | 15 +-------------- 3 files changed, 22 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index e03047f7090b..ebdef7f60862 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -68,4 +68,24 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw); __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); +static inline void ip6tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct net_device_stats *stats = &dev->stats; + int pkt_len, err; + + nf_reset(skb); + pkt_len = skb->len; + err = ip6_local_out(skb); + + if (net_xmit_eval(err) == 0) { + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + stats->tx_errors++; + stats->tx_aborted_errors++; + } +} #endif diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e4efffe2522e..6a6ba73ff265 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -667,7 +667,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct net_device_stats *stats = &tunnel->dev->stats; int err = -1; u8 proto; - int pkt_len; struct sk_buff *new_skb; if (dev->type == ARPHRD_ETHER) @@ -801,23 +800,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } } - nf_reset(skb); - pkt_len = skb->len; - err = ip6_local_out(skb); - - if (net_xmit_eval(err) == 0) { - struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats); - - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - } else { - stats->tx_errors++; - stats->tx_aborted_errors++; - } - + ip6tunnel_xmit(skb, dev); if (ndst) ip6_tnl_dst_store(tunnel, ndst); - return 0; tx_err_link_failure: stats->tx_carrier_errors++; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fff83cbc197f..bef3fedfdc56 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -955,7 +955,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, unsigned int max_headroom = sizeof(struct ipv6hdr); u8 proto; int err = -1; - int pkt_len; if (!fl6->flowi6_mark) dst = ip6_tnl_dst_check(t); @@ -1035,19 +1034,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; - nf_reset(skb); - pkt_len = skb->len; - err = ip6_local_out(skb); - - if (net_xmit_eval(err) == 0) { - struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats); - - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - } else { - stats->tx_errors++; - stats->tx_aborted_errors++; - } + ip6tunnel_xmit(skb, dev); if (ndst) ip6_tnl_dst_store(t, ndst); return 0; -- cgit v1.2.3 From 488b366a452934141959384c7a1b52b22d6154ef Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Mon, 11 Feb 2013 14:56:29 +0200 Subject: mac80211: add driver callback for per-interface multicast filter Some devices have multicast filter capability for each individual virtual interface rather than just a global one. Add an interface specific driver callback allowing such drivers to configure this. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++++ net/mac80211/driver-ops.h | 16 ++++++++++++++++ net/mac80211/iface.c | 11 +++++++++++ net/mac80211/trace.h | 24 ++++++++++++++++++++++++ 4 files changed, 58 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8c0ca11a39c4..f5db5e970428 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2259,6 +2259,9 @@ enum ieee80211_roc_type { * See the section "Frame filtering" for more information. * This callback must be implemented and can sleep. * + * @set_multicast_list: Configure the device's interface specific RX multicast + * filter. This callback is optional. This callback must be atomic. + * * @set_tim: Set TIM bit. mac80211 calls this function when a TIM bit * must be set or cleared for a given STA. Must be atomic. * @@ -2605,6 +2608,10 @@ struct ieee80211_ops { unsigned int changed_flags, unsigned int *total_flags, u64 multicast); + void (*set_multicast_list)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, bool allmulti, + struct netdev_hw_addr_list *mc_list); + int (*set_tim)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, bool set); int (*set_key)(struct ieee80211_hw *hw, enum set_key_cmd cmd, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 832acea4a5cb..025b7592b797 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -241,6 +241,22 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local, return ret; } +static inline void drv_set_multicast_list(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct netdev_hw_addr_list *mc_list) +{ + bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; + + trace_drv_set_multicast_list(local, sdata, mc_list->count); + + check_sdata_in_driver(sdata); + + if (local->ops->set_multicast_list) + local->ops->set_multicast_list(&local->hw, &sdata->vif, + allmulti, mc_list); + trace_drv_return_void(local); +} + static inline void drv_configure_filter(struct ieee80211_local *local, unsigned int changed_flags, unsigned int *total_flags, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d85282f64405..9875e321c9e8 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -919,6 +919,17 @@ static void ieee80211_set_multicast_list(struct net_device *dev) atomic_dec(&local->iff_promiscs); sdata->flags ^= IEEE80211_SDATA_PROMISC; } + + /* + * TODO: If somebody needs this on AP interfaces, + * it can be enabled easily but multicast + * addresses from VLANs need to be synced. + */ + if (sdata->vif.type != NL80211_IFTYPE_MONITOR && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP) + drv_set_multicast_list(local, sdata, &dev->mc); + spin_lock_bh(&local->filter_lock); __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); spin_unlock_bh(&local->filter_lock); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e7db2b804e0c..d97e4305cf1e 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -431,6 +431,30 @@ TRACE_EVENT(drv_prepare_multicast, ) ); +TRACE_EVENT(drv_set_multicast_list, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, int mc_count), + + TP_ARGS(local, sdata, mc_count), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(bool, allmulti) + __field(int, mc_count) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; + __entry->mc_count = mc_count; + ), + + TP_printk( + LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d", + LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti + ) +); + TRACE_EVENT(drv_configure_filter, TP_PROTO(struct ieee80211_local *local, unsigned int changed_flags, -- cgit v1.2.3 From 6ba8a3b19e764b6a65e4030ab0999be50c291e6c Mon Sep 17 00:00:00 2001 From: Nandita Dukkipati Date: Mon, 11 Mar 2013 10:00:43 +0000 Subject: tcp: Tail loss probe (TLP) This patch series implement the Tail loss probe (TLP) algorithm described in http://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01. The first patch implements the basic algorithm. TLP's goal is to reduce tail latency of short transactions. It achieves this by converting retransmission timeouts (RTOs) occuring due to tail losses (losses at end of transactions) into fast recovery. TLP transmits one packet in two round-trips when a connection is in Open state and isn't receiving any ACKs. The transmitted packet, aka loss probe, can be either new or a retransmission. When there is tail loss, the ACK from a loss probe triggers FACK/early-retransmit based fast recovery, thus avoiding a costly RTO. In the absence of loss, there is no change in the connection state. PTO stands for probe timeout. It is a timer event indicating that an ACK is overdue and triggers a loss probe packet. The PTO value is set to max(2*SRTT, 10ms) and is adjusted to account for delayed ACK timer when there is only one oustanding packet. TLP Algorithm On transmission of new data in Open state: -> packets_out > 1: schedule PTO in max(2*SRTT, 10ms). -> packets_out == 1: schedule PTO in max(2*RTT, 1.5*RTT + 200ms) -> PTO = min(PTO, RTO) Conditions for scheduling PTO: -> Connection is in Open state. -> Connection is either cwnd limited or no new data to send. -> Number of probes per tail loss episode is limited to one. -> Connection is SACK enabled. When PTO fires: new_segment_exists: -> transmit new segment. -> packets_out++. cwnd remains same. no_new_packet: -> retransmit the last segment. Its ACK triggers FACK or early retransmit based recovery. ACK path: -> rearm RTO at start of ACK processing. -> reschedule PTO if need be. In addition, the patch includes a small variation to the Early Retransmit (ER) algorithm, such that ER and TLP together can in principle recover any N-degree of tail loss through fast recovery. TLP is controlled by the same sysctl as ER, tcp_early_retrans sysctl. tcp_early_retrans==0; disables TLP and ER. ==1; enables RFC5827 ER. ==2; delayed ER. ==3; TLP and delayed ER. [DEFAULT] ==4; TLP only. The TLP patch series have been extensively tested on Google Web servers. It is most effective for short Web trasactions, where it reduced RTOs by 15% and improved HTTP response time (average by 6%, 99th percentile by 10%). The transmitted probes account for <0.5% of the overall transmissions. Signed-off-by: Nandita Dukkipati Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++- include/linux/tcp.h | 1 - include/net/inet_connection_sock.h | 5 +- include/net/tcp.h | 6 +- include/uapi/linux/snmp.h | 1 + net/ipv4/inet_diag.c | 4 +- net/ipv4/proc.c | 1 + net/ipv4/sysctl_net_ipv4.c | 4 +- net/ipv4/tcp_input.c | 24 ++++--- net/ipv4/tcp_ipv4.c | 4 +- net/ipv4/tcp_output.c | 128 +++++++++++++++++++++++++++++++-- net/ipv4/tcp_timer.c | 13 ++-- 12 files changed, 171 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index dc2dc87d2557..1cae6c383e1b 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -190,7 +190,9 @@ tcp_early_retrans - INTEGER Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold for triggering fast retransmit when the amount of outstanding data is small and when no previously unsent data can be transmitted (such - that limited transmit could be used). + that limited transmit could be used). Also controls the use of + Tail loss probe (TLP) that converts RTOs occuring due to tail + losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01). Possible values: 0 disables ER 1 enables ER @@ -198,7 +200,9 @@ tcp_early_retrans - INTEGER by a fourth of RTT. This mitigates connection falsely recovers when network has a small degree of reordering (less than 3 packets). - Default: 2 + 3 enables delayed ER and TLP. + 4 enables TLP only. + Default: 3 tcp_ecn - INTEGER Control use of Explicit Congestion Notification (ECN) by TCP. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 515c3746b675..01860d74555c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -201,7 +201,6 @@ struct tcp_sock { unused : 1; u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ - early_retrans_delayed:1, /* Delayed ER timer installed */ syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 183292722f6e..de2c78529afa 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -133,6 +133,8 @@ struct inet_connection_sock { #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ #define ICSK_TIME_DACK 2 /* Delayed ack timer */ #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */ +#define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { @@ -222,7 +224,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, when = max_when; } - if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || + what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE) { icsk->icsk_pending = what; icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); diff --git a/include/net/tcp.h b/include/net/tcp.h index a2baa5e4ba31..ab9f947b118b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -543,6 +543,8 @@ extern bool tcp_syn_flood_action(struct sock *sk, extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); +extern void tcp_send_loss_probe(struct sock *sk); +extern bool tcp_schedule_loss_probe(struct sock *sk); /* tcp_input.c */ extern void tcp_cwnd_application_limited(struct sock *sk); @@ -873,8 +875,8 @@ static inline void tcp_enable_fack(struct tcp_sock *tp) static inline void tcp_enable_early_retrans(struct tcp_sock *tp) { tp->do_early_retrans = sysctl_tcp_early_retrans && - !sysctl_tcp_thin_dupack && sysctl_tcp_reordering == 3; - tp->early_retrans_delayed = 0; + sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack && + sysctl_tcp_reordering == 3; } static inline void tcp_disable_early_retrans(struct tcp_sock *tp) diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index b49eab89c9fd..290bed6b085f 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -202,6 +202,7 @@ enum LINUX_MIB_TCPFORWARDRETRANS, /* TCPForwardRetrans */ LINUX_MIB_TCPSLOWSTARTRETRANS, /* TCPSlowStartRetrans */ LINUX_MIB_TCPTIMEOUTS, /* TCPTimeouts */ + LINUX_MIB_TCPLOSSPROBES, /* TCPLossProbes */ LINUX_MIB_TCPRENORECOVERYFAIL, /* TCPRenoRecoveryFail */ LINUX_MIB_TCPSACKRECOVERYFAIL, /* TCPSackRecoveryFail */ LINUX_MIB_TCPSCHEDULERFAILED, /* TCPSchedulerFailed */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7afa2c3c788f..8620408af574 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -158,7 +158,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, #define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 32030a24e776..4c35911d935f 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -224,6 +224,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS), SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), + SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 960fd29d9b8e..cca4550f4082 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -28,7 +28,7 @@ static int zero; static int one = 1; -static int two = 2; +static int four = 4; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -760,7 +760,7 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, - .extra2 = &two, + .extra2 = &four, }, { .procname = "udp_mem", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0d9bdacce99f..b794f89ac1f2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -98,7 +98,7 @@ int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_early_retrans __read_mostly = 2; +int sysctl_tcp_early_retrans __read_mostly = 3; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -2150,15 +2150,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples * available, or RTO is scheduled to fire first. */ - if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) + if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || + (flag & FLAG_ECE) || !tp->srtt) return false; delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) return false; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); - tp->early_retrans_delayed = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay, + TCP_RTO_MAX); return true; } @@ -2321,7 +2322,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) * interval if appropriate. */ if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && - (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && + (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) && !tcp_may_send_now(sk)) return !tcp_pause_early_retransmit(sk, flag); @@ -3081,6 +3082,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ void tcp_rearm_rto(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); /* If the retrans timer is currently being used by Fast Open @@ -3094,12 +3096,13 @@ void tcp_rearm_rto(struct sock *sk) } else { u32 rto = inet_csk(sk)->icsk_rto; /* Offset the time elapsed after installing regular RTO */ - if (tp->early_retrans_delayed) { + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); /* delta may not be positive if the socket is locked - * when the delayed ER timer fires and is rescheduled. + * when the retrans timer fires and is rescheduled. */ if (delta > 0) rto = delta; @@ -3107,7 +3110,6 @@ void tcp_rearm_rto(struct sock *sk) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); } - tp->early_retrans_delayed = 0; } /* This function is called when the delayed ER timer fires. TCP enters @@ -3601,7 +3603,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; - if (tp->early_retrans_delayed) + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); if (after(ack, prior_snd_una)) @@ -3678,6 +3681,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (dst) dst_confirm(dst); } + + if (icsk->icsk_pending == ICSK_TIME_RETRANS) + tcp_schedule_loss_probe(sk); return 1; no_queue: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8cdee120a50c..b7ab868c8284 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2703,7 +2703,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) __u16 srcp = ntohs(inet->inet_sport); int rx_queue; - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2b4461074da..beb63dbc85f5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -74,6 +74,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, /* Account for new data that has been sent to the network. */ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; @@ -85,7 +86,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) tp->frto_counter = 3; tp->packets_out += tcp_skb_pcount(skb); - if (!prior_packets || tp->early_retrans_delayed) + if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); } @@ -1959,6 +1961,9 @@ static int tcp_mtu_probe(struct sock *sk) * snd_up-64k-mss .. snd_up cannot be large. However, taking into * account rare use of URG, this is not a big flaw. * + * Send at most one packet when push_one > 0. Temporarily ignore + * cwnd limit to force at most one packet out when push_one == 2. + * Returns true, if no segments are in flight and we have queued segments, * but cannot send anything now because of SWS or another problem. */ @@ -1994,8 +1999,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, goto repair; /* Skip network transmission */ cwnd_quota = tcp_cwnd_test(tp, skb); - if (!cwnd_quota) - break; + if (!cwnd_quota) { + if (push_one == 2) + /* Force out a loss probe pkt. */ + cwnd_quota = 1; + else + break; + } if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; @@ -2049,10 +2059,120 @@ repair: if (likely(sent_pkts)) { if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; + + /* Send one loss probe per tail loss episode. */ + if (push_one != 2) + tcp_schedule_loss_probe(sk); tcp_cwnd_validate(sk); return false; } - return !tp->packets_out && tcp_send_head(sk); + return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); +} + +bool tcp_schedule_loss_probe(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 timeout, tlp_time_stamp, rto_time_stamp; + u32 rtt = tp->srtt >> 3; + + if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) + return false; + /* No consecutive loss probes. */ + if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { + tcp_rearm_rto(sk); + return false; + } + /* Don't do any loss probe on a Fast Open connection before 3WHS + * finishes. + */ + if (sk->sk_state == TCP_SYN_RECV) + return false; + + /* TLP is only scheduled when next timer event is RTO. */ + if (icsk->icsk_pending != ICSK_TIME_RETRANS) + return false; + + /* Schedule a loss probe in 2*RTT for SACK capable connections + * in Open state, that are either limited by cwnd or application. + */ + if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || + !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) + return false; + + if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && + tcp_send_head(sk)) + return false; + + /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account + * for delayed ack when there's one outstanding packet. + */ + timeout = rtt << 1; + if (tp->packets_out == 1) + timeout = max_t(u32, timeout, + (rtt + (rtt >> 1) + TCP_DELACK_MAX)); + timeout = max_t(u32, timeout, msecs_to_jiffies(10)); + + /* If RTO is shorter, just schedule TLP in its place. */ + tlp_time_stamp = tcp_time_stamp + timeout; + rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; + if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { + s32 delta = rto_time_stamp - tcp_time_stamp; + if (delta > 0) + timeout = delta; + } + + inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, + TCP_RTO_MAX); + return true; +} + +/* When probe timeout (PTO) fires, send a new segment if one exists, else + * retransmit the last segment. + */ +void tcp_send_loss_probe(struct sock *sk) +{ + struct sk_buff *skb; + int pcount; + int mss = tcp_current_mss(sk); + int err = -1; + + if (tcp_send_head(sk) != NULL) { + err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); + goto rearm_timer; + } + + /* Retransmit last segment. */ + skb = tcp_write_queue_tail(sk); + if (WARN_ON(!skb)) + goto rearm_timer; + + pcount = tcp_skb_pcount(skb); + if (WARN_ON(!pcount)) + goto rearm_timer; + + if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { + if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) + goto rearm_timer; + skb = tcp_write_queue_tail(sk); + } + + if (WARN_ON(!skb || !tcp_skb_pcount(skb))) + goto rearm_timer; + + /* Probe with zero data doesn't trigger fast recovery. */ + if (skb->len > 0) + err = __tcp_retransmit_skb(sk, skb); + +rearm_timer: + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, + TCP_RTO_MAX); + + if (likely(!err)) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBES); + return; } /* Push out any pending frames which were held back due to diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b78aac30c498..ecd61d54147f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -342,10 +342,6 @@ void tcp_retransmit_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - if (tp->early_retrans_delayed) { - tcp_resume_early_retransmit(sk); - return; - } if (tp->fastopen_rsk) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); @@ -495,13 +491,20 @@ void tcp_write_timer_handler(struct sock *sk) } event = icsk->icsk_pending; - icsk->icsk_pending = 0; switch (event) { + case ICSK_TIME_EARLY_RETRANS: + tcp_resume_early_retransmit(sk); + break; + case ICSK_TIME_LOSS_PROBE: + tcp_send_loss_probe(sk); + break; case ICSK_TIME_RETRANS: + icsk->icsk_pending = 0; tcp_retransmit_timer(sk); break; case ICSK_TIME_PROBE0: + icsk->icsk_pending = 0; tcp_probe_timer(sk); break; } -- cgit v1.2.3 From 7a875903389f3492d4cb06faa1d55a1630e77c11 Mon Sep 17 00:00:00 2001 From: Erwan Yvin Date: Mon, 11 Mar 2013 03:13:03 +0000 Subject: caif: remove caif_shm caif_shm is an old implementation caif_shm will be replaced by caif_virtio [ As explained by Linus Walleij: "U5500 used this, but was cancelled and the silicon did not reach anyone outside ST-Ericsson. Then for the next platforms, we have gone for the leaner & cleaner approach of using virtio, rpmesg and rproc." ] Signed-off-by: Erwan Yvin Acked-by: Linus Walleij Acked-by: Sjur Brendeland Signed-off-by: David S. Miller --- drivers/net/caif/Kconfig | 7 - drivers/net/caif/Makefile | 4 - drivers/net/caif/caif_shm_u5500.c | 128 ------- drivers/net/caif/caif_shmcore.c | 744 -------------------------------------- include/net/caif/caif_shm.h | 26 -- 5 files changed, 909 deletions(-) delete mode 100644 drivers/net/caif/caif_shm_u5500.c delete mode 100644 drivers/net/caif/caif_shmcore.c delete mode 100644 include/net/caif/caif_shm.h (limited to 'include/net') diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 60c2142373c9..a966128c2a7a 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -32,13 +32,6 @@ config CAIF_SPI_SYNC help to synchronize to the next transfer in case of over or under-runs. This option also needs to be enabled on the modem. -config CAIF_SHM - tristate "CAIF shared memory protocol driver" - depends on CAIF && U5500_MBOX - default n - ---help--- - The CAIF shared memory protocol driver for the STE UX5500 platform. - config CAIF_HSI tristate "CAIF HSI transport driver" depends on CAIF diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile index 91dff861560f..15a9d2fc753d 100644 --- a/drivers/net/caif/Makefile +++ b/drivers/net/caif/Makefile @@ -7,9 +7,5 @@ obj-$(CONFIG_CAIF_TTY) += caif_serial.o cfspi_slave-objs := caif_spi.o caif_spi_slave.o obj-$(CONFIG_CAIF_SPI_SLAVE) += cfspi_slave.o -# Shared memory -caif_shm-objs := caif_shmcore.o caif_shm_u5500.o -obj-$(CONFIG_CAIF_SHM) += caif_shm.o - # HSI interface obj-$(CONFIG_CAIF_HSI) += caif_hsi.o diff --git a/drivers/net/caif/caif_shm_u5500.c b/drivers/net/caif/caif_shm_u5500.c deleted file mode 100644 index 89d76b7b325a..000000000000 --- a/drivers/net/caif/caif_shm_u5500.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Author: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com - * License terms: GNU General Public License (GPL) version 2 - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":" fmt - -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("CAIF Shared Memory protocol driver"); - -#define MAX_SHM_INSTANCES 1 - -enum { - MBX_ACC0, - MBX_ACC1, - MBX_DSP -}; - -static struct shmdev_layer shmdev_lyr[MAX_SHM_INSTANCES]; - -static unsigned int shm_start; -static unsigned int shm_size; - -module_param(shm_size, uint , 0440); -MODULE_PARM_DESC(shm_total_size, "Start of SHM shared memory"); - -module_param(shm_start, uint , 0440); -MODULE_PARM_DESC(shm_total_start, "Total Size of SHM shared memory"); - -static int shmdev_send_msg(u32 dev_id, u32 mbx_msg) -{ - /* Always block until msg is written successfully */ - mbox_send(shmdev_lyr[dev_id].hmbx, mbx_msg, true); - return 0; -} - -static int shmdev_mbx_setup(void *pshmdrv_cb, struct shmdev_layer *pshm_dev, - void *pshm_drv) -{ - /* - * For UX5500, we have only 1 SHM instance which uses MBX0 - * for communication with the peer modem - */ - pshm_dev->hmbx = mbox_setup(MBX_ACC0, pshmdrv_cb, pshm_drv); - - if (!pshm_dev->hmbx) - return -ENODEV; - else - return 0; -} - -static int __init caif_shmdev_init(void) -{ - int i, result; - - /* Loop is currently overkill, there is only one instance */ - for (i = 0; i < MAX_SHM_INSTANCES; i++) { - - shmdev_lyr[i].shm_base_addr = shm_start; - shmdev_lyr[i].shm_total_sz = shm_size; - - if (((char *)shmdev_lyr[i].shm_base_addr == NULL) - || (shmdev_lyr[i].shm_total_sz <= 0)) { - pr_warn("ERROR," - "Shared memory Address and/or Size incorrect" - ", Bailing out ...\n"); - result = -EINVAL; - goto clean; - } - - pr_info("SHM AREA (instance %d) STARTS" - " AT %p\n", i, (char *)shmdev_lyr[i].shm_base_addr); - - shmdev_lyr[i].shm_id = i; - shmdev_lyr[i].pshmdev_mbxsend = shmdev_send_msg; - shmdev_lyr[i].pshmdev_mbxsetup = shmdev_mbx_setup; - - /* - * Finally, CAIF core module is called with details in place: - * 1. SHM base address - * 2. SHM size - * 3. MBX handle - */ - result = caif_shmcore_probe(&shmdev_lyr[i]); - if (result) { - pr_warn("ERROR[%d]," - "Could not probe SHM core (instance %d)" - " Bailing out ...\n", result, i); - goto clean; - } - } - - return 0; - -clean: - /* - * For now, we assume that even if one instance of SHM fails, we bail - * out of the driver support completely. For this, we need to release - * any memory allocated and unregister any instance of SHM net device. - */ - for (i = 0; i < MAX_SHM_INSTANCES; i++) { - if (shmdev_lyr[i].pshm_netdev) - unregister_netdev(shmdev_lyr[i].pshm_netdev); - } - return result; -} - -static void __exit caif_shmdev_exit(void) -{ - int i; - - for (i = 0; i < MAX_SHM_INSTANCES; i++) { - caif_shmcore_remove(shmdev_lyr[i].pshm_netdev); - kfree((void *)shmdev_lyr[i].shm_base_addr); - } - -} - -module_init(caif_shmdev_init); -module_exit(caif_shmdev_exit); diff --git a/drivers/net/caif/caif_shmcore.c b/drivers/net/caif/caif_shmcore.c deleted file mode 100644 index cca2afc945af..000000000000 --- a/drivers/net/caif/caif_shmcore.c +++ /dev/null @@ -1,744 +0,0 @@ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Authors: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com, - * Daniel Martensson / daniel.martensson@stericsson.com - * License terms: GNU General Public License (GPL) version 2 - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":" fmt - -#include -#include -#include -#include -#include -#include - -#include -#include - -#define NR_TX_BUF 6 -#define NR_RX_BUF 6 -#define TX_BUF_SZ 0x2000 -#define RX_BUF_SZ 0x2000 - -#define CAIF_NEEDED_HEADROOM 32 - -#define CAIF_FLOW_ON 1 -#define CAIF_FLOW_OFF 0 - -#define LOW_WATERMARK 3 -#define HIGH_WATERMARK 4 - -/* Maximum number of CAIF buffers per shared memory buffer. */ -#define SHM_MAX_FRMS_PER_BUF 10 - -/* - * Size in bytes of the descriptor area - * (With end of descriptor signalling) - */ -#define SHM_CAIF_DESC_SIZE ((SHM_MAX_FRMS_PER_BUF + 1) * \ - sizeof(struct shm_pck_desc)) - -/* - * Offset to the first CAIF frame within a shared memory buffer. - * Aligned on 32 bytes. - */ -#define SHM_CAIF_FRM_OFS (SHM_CAIF_DESC_SIZE + (SHM_CAIF_DESC_SIZE % 32)) - -/* Number of bytes for CAIF shared memory header. */ -#define SHM_HDR_LEN 1 - -/* Number of padding bytes for the complete CAIF frame. */ -#define SHM_FRM_PAD_LEN 4 - -#define CAIF_MAX_MTU 4096 - -#define SHM_SET_FULL(x) (((x+1) & 0x0F) << 0) -#define SHM_GET_FULL(x) (((x >> 0) & 0x0F) - 1) - -#define SHM_SET_EMPTY(x) (((x+1) & 0x0F) << 4) -#define SHM_GET_EMPTY(x) (((x >> 4) & 0x0F) - 1) - -#define SHM_FULL_MASK (0x0F << 0) -#define SHM_EMPTY_MASK (0x0F << 4) - -struct shm_pck_desc { - /* - * Offset from start of shared memory area to start of - * shared memory CAIF frame. - */ - u32 frm_ofs; - u32 frm_len; -}; - -struct buf_list { - unsigned char *desc_vptr; - u32 phy_addr; - u32 index; - u32 len; - u32 frames; - u32 frm_ofs; - struct list_head list; -}; - -struct shm_caif_frm { - /* Number of bytes of padding before the CAIF frame. */ - u8 hdr_ofs; -}; - -struct shmdrv_layer { - /* caif_dev_common must always be first in the structure*/ - struct caif_dev_common cfdev; - - u32 shm_tx_addr; - u32 shm_rx_addr; - u32 shm_base_addr; - u32 tx_empty_available; - spinlock_t lock; - - struct list_head tx_empty_list; - struct list_head tx_pend_list; - struct list_head tx_full_list; - struct list_head rx_empty_list; - struct list_head rx_pend_list; - struct list_head rx_full_list; - - struct workqueue_struct *pshm_tx_workqueue; - struct workqueue_struct *pshm_rx_workqueue; - - struct work_struct shm_tx_work; - struct work_struct shm_rx_work; - - struct sk_buff_head sk_qhead; - struct shmdev_layer *pshm_dev; -}; - -static int shm_netdev_open(struct net_device *shm_netdev) -{ - netif_wake_queue(shm_netdev); - return 0; -} - -static int shm_netdev_close(struct net_device *shm_netdev) -{ - netif_stop_queue(shm_netdev); - return 0; -} - -int caif_shmdrv_rx_cb(u32 mbx_msg, void *priv) -{ - struct buf_list *pbuf; - struct shmdrv_layer *pshm_drv; - struct list_head *pos; - u32 avail_emptybuff = 0; - unsigned long flags = 0; - - pshm_drv = priv; - - /* Check for received buffers. */ - if (mbx_msg & SHM_FULL_MASK) { - int idx; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check whether we have any outstanding buffers. */ - if (list_empty(&pshm_drv->rx_empty_list)) { - - /* Release spin lock. */ - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* We print even in IRQ context... */ - pr_warn("No empty Rx buffers to fill: " - "mbx_msg:%x\n", mbx_msg); - - /* Bail out. */ - goto err_sync; - } - - pbuf = - list_entry(pshm_drv->rx_empty_list.next, - struct buf_list, list); - idx = pbuf->index; - - /* Check buffer synchronization. */ - if (idx != SHM_GET_FULL(mbx_msg)) { - - /* We print even in IRQ context... */ - pr_warn( - "phyif_shm_mbx_msg_cb: RX full out of sync:" - " idx:%d, msg:%x SHM_GET_FULL(mbx_msg):%x\n", - idx, mbx_msg, SHM_GET_FULL(mbx_msg)); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Bail out. */ - goto err_sync; - } - - list_del_init(&pbuf->list); - list_add_tail(&pbuf->list, &pshm_drv->rx_full_list); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Schedule RX work queue. */ - if (!work_pending(&pshm_drv->shm_rx_work)) - queue_work(pshm_drv->pshm_rx_workqueue, - &pshm_drv->shm_rx_work); - } - - /* Check for emptied buffers. */ - if (mbx_msg & SHM_EMPTY_MASK) { - int idx; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check whether we have any outstanding buffers. */ - if (list_empty(&pshm_drv->tx_full_list)) { - - /* We print even in IRQ context... */ - pr_warn("No TX to empty: msg:%x\n", mbx_msg); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Bail out. */ - goto err_sync; - } - - pbuf = - list_entry(pshm_drv->tx_full_list.next, - struct buf_list, list); - idx = pbuf->index; - - /* Check buffer synchronization. */ - if (idx != SHM_GET_EMPTY(mbx_msg)) { - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* We print even in IRQ context... */ - pr_warn("TX empty " - "out of sync:idx:%d, msg:%x\n", idx, mbx_msg); - - /* Bail out. */ - goto err_sync; - } - list_del_init(&pbuf->list); - - /* Reset buffer parameters. */ - pbuf->frames = 0; - pbuf->frm_ofs = SHM_CAIF_FRM_OFS; - - list_add_tail(&pbuf->list, &pshm_drv->tx_empty_list); - - /* Check the available no. of buffers in the empty list */ - list_for_each(pos, &pshm_drv->tx_empty_list) - avail_emptybuff++; - - /* Check whether we have to wake up the transmitter. */ - if ((avail_emptybuff > HIGH_WATERMARK) && - (!pshm_drv->tx_empty_available)) { - pshm_drv->tx_empty_available = 1; - spin_unlock_irqrestore(&pshm_drv->lock, flags); - pshm_drv->cfdev.flowctrl - (pshm_drv->pshm_dev->pshm_netdev, - CAIF_FLOW_ON); - - - /* Schedule the work queue. if required */ - if (!work_pending(&pshm_drv->shm_tx_work)) - queue_work(pshm_drv->pshm_tx_workqueue, - &pshm_drv->shm_tx_work); - } else - spin_unlock_irqrestore(&pshm_drv->lock, flags); - } - - return 0; - -err_sync: - return -EIO; -} - -static void shm_rx_work_func(struct work_struct *rx_work) -{ - struct shmdrv_layer *pshm_drv; - struct buf_list *pbuf; - unsigned long flags = 0; - struct sk_buff *skb; - char *p; - int ret; - - pshm_drv = container_of(rx_work, struct shmdrv_layer, shm_rx_work); - - while (1) { - - struct shm_pck_desc *pck_desc; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check for received buffers. */ - if (list_empty(&pshm_drv->rx_full_list)) { - spin_unlock_irqrestore(&pshm_drv->lock, flags); - break; - } - - pbuf = - list_entry(pshm_drv->rx_full_list.next, struct buf_list, - list); - list_del_init(&pbuf->list); - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Retrieve pointer to start of the packet descriptor area. */ - pck_desc = (struct shm_pck_desc *) pbuf->desc_vptr; - - /* - * Check whether descriptor contains a CAIF shared memory - * frame. - */ - while (pck_desc->frm_ofs) { - unsigned int frm_buf_ofs; - unsigned int frm_pck_ofs; - unsigned int frm_pck_len; - /* - * Check whether offset is within buffer limits - * (lower). - */ - if (pck_desc->frm_ofs < - (pbuf->phy_addr - pshm_drv->shm_base_addr)) - break; - /* - * Check whether offset is within buffer limits - * (higher). - */ - if (pck_desc->frm_ofs > - ((pbuf->phy_addr - pshm_drv->shm_base_addr) + - pbuf->len)) - break; - - /* Calculate offset from start of buffer. */ - frm_buf_ofs = - pck_desc->frm_ofs - (pbuf->phy_addr - - pshm_drv->shm_base_addr); - - /* - * Calculate offset and length of CAIF packet while - * taking care of the shared memory header. - */ - frm_pck_ofs = - frm_buf_ofs + SHM_HDR_LEN + - (*(pbuf->desc_vptr + frm_buf_ofs)); - frm_pck_len = - (pck_desc->frm_len - SHM_HDR_LEN - - (*(pbuf->desc_vptr + frm_buf_ofs))); - - /* Check whether CAIF packet is within buffer limits */ - if ((frm_pck_ofs + pck_desc->frm_len) > pbuf->len) - break; - - /* Get a suitable CAIF packet and copy in data. */ - skb = netdev_alloc_skb(pshm_drv->pshm_dev->pshm_netdev, - frm_pck_len + 1); - if (skb == NULL) - break; - - p = skb_put(skb, frm_pck_len); - memcpy(p, pbuf->desc_vptr + frm_pck_ofs, frm_pck_len); - - skb->protocol = htons(ETH_P_CAIF); - skb_reset_mac_header(skb); - skb->dev = pshm_drv->pshm_dev->pshm_netdev; - - /* Push received packet up the stack. */ - ret = netif_rx_ni(skb); - - if (!ret) { - pshm_drv->pshm_dev->pshm_netdev->stats. - rx_packets++; - pshm_drv->pshm_dev->pshm_netdev->stats. - rx_bytes += pck_desc->frm_len; - } else - ++pshm_drv->pshm_dev->pshm_netdev->stats. - rx_dropped; - /* Move to next packet descriptor. */ - pck_desc++; - } - - spin_lock_irqsave(&pshm_drv->lock, flags); - list_add_tail(&pbuf->list, &pshm_drv->rx_pend_list); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - } - - /* Schedule the work queue. if required */ - if (!work_pending(&pshm_drv->shm_tx_work)) - queue_work(pshm_drv->pshm_tx_workqueue, &pshm_drv->shm_tx_work); - -} - -static void shm_tx_work_func(struct work_struct *tx_work) -{ - u32 mbox_msg; - unsigned int frmlen, avail_emptybuff, append = 0; - unsigned long flags = 0; - struct buf_list *pbuf = NULL; - struct shmdrv_layer *pshm_drv; - struct shm_caif_frm *frm; - struct sk_buff *skb; - struct shm_pck_desc *pck_desc; - struct list_head *pos; - - pshm_drv = container_of(tx_work, struct shmdrv_layer, shm_tx_work); - - do { - /* Initialize mailbox message. */ - mbox_msg = 0x00; - avail_emptybuff = 0; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check for pending receive buffers. */ - if (!list_empty(&pshm_drv->rx_pend_list)) { - - pbuf = list_entry(pshm_drv->rx_pend_list.next, - struct buf_list, list); - - list_del_init(&pbuf->list); - list_add_tail(&pbuf->list, &pshm_drv->rx_empty_list); - /* - * Value index is never changed, - * so read access should be safe. - */ - mbox_msg |= SHM_SET_EMPTY(pbuf->index); - } - - skb = skb_peek(&pshm_drv->sk_qhead); - - if (skb == NULL) - goto send_msg; - /* Check the available no. of buffers in the empty list */ - list_for_each(pos, &pshm_drv->tx_empty_list) - avail_emptybuff++; - - if ((avail_emptybuff < LOW_WATERMARK) && - pshm_drv->tx_empty_available) { - /* Update blocking condition. */ - pshm_drv->tx_empty_available = 0; - spin_unlock_irqrestore(&pshm_drv->lock, flags); - pshm_drv->cfdev.flowctrl - (pshm_drv->pshm_dev->pshm_netdev, - CAIF_FLOW_OFF); - spin_lock_irqsave(&pshm_drv->lock, flags); - } - /* - * We simply return back to the caller if we do not have space - * either in Tx pending list or Tx empty list. In this case, - * we hold the received skb in the skb list, waiting to - * be transmitted once Tx buffers become available - */ - if (list_empty(&pshm_drv->tx_empty_list)) - goto send_msg; - - /* Get the first free Tx buffer. */ - pbuf = list_entry(pshm_drv->tx_empty_list.next, - struct buf_list, list); - do { - if (append) { - skb = skb_peek(&pshm_drv->sk_qhead); - if (skb == NULL) - break; - } - - frm = (struct shm_caif_frm *) - (pbuf->desc_vptr + pbuf->frm_ofs); - - frm->hdr_ofs = 0; - frmlen = 0; - frmlen += SHM_HDR_LEN + frm->hdr_ofs + skb->len; - - /* Add tail padding if needed. */ - if (frmlen % SHM_FRM_PAD_LEN) - frmlen += SHM_FRM_PAD_LEN - - (frmlen % SHM_FRM_PAD_LEN); - - /* - * Verify that packet, header and additional padding - * can fit within the buffer frame area. - */ - if (frmlen >= (pbuf->len - pbuf->frm_ofs)) - break; - - if (!append) { - list_del_init(&pbuf->list); - append = 1; - } - - skb = skb_dequeue(&pshm_drv->sk_qhead); - if (skb == NULL) - break; - /* Copy in CAIF frame. */ - skb_copy_bits(skb, 0, pbuf->desc_vptr + - pbuf->frm_ofs + SHM_HDR_LEN + - frm->hdr_ofs, skb->len); - - pshm_drv->pshm_dev->pshm_netdev->stats.tx_packets++; - pshm_drv->pshm_dev->pshm_netdev->stats.tx_bytes += - frmlen; - dev_kfree_skb_irq(skb); - - /* Fill in the shared memory packet descriptor area. */ - pck_desc = (struct shm_pck_desc *) (pbuf->desc_vptr); - /* Forward to current frame. */ - pck_desc += pbuf->frames; - pck_desc->frm_ofs = (pbuf->phy_addr - - pshm_drv->shm_base_addr) + - pbuf->frm_ofs; - pck_desc->frm_len = frmlen; - /* Terminate packet descriptor area. */ - pck_desc++; - pck_desc->frm_ofs = 0; - /* Update buffer parameters. */ - pbuf->frames++; - pbuf->frm_ofs += frmlen + (frmlen % 32); - - } while (pbuf->frames < SHM_MAX_FRMS_PER_BUF); - - /* Assign buffer as full. */ - list_add_tail(&pbuf->list, &pshm_drv->tx_full_list); - append = 0; - mbox_msg |= SHM_SET_FULL(pbuf->index); -send_msg: - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - if (mbox_msg) - pshm_drv->pshm_dev->pshmdev_mbxsend - (pshm_drv->pshm_dev->shm_id, mbox_msg); - } while (mbox_msg); -} - -static int shm_netdev_tx(struct sk_buff *skb, struct net_device *shm_netdev) -{ - struct shmdrv_layer *pshm_drv; - - pshm_drv = netdev_priv(shm_netdev); - - skb_queue_tail(&pshm_drv->sk_qhead, skb); - - /* Schedule Tx work queue. for deferred processing of skbs*/ - if (!work_pending(&pshm_drv->shm_tx_work)) - queue_work(pshm_drv->pshm_tx_workqueue, &pshm_drv->shm_tx_work); - - return 0; -} - -static const struct net_device_ops netdev_ops = { - .ndo_open = shm_netdev_open, - .ndo_stop = shm_netdev_close, - .ndo_start_xmit = shm_netdev_tx, -}; - -static void shm_netdev_setup(struct net_device *pshm_netdev) -{ - struct shmdrv_layer *pshm_drv; - pshm_netdev->netdev_ops = &netdev_ops; - - pshm_netdev->mtu = CAIF_MAX_MTU; - pshm_netdev->type = ARPHRD_CAIF; - pshm_netdev->hard_header_len = CAIF_NEEDED_HEADROOM; - pshm_netdev->tx_queue_len = 0; - pshm_netdev->destructor = free_netdev; - - pshm_drv = netdev_priv(pshm_netdev); - - /* Initialize structures in a clean state. */ - memset(pshm_drv, 0, sizeof(struct shmdrv_layer)); - - pshm_drv->cfdev.link_select = CAIF_LINK_LOW_LATENCY; -} - -int caif_shmcore_probe(struct shmdev_layer *pshm_dev) -{ - int result, j; - struct shmdrv_layer *pshm_drv = NULL; - - pshm_dev->pshm_netdev = alloc_netdev(sizeof(struct shmdrv_layer), - "cfshm%d", shm_netdev_setup); - if (!pshm_dev->pshm_netdev) - return -ENOMEM; - - pshm_drv = netdev_priv(pshm_dev->pshm_netdev); - pshm_drv->pshm_dev = pshm_dev; - - /* - * Initialization starts with the verification of the - * availability of MBX driver by calling its setup function. - * MBX driver must be available by this time for proper - * functioning of SHM driver. - */ - if ((pshm_dev->pshmdev_mbxsetup - (caif_shmdrv_rx_cb, pshm_dev, pshm_drv)) != 0) { - pr_warn("Could not config. SHM Mailbox," - " Bailing out.....\n"); - free_netdev(pshm_dev->pshm_netdev); - return -ENODEV; - } - - skb_queue_head_init(&pshm_drv->sk_qhead); - - pr_info("SHM DEVICE[%d] PROBED BY DRIVER, NEW SHM DRIVER" - " INSTANCE AT pshm_drv =0x%p\n", - pshm_drv->pshm_dev->shm_id, pshm_drv); - - if (pshm_dev->shm_total_sz < - (NR_TX_BUF * TX_BUF_SZ + NR_RX_BUF * RX_BUF_SZ)) { - - pr_warn("ERROR, Amount of available" - " Phys. SHM cannot accommodate current SHM " - "driver configuration, Bailing out ...\n"); - free_netdev(pshm_dev->pshm_netdev); - return -ENOMEM; - } - - pshm_drv->shm_base_addr = pshm_dev->shm_base_addr; - pshm_drv->shm_tx_addr = pshm_drv->shm_base_addr; - - if (pshm_dev->shm_loopback) - pshm_drv->shm_rx_addr = pshm_drv->shm_tx_addr; - else - pshm_drv->shm_rx_addr = pshm_dev->shm_base_addr + - (NR_TX_BUF * TX_BUF_SZ); - - spin_lock_init(&pshm_drv->lock); - INIT_LIST_HEAD(&pshm_drv->tx_empty_list); - INIT_LIST_HEAD(&pshm_drv->tx_pend_list); - INIT_LIST_HEAD(&pshm_drv->tx_full_list); - - INIT_LIST_HEAD(&pshm_drv->rx_empty_list); - INIT_LIST_HEAD(&pshm_drv->rx_pend_list); - INIT_LIST_HEAD(&pshm_drv->rx_full_list); - - INIT_WORK(&pshm_drv->shm_tx_work, shm_tx_work_func); - INIT_WORK(&pshm_drv->shm_rx_work, shm_rx_work_func); - - pshm_drv->pshm_tx_workqueue = - create_singlethread_workqueue("shm_tx_work"); - pshm_drv->pshm_rx_workqueue = - create_singlethread_workqueue("shm_rx_work"); - - for (j = 0; j < NR_TX_BUF; j++) { - struct buf_list *tx_buf = - kmalloc(sizeof(struct buf_list), GFP_KERNEL); - - if (tx_buf == NULL) { - free_netdev(pshm_dev->pshm_netdev); - return -ENOMEM; - } - tx_buf->index = j; - tx_buf->phy_addr = pshm_drv->shm_tx_addr + (TX_BUF_SZ * j); - tx_buf->len = TX_BUF_SZ; - tx_buf->frames = 0; - tx_buf->frm_ofs = SHM_CAIF_FRM_OFS; - - if (pshm_dev->shm_loopback) - tx_buf->desc_vptr = (unsigned char *)tx_buf->phy_addr; - else - /* - * FIXME: the result of ioremap is not a pointer - arnd - */ - tx_buf->desc_vptr = - ioremap(tx_buf->phy_addr, TX_BUF_SZ); - - list_add_tail(&tx_buf->list, &pshm_drv->tx_empty_list); - } - - for (j = 0; j < NR_RX_BUF; j++) { - struct buf_list *rx_buf = - kmalloc(sizeof(struct buf_list), GFP_KERNEL); - - if (rx_buf == NULL) { - free_netdev(pshm_dev->pshm_netdev); - return -ENOMEM; - } - rx_buf->index = j; - rx_buf->phy_addr = pshm_drv->shm_rx_addr + (RX_BUF_SZ * j); - rx_buf->len = RX_BUF_SZ; - - if (pshm_dev->shm_loopback) - rx_buf->desc_vptr = (unsigned char *)rx_buf->phy_addr; - else - rx_buf->desc_vptr = - ioremap(rx_buf->phy_addr, RX_BUF_SZ); - list_add_tail(&rx_buf->list, &pshm_drv->rx_empty_list); - } - - pshm_drv->tx_empty_available = 1; - result = register_netdev(pshm_dev->pshm_netdev); - if (result) - pr_warn("ERROR[%d], SHM could not, " - "register with NW FRMWK Bailing out ...\n", result); - - return result; -} - -void caif_shmcore_remove(struct net_device *pshm_netdev) -{ - struct buf_list *pbuf; - struct shmdrv_layer *pshm_drv = NULL; - - pshm_drv = netdev_priv(pshm_netdev); - - while (!(list_empty(&pshm_drv->tx_pend_list))) { - pbuf = - list_entry(pshm_drv->tx_pend_list.next, - struct buf_list, list); - - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->tx_full_list))) { - pbuf = - list_entry(pshm_drv->tx_full_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->tx_empty_list))) { - pbuf = - list_entry(pshm_drv->tx_empty_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->rx_full_list))) { - pbuf = - list_entry(pshm_drv->tx_full_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->rx_pend_list))) { - pbuf = - list_entry(pshm_drv->tx_pend_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->rx_empty_list))) { - pbuf = - list_entry(pshm_drv->rx_empty_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - /* Destroy work queues. */ - destroy_workqueue(pshm_drv->pshm_tx_workqueue); - destroy_workqueue(pshm_drv->pshm_rx_workqueue); - - unregister_netdev(pshm_netdev); -} diff --git a/include/net/caif/caif_shm.h b/include/net/caif/caif_shm.h deleted file mode 100644 index 5bcce55438cf..000000000000 --- a/include/net/caif/caif_shm.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Author: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com - * License terms: GNU General Public License (GPL) version 2 - */ - -#ifndef CAIF_SHM_H_ -#define CAIF_SHM_H_ - -struct shmdev_layer { - u32 shm_base_addr; - u32 shm_total_sz; - u32 shm_id; - u32 shm_loopback; - void *hmbx; - int (*pshmdev_mbxsend) (u32 shm_id, u32 mbx_msg); - int (*pshmdev_mbxsetup) (void *pshmdrv_cb, - struct shmdev_layer *pshm_dev, void *pshm_drv); - struct net_device *pshm_netdev; -}; - -extern int caif_shmcore_probe(struct shmdev_layer *pshm_dev); -extern void caif_shmcore_remove(struct net_device *pshm_netdev); - -#endif -- cgit v1.2.3 From 1a2c6181c4a1922021b4d7df373bba612c3e5f04 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Sun, 17 Mar 2013 08:23:34 +0000 Subject: tcp: Remove TCPCT TCPCT uses option-number 253, reserved for experimental use and should not be used in production environments. Further, TCPCT does not fully implement RFC 6013. As a nice side-effect, removing TCPCT increases TCP's performance for very short flows: Doing an apache-benchmark with -c 100 -n 100000, sending HTTP-requests for files of 1KB size. before this patch: average (among 7 runs) of 20845.5 Requests/Second after: average (among 7 runs) of 21403.6 Requests/Second Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 - drivers/infiniband/hw/cxgb4/cm.c | 2 +- include/linux/tcp.h | 10 -- include/net/request_sock.h | 8 +- include/net/tcp.h | 89 +---------- include/uapi/linux/tcp.h | 26 ---- net/dccp/ipv4.c | 5 +- net/dccp/ipv6.c | 5 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/syncookies.c | 3 +- net/ipv4/sysctl_net_ipv4.c | 7 - net/ipv4/tcp.c | 267 --------------------------------- net/ipv4/tcp_input.c | 69 +-------- net/ipv4/tcp_ipv4.c | 60 +------- net/ipv4/tcp_minisocks.c | 40 +---- net/ipv4/tcp_output.c | 219 +-------------------------- net/ipv6/syncookies.c | 3 +- net/ipv6/tcp_ipv6.c | 56 +------ 18 files changed, 38 insertions(+), 841 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 18a24c405ac0..17953e2bc3e9 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -175,14 +175,6 @@ tcp_congestion_control - STRING is inherited. [see setsockopt(listenfd, SOL_TCP, TCP_CONGESTION, "name" ...) ] -tcp_cookie_size - INTEGER - Default size of TCP Cookie Transactions (TCPCT) option, that may be - overridden on a per socket basis by the TCPCT socket option. - Values greater than the maximum (16) are interpreted as the maximum. - Values greater than zero and less than the minimum (8) are interpreted - as the minimum. Odd values are interpreted as the next even value. - Default: 0 (off). - tcp_dsack - BOOLEAN Allows TCP to send "duplicate" SACKs. diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8dcc84fd9d30..54fd31fcc332 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2915,7 +2915,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) */ memset(&tmp_opt, 0, sizeof(tmp_opt)); tcp_clear_options(&tmp_opt); - tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 763c108ee03d..ed6a7456eecd 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -90,9 +90,6 @@ struct tcp_options_received { sack_ok : 4, /* SACK seen on SYN packet */ snd_wscale : 4, /* Window scaling received from sender */ rcv_wscale : 4; /* Window scaling to send to receiver */ - u8 cookie_plus:6, /* bytes in authenticator/cookie option */ - cookie_out_never:1, - cookie_in_always:1; u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ @@ -102,7 +99,6 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = 0; rx_opt->wscale_ok = rx_opt->snd_wscale = 0; - rx_opt->cookie_plus = 0; } /* This is the max number of SACKS that we'll generate and process. It's safe @@ -320,12 +316,6 @@ struct tcp_sock { struct tcp_md5sig_info __rcu *md5sig_info; #endif - /* When the cookie options are generated and exchanged, then this - * object holds a reference to them (cookie_values->kref). Also - * contains related tcp_cookie_transactions fields. - */ - struct tcp_cookie_values *cookie_values; - /* TCP fastopen related information */ struct tcp_fastopen_request *fastopen_req; /* fastopen_rsk points to request_sock that resulted in this big diff --git a/include/net/request_sock.h b/include/net/request_sock.h index a51dbd17c2de..9069e65c1c56 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -27,19 +27,13 @@ struct sk_buff; struct dst_entry; struct proto; -/* empty to "strongly type" an otherwise void parameter. - */ -struct request_values { -}; - struct request_sock_ops { int family; int obj_size; struct kmem_cache *slab; char *slab_name; int (*rtx_syn_ack)(struct sock *sk, - struct request_sock *req, - struct request_values *rvp); + struct request_sock *req); void (*send_ack)(struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(struct sock *sk, diff --git a/include/net/tcp.h b/include/net/tcp.h index ab9f947b118b..7f2f17198d75 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -179,7 +179,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ -#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ #define TCPOPT_EXP 254 /* Experimental */ /* Magic number to be after the option value for sharing TCP * experimental options. See draft-ietf-tcpm-experimental-options-00.txt @@ -454,7 +453,7 @@ extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req); extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); extern void tcp_parse_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx, const u8 **hvpp, + struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); @@ -476,7 +475,6 @@ extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, extern int tcp_connect(struct sock *sk); extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, struct tcp_fastopen_cookie *foc); extern int tcp_disconnect(struct sock *sk, int flags); @@ -1589,91 +1587,6 @@ struct tcp_request_sock_ops { #endif }; -/* Using SHA1 for now, define some constants. - */ -#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS) -#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4) -#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS) - -extern int tcp_cookie_generator(u32 *bakery); - -/** - * struct tcp_cookie_values - each socket needs extra space for the - * cookies, together with (optional) space for any SYN data. - * - * A tcp_sock contains a pointer to the current value, and this is - * cloned to the tcp_timewait_sock. - * - * @cookie_pair: variable data from the option exchange. - * - * @cookie_desired: user specified tcpct_cookie_desired. Zero - * indicates default (sysctl_tcp_cookie_size). - * After cookie sent, remembers size of cookie. - * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX. - * - * @s_data_desired: user specified tcpct_s_data_desired. When the - * constant payload is specified (@s_data_constant), - * holds its length instead. - * Range 0 to TCP_MSS_DESIRED. - * - * @s_data_payload: constant data that is to be included in the - * payload of SYN or SYNACK segments when the - * cookie option is present. - */ -struct tcp_cookie_values { - struct kref kref; - u8 cookie_pair[TCP_COOKIE_PAIR_SIZE]; - u8 cookie_pair_size; - u8 cookie_desired; - u16 s_data_desired:11, - s_data_constant:1, - s_data_in:1, - s_data_out:1, - s_data_unused:2; - u8 s_data_payload[0]; -}; - -static inline void tcp_cookie_values_release(struct kref *kref) -{ - kfree(container_of(kref, struct tcp_cookie_values, kref)); -} - -/* The length of constant payload data. Note that s_data_desired is - * overloaded, depending on s_data_constant: either the length of constant - * data (returned here) or the limit on variable data. - */ -static inline int tcp_s_data_size(const struct tcp_sock *tp) -{ - return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant) - ? tp->cookie_values->s_data_desired - : 0; -} - -/** - * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace. - * - * As tcp_request_sock has already been extended in other places, the - * only remaining method is to pass stack values along as function - * parameters. These parameters are not needed after sending SYNACK. - * - * @cookie_bakery: cryptographic secret and message workspace. - * - * @cookie_plus: bytes in authenticator/cookie option, copied from - * struct tcp_options_received (above). - */ -struct tcp_extend_values { - struct request_values rv; - u32 cookie_bakery[COOKIE_WORKSPACE_WORDS]; - u8 cookie_plus:6, - cookie_out_never:1, - cookie_in_always:1; -}; - -static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp) -{ - return (struct tcp_extend_values *)rvp; -} - extern void tcp_v4_init(void); extern void tcp_init(void); diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 6b1ead0b0c9d..8d776ebc4829 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -102,7 +102,6 @@ enum { #define TCP_QUICKACK 12 /* Block/reenable quick acks */ #define TCP_CONGESTION 13 /* Congestion control algorithm */ #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ -#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ #define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ #define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ @@ -199,29 +198,4 @@ struct tcp_md5sig { __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ }; -/* for TCP_COOKIE_TRANSACTIONS (TCPCT) socket option */ -#define TCP_COOKIE_MIN 8 /* 64-bits */ -#define TCP_COOKIE_MAX 16 /* 128-bits */ -#define TCP_COOKIE_PAIR_SIZE (2*TCP_COOKIE_MAX) - -/* Flags for both getsockopt and setsockopt */ -#define TCP_COOKIE_IN_ALWAYS (1 << 0) /* Discard SYN without cookie */ -#define TCP_COOKIE_OUT_NEVER (1 << 1) /* Prohibit outgoing cookies, - * supercedes everything. */ - -/* Flags for getsockopt */ -#define TCP_S_DATA_IN (1 << 2) /* Was data received? */ -#define TCP_S_DATA_OUT (1 << 3) /* Was data sent? */ - -/* TCP_COOKIE_TRANSACTIONS data */ -struct tcp_cookie_transactions { - __u16 tcpct_flags; /* see above */ - __u8 __tcpct_pad1; /* zero */ - __u8 tcpct_cookie_desired; /* bytes */ - __u16 tcpct_s_data_desired; /* bytes of variable data */ - __u16 tcpct_used; /* bytes in value */ - __u8 tcpct_value[TCP_MSS_DEFAULT]; -}; - - #endif /* _UAPI_LINUX_TCP_H */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 4f9f5eb478f1..ebc54fef85a5 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -500,8 +500,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return &rt->dst; } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) { int err = -1; struct sk_buff *skb; @@ -658,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v4_send_response(sk, req, NULL)) + if (dccp_v4_send_response(sk, req)) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6e05981f271e..9c61f9c02fdb 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -213,8 +213,7 @@ out: } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -428,7 +427,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v6_send_response(sk, req, NULL)) + if (dccp_v6_send_response(sk, req)) goto drop_and_free; inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 786d97aee751..6acb541c9091 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -559,7 +559,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) { - int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); + int err = req->rsk_ops->rtx_syn_ack(parent, req); if (!err) req->num_retrans++; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index ef54377fb11c..7f4a5cb8f8d0 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -267,7 +267,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { struct tcp_options_received tcp_opt; - const u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); @@ -294,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tcp_opt, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cca4550f4082..cb45062c8be0 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -732,13 +732,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "tcp_cookie_size", - .data = &sysctl_tcp_cookie_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_thin_linear_timeouts", .data = &sysctl_tcp_thin_linear_timeouts, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8d14573ade77..17a6810af5c8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - /* TCP Cookie Transactions */ - if (sysctl_tcp_cookie_size > 0) { - /* Default, cookies without s_data_payload. */ - tp->cookie_values = - kzalloc(sizeof(*tp->cookie_values), - sk->sk_allocation); - if (tp->cookie_values != NULL) - kref_init(&tp->cookie_values->kref); - } /* Presumed zeroed, in order of appearance: * cookie_in_always, cookie_out_never, * s_data_constant, s_data_in, s_data_out @@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level, release_sock(sk); return err; } - case TCP_COOKIE_TRANSACTIONS: { - struct tcp_cookie_transactions ctd; - struct tcp_cookie_values *cvp = NULL; - - if (sizeof(ctd) > optlen) - return -EINVAL; - if (copy_from_user(&ctd, optval, sizeof(ctd))) - return -EFAULT; - - if (ctd.tcpct_used > sizeof(ctd.tcpct_value) || - ctd.tcpct_s_data_desired > TCP_MSS_DESIRED) - return -EINVAL; - - if (ctd.tcpct_cookie_desired == 0) { - /* default to global value */ - } else if ((0x1 & ctd.tcpct_cookie_desired) || - ctd.tcpct_cookie_desired > TCP_COOKIE_MAX || - ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) { - return -EINVAL; - } - - if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) { - /* Supercedes all other values */ - lock_sock(sk); - if (tp->cookie_values != NULL) { - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - tp->cookie_values = NULL; - } - tp->rx_opt.cookie_in_always = 0; /* false */ - tp->rx_opt.cookie_out_never = 1; /* true */ - release_sock(sk); - return err; - } - - /* Allocate ancillary memory before locking. - */ - if (ctd.tcpct_used > 0 || - (tp->cookie_values == NULL && - (sysctl_tcp_cookie_size > 0 || - ctd.tcpct_cookie_desired > 0 || - ctd.tcpct_s_data_desired > 0))) { - cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used, - GFP_KERNEL); - if (cvp == NULL) - return -ENOMEM; - - kref_init(&cvp->kref); - } - lock_sock(sk); - tp->rx_opt.cookie_in_always = - (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags); - tp->rx_opt.cookie_out_never = 0; /* false */ - - if (tp->cookie_values != NULL) { - if (cvp != NULL) { - /* Changed values are recorded by a changed - * pointer, ensuring the cookie will differ, - * without separately hashing each value later. - */ - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - } else { - cvp = tp->cookie_values; - } - } - - if (cvp != NULL) { - cvp->cookie_desired = ctd.tcpct_cookie_desired; - - if (ctd.tcpct_used > 0) { - memcpy(cvp->s_data_payload, ctd.tcpct_value, - ctd.tcpct_used); - cvp->s_data_desired = ctd.tcpct_used; - cvp->s_data_constant = 1; /* true */ - } else { - /* No constant payload data. */ - cvp->s_data_desired = ctd.tcpct_s_data_desired; - cvp->s_data_constant = 0; /* false */ - } - - tp->cookie_values = cvp; - } - release_sock(sk); - return err; - } default: /* fallthru */ break; @@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; - case TCP_COOKIE_TRANSACTIONS: { - struct tcp_cookie_transactions ctd; - struct tcp_cookie_values *cvp = tp->cookie_values; - - if (get_user(len, optlen)) - return -EFAULT; - if (len < sizeof(ctd)) - return -EINVAL; - - memset(&ctd, 0, sizeof(ctd)); - ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ? - TCP_COOKIE_IN_ALWAYS : 0) - | (tp->rx_opt.cookie_out_never ? - TCP_COOKIE_OUT_NEVER : 0); - - if (cvp != NULL) { - ctd.tcpct_flags |= (cvp->s_data_in ? - TCP_S_DATA_IN : 0) - | (cvp->s_data_out ? - TCP_S_DATA_OUT : 0); - - ctd.tcpct_cookie_desired = cvp->cookie_desired; - ctd.tcpct_s_data_desired = cvp->s_data_desired; - - memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], - cvp->cookie_pair_size); - ctd.tcpct_used = cvp->cookie_pair_size; - } - - if (put_user(sizeof(ctd), optlen)) - return -EFAULT; - if (copy_to_user(optval, &ctd, sizeof(ctd))) - return -EFAULT; - return 0; - } case TCP_THIN_LINEAR_TIMEOUTS: val = tp->thin_lto; break; @@ -3409,134 +3279,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key); #endif -/* Each Responder maintains up to two secret values concurrently for - * efficient secret rollover. Each secret value has 4 states: - * - * Generating. (tcp_secret_generating != tcp_secret_primary) - * Generates new Responder-Cookies, but not yet used for primary - * verification. This is a short-term state, typically lasting only - * one round trip time (RTT). - * - * Primary. (tcp_secret_generating == tcp_secret_primary) - * Used both for generation and primary verification. - * - * Retiring. (tcp_secret_retiring != tcp_secret_secondary) - * Used for verification, until the first failure that can be - * verified by the newer Generating secret. At that time, this - * cookie's state is changed to Secondary, and the Generating - * cookie's state is changed to Primary. This is a short-term state, - * typically lasting only one round trip time (RTT). - * - * Secondary. (tcp_secret_retiring == tcp_secret_secondary) - * Used for secondary verification, after primary verification - * failures. This state lasts no more than twice the Maximum Segment - * Lifetime (2MSL). Then, the secret is discarded. - */ -struct tcp_cookie_secret { - /* The secret is divided into two parts. The digest part is the - * equivalent of previously hashing a secret and saving the state, - * and serves as an initialization vector (IV). The message part - * serves as the trailing secret. - */ - u32 secrets[COOKIE_WORKSPACE_WORDS]; - unsigned long expires; -}; - -#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) -#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) -#define TCP_SECRET_LIFE (HZ * 600) - -static struct tcp_cookie_secret tcp_secret_one; -static struct tcp_cookie_secret tcp_secret_two; - -/* Essentially a circular list, without dynamic allocation. */ -static struct tcp_cookie_secret *tcp_secret_generating; -static struct tcp_cookie_secret *tcp_secret_primary; -static struct tcp_cookie_secret *tcp_secret_retiring; -static struct tcp_cookie_secret *tcp_secret_secondary; - -static DEFINE_SPINLOCK(tcp_secret_locker); - -/* Select a pseudo-random word in the cookie workspace. - */ -static inline u32 tcp_cookie_work(const u32 *ws, const int n) -{ - return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; -} - -/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. - * Called in softirq context. - * Returns: 0 for success. - */ -int tcp_cookie_generator(u32 *bakery) -{ - unsigned long jiffy = jiffies; - - if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { - spin_lock_bh(&tcp_secret_locker); - if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { - /* refreshed by another */ - memcpy(bakery, - &tcp_secret_generating->secrets[0], - COOKIE_WORKSPACE_WORDS); - } else { - /* still needs refreshing */ - get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); - - /* The first time, paranoia assumes that the - * randomization function isn't as strong. But, - * this secret initialization is delayed until - * the last possible moment (packet arrival). - * Although that time is observable, it is - * unpredictably variable. Mash in the most - * volatile clock bits available, and expire the - * secret extra quickly. - */ - if (unlikely(tcp_secret_primary->expires == - tcp_secret_secondary->expires)) { - struct timespec tv; - - getnstimeofday(&tv); - bakery[COOKIE_DIGEST_WORDS+0] ^= - (u32)tv.tv_nsec; - - tcp_secret_secondary->expires = jiffy - + TCP_SECRET_1MSL - + (0x0f & tcp_cookie_work(bakery, 0)); - } else { - tcp_secret_secondary->expires = jiffy - + TCP_SECRET_LIFE - + (0xff & tcp_cookie_work(bakery, 1)); - tcp_secret_primary->expires = jiffy - + TCP_SECRET_2MSL - + (0x1f & tcp_cookie_work(bakery, 2)); - } - memcpy(&tcp_secret_secondary->secrets[0], - bakery, COOKIE_WORKSPACE_WORDS); - - rcu_assign_pointer(tcp_secret_generating, - tcp_secret_secondary); - rcu_assign_pointer(tcp_secret_retiring, - tcp_secret_primary); - /* - * Neither call_rcu() nor synchronize_rcu() needed. - * Retiring data is not freed. It is replaced after - * further (locked) pointer updates, and a quiet time - * (minimum 1MSL, maximum LIFE - 2MSL). - */ - } - spin_unlock_bh(&tcp_secret_locker); - } else { - rcu_read_lock_bh(); - memcpy(bakery, - &rcu_dereference(tcp_secret_generating)->secrets[0], - COOKIE_WORKSPACE_WORDS); - rcu_read_unlock_bh(); - } - return 0; -} -EXPORT_SYMBOL(tcp_cookie_generator); - void tcp_done(struct sock *sk) { struct request_sock *req = tcp_sk(sk)->fastopen_rsk; @@ -3591,7 +3333,6 @@ void __init tcp_init(void) unsigned long limit; int max_rshare, max_wshare, cnt; unsigned int i; - unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3667,13 +3408,5 @@ void __init tcp_init(void) tcp_register_congestion_control(&tcp_reno); - memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); - memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); - tcp_secret_one.expires = jiffy; /* past due */ - tcp_secret_two.expires = jiffy; /* past due */ - tcp_secret_generating = &tcp_secret_one; - tcp_secret_primary = &tcp_secret_one; - tcp_secret_retiring = &tcp_secret_two; - tcp_secret_secondary = &tcp_secret_two; tcp_tasklet_init(); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 836d74dd0187..19f0149fb6a2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3760,8 +3760,8 @@ old_ack: * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, - const u8 **hvpp, int estab, +void tcp_parse_options(const struct sk_buff *skb, + struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc) { const unsigned char *ptr; @@ -3845,31 +3845,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o */ break; #endif - case TCPOPT_COOKIE: - /* This option is variable length. - */ - switch (opsize) { - case TCPOLEN_COOKIE_BASE: - /* not yet implemented */ - break; - case TCPOLEN_COOKIE_PAIR: - /* not yet implemented */ - break; - case TCPOLEN_COOKIE_MIN+0: - case TCPOLEN_COOKIE_MIN+2: - case TCPOLEN_COOKIE_MIN+4: - case TCPOLEN_COOKIE_MIN+6: - case TCPOLEN_COOKIE_MAX: - /* 16-bit multiple */ - opt_rx->cookie_plus = opsize; - *hvpp = ptr; - break; - default: - /* ignore option */ - break; - } - break; - case TCPOPT_EXP: /* Fast Open option shares code 254 using a * 16 bits magic number. It's valid only in @@ -3915,8 +3890,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr * If it is wrong it falls back on tcp_parse_options(). */ static bool tcp_fast_parse_options(const struct sk_buff *skb, - const struct tcphdr *th, - struct tcp_sock *tp, const u8 **hvpp) + const struct tcphdr *th, struct tcp_sock *tp) { /* In the spirit of fast parsing, compare doff directly to constant * values. Because equality is used, short doff can be ignored here. @@ -3930,7 +3904,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, return true; } - tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); + tcp_parse_options(skb, &tp->rx_opt, 1, NULL); if (tp->rx_opt.saw_tstamp) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5311,12 +5285,10 @@ out: static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, int syn_inerr) { - const u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp, &hash_location) && - tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5670,12 +5642,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, if (mss == tp->rx_opt.user_mss) { struct tcp_options_received opt; - const u8 *hash_location; /* Get original SYNACK MSS value if user MSS sets mss_clamp */ tcp_clear_options(&opt); opt.user_mss = opt.mss_clamp = 0; - tcp_parse_options(synack, &opt, &hash_location, 0, NULL); + tcp_parse_options(synack, &opt, 0, NULL); mss = opt.mss_clamp; } @@ -5706,14 +5677,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len) { - const u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct tcp_cookie_values *cvp = tp->cookie_values; struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); + tcp_parse_options(skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5810,30 +5779,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * is initialized. */ tp->copied_seq = tp->rcv_nxt; - if (cvp != NULL && - cvp->cookie_pair_size > 0 && - tp->rx_opt.cookie_plus > 0) { - int cookie_size = tp->rx_opt.cookie_plus - - TCPOLEN_COOKIE_BASE; - int cookie_pair_size = cookie_size - + cvp->cookie_desired; - - /* A cookie extension option was sent and returned. - * Note that each incoming SYNACK replaces the - * Responder cookie. The initial exchange is most - * fragile, as protection against spoofing relies - * entirely upon the sequence and timestamp (above). - * This replacement strategy allows the correct pair to - * pass through, while any others will be filtered via - * Responder verification later. - */ - if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { - memcpy(&cvp->cookie_pair[cvp->cookie_desired], - hash_location, cookie_size); - cvp->cookie_pair_size = cookie_pair_size; - } - } - smp_mb(); tcp_finish_connect(sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b7ab868c8284..b27c758ca23f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -838,7 +838,6 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, u16 queue_mapping, bool nocache) { @@ -851,7 +850,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, rvp, NULL); + skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); @@ -868,10 +867,9 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, return err; } -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) +static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) { - int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); + int res = tcp_v4_send_synack(sk, NULL, req, 0, false); if (!res) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); @@ -1371,8 +1369,7 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, static int tcp_v4_conn_req_fastopen(struct sock *sk, struct sk_buff *skb, struct sk_buff *skb_synack, - struct request_sock *req, - struct request_values *rvp) + struct request_sock *req) { struct tcp_sock *tp = tcp_sk(sk); struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; @@ -1467,9 +1464,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - const u8 *hash_location; struct request_sock *req; struct inet_request_sock *ireq; struct tcp_sock *tp = tcp_sk(sk); @@ -1519,42 +1514,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, - want_cookie ? NULL : &foc); - - if (tmp_opt.cookie_plus > 0 && - tmp_opt.saw_tstamp && - !tp->rx_opt.cookie_out_never && - (sysctl_tcp_cookie_size > 0 || - (tp->cookie_values != NULL && - tp->cookie_values->cookie_desired > 0))) { - u8 *c; - u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; - int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - - if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) - goto drop_and_release; - - /* Secret recipe starts with IP addresses */ - *mess++ ^= (__force u32)daddr; - *mess++ ^= (__force u32)saddr; - - /* plus variable length Initiator Cookie */ - c = (u8 *)mess; - while (l-- > 0) - *c++ ^= *hash_location++; - - want_cookie = false; /* not our kind of cookie */ - tmp_ext.cookie_out_never = 0; /* false */ - tmp_ext.cookie_plus = tmp_opt.cookie_plus; - } else if (!tp->rx_opt.cookie_in_always) { - /* redundant indications, but ensure initialization. */ - tmp_ext.cookie_out_never = 1; /* true */ - tmp_ext.cookie_plus = 0; - } else { - goto drop_and_release; - } - tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1636,7 +1596,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * of tcp_v4_send_synack()->tcp_select_initial_window(). */ skb_synack = tcp_make_synack(sk, dst, req, - (struct request_values *)&tmp_ext, fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); if (skb_synack) { @@ -1660,8 +1619,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (fastopen_cookie_present(&foc) && foc.len != 0) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, - (struct request_values *)&tmp_ext)) + } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req)) goto drop_and_free; return 0; @@ -2241,12 +2199,6 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - /* TCP Cookie Transactions */ - if (tp->cookie_values != NULL) { - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - tp->cookie_values = NULL; - } BUG_ON(tp->fastopen_rsk != NULL); /* If socket is aborted during connect operation */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4bdb09fca401..8f0234f8bb95 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -93,13 +93,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_options_received tmp_opt; - const u8 *hash_location; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; @@ -388,32 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct tcp_sock *newtp = tcp_sk(newsk); - struct tcp_sock *oldtp = tcp_sk(sk); - struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - - /* TCP Cookie Transactions require space for the cookie pair, - * as it differs for each connection. There is no need to - * copy any s_data_payload stored at the original socket. - * Failure will prevent resuming the connection. - * - * Presumed copied, in order of appearance: - * cookie_in_always, cookie_out_never - */ - if (oldcvp != NULL) { - struct tcp_cookie_values *newcvp = - kzalloc(sizeof(*newtp->cookie_values), - GFP_ATOMIC); - - if (newcvp != NULL) { - kref_init(&newcvp->kref); - newcvp->cookie_desired = - oldcvp->cookie_desired; - newtp->cookie_values = newcvp; - } else { - /* Not Yet Implemented */ - newtp->cookie_values = NULL; - } - } /* Now setup tcp_sock */ newtp->pred_flags = 0; @@ -422,8 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rcv_nxt = treq->rcv_isn + 1; newtp->snd_sml = newtp->snd_una = - newtp->snd_nxt = newtp->snd_up = - treq->snt_isn + 1 + tcp_s_data_size(oldtp); + newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; tcp_prequeue_init(newtp); INIT_LIST_HEAD(&newtp->tsq_node); @@ -460,8 +432,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); - newtp->write_seq = newtp->pushed_seq = - treq->snt_isn + 1 + tcp_s_data_size(oldtp); + newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; @@ -538,7 +509,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, bool fastopen) { struct tcp_options_received tmp_opt; - const u8 *hash_location; struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); @@ -548,7 +518,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; @@ -648,7 +618,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, */ if ((flg & TCP_FLAG_ACK) && !fastopen && (TCP_SKB_CB(skb)->ack_seq != - tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) + tcp_rsk(req)->snt_isn + 1)) return sk; /* Also, it would be not so bad idea to check rcv_tsecr, which diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8e7742f0b5d2..ac5871ebe086 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -65,9 +65,6 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; -int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ -EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); @@ -386,7 +383,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) -#define OPTION_COOKIE_EXTENSION (1 << 4) #define OPTION_FAST_OPEN_COOKIE (1 << 8) struct tcp_out_options { @@ -400,36 +396,6 @@ struct tcp_out_options { struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ }; -/* The sysctl int routines are generic, so check consistency here. - */ -static u8 tcp_cookie_size_check(u8 desired) -{ - int cookie_size; - - if (desired > 0) - /* previously specified */ - return desired; - - cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); - if (cookie_size <= 0) - /* no default specified */ - return 0; - - if (cookie_size <= TCP_COOKIE_MIN) - /* value too small, specify minimum */ - return TCP_COOKIE_MIN; - - if (cookie_size >= TCP_COOKIE_MAX) - /* value too large, specify maximum */ - return TCP_COOKIE_MAX; - - if (cookie_size & 1) - /* 8-bit multiple, illegal, fix it */ - cookie_size++; - - return (u8)cookie_size; -} - /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of @@ -448,27 +414,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, { u16 options = opts->options; /* mungable copy */ - /* Having both authentication and cookies for security is redundant, - * and there's certainly not enough room. Instead, the cookie-less - * extension variant is proposed. - * - * Consider the pessimal case with authentication. The options - * could look like: - * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 - */ if (unlikely(OPTION_MD5 & options)) { - if (unlikely(OPTION_COOKIE_EXTENSION & options)) { - *ptr++ = htonl((TCPOPT_COOKIE << 24) | - (TCPOLEN_COOKIE_BASE << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - } else { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - } - options &= ~OPTION_COOKIE_EXTENSION; + *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); /* overload cookie hash location */ opts->hash_location = (__u8 *)ptr; ptr += 4; @@ -497,44 +445,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(opts->tsecr); } - /* Specification requires after timestamp, so do it now. - * - * Consider the pessimal case without authentication. The options - * could look like: - * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 - */ - if (unlikely(OPTION_COOKIE_EXTENSION & options)) { - __u8 *cookie_copy = opts->hash_location; - u8 cookie_size = opts->hash_size; - - /* 8-bit multiple handled in tcp_cookie_size_check() above, - * and elsewhere. - */ - if (0x2 & cookie_size) { - __u8 *p = (__u8 *)ptr; - - /* 16-bit multiple */ - *p++ = TCPOPT_COOKIE; - *p++ = TCPOLEN_COOKIE_BASE + cookie_size; - *p++ = *cookie_copy++; - *p++ = *cookie_copy++; - ptr++; - cookie_size -= 2; - } else { - /* 32-bit multiple */ - *ptr++ = htonl(((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_COOKIE << 8) | - TCPOLEN_COOKIE_BASE) + - cookie_size); - } - - if (cookie_size > 0) { - memcpy(ptr, cookie_copy, cookie_size); - ptr += (cookie_size / 4); - } - } - if (unlikely(OPTION_SACK_ADVERTISE & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -593,11 +503,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); - struct tcp_cookie_values *cvp = tp->cookie_values; unsigned int remaining = MAX_TCP_OPTION_SPACE; - u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? - tcp_cookie_size_check(cvp->cookie_desired) : - 0; struct tcp_fastopen_request *fastopen = tp->fastopen_req; #ifdef CONFIG_TCP_MD5SIG @@ -649,52 +555,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, tp->syn_fastopen = 1; } } - /* Note that timestamps are required by the specification. - * - * Odd numbers of bytes are prohibited by the specification, ensuring - * that the cookie is 16-bit aligned, and the resulting cookie pair is - * 32-bit aligned. - */ - if (*md5 == NULL && - (OPTION_TS & opts->options) && - cookie_size > 0) { - int need = TCPOLEN_COOKIE_BASE + cookie_size; - - if (0x2 & need) { - /* 32-bit multiple */ - need += 2; /* NOPs */ - - if (need > remaining) { - /* try shrinking cookie to fit */ - cookie_size -= 2; - need -= 4; - } - } - while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { - cookie_size -= 4; - need -= 4; - } - if (TCP_COOKIE_MIN <= cookie_size) { - opts->options |= OPTION_COOKIE_EXTENSION; - opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; - opts->hash_size = cookie_size; - - /* Remember for future incarnations. */ - cvp->cookie_desired = cookie_size; - - if (cvp->cookie_desired != cvp->cookie_pair_size) { - /* Currently use random bytes as a nonce, - * assuming these are completely unpredictable - * by hostile users of the same system. - */ - get_random_bytes(&cvp->cookie_pair[0], - cookie_size); - cvp->cookie_pair_size = cookie_size; - } - remaining -= need; - } - } return MAX_TCP_OPTION_SPACE - remaining; } @@ -704,14 +565,10 @@ static unsigned int tcp_synack_options(struct sock *sk, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5, - struct tcp_extend_values *xvp, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; - u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? - xvp->cookie_plus : - 0; #ifdef CONFIG_TCP_MD5SIG *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); @@ -759,28 +616,7 @@ static unsigned int tcp_synack_options(struct sock *sk, remaining -= need; } } - /* Similar rationale to tcp_syn_options() applies here, too. - * If the options fit, the same options should fit now! - */ - if (*md5 == NULL && - ireq->tstamp_ok && - cookie_plus > TCPOLEN_COOKIE_BASE) { - int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ - - if (0x2 & need) { - /* 32-bit multiple */ - need += 2; /* NOPs */ - } - if (need <= remaining) { - opts->options |= OPTION_COOKIE_EXTENSION; - opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; - remaining -= need; - } else { - /* There's no error return, so flag it. */ - xvp->cookie_out_never = 1; /* true */ - opts->hash_size = 0; - } - } + return MAX_TCP_OPTION_SPACE - remaining; } @@ -2802,32 +2638,24 @@ int tcp_send_synack(struct sock *sk) * sk: listener socket * dst: dst entry attached to the SYNACK * req: request_sock pointer - * rvp: request_values pointer * * Allocate one skb and build a SYNACK packet. * @dst is consumed : Caller should not use it again. */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, struct tcp_fastopen_cookie *foc) { struct tcp_out_options opts; - struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); - const struct tcp_cookie_values *cvp = tp->cookie_values; struct tcphdr *th; struct sk_buff *skb; struct tcp_md5sig_key *md5; int tcp_header_size; int mss; - int s_data_desired = 0; - if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) - s_data_desired = cvp->s_data_desired; - skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, - sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -2869,9 +2697,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, else #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5, xvp, foc) - + sizeof(*th); + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, + foc) + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2889,40 +2716,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, TCPHDR_SYN | TCPHDR_ACK); - if (OPTION_COOKIE_EXTENSION & opts.options) { - if (s_data_desired) { - u8 *buf = skb_put(skb, s_data_desired); - - /* copy data directly from the listening socket. */ - memcpy(buf, cvp->s_data_payload, s_data_desired); - TCP_SKB_CB(skb)->end_seq += s_data_desired; - } - - if (opts.hash_size > 0) { - __u32 workspace[SHA_WORKSPACE_WORDS]; - u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; - u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; - - /* Secret recipe depends on the Timestamp, (future) - * Sequence and Acknowledgment Numbers, Initiator - * Cookie, and others handled by IP variant caller. - */ - *tail-- ^= opts.tsval; - *tail-- ^= tcp_rsk(req)->rcv_isn + 1; - *tail-- ^= TCP_SKB_CB(skb)->seq + 1; - - /* recommended */ - *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); - *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ - - sha_transform((__u32 *)&xvp->cookie_bakery[0], - (char *)mess, - &workspace[0]); - opts.hash_location = - (__u8 *)&xvp->cookie_bakery[0]; - } - } - th->seq = htonl(TCP_SKB_CB(skb)->seq); /* XXX data is queued and acked as is. No buffer/window check */ th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8a0848b60b35..d5dda20bd717 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -149,7 +149,6 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; - const u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; @@ -177,7 +176,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tcp_opt, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9b6460055df5..0a97add2ab74 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -454,7 +454,6 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct flowi6 *fl6, struct request_sock *req, - struct request_values *rvp, u16 queue_mapping) { struct inet6_request_sock *treq = inet6_rsk(req); @@ -466,7 +465,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, rvp, NULL); + skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); @@ -481,13 +480,12 @@ done: return err; } -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) +static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) { struct flowi6 fl6; int res; - res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); if (!res) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); return res; @@ -940,9 +938,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - const u8 *hash_location; struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); @@ -980,50 +976,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); - - if (tmp_opt.cookie_plus > 0 && - tmp_opt.saw_tstamp && - !tp->rx_opt.cookie_out_never && - (sysctl_tcp_cookie_size > 0 || - (tp->cookie_values != NULL && - tp->cookie_values->cookie_desired > 0))) { - u8 *c; - u32 *d; - u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; - int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - - if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) - goto drop_and_free; - - /* Secret recipe starts with IP addresses */ - d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0]; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0]; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - - /* plus variable length Initiator Cookie */ - c = (u8 *)mess; - while (l-- > 0) - *c++ ^= *hash_location++; - - want_cookie = false; /* not our kind of cookie */ - tmp_ext.cookie_out_never = 0; /* false */ - tmp_ext.cookie_plus = tmp_opt.cookie_plus; - } else if (!tp->rx_opt.cookie_in_always) { - /* redundant indications, but ensure initialization. */ - tmp_ext.cookie_out_never = 1; /* true */ - tmp_ext.cookie_plus = 0; - } else { - goto drop_and_free; - } - tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1101,7 +1054,6 @@ have_isn: goto drop_and_release; if (tcp_v6_send_synack(sk, dst, &fl6, req, - (struct request_values *)&tmp_ext, skb_get_queue_mapping(skb)) || want_cookie) goto drop_and_free; -- cgit v1.2.3 From 2908fe31cf6b8d3a975efb567347f85e724f4e81 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 15 Mar 2013 17:06:56 -0500 Subject: Bluetooth: Remove useless HCI_PENDING_CLASS flag Now that class related operations are tracked through asynchronous HCI requests this flag is no longer needed. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 1 - net/bluetooth/hci_event.c | 3 +-- net/bluetooth/mgmt.c | 4 ---- 3 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 7f12c25f1fca..1e723c76a8f6 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -119,7 +119,6 @@ enum { HCI_CONNECTABLE, HCI_DISCOVERABLE, HCI_LINK_SECURITY, - HCI_PENDING_CLASS, HCI_PERIODIC_INQ, }; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d11b87bc1d1a..5f2d008f3352 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -194,8 +194,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_RESET, &hdev->flags); /* Reset all non-persistent flags */ - hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PENDING_CLASS) | - BIT(HCI_PERIODIC_INQ)); + hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ)); hdev->discovery.state = DISCOVERY_STOPPED; hdev->inq_tx_power = HCI_TX_POWER_INVALID; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8a0bbb914bed..3e3cb0102b13 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -652,8 +652,6 @@ static void update_class(struct hci_request *req) return; hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); - - set_bit(HCI_PENDING_CLASS, &hdev->dev_flags); } static void service_cache_off(struct work_struct *work) @@ -3762,8 +3760,6 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, struct cmd_lookup match = { NULL, hdev, mgmt_status(status) }; int err = 0; - clear_bit(HCI_PENDING_CLASS, &hdev->dev_flags); - mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match); mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); -- cgit v1.2.3 From 2cc6fb0049bc02ca7a020ba7b4f88b4c35976058 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 15 Mar 2013 17:06:57 -0500 Subject: Bluetooth: Add a define for the HCI persistent flags mask We'll need to use this mask also when powering off the HCI device so it's better to have this in a single and visible place. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 5 +++++ net/bluetooth/hci_event.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 1e723c76a8f6..1e40222e9dd0 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -122,6 +122,11 @@ enum { HCI_PERIODIC_INQ, }; +/* A mask for the flags that are supposed to remain when a reset happens + * or the HCI device is closed. + */ +#define HCI_PERSISTENT_MASK (BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ)) + /* HCI ioctl defines */ #define HCIDEVUP _IOW('H', 201, int) #define HCIDEVDOWN _IOW('H', 202, int) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5f2d008f3352..ed4ecd930a71 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -194,7 +194,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_RESET, &hdev->flags); /* Reset all non-persistent flags */ - hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ)); + hdev->dev_flags &= ~HCI_PERSISTENT_MASK; hdev->discovery.state = DISCOVERY_STOPPED; hdev->inq_tx_power = HCI_TX_POWER_INVALID; -- cgit v1.2.3 From 04b4edcbc9049e100681c0149b572de439be42ab Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 15 Mar 2013 17:07:01 -0500 Subject: Bluetooth: Handle AD updating through an async request For proper control of the AD update and the related HCI commands it's best to run the AD update through an async request instead of a standalone HCI command. This patch changes the hci_update_ad() function to take a request pointer and updates its users appropriately. E.g. the function is no longer called after the init sequence but during stage 3 of the init sequence. The TX power is read during the init sequence, so we don't need an explicit update whenever it is read and the AD update based on the local name should be done through the local name mgmt handler. The only other user is the update based on enabling advertising. This part is still kept as there is no mgmt API to enable it. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 4 ++-- net/bluetooth/hci_core.c | 29 ++++++++++------------------- net/bluetooth/hci_event.c | 19 +++++++++---------- 3 files changed, 21 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d6c32561fc02..cb99193c7493 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -740,8 +740,6 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *hash, u8 *randomizer); int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr); -int hci_update_ad(struct hci_dev *hdev); - void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_frame(struct sk_buff *skb); @@ -1167,6 +1165,8 @@ struct hci_sec_filter { #define hci_req_lock(d) mutex_lock(&d->req_lock) #define hci_req_unlock(d) mutex_unlock(&d->req_lock) +void hci_update_ad(struct hci_request *req); + void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __u8 rand[8], diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9e87a91562a6..0ffd35871172 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -492,8 +492,10 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) if (hdev->commands[5] & 0x10) hci_setup_link_policy(req); - if (lmp_le_capable(hdev)) + if (lmp_le_capable(hdev)) { hci_set_le_support(req); + hci_update_ad(req); + } } static int __hci_init(struct hci_dev *hdev) @@ -936,39 +938,29 @@ static u8 create_ad(struct hci_dev *hdev, u8 *ptr) return ad_len; } -int hci_update_ad(struct hci_dev *hdev) +void hci_update_ad(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_adv_data cp; u8 len; - int err; - - hci_dev_lock(hdev); - if (!lmp_le_capable(hdev)) { - err = -EINVAL; - goto unlock; - } + if (!lmp_le_capable(hdev)) + return; memset(&cp, 0, sizeof(cp)); len = create_ad(hdev, cp.data); if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) { - err = 0; - goto unlock; - } + memcmp(cp.data, hdev->adv_data, len) == 0) + return; memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); hdev->adv_data_len = len; cp.length = len; - err = hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); -unlock: - hci_dev_unlock(hdev); - - return err; + hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); } /* ---- HCI ioctl helpers ---- */ @@ -1025,7 +1017,6 @@ int hci_dev_open(__u16 dev) hci_dev_hold(hdev); set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); - hci_update_ad(hdev); if (!test_bit(HCI_SETUP, &hdev->dev_flags) && mgmt_valid_hdev(hdev)) { hci_dev_lock(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ed4ecd930a71..84edacbc14a1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -223,9 +223,6 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH); hci_dev_unlock(hdev); - - if (!status && !test_bit(HCI_INIT, &hdev->flags)) - hci_update_ad(hdev); } static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -776,11 +773,8 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (!rp->status) { + if (!rp->status) hdev->adv_tx_power = rp->tx_power; - if (!test_bit(HCI_INIT, &hdev->flags)) - hci_update_ad(hdev); - } } static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -877,10 +871,15 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags); } - hci_dev_unlock(hdev); + if (!test_bit(HCI_INIT, &hdev->flags)) { + struct hci_request req; - if (!test_bit(HCI_INIT, &hdev->flags)) - hci_update_ad(hdev); + hci_req_init(&req, hdev); + hci_update_ad(&req); + hci_req_run(&req, NULL); + } + + hci_dev_unlock(hdev); } static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) -- cgit v1.2.3 From 1a4d3c4b3750885733641216756de4e4d9b2443a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 15 Mar 2013 17:07:08 -0500 Subject: Bluetooth: Add proper flag for fast connectable mode In order to be able to represent fast connectable mode in the mgmt settings we need to have a HCI dev flag for it. This patch adds the flag and makes sure its value is changed whenever a mgmt_set_fast_connectable command completes. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 4 +++- net/bluetooth/mgmt.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 1e40222e9dd0..b854506c859c 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -120,12 +120,14 @@ enum { HCI_DISCOVERABLE, HCI_LINK_SECURITY, HCI_PERIODIC_INQ, + HCI_FAST_CONNECTABLE, }; /* A mask for the flags that are supposed to remain when a reset happens * or the HCI device is closed. */ -#define HCI_PERSISTENT_MASK (BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ)) +#define HCI_PERSISTENT_MASK (BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ) | \ + BIT(HCI_FAST_CONNECTABLE)) /* HCI ioctl defines */ #define HCIDEVUP _IOW('H', 201, int) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e89938e0233c..b6a33c5e7685 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -410,6 +410,9 @@ static u32 get_current_settings(struct hci_dev *hdev) if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) settings |= MGMT_SETTING_CONNECTABLE; + if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) + settings |= MGMT_SETTING_FAST_CONNECTABLE; + if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) settings |= MGMT_SETTING_DISCOVERABLE; @@ -2913,6 +2916,13 @@ static void fast_connectable_complete(struct hci_dev *hdev, u8 status) cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, mgmt_status(status)); } else { + struct mgmt_mode *cp = cmd->param; + + if (cp->val) + set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + else + clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); new_settings(hdev, cmd->sk); } @@ -2959,6 +2969,12 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, goto unlock; } + if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) { + err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, + hdev); + goto unlock; + } + if (cp->val) { type = PAGE_SCAN_TYPE_INTERLACED; -- cgit v1.2.3 From f332ec6699980e0563408c7bcf1a8a31b825fee1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 15 Mar 2013 17:07:11 -0500 Subject: Bluetooth: Add reading of page scan parameters These parameters are related to the "fast connectable" mode that can be changed through the mgmt interface. Not all controllers properly reset these values with HCI_Reset so they need to be read in order to be able to verify whether the values are correct or not before enabling page scan. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 13 +++++++++++++ include/net/bluetooth/hci_core.h | 4 ++++ net/bluetooth/hci_core.c | 6 ++++++ net/bluetooth/hci_event.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index b854506c859c..b3308927a0a1 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -887,12 +887,25 @@ struct hci_rp_read_data_block_size { __le16 num_blocks; } __packed; +#define HCI_OP_READ_PAGE_SCAN_ACTIVITY 0x0c1b +struct hci_rp_read_page_scan_activity { + __u8 status; + __le16 interval; + __le16 window; +} __packed; + #define HCI_OP_WRITE_PAGE_SCAN_ACTIVITY 0x0c1c struct hci_cp_write_page_scan_activity { __le16 interval; __le16 window; } __packed; +#define HCI_OP_READ_PAGE_SCAN_TYPE 0x0c46 +struct hci_rp_read_page_scan_type { + __u8 status; + __u8 type; +} __packed; + #define HCI_OP_WRITE_PAGE_SCAN_TYPE 0x0c47 #define PAGE_SCAN_TYPE_STANDARD 0x00 #define PAGE_SCAN_TYPE_INTERLACED 0x01 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index cb99193c7493..358a6983d3bb 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -165,6 +165,10 @@ struct hci_dev { __u16 voice_setting; __u8 io_capability; __s8 inq_tx_power; + __u16 page_scan_interval; + __u16 page_scan_window; + __u8 page_scan_type; + __u16 devid_source; __u16 devid_vendor; __u16 devid_product; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0ffd35871172..cfcad5423f1c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -272,6 +272,12 @@ static void bredr_setup(struct hci_request *req) bacpy(&cp.bdaddr, BDADDR_ANY); cp.delete_all = 0x01; hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); + + /* Read page scan parameters */ + if (req->hdev->hci_ver > BLUETOOTH_VER_1_1) { + hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); + hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL); + } } static void le_setup(struct hci_request *req) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 84edacbc14a1..3c6d0a4f78dc 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -601,6 +601,30 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) bacpy(&hdev->bdaddr, &rp->bdaddr); } +static void hci_cc_read_page_scan_activity(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_page_scan_activity *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) { + hdev->page_scan_interval = __le16_to_cpu(rp->interval); + hdev->page_scan_window = __le16_to_cpu(rp->window); + } +} + +static void hci_cc_read_page_scan_type(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_page_scan_type *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) + hdev->page_scan_type = rp->type; +} + static void hci_cc_read_data_block_size(struct hci_dev *hdev, struct sk_buff *skb) { @@ -2204,6 +2228,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_read_bd_addr(hdev, skb); break; + case HCI_OP_READ_PAGE_SCAN_ACTIVITY: + hci_cc_read_page_scan_activity(hdev, skb); + break; + + case HCI_OP_READ_PAGE_SCAN_TYPE: + hci_cc_read_page_scan_type(hdev, skb); + break; + case HCI_OP_READ_DATA_BLOCK_SIZE: hci_cc_read_data_block_size(hdev, skb); break; -- cgit v1.2.3 From ddbfe860acc39d4856a86186eb8a292426ea6224 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 8 Mar 2013 14:46:14 +0100 Subject: mac80211: move sdata debugfs dir to vif There is need create driver own per interface debugfs files. This is currently done by drv_{add,remove}_interface_debugfs() callbacks. But it is possible that after we remove interface from the driver (i.e. on suspend) we call drv_remove_interface_debugfs() function. Fixing this problem will require to add call drv_{add,remove}_interface_debugfs() anytime we create and remove interface in mac80211. So it's better to add debugfs dir dentry to vif structure to allow to create/remove custom debugfs driver files on drv_{add,remove}_interface(). Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++++ net/mac80211/debugfs_key.c | 10 +++++----- net/mac80211/debugfs_netdev.c | 22 +++++++++++----------- net/mac80211/driver-ops.h | 4 ++-- net/mac80211/ieee80211_i.h | 1 - 5 files changed, 25 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f5db5e970428..8b2c7506f5cb 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1067,6 +1067,9 @@ enum ieee80211_vif_flags { * path needing to access it; even though the netdev carrier will always * be off when it is %NULL there can still be races and packets could be * processed after it switches back to %NULL. + * @debugfs_dir: debugfs dentry, can be used by drivers to create own per + * interface debug files. Note that it will be NULL for the virtual + * monitor interface (if that is requested.) * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *). */ @@ -1083,6 +1086,10 @@ struct ieee80211_vif { u32 driver_flags; +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *debugfs_dir; +#endif + /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); }; diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index c3a3082b72e5..1521cabad3d6 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -295,7 +295,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) char buf[50]; struct ieee80211_key *key; - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; lockdep_assert_held(&sdata->local->key_mtx); @@ -311,7 +311,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_unicast_key = debugfs_create_symlink("default_unicast_key", - sdata->debugfs.dir, buf); + sdata->vif.debugfs_dir, buf); } if (sdata->debugfs.default_multicast_key) { @@ -325,7 +325,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_multicast_key = debugfs_create_symlink("default_multicast_key", - sdata->debugfs.dir, buf); + sdata->vif.debugfs_dir, buf); } } @@ -334,7 +334,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) char buf[50]; struct ieee80211_key *key; - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; key = key_mtx_dereference(sdata->local, @@ -343,7 +343,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_mgmt_key = debugfs_create_symlink("default_mgmt_key", - sdata->debugfs.dir, buf); + sdata->vif.debugfs_dir, buf); } else ieee80211_debugfs_key_remove_mgmt_default(sdata); } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 059bbb82e84f..ddb426867904 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -521,7 +521,7 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration, #endif #define DEBUGFS_ADD_MODE(name, mode) \ - debugfs_create_file(#name, mode, sdata->debugfs.dir, \ + debugfs_create_file(#name, mode, sdata->vif.debugfs_dir, \ sdata, &name##_ops); #define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400) @@ -577,7 +577,7 @@ static void add_mesh_files(struct ieee80211_sub_if_data *sdata) static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) { struct dentry *dir = debugfs_create_dir("mesh_stats", - sdata->debugfs.dir); + sdata->vif.debugfs_dir); #define MESHSTATS_ADD(name)\ debugfs_create_file(#name, 0400, dir, sdata, &name##_ops); @@ -594,7 +594,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) static void add_mesh_config(struct ieee80211_sub_if_data *sdata) { struct dentry *dir = debugfs_create_dir("mesh_config", - sdata->debugfs.dir); + sdata->vif.debugfs_dir); #define MESHPARAMS_ADD(name) \ debugfs_create_file(#name, 0600, dir, sdata, &name##_ops); @@ -631,7 +631,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) static void add_files(struct ieee80211_sub_if_data *sdata) { - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; DEBUGFS_ADD(flags); @@ -673,21 +673,21 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) char buf[10+IFNAMSIZ]; sprintf(buf, "netdev:%s", sdata->name); - sdata->debugfs.dir = debugfs_create_dir(buf, + sdata->vif.debugfs_dir = debugfs_create_dir(buf, sdata->local->hw.wiphy->debugfsdir); - if (sdata->debugfs.dir) + if (sdata->vif.debugfs_dir) sdata->debugfs.subdir_stations = debugfs_create_dir("stations", - sdata->debugfs.dir); + sdata->vif.debugfs_dir); add_files(sdata); } void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) { - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; - debugfs_remove_recursive(sdata->debugfs.dir); - sdata->debugfs.dir = NULL; + debugfs_remove_recursive(sdata->vif.debugfs_dir); + sdata->vif.debugfs_dir = NULL; } void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) @@ -695,7 +695,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) struct dentry *dir; char buf[10 + IFNAMSIZ]; - dir = sdata->debugfs.dir; + dir = sdata->vif.debugfs_dir; if (!dir) return; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 025b7592b797..0059f3886d44 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -560,7 +560,7 @@ void drv_add_interface_debugfs(struct ieee80211_local *local, return; local->ops->add_interface_debugfs(&local->hw, &sdata->vif, - sdata->debugfs.dir); + sdata->vif.debugfs_dir); } static inline @@ -575,7 +575,7 @@ void drv_remove_interface_debugfs(struct ieee80211_local *local, return; local->ops->remove_interface_debugfs(&local->hw, &sdata->vif, - sdata->debugfs.dir); + sdata->vif.debugfs_dir); } #else static inline diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 95beb18588f6..d6e920609823 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -758,7 +758,6 @@ struct ieee80211_sub_if_data { #ifdef CONFIG_MAC80211_DEBUGFS struct { - struct dentry *dir; struct dentry *subdir_stations; struct dentry *default_unicast_key; struct dentry *default_multicast_key; -- cgit v1.2.3 From d260ff12e7768444b4da7612b785cfd7cbc1d1c1 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 8 Mar 2013 14:46:15 +0100 Subject: mac80211: remove vif debugfs driver callbacks This basically reverts commit b207cdb07f3f01ec1adaac62e9d0cc918c60a81a. Now is possible to use drv_{add,remove}_interface() and vif->debugfs_dir to create/remove per interface debugfs files. Remove redundant callbacks. Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- include/net/mac80211.h | 18 ------------------ net/mac80211/driver-ops.h | 37 ------------------------------------- net/mac80211/iface.c | 4 ---- 3 files changed, 59 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8b2c7506f5cb..0b912d22f82d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2233,18 +2233,6 @@ enum ieee80211_roc_type { * MAC address of the device going away. * Hence, this callback must be implemented. It can sleep. * - * @add_interface_debugfs: Drivers can use this callback to add debugfs files - * when a vif is added to mac80211. This callback and - * @remove_interface_debugfs should be within a CONFIG_MAC80211_DEBUGFS - * conditional. @remove_interface_debugfs must be provided for cleanup. - * This callback can sleep. - * - * @remove_interface_debugfs: Remove the debugfs files which were added using - * @add_interface_debugfs. This callback must remove all debugfs entries - * that were added because mac80211 only removes interface debugfs when the - * interface is destroyed, not when it is removed from the driver. - * This callback can sleep. - * * @config: Handler for configuration requests. IEEE 802.11 code calls this * function to change hardware configuration, e.g., channel. * This function should never fail but returns a negative error code @@ -2665,12 +2653,6 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct dentry *dir); - void (*add_interface_debugfs)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct dentry *dir); - void (*remove_interface_debugfs)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct dentry *dir); #endif void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 0059f3886d44..7b9ff53bd2e9 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -547,43 +547,6 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, local->ops->sta_remove_debugfs(&local->hw, &sdata->vif, sta, dir); } - -static inline -void drv_add_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) -{ - might_sleep(); - - check_sdata_in_driver(sdata); - - if (!local->ops->add_interface_debugfs) - return; - - local->ops->add_interface_debugfs(&local->hw, &sdata->vif, - sdata->vif.debugfs_dir); -} - -static inline -void drv_remove_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) -{ - might_sleep(); - - check_sdata_in_driver(sdata); - - if (!local->ops->remove_interface_debugfs) - return; - - local->ops->remove_interface_debugfs(&local->hw, &sdata->vif, - sdata->vif.debugfs_dir); -} -#else -static inline -void drv_add_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) {} -static inline -void drv_remove_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) {} #endif static inline __must_check diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 9875e321c9e8..80e838bc875d 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -557,8 +557,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) goto err_del_interface; } - drv_add_interface_debugfs(local, sdata); - if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll++; local->fif_probe_req++; @@ -846,8 +844,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP: skb_queue_purge(&sdata->skb_queue); - drv_remove_interface_debugfs(local, sdata); - if (going_down) drv_remove_interface(local, sdata); } -- cgit v1.2.3 From 39ecc01d1bbe3de2cf5f01a81e176ea5160d3b95 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Feb 2013 12:11:00 +0100 Subject: mac80211: pass queue bitmap to flush operation There are a number of situations in which mac80211 only really needs to flush queues for one virtual interface, and in fact during this frames might be transmitted on other virtual interfaces. Calculate and pass a queue bitmap to the driver so it knows which queues to flush. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ar5523/ar5523.c | 2 +- drivers/net/wireless/ath/ath9k/main.c | 2 +- drivers/net/wireless/ath/carl9170/main.c | 2 +- .../net/wireless/brcm80211/brcmsmac/mac80211_if.c | 2 +- drivers/net/wireless/iwlegacy/common.c | 3 +-- drivers/net/wireless/iwlegacy/common.h | 2 +- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 2 +- drivers/net/wireless/mac80211_hwsim.c | 2 +- drivers/net/wireless/p54/main.c | 2 +- drivers/net/wireless/rt2x00/rt2x00.h | 2 +- drivers/net/wireless/rt2x00/rt2x00mac.c | 2 +- drivers/net/wireless/rtlwifi/core.c | 2 +- drivers/net/wireless/ti/wlcore/main.c | 2 +- include/net/mac80211.h | 9 +++++--- net/mac80211/driver-ops.h | 7 +++--- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/iface.c | 2 +- net/mac80211/mlme.c | 8 +++---- net/mac80211/offchannel.c | 4 ++-- net/mac80211/pm.c | 2 +- net/mac80211/scan.c | 4 ++-- net/mac80211/trace.h | 11 ++++++---- net/mac80211/util.c | 25 ++++++++++++++++++++++ 23 files changed, 67 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 7157f7d311c5..afd1e36d308f 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1091,7 +1091,7 @@ static int ar5523_set_rts_threshold(struct ieee80211_hw *hw, u32 value) return ret; } -static void ar5523_flush(struct ieee80211_hw *hw, bool drop) +static void ar5523_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct ar5523 *ar = hw->priv; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 6e66f9c6782b..24650fd41694 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1745,7 +1745,7 @@ static void ath9k_set_coverage_class(struct ieee80211_hw *hw, u8 coverage_class) mutex_unlock(&sc->mutex); } -static void ath9k_flush(struct ieee80211_hw *hw, bool drop) +static void ath9k_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct ath_softc *sc = hw->priv; struct ath_hw *ah = sc->sc_ah; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index f293b3ff4756..08b193199946 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1703,7 +1703,7 @@ found: return 0; } -static void carl9170_op_flush(struct ieee80211_hw *hw, bool drop) +static void carl9170_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct ar9170 *ar = hw->priv; unsigned int vid; diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c index c6451c61407a..aa5f43fee5ed 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c @@ -723,7 +723,7 @@ static bool brcms_tx_flush_completed(struct brcms_info *wl) return result; } -static void brcms_ops_flush(struct ieee80211_hw *hw, bool drop) +static void brcms_ops_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct brcms_info *wl = hw->priv; int ret; diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index e006ea831320..722bfb57cfd5 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -4704,8 +4704,7 @@ out: } EXPORT_SYMBOL(il_mac_change_interface); -void -il_mac_flush(struct ieee80211_hw *hw, bool drop) +void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct il_priv *il = hw->priv; unsigned long timeout = jiffies + msecs_to_jiffies(500); diff --git a/drivers/net/wireless/iwlegacy/common.h b/drivers/net/wireless/iwlegacy/common.h index 73bd3ef316c8..728aa1306ab8 100644 --- a/drivers/net/wireless/iwlegacy/common.h +++ b/drivers/net/wireless/iwlegacy/common.h @@ -1720,7 +1720,7 @@ void il_mac_remove_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int il_mac_change_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum nl80211_iftype newtype, bool newp2p); -void il_mac_flush(struct ieee80211_hw *hw, bool drop); +void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop); int il_alloc_txq_mem(struct il_priv *il); void il_free_txq_mem(struct il_priv *il); diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index c7cd2dffa5cd..a7294fa4d7e5 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1100,7 +1100,7 @@ static void iwlagn_configure_filter(struct ieee80211_hw *hw, FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } -static void iwlagn_mac_flush(struct ieee80211_hw *hw, bool drop) +static void iwlagn_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 7490c4fc7177..3466f1a8a8d3 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1389,7 +1389,7 @@ static int mac80211_hwsim_ampdu_action(struct ieee80211_hw *hw, return 0; } -static void mac80211_hwsim_flush(struct ieee80211_hw *hw, bool drop) +static void mac80211_hwsim_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { /* Not implemented, queues only on kernel side */ } diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index aadda99989c0..ee654a691f38 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -670,7 +670,7 @@ static unsigned int p54_flush_count(struct p54_common *priv) return total; } -static void p54_flush(struct ieee80211_hw *dev, bool drop) +static void p54_flush(struct ieee80211_hw *dev, u32 queues, bool drop) { struct p54_common *priv = dev->priv; unsigned int total, i; diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index 086abb403a4f..041b392f4f47 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -1360,7 +1360,7 @@ int rt2x00mac_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 queue, const struct ieee80211_tx_queue_params *params); void rt2x00mac_rfkill_poll(struct ieee80211_hw *hw); -void rt2x00mac_flush(struct ieee80211_hw *hw, bool drop); +void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop); int rt2x00mac_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant); int rt2x00mac_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant); void rt2x00mac_get_ringparam(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 20c6eccce5aa..9161c02d8ff9 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -748,7 +748,7 @@ void rt2x00mac_rfkill_poll(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(rt2x00mac_rfkill_poll); -void rt2x00mac_flush(struct ieee80211_hw *hw, bool drop) +void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct rt2x00_dev *rt2x00dev = hw->priv; struct data_queue *queue; diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c index d3ce9fbef00e..b5a7a260bf63 100644 --- a/drivers/net/wireless/rtlwifi/core.c +++ b/drivers/net/wireless/rtlwifi/core.c @@ -1166,7 +1166,7 @@ static void rtl_op_rfkill_poll(struct ieee80211_hw *hw) * before switch channle or power save, or tx buffer packet * maybe send after offchannel or rf sleep, this may cause * dis-association by AP */ -static void rtl_op_flush(struct ieee80211_hw *hw, bool drop) +static void rtl_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct rtl_priv *rtlpriv = rtl_priv(hw); diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index d7e306333f6c..a9f7041c7192 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -4946,7 +4946,7 @@ out: mutex_unlock(&wl->mutex); } -static void wlcore_op_flush(struct ieee80211_hw *hw, bool drop) +static void wlcore_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct wl1271 *wl = hw->priv; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0b912d22f82d..4158da74e11b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2438,8 +2438,11 @@ enum ieee80211_roc_type { * @testmode_dump: Implement a cfg80211 test mode dump. The callback can sleep. * * @flush: Flush all pending frames from the hardware queue, making sure - * that the hardware queues are empty. If the parameter @drop is set - * to %true, pending frames may be dropped. The callback can sleep. + * that the hardware queues are empty. The @queues parameter is a bitmap + * of queues to flush, which is useful if different virtual interfaces + * use different hardware queues; it may also indicate all queues. + * If the parameter @drop is set to %true, pending frames may be dropped. + * The callback can sleep. * * @channel_switch: Drivers that need (or want) to offload the channel * switch operation for CSAs received from the AP may implement this @@ -2687,7 +2690,7 @@ struct ieee80211_ops { struct netlink_callback *cb, void *data, int len); #endif - void (*flush)(struct ieee80211_hw *hw, bool drop); + void (*flush)(struct ieee80211_hw *hw, u32 queues, bool drop); void (*channel_switch)(struct ieee80211_hw *hw, struct ieee80211_channel_switch *ch_switch); int (*napi_poll)(struct ieee80211_hw *hw, int budget); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 7b9ff53bd2e9..169664c122e2 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -720,13 +720,14 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local) local->ops->rfkill_poll(&local->hw); } -static inline void drv_flush(struct ieee80211_local *local, bool drop) +static inline void drv_flush(struct ieee80211_local *local, + u32 queues, bool drop) { might_sleep(); - trace_drv_flush(local, drop); + trace_drv_flush(local, queues, drop); if (local->ops->flush) - local->ops->flush(&local->hw, drop); + local->ops->flush(&local->hw, queues, drop); trace_drv_return_void(local); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d6e920609823..b96c0e977752 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1540,6 +1540,8 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local, { ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); } +void ieee80211_flush_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 80e838bc875d..d646e12e55a6 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -92,7 +92,7 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local) if (local->hw.conf.flags & IEEE80211_CONF_IDLE) return 0; - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); local->hw.conf.flags |= IEEE80211_CONF_IDLE; return IEEE80211_CONF_CHANGE_IDLE; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index fdc06e381c10..65b38e13eb0c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1436,7 +1436,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) else { ieee80211_send_nullfunc(local, sdata, 1); /* Flush to get the tx status of nullfunc frame */ - drv_flush(local, false); + ieee80211_flush_queues(local, sdata); } } @@ -1767,7 +1767,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* flush out any pending frame (e.g. DELBA) before deauth/disassoc */ if (tx) - drv_flush(local, false); + ieee80211_flush_queues(local, sdata); /* deauthenticate/disassociate now */ if (tx || frame_buf) @@ -1776,7 +1776,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* flush out frame */ if (tx) - drv_flush(local, false); + ieee80211_flush_queues(local, sdata); /* clear bssid only after building the needed mgmt frames */ memset(ifmgd->bssid, 0, ETH_ALEN); @@ -1948,7 +1948,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); run_again(ifmgd, ifmgd->probe_timeout); if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) - drv_flush(sdata->local, false); + ieee80211_flush_queues(sdata->local, sdata); } static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index db547fceaeb9..d32f514074b9 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -120,7 +120,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) */ ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { @@ -373,7 +373,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) ieee80211_roc_notify_destroy(roc); if (started) { - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); local->tmp_channel = NULL; ieee80211_hw_config(local, 0); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index b471a67f224d..497f21a0d116 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -35,7 +35,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* flush out all packets */ synchronize_net(); - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); local->quiescing = true; /* make quiescing visible to timers everywhere */ diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 5dc17c623f72..cb34cbbaa20c 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -332,7 +332,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) ieee80211_offchannel_stop_vifs(local); /* ensure nullfunc is transmitted before leaving operating channel */ - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); ieee80211_configure_filter(local); @@ -668,7 +668,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local, ieee80211_offchannel_stop_vifs(local); if (local->ops->flush) { - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); *next_delay = 0; } else *next_delay = HZ / 10; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index d97e4305cf1e..c5899797a8d4 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -964,23 +964,26 @@ TRACE_EVENT(drv_get_survey, ); TRACE_EVENT(drv_flush, - TP_PROTO(struct ieee80211_local *local, bool drop), + TP_PROTO(struct ieee80211_local *local, + u32 queues, bool drop), - TP_ARGS(local, drop), + TP_ARGS(local, queues, drop), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, drop) + __field(u32, queues) ), TP_fast_assign( LOCAL_ASSIGN; __entry->drop = drop; + __entry->queues = queues; ), TP_printk( - LOCAL_PR_FMT " drop:%d", - LOCAL_PR_ARG, __entry->drop + LOCAL_PR_FMT " queues:0x%x drop:%d", + LOCAL_PR_ARG, __entry->queues, __entry->drop ) ); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b7a856e3281b..f978ddd1bb43 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -511,6 +511,31 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_wake_queues); +void ieee80211_flush_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + u32 queues; + + if (!local->ops->flush) + return; + + if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) { + int ac; + + queues = 0; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + queues |= BIT(sdata->vif.hw_queue[ac]); + if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE) + queues |= BIT(sdata->vif.cab_queue); + } else { + /* all queues */ + queues = BIT(local->hw.queues) - 1; + } + + drv_flush(local, queues, false); +} + void ieee80211_iterate_active_interfaces( struct ieee80211_hw *hw, u32 iter_flags, void (*iterator)(void *data, u8 *mac, -- cgit v1.2.3 From 445ea4e83ec50668cc9ad7e5cf96d242f19165e8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Feb 2013 12:25:28 +0100 Subject: mac80211: stop queues temporarily for flushing Sometimes queues are flushed in the middle of operation, which can lead to driver issues. Stop queues temporarily, while flushing, to avoid transmitting new packets while they are being flushed. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/main.c | 4 ++-- net/mac80211/mlme.c | 4 ++++ net/mac80211/offchannel.c | 4 ++-- net/mac80211/pm.c | 4 +++- net/mac80211/tx.c | 1 + net/mac80211/util.c | 23 ++++++++++++++++------- 8 files changed, 33 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 4158da74e11b..dd73b8c6746b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -93,9 +93,11 @@ struct device; * enum ieee80211_max_queues - maximum number of queues * * @IEEE80211_MAX_QUEUES: Maximum number of regular device queues. + * @IEEE80211_MAX_QUEUE_MAP: bitmap with maximum queues set */ enum ieee80211_max_queues { IEEE80211_MAX_QUEUES = 16, + IEEE80211_MAX_QUEUE_MAP = BIT(IEEE80211_MAX_QUEUES) - 1, }; #define IEEE80211_INVAL_HW_QUEUE 0xff diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b96c0e977752..ae2d1754b792 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -809,6 +809,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_SUSPEND, IEEE80211_QUEUE_STOP_REASON_SKB_ADD, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, + IEEE80211_QUEUE_STOP_REASON_FLUSH, }; #ifdef CONFIG_MAC80211_LEDS @@ -1522,8 +1523,10 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr, bool ack); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, + unsigned long queues, enum queue_stop_reason reason); void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, + unsigned long queues, enum queue_stop_reason reason); void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index eee1768e89c0..c6f81ecc36a1 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -277,8 +277,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw) "Hardware restart was requested\n"); /* use this reason, ieee80211_reconfig will unblock it */ - ieee80211_stop_queues_by_reason(hw, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); /* * Stop all Rx during the reconfig. We don't want state changes diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 65b38e13eb0c..4d383a93ea73 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1009,6 +1009,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) /* XXX: wait for a beacon first? */ ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); out: ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; @@ -1108,6 +1109,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (sw_elem->mode) ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); if (sdata->local->ops->channel_switch) { @@ -1375,6 +1377,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work) } ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_PS); } @@ -2071,6 +2074,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) true, frame_buf); ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); mutex_unlock(&ifmgd->mtx); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index d32f514074b9..b01eb7314ec6 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -118,7 +118,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) * Stop queues and transmit all frames queued by the driver * before sending nullfunc to enable powersave at the AP. */ - ieee80211_stop_queues_by_reason(&local->hw, + ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); ieee80211_flush_queues(local, NULL); @@ -181,7 +181,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) } mutex_unlock(&local->iflist_mtx); - ieee80211_wake_queues_by_reason(&local->hw, + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); } diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 497f21a0d116..3d16f4e61743 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -30,7 +30,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } ieee80211_stop_queues_by_reason(hw, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); /* flush out all packets */ synchronize_net(); @@ -68,6 +69,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) mutex_unlock(&local->sta_mtx); } ieee80211_wake_queues_by_reason(hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_SUSPEND); return err; } else if (err > 0) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3fcdf2118101..2a6ae8030bd9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -233,6 +233,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) if (local->hw.conf.flags & IEEE80211_CONF_PS) { ieee80211_stop_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_PS); ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; ieee80211_queue_work(&local->hw, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f978ddd1bb43..a7368870c8ee 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -453,7 +453,8 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, } void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, - enum queue_stop_reason reason) + unsigned long queues, + enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); unsigned long flags; @@ -461,7 +462,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - for (i = 0; i < hw->queues; i++) + for_each_set_bit(i, &queues, hw->queues) __ieee80211_stop_queue(hw, i, reason); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -469,7 +470,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, void ieee80211_stop_queues(struct ieee80211_hw *hw) { - ieee80211_stop_queues_by_reason(hw, + ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_DRIVER); } EXPORT_SYMBOL(ieee80211_stop_queues); @@ -491,6 +492,7 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) EXPORT_SYMBOL(ieee80211_queue_stopped); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, + unsigned long queues, enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); @@ -499,7 +501,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - for (i = 0; i < hw->queues; i++) + for_each_set_bit(i, &queues, hw->queues) __ieee80211_wake_queue(hw, i, reason); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -507,7 +509,8 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, void ieee80211_wake_queues(struct ieee80211_hw *hw) { - ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_DRIVER); + ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_DRIVER); } EXPORT_SYMBOL(ieee80211_wake_queues); @@ -533,7 +536,13 @@ void ieee80211_flush_queues(struct ieee80211_local *local, queues = BIT(local->hw.queues) - 1; } + ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_FLUSH); + drv_flush(local, queues, false); + + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_FLUSH); } void ieee80211_iterate_active_interfaces( @@ -1676,8 +1685,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); } - ieee80211_wake_queues_by_reason(hw, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); /* * If this is for hw restart things are still running. -- cgit v1.2.3 From b962abdc6531c8de837504ebc98139587162f223 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 9 Mar 2013 23:25:08 +0200 Subject: ipvs: fix some sparse warnings Add missing __percpu annotations and make ip_vs_net_id static. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_core.c | 8 +------- net/netfilter/ipvs/ip_vs_est.c | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 68c69d54d392..29bc05577560 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -459,7 +459,7 @@ struct ip_vs_estimator { struct ip_vs_stats { struct ip_vs_stats_user ustats; /* statistics */ struct ip_vs_estimator est; /* estimator */ - struct ip_vs_cpu_stats *cpustats; /* per cpu counters */ + struct ip_vs_cpu_stats __percpu *cpustats; /* per cpu counters */ spinlock_t lock; /* spin lock */ struct ip_vs_stats_user ustats0; /* reset values */ }; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 47edf5a40a59..3e5e80b809fe 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put); EXPORT_SYMBOL(ip_vs_get_debug_level); #endif -int ip_vs_net_id __read_mostly; -#ifdef IP_VS_GENERIC_NETNS -EXPORT_SYMBOL(ip_vs_net_id); -#endif +static int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); @@ -1181,9 +1178,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) iph.len)))) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - struct net *net = - dev_net(skb_dst(skb)->dev); - if (!skb->dev) skb->dev = net->loopback_dev; icmpv6_send(skb, diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 0fac6017b6fb..6bee6d0c73a5 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -56,7 +56,7 @@ * Make a summary from each cpu */ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, - struct ip_vs_cpu_stats *stats) + struct ip_vs_cpu_stats __percpu *stats) { int i; -- cgit v1.2.3 From dece40e848f6e022f960dc9de54be518928460c3 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 13 Mar 2013 23:40:14 +0000 Subject: netfilter: nf_conntrack: speed up module removal path if netns in use The patch introduces nf_conntrack_cleanup_net_list(), which cleanups nf_conntrack for a list of netns and calls synchronize_net() only once for them all. This should reduce netns destruction time. I've measured cleanup time for 1k dummy net ns. Here are the results: # modprobe nf_conntrack # time modprobe -r nf_conntrack real 0m10.337s user 0m0.000s sys 0m0.376s # modprobe nf_conntrack # time modprobe -r nf_conntrack real 0m5.661s user 0m0.000s sys 0m0.216s Signed-off-by: Vladimir Davydov Cc: Patrick McHardy Cc: "David S. Miller" Cc: "Eric W. Biederman" Acked-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 1 + net/netfilter/nf_conntrack_core.c | 46 +++++++++++++++++++++---------- net/netfilter/nf_conntrack_standalone.c | 16 +++++++---- 3 files changed, 43 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 930275fa2ea6..fb2b6234e937 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -27,6 +27,7 @@ extern unsigned int nf_conntrack_in(struct net *net, extern int nf_conntrack_init_net(struct net *net); extern void nf_conntrack_cleanup_net(struct net *net); +extern void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list); extern int nf_conntrack_proto_pernet_init(struct net *net); extern void nf_conntrack_proto_pernet_fini(struct net *net); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1068deb97c8b..007e8c43d19a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1365,30 +1365,48 @@ void nf_conntrack_cleanup_end(void) */ void nf_conntrack_cleanup_net(struct net *net) { + LIST_HEAD(single); + + list_add(&net->exit_list, &single); + nf_conntrack_cleanup_net_list(&single); +} + +void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) +{ + int busy; + struct net *net; + /* * This makes sure all current packets have passed through * netfilter framework. Roll on, two-stage module * delete... */ synchronize_net(); - i_see_dead_people: - nf_ct_iterate_cleanup(net, kill_all, NULL); - nf_ct_release_dying_list(net); - if (atomic_read(&net->ct.count) != 0) { +i_see_dead_people: + busy = 0; + list_for_each_entry(net, net_exit_list, exit_list) { + nf_ct_iterate_cleanup(net, kill_all, NULL); + nf_ct_release_dying_list(net); + if (atomic_read(&net->ct.count) != 0) + busy = 1; + } + if (busy) { schedule(); goto i_see_dead_people; } - nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); - nf_conntrack_proto_pernet_fini(net); - nf_conntrack_helper_pernet_fini(net); - nf_conntrack_ecache_pernet_fini(net); - nf_conntrack_tstamp_pernet_fini(net); - nf_conntrack_acct_pernet_fini(net); - nf_conntrack_expect_pernet_fini(net); - kmem_cache_destroy(net->ct.nf_conntrack_cachep); - kfree(net->ct.slabname); - free_percpu(net->ct.stat); + list_for_each_entry(net, net_exit_list, exit_list) { + nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); + nf_conntrack_proto_pernet_fini(net); + nf_conntrack_helper_pernet_fini(net); + nf_conntrack_ecache_pernet_fini(net); + nf_conntrack_tstamp_pernet_fini(net); + nf_conntrack_acct_pernet_fini(net); + nf_conntrack_expect_pernet_fini(net); + kmem_cache_destroy(net->ct.nf_conntrack_cachep); + kfree(net->ct.slabname); + free_percpu(net->ct.stat); + } } void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6bcce401fd1c..6c69fbdb8361 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -545,16 +545,20 @@ out_init: return ret; } -static void nf_conntrack_pernet_exit(struct net *net) +static void nf_conntrack_pernet_exit(struct list_head *net_exit_list) { - nf_conntrack_standalone_fini_sysctl(net); - nf_conntrack_standalone_fini_proc(net); - nf_conntrack_cleanup_net(net); + struct net *net; + + list_for_each_entry(net, net_exit_list, exit_list) { + nf_conntrack_standalone_fini_sysctl(net); + nf_conntrack_standalone_fini_proc(net); + } + nf_conntrack_cleanup_net_list(net_exit_list); } static struct pernet_operations nf_conntrack_net_ops = { - .init = nf_conntrack_pernet_init, - .exit = nf_conntrack_pernet_exit, + .init = nf_conntrack_pernet_init, + .exit_batch = nf_conntrack_pernet_exit, }; static int __init nf_conntrack_standalone_init(void) -- cgit v1.2.3 From 9b44190dc114c1720b34975b5bfc65aece112ced Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 20 Mar 2013 13:32:58 +0000 Subject: tcp: refactor F-RTO The patch series refactor the F-RTO feature (RFC4138/5682). This is to simplify the loss recovery processing. Existing F-RTO was developed during the experimental stage (RFC4138) and has many experimental features. It takes a separate code path from the traditional timeout processing by overloading CA_Disorder instead of using CA_Loss state. This complicates CA_Disorder state handling because it's also used for handling dubious ACKs and undos. While the algorithm in the RFC does not change the congestion control, the implementation intercepts congestion control in various places (e.g., frto_cwnd in tcp_ack()). The new code implements newer F-RTO RFC5682 using CA_Loss processing path. F-RTO becomes a small extension in the timeout processing and interfaces with congestion control and Eifel undo modules. It lets congestion control (module) determines how many to send independently. F-RTO only chooses what to send in order to detect spurious retranmission. If timeout is found spurious it invokes existing Eifel undo algorithms like DSACK or TCP timestamp based detection. The first patch removes all F-RTO code except the sysctl_tcp_frto is left for the new implementation. Since CA_EVENT_FRTO is removed, TCP westwood now computes ssthresh on regular timeout CA_EVENT_LOSS event. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 17 -- include/linux/tcp.h | 6 +- include/net/tcp.h | 4 - net/ipv4/sysctl_net_ipv4.c | 7 - net/ipv4/tcp_input.c | 375 +-------------------------------- net/ipv4/tcp_minisocks.c | 3 - net/ipv4/tcp_output.c | 11 +- net/ipv4/tcp_timer.c | 6 +- net/ipv4/tcp_westwood.c | 2 +- 9 files changed, 10 insertions(+), 421 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 17953e2bc3e9..8a977a0aaede 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -239,23 +239,6 @@ tcp_frto - INTEGER interacts badly with the packet counting of the SACK enabled TCP flow. -tcp_frto_response - INTEGER - When F-RTO has detected that a TCP retransmission timeout was - spurious (i.e, the timeout would have been avoided had TCP set a - longer retransmission timeout), TCP has several options what to do - next. Possible values are: - 0 Rate halving based; a smooth and conservative response, - results in halved cwnd and ssthresh after one RTT - 1 Very conservative response; not recommended because even - though being valid, it interacts poorly with the rest of - Linux TCP, halves cwnd and ssthresh immediately - 2 Aggressive response; undoes congestion control measures - that are now known to be unnecessary (ignoring the - possibility of a lost retransmission that would require - TCP to be more cautious), cwnd and ssthresh are restored - to the values prior timeout - Default: 0 (rate halving based) - tcp_keepalive_time - INTEGER How often TCP sends out keepalive messages when keepalive is enabled. Default: 2hours. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ed6a7456eecd..f5f203b36379 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -187,14 +187,12 @@ struct tcp_sock { u32 window_clamp; /* Maximal window to advertise */ u32 rcv_ssthresh; /* Current window clamp */ - u32 frto_highmark; /* snd_nxt when RTO occurred */ u16 advmss; /* Advertised MSS */ - u8 frto_counter; /* Number of new acks after RTO */ + u8 unused; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ - repair : 1, - unused : 1; + repair : 1; u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ syn_data:1, /* SYN includes data */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 7f2f17198d75..d1dcb596230e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -272,7 +272,6 @@ extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; -extern int sysctl_tcp_frto_response; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_nometrics_save; @@ -424,8 +423,6 @@ extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, bool fastopen); extern int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); -extern bool tcp_use_frto(struct sock *sk); -extern void tcp_enter_frto(struct sock *sk); extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); @@ -756,7 +753,6 @@ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ CA_EVENT_CWND_RESTART, /* congestion window restart */ CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ - CA_EVENT_FRTO, /* fast recovery timeout */ CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_FAST_ACK, /* in sequence ack */ CA_EVENT_SLOW_ACK, /* other ack */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cb45062c8be0..fa2f63fc453b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -591,13 +591,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_frto_response", - .data = &sysctl_tcp_frto_response, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 19f0149fb6a2..231c79fe91f3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -93,7 +93,6 @@ int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; -int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; @@ -108,17 +107,14 @@ int sysctl_tcp_early_retrans __read_mostly = 3; #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ -#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ -#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) -#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -1159,10 +1155,6 @@ static u8 tcp_sacktag_one(struct sock *sk, tcp_highest_sack_seq(tp))) state->reord = min(fack_count, state->reord); - - /* SACK enhanced F-RTO (RFC4138; Appendix B) */ - if (!after(end_seq, tp->frto_highmark)) - state->flag |= FLAG_ONLY_ORIG_SACKED; } if (sacked & TCPCB_LOST) { @@ -1555,7 +1547,6 @@ static int tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, u32 prior_snd_una) { - const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); const unsigned char *ptr = (skb_transport_header(ack_skb) + TCP_SKB_CB(ack_skb)->sacked); @@ -1728,12 +1719,6 @@ walk: start_seq, end_seq, dup_sack); advance_sp: - /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct - * due to in-order walk - */ - if (after(end_seq, tp->frto_highmark)) - state.flag &= ~FLAG_ONLY_ORIG_SACKED; - i++; } @@ -1750,8 +1735,7 @@ advance_sp: tcp_verify_left_out(tp); if ((state.reord < tp->fackets_out) && - ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && - (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) + ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); out: @@ -1825,197 +1809,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } -static int tcp_is_sackfrto(const struct tcp_sock *tp) -{ - return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); -} - -/* F-RTO can only be used if TCP has never retransmitted anything other than - * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) - */ -bool tcp_use_frto(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); - struct sk_buff *skb; - - if (!sysctl_tcp_frto) - return false; - - /* MTU probe and F-RTO won't really play nicely along currently */ - if (icsk->icsk_mtup.probe_size) - return false; - - if (tcp_is_sackfrto(tp)) - return true; - - /* Avoid expensive walking of rexmit queue if possible */ - if (tp->retrans_out > 1) - return false; - - skb = tcp_write_queue_head(sk); - if (tcp_skb_is_last(sk, skb)) - return true; - skb = tcp_write_queue_next(sk, skb); /* Skips head */ - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - return false; - /* Short-circuit when first non-SACKed skb has been checked */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) - break; - } - return true; -} - -/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO - * recovery a bit and use heuristics in tcp_process_frto() to detect if - * the RTO was spurious. Only clear SACKED_RETRANS of the head here to - * keep retrans_out counting accurate (with SACK F-RTO, other than head - * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS - * bits are handled if the Loss state is really to be entered (in - * tcp_enter_frto_loss). - * - * Do like tcp_enter_loss() would; when RTO expires the second time it - * does: - * "Reduce ssthresh if it has not yet been made inside this window." - */ -void tcp_enter_frto(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) || - tp->snd_una == tp->high_seq || - ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) && - !icsk->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(sk); - /* Our state is too optimistic in ssthresh() call because cwnd - * is not reduced until tcp_enter_frto_loss() when previous F-RTO - * recovery has not yet completed. Pattern would be this: RTO, - * Cumulative ACK, RTO (2xRTO for the same segment does not end - * up here twice). - * RFC4138 should be more specific on what to do, even though - * RTO is quite unlikely to occur after the first Cumulative ACK - * due to back-off and complexity of triggering events ... - */ - if (tp->frto_counter) { - u32 stored_cwnd; - stored_cwnd = tp->snd_cwnd; - tp->snd_cwnd = 2; - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - tp->snd_cwnd = stored_cwnd; - } else { - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - } - /* ... in theory, cong.control module could do "any tricks" in - * ssthresh(), which means that ca_state, lost bits and lost_out - * counter would have to be faked before the call occurs. We - * consider that too expensive, unlikely and hacky, so modules - * using these in ssthresh() must deal these incompatibility - * issues if they receives CA_EVENT_FRTO and frto_counter != 0 - */ - tcp_ca_event(sk, CA_EVENT_FRTO); - } - - tp->undo_marker = tp->snd_una; - tp->undo_retrans = 0; - - skb = tcp_write_queue_head(sk); - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - tp->undo_marker = 0; - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - } - tcp_verify_left_out(tp); - - /* Too bad if TCP was application limited */ - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); - - /* Earlier loss recovery underway (see RFC4138; Appendix B). - * The last condition is necessary at least in tp->frto_counter case. - */ - if (tcp_is_sackfrto(tp) && (tp->frto_counter || - ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && - after(tp->high_seq, tp->snd_una)) { - tp->frto_highmark = tp->high_seq; - } else { - tp->frto_highmark = tp->snd_nxt; - } - tcp_set_ca_state(sk, TCP_CA_Disorder); - tp->high_seq = tp->snd_nxt; - tp->frto_counter = 1; -} - -/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, - * which indicates that we should follow the traditional RTO recovery, - * i.e. mark everything lost and do go-back-N retransmission. - */ -static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - tp->lost_out = 0; - tp->retrans_out = 0; - if (tcp_is_reno(tp)) - tcp_reset_reno_sack(tp); - - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - /* - * Count the retransmission made on RTO correctly (only when - * waiting for the first ACK and did not get it)... - */ - if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) { - /* For some reason this R-bit might get cleared? */ - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out += tcp_skb_pcount(skb); - /* ...enter this if branch just for the first segment */ - flag |= FLAG_DATA_ACKED; - } else { - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - } - - /* Marking forward transmissions that were made after RTO lost - * can cause unnecessary retransmissions in some scenarios, - * SACK blocks will mitigate that in some but not in all cases. - * We used to not mark them but it was causing break-ups with - * receivers that do only in-order receival. - * - * TODO: we could detect presence of such receiver and select - * different behavior per flow. - */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; - } - } - tcp_verify_left_out(tp); - - tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; - tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; - tp->frto_counter = 0; - - tp->reordering = min_t(unsigned int, tp->reordering, - sysctl_tcp_reordering); - tcp_set_ca_state(sk, TCP_CA_Loss); - tp->high_seq = tp->snd_nxt; - TCP_ECN_queue_cwr(tp); - - tcp_clear_all_retrans_hints(tp); -} - static void tcp_clear_retrans_partial(struct tcp_sock *tp) { tp->retrans_out = 0; @@ -2090,8 +1883,6 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); - /* Abort F-RTO algorithm if one is in progress */ - tp->frto_counter = 0; } /* If ACK arrived pointing to a remembered SACK, it means that our @@ -2275,10 +2066,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) struct tcp_sock *tp = tcp_sk(sk); __u32 packets_out; - /* Do not perform any recovery during F-RTO algorithm */ - if (tp->frto_counter) - return false; - /* Trick#1: The loss is proven. */ if (tp->lost_out) return true; @@ -2760,7 +2547,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) tcp_verify_left_out(tp); - if (!tp->frto_counter && !tcp_any_retrans_done(sk)) + if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; if (flag & FLAG_ECE) @@ -3198,8 +2985,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, flag |= FLAG_RETRANS_DATA_ACKED; ca_seq_rtt = -1; seq_rtt = -1; - if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1)) - flag |= FLAG_NONHEAD_RETRANS_ACKED; } else { ca_seq_rtt = now - scb->when; last_ackt = skb->tstamp; @@ -3408,150 +3193,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 return flag; } -/* A very conservative spurious RTO response algorithm: reduce cwnd and - * continue in congestion avoidance. - */ -static void tcp_conservative_spur_to_response(struct tcp_sock *tp) -{ - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - tp->snd_cwnd_cnt = 0; - TCP_ECN_queue_cwr(tp); - tcp_moderate_cwnd(tp); -} - -/* A conservative spurious RTO response algorithm: reduce cwnd using - * PRR and continue in congestion avoidance. - */ -static void tcp_cwr_spur_to_response(struct sock *sk) -{ - tcp_enter_cwr(sk, 0); -} - -static void tcp_undo_spur_to_response(struct sock *sk, int flag) -{ - if (flag & FLAG_ECE) - tcp_cwr_spur_to_response(sk); - else - tcp_undo_cwr(sk, true); -} - -/* F-RTO spurious RTO detection algorithm (RFC4138) - * - * F-RTO affects during two new ACKs following RTO (well, almost, see inline - * comments). State (ACK number) is kept in frto_counter. When ACK advances - * window (but not to or beyond highest sequence sent before RTO): - * On First ACK, send two new segments out. - * On Second ACK, RTO was likely spurious. Do spurious response (response - * algorithm is not part of the F-RTO detection algorithm - * given in RFC4138 but can be selected separately). - * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss - * and TCP falls back to conventional RTO recovery. F-RTO allows overriding - * of Nagle, this is done using frto_counter states 2 and 3, when a new data - * segment of any size sent during F-RTO, state 2 is upgraded to 3. - * - * Rationale: if the RTO was spurious, new ACKs should arrive from the - * original window even after we transmit two new data segments. - * - * SACK version: - * on first step, wait until first cumulative ACK arrives, then move to - * the second step. In second step, the next ACK decides. - * - * F-RTO is implemented (mainly) in four functions: - * - tcp_use_frto() is used to determine if TCP is can use F-RTO - * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is - * called when tcp_use_frto() showed green light - * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm - * - tcp_enter_frto_loss() is called if there is not enough evidence - * to prove that the RTO is indeed spurious. It transfers the control - * from F-RTO to the conventional RTO recovery - */ -static bool tcp_process_frto(struct sock *sk, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tcp_verify_left_out(tp); - - /* Duplicate the behavior from Loss state (fastretrans_alert) */ - if (flag & FLAG_DATA_ACKED) - inet_csk(sk)->icsk_retransmits = 0; - - if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || - ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) - tp->undo_marker = 0; - - if (!before(tp->snd_una, tp->frto_highmark)) { - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); - return true; - } - - if (!tcp_is_sackfrto(tp)) { - /* RFC4138 shortcoming in step 2; should also have case c): - * ACK isn't duplicate nor advances window, e.g., opposite dir - * data, winupdate - */ - if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) - return true; - - if (!(flag & FLAG_DATA_ACKED)) { - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), - flag); - return true; - } - } else { - if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { - if (!tcp_packets_in_flight(tp)) { - tcp_enter_frto_loss(sk, 2, flag); - return true; - } - - /* Prevent sending of new data. */ - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp)); - return true; - } - - if ((tp->frto_counter >= 2) && - (!(flag & FLAG_FORWARD_PROGRESS) || - ((flag & FLAG_DATA_SACKED) && - !(flag & FLAG_ONLY_ORIG_SACKED)))) { - /* RFC4138 shortcoming (see comment above) */ - if (!(flag & FLAG_FORWARD_PROGRESS) && - (flag & FLAG_NOT_DUP)) - return true; - - tcp_enter_frto_loss(sk, 3, flag); - return true; - } - } - - if (tp->frto_counter == 1) { - /* tcp_may_send_now needs to see updated state */ - tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; - tp->frto_counter = 2; - - if (!tcp_may_send_now(sk)) - tcp_enter_frto_loss(sk, 2, flag); - - return true; - } else { - switch (sysctl_tcp_frto_response) { - case 2: - tcp_undo_spur_to_response(sk, flag); - break; - case 1: - tcp_conservative_spur_to_response(tp); - break; - default: - tcp_cwr_spur_to_response(sk); - break; - } - tp->frto_counter = 0; - tp->undo_marker = 0; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); - } - return false; -} - /* RFC 5961 7 [ACK Throttling] */ static void tcp_send_challenge_ack(struct sock *sk) { @@ -3616,7 +3257,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets; int prior_sacked = tp->sacked_out; int pkts_acked = 0; - bool frto_cwnd = false; /* If the ack is older than previous acks * then we can probably ignore it. @@ -3690,22 +3330,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) pkts_acked = prior_packets - tp->packets_out; - if (tp->frto_counter) - frto_cwnd = tcp_process_frto(sk, flag); - /* Guarantee sacktag reordering detection against wrap-arounds */ - if (before(tp->frto_highmark, tp->snd_una)) - tp->frto_highmark = 0; - if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && - tcp_may_raise_cwnd(sk, flag)) + if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } else { - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) + if (flag & FLAG_DATA_ACKED) tcp_cong_avoid(sk, ack, prior_in_flight); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8f0234f8bb95..05eaf8904613 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -422,9 +422,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - newtp->frto_counter = 0; - newtp->frto_highmark = 0; - if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && !try_module_get(newicsk->icsk_ca_ops->owner)) newicsk->icsk_ca_ops = &tcp_init_congestion_ops; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e787ecec505e..163cf5fc0119 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -78,10 +78,6 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - /* Don't override Nagle indefinitely with F-RTO */ - if (tp->frto_counter == 2) - tp->frto_counter = 3; - tp->packets_out += tcp_skb_pcount(skb); if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) @@ -1470,11 +1466,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf if (nonagle & TCP_NAGLE_PUSH) return true; - /* Don't use the nagle rule for urgent data (or for the final FIN). - * Nagle can be ignored during F-RTO too (see RFC4138). - */ - if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || - (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) + /* Don't use the nagle rule for urgent data (or for the final FIN). */ + if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) return true; if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index eeccf795e917..4b85e6f636c9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -416,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk) NET_INC_STATS_BH(sock_net(sk), mib_idx); } - if (tcp_use_frto(sk)) { - tcp_enter_frto(sk); - } else { - tcp_enter_loss(sk, 0); - } + tcp_enter_loss(sk, 0); if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { /* Retransmission failed because of local congestion, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 1b91bf48e277..76a1e23259e1 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); break; - case CA_EVENT_FRTO: + case CA_EVENT_LOSS: tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); /* Update RTT_min when next ack arrives */ w->reset_rtt_min = 1; -- cgit v1.2.3 From 67baf66339f82b5ddef5731caedb1e6db496818d Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Thu, 21 Mar 2013 15:47:56 +0100 Subject: mac80211: add P2P NoA settings Add P2P NoA settings for STA mode. Signed-off-by: Janusz Dziedzic [fix docs] Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c | 12 ++++++-- include/net/mac80211.h | 6 ++-- net/mac80211/cfg.c | 23 +++++++++++---- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 45 ++++++++++++++++------------- net/mac80211/trace.h | 6 ++-- 6 files changed, 57 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c index 341dbc0237ea..1d20287b1120 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c @@ -662,6 +662,7 @@ static int iwl_mvm_mac_ctxt_cmd_p2p_client(struct iwl_mvm *mvm, u32 action) { struct iwl_mac_ctx_cmd cmd = {}; + struct ieee80211_p2p_noa_attr *noa = &vif->bss_conf.p2p_noa_attr; WARN_ON(vif->type != NL80211_IFTYPE_STATION || !vif->p2p); @@ -671,7 +672,8 @@ static int iwl_mvm_mac_ctxt_cmd_p2p_client(struct iwl_mvm *mvm, /* Fill the data specific for station mode */ iwl_mvm_mac_ctxt_cmd_fill_sta(mvm, vif, &cmd.p2p_sta.sta); - cmd.p2p_sta.ctwin = cpu_to_le32(vif->bss_conf.p2p_ctwindow); + cmd.p2p_sta.ctwin = cpu_to_le32(noa->oppps_ctwindow & + IEEE80211_P2P_OPPPS_CTWINDOW_MASK); return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd); } @@ -892,6 +894,7 @@ static int iwl_mvm_mac_ctxt_cmd_go(struct iwl_mvm *mvm, u32 action) { struct iwl_mac_ctx_cmd cmd = {}; + struct ieee80211_p2p_noa_attr *noa = &vif->bss_conf.p2p_noa_attr; WARN_ON(vif->type != NL80211_IFTYPE_AP || !vif->p2p); @@ -901,8 +904,11 @@ static int iwl_mvm_mac_ctxt_cmd_go(struct iwl_mvm *mvm, /* Fill the data specific for GO mode */ iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd.go.ap); - cmd.go.ctwin = cpu_to_le32(vif->bss_conf.p2p_ctwindow); - cmd.go.opp_ps_enabled = cpu_to_le32(!!vif->bss_conf.p2p_oppps); + cmd.go.ctwin = cpu_to_le32(noa->oppps_ctwindow & + IEEE80211_P2P_OPPPS_CTWINDOW_MASK); + cmd.go.opp_ps_enabled = + cpu_to_le32(!!(noa->oppps_ctwindow & + IEEE80211_P2P_OPPPS_ENABLE_BIT)); return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd); } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dd73b8c6746b..9b536172e27e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -330,8 +330,7 @@ enum ieee80211_rssi_event { * @ssid_len: Length of SSID given in @ssid. * @hidden_ssid: The SSID of the current vif is hidden. Only valid in AP-mode. * @txpower: TX power in dBm - * @p2p_ctwindow: P2P CTWindow, only for P2P client interfaces - * @p2p_oppps: P2P opportunistic PS is enabled + * @p2p_noa_attr: P2P NoA attribute for P2P powersave */ struct ieee80211_bss_conf { const u8 *bssid; @@ -365,8 +364,7 @@ struct ieee80211_bss_conf { size_t ssid_len; bool hidden_ssid; int txpower; - u8 p2p_ctwindow; - bool p2p_oppps; + struct ieee80211_p2p_noa_attr p2p_noa_attr; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e5c1441ac2b8..50aaf25d4735 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -965,8 +965,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.hidden_ssid = (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); - sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow; - sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps; + memset(&sdata->vif.bss_conf.p2p_noa_attr, 0, + sizeof(sdata->vif.bss_conf.p2p_noa_attr)); + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow = + params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; + if (params->p2p_opp_ps) + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= + IEEE80211_P2P_OPPPS_ENABLE_BIT; err = ieee80211_assign_beacon(sdata, ¶ms->beacon); if (err < 0) @@ -1961,12 +1966,20 @@ static int ieee80211_change_bss(struct wiphy *wiphy, } if (params->p2p_ctwindow >= 0) { - sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow; + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &= + ~IEEE80211_P2P_OPPPS_CTWINDOW_MASK; + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= + params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; changed |= BSS_CHANGED_P2P_PS; } - if (params->p2p_opp_ps >= 0) { - sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps; + if (params->p2p_opp_ps > 0) { + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= + IEEE80211_P2P_OPPPS_ENABLE_BIT; + changed |= BSS_CHANGED_P2P_PS; + } else if (params->p2p_opp_ps == 0) { + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &= + ~IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ae2d1754b792..55155e3b9b20 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -442,7 +442,7 @@ struct ieee80211_if_managed { u8 use_4addr; - u8 p2p_noa_index; + s16 p2p_noa_index; /* Signal strength from the last Beacon frame in the current BSS. */ int last_beacon_signal; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f9258707b15a..8b3e852d6032 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1661,20 +1661,17 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); ies = rcu_dereference(cbss->ies); if (ies) { - struct ieee80211_p2p_noa_attr noa; int ret; ret = cfg80211_get_p2p_attr( ies->data, ies->len, IEEE80211_P2P_ATTR_ABSENCE_NOTICE, - (u8 *) &noa, sizeof(noa)); + (u8 *) &bss_conf->p2p_noa_attr, + sizeof(bss_conf->p2p_noa_attr)); if (ret >= 2) { - bss_conf->p2p_oppps = noa.oppps_ctwindow & - IEEE80211_P2P_OPPPS_ENABLE_BIT; - bss_conf->p2p_ctwindow = noa.oppps_ctwindow & - IEEE80211_P2P_OPPPS_CTWINDOW_MASK; + sdata->u.mgd.p2p_noa_index = + bss_conf->p2p_noa_attr.index; bss_info_changed |= BSS_CHANGED_P2P_PS; - sdata->u.mgd.p2p_noa_index = noa.index; } } rcu_read_unlock(); @@ -1799,8 +1796,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, changed |= BSS_CHANGED_ASSOC; sdata->vif.bss_conf.assoc = false; - sdata->vif.bss_conf.p2p_ctwindow = 0; - sdata->vif.bss_conf.p2p_oppps = false; + ifmgd->p2p_noa_index = -1; + memset(&sdata->vif.bss_conf.p2p_noa_attr, 0, + sizeof(sdata->vif.bss_conf.p2p_noa_attr)); /* on the next assoc, re-program HT/VHT parameters */ memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa)); @@ -2963,24 +2961,30 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } if (sdata->vif.p2p) { - struct ieee80211_p2p_noa_attr noa; + struct ieee80211_p2p_noa_attr noa = {}; int ret; ret = cfg80211_get_p2p_attr(mgmt->u.beacon.variable, len - baselen, IEEE80211_P2P_ATTR_ABSENCE_NOTICE, (u8 *) &noa, sizeof(noa)); - if (ret >= 2 && sdata->u.mgd.p2p_noa_index != noa.index) { - bss_conf->p2p_oppps = noa.oppps_ctwindow & - IEEE80211_P2P_OPPPS_ENABLE_BIT; - bss_conf->p2p_ctwindow = noa.oppps_ctwindow & - IEEE80211_P2P_OPPPS_CTWINDOW_MASK; + if (ret >= 2) { + if (sdata->u.mgd.p2p_noa_index != noa.index) { + /* valid noa_attr and index changed */ + sdata->u.mgd.p2p_noa_index = noa.index; + memcpy(&bss_conf->p2p_noa_attr, &noa, sizeof(noa)); + changed |= BSS_CHANGED_P2P_PS; + /* + * make sure we update all information, the CRC + * mechanism doesn't look at P2P attributes. + */ + ifmgd->beacon_crc_valid = false; + } + } else if (sdata->u.mgd.p2p_noa_index != -1) { + /* noa_attr not found and we had valid noa_attr before */ + sdata->u.mgd.p2p_noa_index = -1; + memset(&bss_conf->p2p_noa_attr, 0, sizeof(bss_conf->p2p_noa_attr)); changed |= BSS_CHANGED_P2P_PS; - sdata->u.mgd.p2p_noa_index = noa.index; - /* - * make sure we update all information, the CRC - * mechanism doesn't look at P2P attributes. - */ ifmgd->beacon_crc_valid = false; } } @@ -3523,6 +3527,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->powersave = sdata->wdev.ps; ifmgd->uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; ifmgd->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; + ifmgd->p2p_noa_index = -1; mutex_init(&ifmgd->mtx); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index c5899797a8d4..d79e374e129a 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -359,8 +359,7 @@ TRACE_EVENT(drv_bss_info_changed, __dynamic_array(u8, ssid, info->ssid_len); __field(bool, hidden_ssid); __field(int, txpower) - __field(u8, p2p_ctwindow) - __field(bool, p2p_oppps) + __field(u8, p2p_oppps_ctwindow) ), TP_fast_assign( @@ -400,8 +399,7 @@ TRACE_EVENT(drv_bss_info_changed, memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len); __entry->hidden_ssid = info->hidden_ssid; __entry->txpower = info->txpower; - __entry->p2p_ctwindow = info->p2p_ctwindow; - __entry->p2p_oppps = info->p2p_oppps; + __entry->p2p_oppps_ctwindow = info->p2p_noa_attr.oppps_ctwindow; ), TP_printk( -- cgit v1.2.3 From 58d7d8f9b20ee6f883532b952f246e4289fe06eb Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 21 Mar 2013 07:45:28 +0000 Subject: decnet: Parse netlink attributes on our own decnet is the only subsystem left that is relying on the global netlink attribute buffer rta_buf. It's horrible design and we want to get rid of it. This converts all of decnet to do implicit attribute parsing. It also gets rid of the error prone struct dn_kern_rta. Yes, the fib_magic() stuff is not pretty. It's compiled tested but I need someone with appropriate hardware to test the patch since I don't have access to it. Cc: linux-decnet-user@lists.sourceforge.net Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/dn_fib.h | 28 ++----- net/decnet/dn_fib.c | 211 +++++++++++++++++++++++++++----------------------- net/decnet/dn_route.c | 27 ++++--- net/decnet/dn_table.c | 42 +++++----- 4 files changed, 160 insertions(+), 148 deletions(-) (limited to 'include/net') diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index 1ee9d4bda30d..74004af31c48 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -1,24 +1,9 @@ #ifndef _NET_DN_FIB_H #define _NET_DN_FIB_H -/* WARNING: The ordering of these elements must match ordering - * of RTA_* rtnetlink attribute numbers. - */ -struct dn_kern_rta { - void *rta_dst; - void *rta_src; - int *rta_iif; - int *rta_oif; - void *rta_gw; - u32 *rta_priority; - void *rta_prefsrc; - struct rtattr *rta_mx; - struct rtattr *rta_mp; - unsigned char *rta_protoinfo; - u32 *rta_flow; - struct rta_cacheinfo *rta_ci; - struct rta_session *rta_sess; -}; +#include + +extern const struct nla_policy rtm_dn_policy[]; struct dn_fib_res { struct fib_rule *r; @@ -93,10 +78,10 @@ struct dn_fib_table { u32 n; int (*insert)(struct dn_fib_table *t, struct rtmsg *r, - struct dn_kern_rta *rta, struct nlmsghdr *n, + struct nlattr *attrs[], struct nlmsghdr *n, struct netlink_skb_parms *req); int (*delete)(struct dn_fib_table *t, struct rtmsg *r, - struct dn_kern_rta *rta, struct nlmsghdr *n, + struct nlattr *attrs[], struct nlmsghdr *n, struct netlink_skb_parms *req); int (*lookup)(struct dn_fib_table *t, const struct flowidn *fld, struct dn_fib_res *res); @@ -116,13 +101,12 @@ extern void dn_fib_cleanup(void); extern int dn_fib_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); extern struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, - struct dn_kern_rta *rta, + struct nlattr *attrs[], const struct nlmsghdr *nlh, int *errp); extern int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res); extern void dn_fib_release_info(struct dn_fib_info *fi); -extern __le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type); extern void dn_fib_flush(void); extern void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index e36614eccc04..42a8048fe725 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -145,22 +145,10 @@ static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi return NULL; } -__le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type) +static int dn_fib_count_nhs(const struct nlattr *attr) { - while(RTA_OK(attr,attrlen)) { - if (attr->rta_type == type) - return *(__le16*)RTA_DATA(attr); - attr = RTA_NEXT(attr, attrlen); - } - - return 0; -} - -static int dn_fib_count_nhs(struct rtattr *rta) -{ - int nhs = 0; - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); + struct rtnexthop *nhp = nla_data(attr); + int nhs = 0, nhlen = nla_len(attr); while(nhlen >= (int)sizeof(struct rtnexthop)) { if ((nhlen -= nhp->rtnh_len) < 0) @@ -172,10 +160,11 @@ static int dn_fib_count_nhs(struct rtattr *rta) return nhs; } -static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) +static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, + const struct rtmsg *r) { - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); + struct rtnexthop *nhp = nla_data(attr); + int nhlen = nla_len(attr); change_nexthops(fi) { int attrlen = nhlen - sizeof(struct rtnexthop); @@ -187,7 +176,10 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, cons nh->nh_weight = nhp->rtnh_hops + 1; if (attrlen) { - nh->nh_gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + struct nlattr *gw_attr; + + gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); + nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; } nhp = RTNH_NEXT(nhp); } endfor_nexthops(fi); @@ -268,7 +260,8 @@ out: } -struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta *rta, const struct nlmsghdr *nlh, int *errp) +struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[], + const struct nlmsghdr *nlh, int *errp) { int err; struct dn_fib_info *fi = NULL; @@ -281,11 +274,9 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta if (dn_fib_props[r->rtm_type].scope > r->rtm_scope) goto err_inval; - if (rta->rta_mp) { - nhs = dn_fib_count_nhs(rta->rta_mp); - if (nhs == 0) - goto err_inval; - } + if (attrs[RTA_MULTIPATH] && + (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0) + goto err_inval; fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); err = -ENOBUFS; @@ -295,53 +286,65 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta fi->fib_protocol = r->rtm_protocol; fi->fib_nhs = nhs; fi->fib_flags = r->rtm_flags; - if (rta->rta_priority) - fi->fib_priority = *rta->rta_priority; - if (rta->rta_mx) { - int attrlen = RTA_PAYLOAD(rta->rta_mx); - struct rtattr *attr = RTA_DATA(rta->rta_mx); - while(RTA_OK(attr, attrlen)) { - unsigned int flavour = attr->rta_type; + if (attrs[RTA_PRIORITY]) + fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]); + + if (attrs[RTA_METRICS]) { + struct nlattr *attr; + int rem; - if (flavour) { - if (flavour > RTAX_MAX) + nla_for_each_nested(attr, attrs[RTA_METRICS], rem) { + int type = nla_type(attr); + + if (type) { + if (type > RTAX_MAX || nla_len(attr) < 4) goto err_inval; - fi->fib_metrics[flavour-1] = *(unsigned int *)RTA_DATA(attr); + + fi->fib_metrics[type-1] = nla_get_u32(attr); } - attr = RTA_NEXT(attr, attrlen); } } - if (rta->rta_prefsrc) - memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 2); - if (rta->rta_mp) { - if ((err = dn_fib_get_nhs(fi, rta->rta_mp, r)) != 0) + if (attrs[RTA_PREFSRC]) + fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]); + + if (attrs[RTA_MULTIPATH]) { + if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0) goto failure; - if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) + + if (attrs[RTA_OIF] && + fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF])) goto err_inval; - if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 2)) + + if (attrs[RTA_GATEWAY] && + fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY])) goto err_inval; } else { struct dn_fib_nh *nh = fi->fib_nh; - if (rta->rta_oif) - nh->nh_oif = *rta->rta_oif; - if (rta->rta_gw) - memcpy(&nh->nh_gw, rta->rta_gw, 2); + + if (attrs[RTA_OIF]) + nh->nh_oif = nla_get_u32(attrs[RTA_OIF]); + + if (attrs[RTA_GATEWAY]) + nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); + nh->nh_flags = r->rtm_flags; nh->nh_weight = 1; } if (r->rtm_type == RTN_NAT) { - if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif) + if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF]) goto err_inval; - memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 2); + + fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); goto link_it; } if (dn_fib_props[r->rtm_type].error) { - if (rta->rta_gw || rta->rta_oif || rta->rta_mp) + if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH]) goto err_inval; + goto link_it; } @@ -367,8 +370,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta } if (fi->fib_prefsrc) { - if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || - memcmp(&fi->fib_prefsrc, rta->rta_dst, 2)) + if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] || + fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST])) if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } @@ -486,29 +489,24 @@ void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res) spin_unlock_bh(&dn_fib_multipath_lock); } +const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = { + [RTA_DST] = { .type = NLA_U16 }, + [RTA_SRC] = { .type = NLA_U16 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_GATEWAY] = { .type = NLA_U16 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_PREFSRC] = { .type = NLA_U16 }, + [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .type = NLA_NESTED }, + [RTA_TABLE] = { .type = NLA_U32 }, + [RTA_MARK] = { .type = NLA_U32 }, +}; -static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) -{ - int i; - - for(i = 1; i <= RTA_MAX; i++) { - struct rtattr *attr = rta[i-1]; - if (attr) { - if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) - return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS && - i != RTA_TABLE) - rta[i-1] = (struct rtattr *)RTA_DATA(attr); - } - } - - return 0; -} - -static inline u32 rtm_get_table(struct rtattr **rta, u8 table) +static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table) { - if (rta[RTA_TABLE - 1]) - table = nla_get_u32((struct nlattr *) rta[RTA_TABLE - 1]); + if (attrs[RTA_TABLE]) + table = nla_get_u32(attrs[RTA_TABLE]); return table; } @@ -517,8 +515,9 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct rtmsg *r = nlmsg_data(nlh); + struct nlattr *attrs[RTA_MAX+1]; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -526,22 +525,24 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * if (!net_eq(net, &init_net)) return -EINVAL; - if (dn_fib_check_attr(r, rta)) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; - tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); - if (tb) - return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); + tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0); + if (!tb) + return -ESRCH; - return -ESRCH; + return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb)); } static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct rtmsg *r = nlmsg_data(nlh); + struct nlattr *attrs[RTA_MAX+1]; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -549,14 +550,15 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * if (!net_eq(net, &init_net)) return -EINVAL; - if (dn_fib_check_attr(r, rta)) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; - tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); - if (tb) - return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); + tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1); + if (!tb) + return -ENOBUFS; - return -ENOBUFS; + return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb)); } static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) @@ -566,10 +568,31 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad struct nlmsghdr nlh; struct rtmsg rtm; } req; - struct dn_kern_rta rta; + struct { + struct nlattr hdr; + __le16 dst; + } dst_attr = { + .dst = dst, + }; + struct { + struct nlattr hdr; + __le16 prefsrc; + } prefsrc_attr = { + .prefsrc = ifa->ifa_local, + }; + struct { + struct nlattr hdr; + u32 oif; + } oif_attr = { + .oif = ifa->ifa_dev->dev->ifindex, + }; + struct nlattr *attrs[RTA_MAX+1] = { + [RTA_DST] = (struct nlattr *) &dst_attr, + [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr, + [RTA_OIF] = (struct nlattr *) &oif_attr, + }; memset(&req.rtm, 0, sizeof(req.rtm)); - memset(&rta, 0, sizeof(rta)); if (type == RTN_UNICAST) tb = dn_fib_get_table(RT_MIN_TABLE, 1); @@ -591,14 +614,10 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); req.rtm.rtm_type = type; - rta.rta_dst = &dst; - rta.rta_prefsrc = &ifa->ifa_local; - rta.rta_oif = &ifa->ifa_dev->dev->ifindex; - if (cmd == RTM_NEWROUTE) - tb->insert(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL); else - tb->delete(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL); } static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5ac0e153ef83..b4b3508e77f0 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1619,17 +1619,21 @@ errout: static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(in_skb->sk); - struct rtattr **rta = arg; struct rtmsg *rtm = nlmsg_data(nlh); struct dn_route *rt = NULL; struct dn_skb_cb *cb; int err; struct sk_buff *skb; struct flowidn fld; + struct nlattr *tb[RTA_MAX+1]; if (!net_eq(net, &init_net)) return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; + memset(&fld, 0, sizeof(fld)); fld.flowidn_proto = DNPROTO_NSP; @@ -1639,12 +1643,14 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void skb_reset_mac_header(skb); cb = DN_SKB_CB(skb); - if (rta[RTA_SRC-1]) - memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2); - if (rta[RTA_DST-1]) - memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2); - if (rta[RTA_IIF-1]) - memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + if (tb[RTA_SRC]) + fld.saddr = nla_get_le16(tb[RTA_SRC]); + + if (tb[RTA_DST]) + fld.daddr = nla_get_le16(tb[RTA_DST]); + + if (tb[RTA_IIF]) + fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]); if (fld.flowidn_iif) { struct net_device *dev; @@ -1669,10 +1675,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (!err && -rt->dst.error) err = rt->dst.error; } else { - int oif = 0; - if (rta[RTA_OIF - 1]) - memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fld.flowidn_oif = oif; + if (tb[RTA_OIF]) + fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]); + err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 6c2445bcaba1..fc42a0afd306 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -224,26 +224,27 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z) } -static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern_rta *rta, struct dn_fib_info *fi) +static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi) { struct rtnexthop *nhp; int nhlen; - if (rta->rta_priority && *rta->rta_priority != fi->fib_priority) + if (attrs[RTA_PRIORITY] && + nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority) return 1; - if (rta->rta_oif || rta->rta_gw) { - if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && - (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 2) == 0)) + if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) { + if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) && + (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw)) return 0; return 1; } - if (rta->rta_mp == NULL) + if (!attrs[RTA_MULTIPATH]) return 0; - nhp = RTA_DATA(rta->rta_mp); - nhlen = RTA_PAYLOAD(rta->rta_mp); + nhp = nla_data(attrs[RTA_MULTIPATH]); + nhlen = nla_len(attrs[RTA_MULTIPATH]); for_nexthops(fi) { int attrlen = nhlen - sizeof(struct rtnexthop); @@ -254,7 +255,10 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) return 1; if (attrlen) { - gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + struct nlattr *gw_attr; + + gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); + gw = gw_attr ? nla_get_le16(gw_attr) : 0; if (gw && gw != nh->nh_gw) return 1; @@ -517,7 +521,8 @@ out: return skb->len; } -static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) +static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], + struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash *)tb->data; struct dn_fib_node *new_f, *f, **fp, **del_fp; @@ -536,15 +541,14 @@ static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct return -ENOBUFS; dz_key_0(key); - if (rta->rta_dst) { - __le16 dst; - memcpy(&dst, rta->rta_dst, 2); + if (attrs[RTA_DST]) { + __le16 dst = nla_get_le16(attrs[RTA_DST]); if (dst & ~DZ_MASK(dz)) return -EINVAL; key = dz_key(dst, dz); } - if ((fi = dn_fib_create_info(r, rta, n, &err)) == NULL) + if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL) return err; if (dz->dz_nent > (dz->dz_divisor << 2) && @@ -654,7 +658,8 @@ out: } -static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) +static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], + struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash*)tb->data; struct dn_fib_node **fp, **del_fp, *f; @@ -671,9 +676,8 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct return -ESRCH; dz_key_0(key); - if (rta->rta_dst) { - __le16 dst; - memcpy(&dst, rta->rta_dst, 2); + if (attrs[RTA_DST]) { + __le16 dst = nla_get_le16(attrs[RTA_DST]); if (dst & ~DZ_MASK(dz)) return -EINVAL; key = dz_key(dst, dz); @@ -703,7 +707,7 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) && (!r->rtm_protocol || fi->fib_protocol == r->rtm_protocol) && - dn_fib_nh_match(r, n, rta, fi) == 0) + dn_fib_nh_match(r, n, attrs, fi) == 0) del_fp = fp; } -- cgit v1.2.3 From 661d2967b3f1b34eeaa7e212e7b9bbe8ee072b59 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 21 Mar 2013 07:45:29 +0000 Subject: rtnetlink: Remove passing of attributes into rtnl_doit functions With decnet converted, we can finally get rid of rta_buf and its computations around it. It also gets rid of the minimal header length verification since all message handlers do that explicitly anyway. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 2 +- net/bridge/br_mdb.c | 4 +-- net/can/gw.c | 5 ++- net/core/fib_rules.c | 4 +-- net/core/neighbour.c | 6 ++-- net/core/rtnetlink.c | 82 ++++++------------------------------------------- net/dcb/dcbnl.c | 2 +- net/decnet/dn_dev.c | 4 +-- net/decnet/dn_fib.c | 4 +-- net/decnet/dn_route.c | 2 +- net/ipv4/devinet.c | 7 ++--- net/ipv4/fib_frontend.c | 4 +-- net/ipv4/route.c | 2 +- net/ipv6/addrconf.c | 10 +++--- net/ipv6/addrlabel.c | 6 ++-- net/ipv6/route.c | 6 ++-- net/phonet/pn_netlink.c | 4 +-- net/sched/act_api.c | 2 +- net/sched/cls_api.c | 2 +- net/sched/sch_api.c | 6 ++-- 20 files changed, 47 insertions(+), 117 deletions(-) (limited to 'include/net') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 5a15fabd6a75..702664833a53 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -4,7 +4,7 @@ #include #include -typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *); +typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index ee79f3f20383..19942e38fd2d 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -382,7 +382,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, return ret; } -static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct br_mdb_entry *entry; @@ -458,7 +458,7 @@ unlock: return err; } -static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net_device *dev; struct br_mdb_entry *entry; diff --git a/net/can/gw.c b/net/can/gw.c index 2d117dc5ebea..2dc619db805a 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -778,8 +778,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, return 0; } -static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) { struct rtcanmsg *r; struct cgw_job *gwj; @@ -868,7 +867,7 @@ static void cgw_remove_all_jobs(void) } } -static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) { struct cgw_job *gwj = NULL; struct hlist_node *nx; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 58a4ba27dfe3..d5a9f8ead0d8 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -266,7 +266,7 @@ errout: return err; } -static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); @@ -415,7 +415,7 @@ errout: return err; } -static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3863b8f639c5..c72a646d9f44 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1613,7 +1613,7 @@ int neigh_table_clear(struct neigh_table *tbl) } EXPORT_SYMBOL(neigh_table_clear); -static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -1677,7 +1677,7 @@ out: return err; } -static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -1955,7 +1955,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_LOCKTIME] = { .type = NLA_U64 }, }; -static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct neigh_table *tbl; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9a9b99e1fb70..751f1244b648 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -515,32 +515,6 @@ out: return err; } -static const int rtm_min[RTM_NR_FAMILIES] = -{ - [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), - [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), - [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), - [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)), - [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), - [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), -}; - -static const int rta_max[RTM_NR_FAMILIES] = -{ - [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, - [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, - [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, - [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, - [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, - [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, - [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, - [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, -}; - int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) { struct sock *rtnl = net->rtnl; @@ -1537,7 +1511,7 @@ errout: return err; } -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -1578,7 +1552,7 @@ errout: return err; } -static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; @@ -1709,7 +1683,7 @@ static int rtnl_group_changelink(struct net *net, int group, return 0; } -static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; @@ -1864,7 +1838,7 @@ out: } } -static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2081,7 +2055,7 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_add); -static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -2179,7 +2153,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_del); -static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -2478,8 +2452,7 @@ errout: return err; } -static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2549,8 +2522,7 @@ out: return err; } -static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2620,10 +2592,6 @@ out: return err; } -/* Protected by RTNL sempahore. */ -static struct rtattr **rta_buf; -static int rtattr_max; - /* Process one rtnetlink message. */ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -2631,7 +2599,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); rtnl_doit_func doit; int sz_idx, kind; - int min_len; int family; int type; int err; @@ -2679,32 +2646,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } - memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); - - min_len = rtm_min[sz_idx]; - if (nlh->nlmsg_len < min_len) - return -EINVAL; - - if (nlh->nlmsg_len > min_len) { - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); - - while (RTA_OK(attr, attrlen)) { - unsigned int flavor = attr->rta_type & NLA_TYPE_MASK; - if (flavor) { - if (flavor > rta_max[sz_idx]) - return -EINVAL; - rta_buf[flavor-1] = attr; - } - attr = RTA_NEXT(attr, attrlen); - } - } - doit = rtnl_get_doit(family, type); if (doit == NULL) return -EOPNOTSUPP; - return doit(skb, nlh, (void *)&rta_buf[0]); + return doit(skb, nlh); } static void rtnetlink_rcv(struct sk_buff *skb) @@ -2774,16 +2720,6 @@ static struct pernet_operations rtnetlink_net_ops = { void __init rtnetlink_init(void) { - int i; - - rtattr_max = 0; - for (i = 0; i < ARRAY_SIZE(rta_max); i++) - if (rta_max[i] > rtattr_max) - rtattr_max = rta_max[i]; - rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL); - if (!rta_buf) - panic("rtnetlink_init: cannot allocate rta_buf\n"); - if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 21291f1abcd6..40d5829ed36a 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1658,7 +1658,7 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = { [DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get }, }; -static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct net_device *netdev; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index c8da116d84a4..7d9197063ebb 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -563,7 +563,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { .len = IFNAMSIZ - 1 }, }; -static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -607,7 +607,7 @@ errout: return err; } -static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 42a8048fe725..f09305949931 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -511,7 +511,7 @@ static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table) return table; } -static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; @@ -536,7 +536,7 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb)); } -static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index b4b3508e77f0..5904429e8d6a 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1616,7 +1616,7 @@ errout: /* * This is called by both endnodes and routers now. */ -static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) +static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm = nlmsg_data(nlh); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index af57bbae05b9..20a9f9274f3f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -536,7 +536,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, return NULL; } -static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -775,7 +775,7 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) return NULL; } -static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; @@ -1730,8 +1730,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, - struct nlmsghdr *nlh, - void *arg) + struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb4bb12b3eb4..0e74398bc8e6 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -604,7 +604,7 @@ errout: return err; } -static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -626,7 +626,7 @@ errout: return err; } -static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct fib_config cfg; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6e2851464f8f..550781a17b34 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2311,7 +2311,7 @@ nla_put_failure: return -EMSGSIZE; } -static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fa36a677490f..15794fdaf028 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -544,8 +544,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { }; static int inet6_netconf_get_devconf(struct sk_buff *in_skb, - struct nlmsghdr *nlh, - void *arg) + struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; @@ -3578,7 +3577,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { }; static int -inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; @@ -3644,7 +3643,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, } static int -inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; @@ -3983,8 +3982,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } -static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, - void *arg) +static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct ifaddrmsg *ifm; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 6f226c850c91..f083a583a05c 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -414,8 +414,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_LABEL] = { .len = sizeof(u32), }, }; -static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrlblmsg *ifal; @@ -530,8 +529,7 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } -static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, - void *arg) +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) { struct net *net = sock_net(in_skb->sk); struct ifaddrlblmsg *ifal; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e5fe0041adfa..ad0aa6b0b86a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2355,7 +2355,7 @@ beginning: return last_err; } -static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh) { struct fib6_config cfg; int err; @@ -2370,7 +2370,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a return ip6_route_del(&cfg); } -static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh) { struct fib6_config cfg; int err; @@ -2562,7 +2562,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) prefix, 0, NLM_F_MULTI); } -static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 0193630d3061..dc15f4300808 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -61,7 +61,7 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U8 }, }; -static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -224,7 +224,7 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = { [RTA_OIF] = { .type = NLA_U32 }, }; -static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[RTA_MAX+1]; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8579c4bb20c9..fd7072827a40 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -982,7 +982,7 @@ done: return ret; } -static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ACT_MAX + 1]; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 964f5e4f4b8a..9a04b981bc13 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -118,7 +118,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) /* Add/change/delete/get a filter node */ -static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c297e2a8e2a1..0bbce229ac69 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -971,7 +971,7 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) * Delete/get qdisc. */ -static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); @@ -1038,7 +1038,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) * Create/change qdisc. */ -static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm; @@ -1372,7 +1372,7 @@ done: -static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); -- cgit v1.2.3 From 63998ac24f8370caf99e433483532bab8368eb7e Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 22 Mar 2013 06:28:43 +0000 Subject: ipv6: provide addr and netconf dump consistency info This patch adds a dev_addr_genid for IPv6. The goal is to use it, combined with dev_base_seq to check if a change occurs during a netlink dump. If a change is detected, the flag NLM_F_DUMP_INTR is set in the first message after the dump was interrupted. Note that only dump of unicast addresses is checked (multicast and anycast are not checked). Reported-by: Junwei Zhang Reported-by: Hongjun Li Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + net/ipv6/addrconf.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 1242f371718b..005e2c2e39a9 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -71,6 +71,7 @@ struct netns_ipv6 { struct fib_rules_ops *mr6_rules_ops; #endif #endif + atomic_t dev_addr_genid; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 15794fdaf028..1b9f4f25212e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -621,6 +621,8 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); + cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ + net->dev_base_seq; hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -638,6 +640,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, rcu_read_unlock(); goto done; } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -3874,6 +3877,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, NLM_F_MULTI); if (err <= 0) break; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } break; } @@ -3931,6 +3935,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, s_ip_idx = ip_idx = cb->args[2]; rcu_read_lock(); + cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq; for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; @@ -4407,6 +4412,8 @@ errout: static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { + struct net *net = dev_net(ifp->idev->dev); + inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); switch (event) { @@ -4432,6 +4439,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) dst_free(&ifp->rt->dst); break; } + atomic_inc(&net->ipv6.dev_addr_genid); } static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) -- cgit v1.2.3 From be991971d53e0f5b6d13e3940192054216590072 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 22 Mar 2013 08:24:37 +0000 Subject: inet: generalize ipv4-only RFC3168 5.3 ecn fragmentation handling for future use by ipv6 This patch just moves some code arround to make the ip4_frag_ecn_table and IPFRAG_ECN_* constants accessible from the other reassembly engines. I also renamed ip4_frag_ecn_table to ip_frag_ecn_table. Cc: Eric Dumazet Cc: Jesper Dangaard Brouer Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/inet_frag.h | 12 ++++++++++++ net/ipv4/inet_fragment.c | 22 ++++++++++++++++++++++ net/ipv4/ip_fragment.c | 31 +------------------------------ 3 files changed, 35 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0a1dcc2fa2f5..64b4e7d23b8a 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -153,4 +153,16 @@ static inline void inet_frag_lru_add(struct netns_frags *nf, list_add_tail(&q->lru_list, &nf->lru_list); spin_unlock(&nf->lru_lock); } + +/* RFC 3168 support : + * We want to check ECN values of all fragments, do detect invalid combinations. + * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. + */ +#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */ +#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */ +#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */ +#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */ + +extern const u8 ip_frag_ecn_table[16]; + #endif diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index f4fd23de9b13..2bff045bec60 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -23,6 +23,28 @@ #include #include +#include + +/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements + * Value : 0xff if frame should be dropped. + * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field + */ +const u8 ip_frag_ecn_table[16] = { + /* at least one fragment had CE, and others ECT_0 or ECT_1 */ + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, + + /* invalid combinations : drop frame */ + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, +}; +EXPORT_SYMBOL(ip_frag_ecn_table); static void inet_frag_secret_rebuild(unsigned long dummy) { diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a6445b843ef4..938520668b2f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -79,40 +79,11 @@ struct ipq { struct inet_peer *peer; }; -/* RFC 3168 support : - * We want to check ECN values of all fragments, do detect invalid combinations. - * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. - */ -#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */ -#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */ -#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */ -#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */ - static inline u8 ip4_frag_ecn(u8 tos) { return 1 << (tos & INET_ECN_MASK); } -/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements - * Value : 0xff if frame should be dropped. - * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field - */ -static const u8 ip4_frag_ecn_table[16] = { - /* at least one fragment had CE, and others ECT_0 or ECT_1 */ - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, - - /* invalid combinations : drop frame */ - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, -}; - static struct inet_frags ip4_frags; int ip_frag_nqueues(struct net *net) @@ -551,7 +522,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, ipq_kill(qp); - ecn = ip4_frag_ecn_table[qp->ecn]; + ecn = ip_frag_ecn_table[qp->ecn]; if (unlikely(ecn == 0xff)) { err = -EINVAL; goto out_fail; -- cgit v1.2.3 From eec2e6185ff6eab18c2cae9b01a9fbc5c33248fc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 22 Mar 2013 08:24:44 +0000 Subject: ipv6: implement RFC3168 5.3 (ecn protection) for ipv6 fragmentation handling Hello! After patch 1 got accepted to net-next I will also send a patch to netfilter-devel to make the corresponding changes to the netfilter reassembly logic. Thanks, Hannes -- >8 -- [PATCH 2/2] ipv6: implement RFC3168 5.3 (ecn protection) for ipv6 fragmentation handling This patch also ensures that INET_ECN_CE is propagated if one fragment had the codepoint set. Cc: Eric Dumazet Cc: Jesper Dangaard Brouer Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ net/ipv6/reassembly.c | 23 +++++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 42ef6abf25c3..0810aa57c780 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -478,6 +478,7 @@ struct ip6_create_arg { u32 user; const struct in6_addr *src; const struct in6_addr *dst; + u8 ecn; }; void ip6_frag_init(struct inet_frag_queue *q, void *a); @@ -497,6 +498,7 @@ struct frag_queue { int iif; unsigned int csum; __u16 nhoffset; + u8 ecn; }; void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 196ab9347ad1..e6e44cef8db2 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -58,6 +58,7 @@ #include #include #include +#include struct ip6frag_skb_cb { @@ -67,6 +68,10 @@ struct ip6frag_skb_cb #define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) +static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) +{ + return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); +} static struct inet_frags ip6_frags; @@ -119,6 +124,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) fq->user = arg->user; fq->saddr = *arg->src; fq->daddr = *arg->dst; + fq->ecn = arg->ecn; } EXPORT_SYMBOL(ip6_frag_init); @@ -173,7 +179,8 @@ static void ip6_frag_expire(unsigned long data) } static __inline__ struct frag_queue * -fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst) +fq_find(struct net *net, __be32 id, const struct in6_addr *src, + const struct in6_addr *dst, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -183,6 +190,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6 arg.user = IP6_DEFRAG_LOCAL_DELIVER; arg.src = src; arg.dst = dst; + arg.ecn = ecn; read_lock(&ip6_frags.lock); hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); @@ -202,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct net_device *dev; int offset, end; struct net *net = dev_net(skb_dst(skb)->dev); + u8 ecn; if (fq->q.last_in & INET_FRAG_COMPLETE) goto err; @@ -219,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; } + ecn = ip6_frag_ecn(ipv6_hdr(skb)); + if (skb->ip_summed == CHECKSUM_COMPLETE) { const unsigned char *nh = skb_network_header(skb); skb->csum = csum_sub(skb->csum, @@ -319,6 +330,7 @@ found: } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; + fq->ecn |= ecn; add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. @@ -362,9 +374,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, int payload_len; unsigned int nhoff; int sum_truesize; + u8 ecn; inet_frag_kill(&fq->q, &ip6_frags); + ecn = ip_frag_ecn_table[fq->ecn]; + if (unlikely(ecn == 0xff)) + goto out_fail; + /* Make the one we just received the head. */ if (prev) { head = prev->next; @@ -463,6 +480,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; /* Yes, and fold redundant checksum back. 8) */ @@ -526,7 +544,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS, evicted); - fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); + fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, + ip6_frag_ecn(hdr)); if (fq != NULL) { int ret; -- cgit v1.2.3 From 8bf24293453a10a696bcd00abb1248e6365f66c2 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 25 Mar 2013 11:15:59 +0200 Subject: cfg80211: Document update_ft_ies() cfg80211_ops This was forgotten from the commit that added support for FT operations with drivers that implement SME. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bdba9b619064..57870b646974 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1998,6 +1998,10 @@ struct cfg80211_update_ft_ies_params { * advertise the support for MAC based ACL have to implement this callback. * * @start_radar_detection: Start radar detection in the driver. + * + * @update_ft_ies: Provide updated Fast BSS Transition information to the + * driver. If the SME is in the driver/firmware, this information can be + * used in building Authentication and Reassociation Request frames. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); -- cgit v1.2.3 From 219c38674c262378ec411dd8318ebfd199fbce8d Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Tue, 22 Jan 2013 16:52:23 +0200 Subject: mac80211: allow drivers to set default uAPSD parameters mac80211 currently sets uAPSD parameters to have VO AC trigger- and delivery-enabled, with maximum service period length. Allow drivers to change these default settings since different uAPSD client implementations may handle errors differently and be able to recover from some errors. Note: some APs may not function correctly if one or all ACs are trigger- and delivery-enabled, see http://thread.gmane.org/gmane.linux.kernel.wireless.general/93577. We retested with this AP and later firmware doesn't have this bug any more. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- include/net/mac80211.h | 13 +++++++++++++ net/mac80211/main.c | 2 ++ net/mac80211/mlme.c | 4 ++-- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9b536172e27e..23a275a9a3b2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1534,6 +1534,17 @@ enum ieee80211_hw_flags { * @netdev_features: netdev features to be set in each netdev created * from this HW. Note only HW checksum features are currently * compatible with mac80211. Other feature bits will be rejected. + * + * @uapsd_queues: This bitmap is included in (re)association frame to indicate + * for each access category if it is uAPSD trigger-enabled and delivery- + * enabled. Use IEEE80211_WMM_IE_STA_QOSINFO_AC_* to set this bitmap. + * Each bit corresponds to different AC. Value '1' in specific bit means + * that corresponding AC is both trigger- and delivery-enabled. '0' means + * neither enabled. + * + * @uapsd_max_sp_len: maximum number of total buffered frames the WMM AP may + * deliver to a WMM STA during any Service Period triggered by the WMM STA. + * Use IEEE80211_WMM_IE_STA_QOSINFO_SP_* for correct values. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -1559,6 +1570,8 @@ struct ieee80211_hw { u8 radiotap_mcs_details; u16 radiotap_vht_details; netdev_features_t netdev_features; + u8 uapsd_queues; + u8 uapsd_max_sp_len; }; /** diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c6f81ecc36a1..b0d286821864 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -587,6 +587,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, IEEE80211_RADIOTAP_MCS_HAVE_BW; local->hw.radiotap_vht_details = IEEE80211_RADIOTAP_VHT_KNOWN_GI | IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH; + local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; + local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask; wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8b3e852d6032..9958cb7df8f1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3525,8 +3525,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->flags = 0; ifmgd->powersave = sdata->wdev.ps; - ifmgd->uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; - ifmgd->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; + ifmgd->uapsd_queues = sdata->local->hw.uapsd_queues; + ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len; ifmgd->p2p_noa_index = -1; mutex_init(&ifmgd->mtx); -- cgit v1.2.3 From 25c7704d8bdcf6746dab4397953df51759924b37 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 24 Mar 2013 17:36:29 +0000 Subject: ipv4: Fix ip-header identification for gso packets. ip-header id needs to be incremented even if IP_DF flag is set. This behaviour was changed in commit 490ab08127cebc25e3a26 (IP_GRE: Fix IP-Identification). Following patch fixes it so that identification is always incremented. Reported-by: Cong Wang Acked-by: Cong Wang Signed-off-by: Pravin B Shelar --- include/net/ipip.h | 16 ++++++---------- net/ipv4/af_inet.c | 3 +-- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/ipip.h b/include/net/ipip.h index 0c046e3bca05..483b91a10bb2 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -74,15 +74,11 @@ static inline void tunnel_ip_select_ident(struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); - if (iph->frag_off & htons(IP_DF)) - iph->id = 0; - else { - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); - } + /* Use inner packet iph-id if possible. */ + if (skb->protocol == htons(ETH_P_IP) && old_iph->id) + iph->id = old_iph->id; + else + __ip_select_ident(iph, dst, + (skb_shinfo(skb)->gso_segs ?: 1) - 1); } #endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9e5882caf8a7..70b2d4cf5801 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1334,8 +1334,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, iph->frag_off |= htons(IP_MF); offset += (skb->len - skb->mac_len - iph->ihl * 4); } else { - if (!(iph->frag_off & htons(IP_DF))) - iph->id = htons(id++); + iph->id = htons(id++); } iph->tot_len = htons(skb->len - skb->mac_len); iph->check = 0; -- cgit v1.2.3 From 675a0b049abf6edf30f8dd84c5610b6edc2296c8 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 25 Mar 2013 16:26:57 +0100 Subject: mac80211: Use a cfg80211_chan_def in ieee80211_hw_conf_chan Drivers that don't use chanctxes cannot perform VHT association because they still use a "backward compatibility" pair of {ieee80211_channel, nl80211_channel_type} in ieee80211_conf and ieee80211_local. Signed-off-by: Karl Beldan [fix kernel-doc] Signed-off-by: Johannes Berg --- drivers/net/wireless/adm8211.c | 3 +- drivers/net/wireless/at76c50x-usb.c | 4 +- drivers/net/wireless/ath/ar5523/ar5523.c | 14 ++--- drivers/net/wireless/ath/ath5k/base.c | 2 +- drivers/net/wireless/ath/ath5k/mac80211-ops.c | 4 +- drivers/net/wireless/ath/ath9k/beacon.c | 2 +- drivers/net/wireless/ath/ath9k/calib.c | 2 +- drivers/net/wireless/ath/ath9k/common.c | 5 +- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 16 +++--- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 8 +-- drivers/net/wireless/ath/ath9k/hw.c | 5 +- drivers/net/wireless/ath/ath9k/link.c | 2 +- drivers/net/wireless/ath/ath9k/main.c | 10 ++-- drivers/net/wireless/ath/ath9k/rc.c | 4 +- drivers/net/wireless/ath/ath9k/recv.c | 6 +-- drivers/net/wireless/ath/carl9170/debug.c | 5 +- drivers/net/wireless/ath/carl9170/mac.c | 8 +-- drivers/net/wireless/ath/carl9170/main.c | 9 ++-- drivers/net/wireless/ath/carl9170/phy.c | 4 +- drivers/net/wireless/b43/b43.h | 2 +- drivers/net/wireless/b43/main.c | 8 +-- drivers/net/wireless/b43/phy_ht.c | 5 +- drivers/net/wireless/b43/phy_lcn.c | 5 +- drivers/net/wireless/b43/phy_n.c | 5 +- drivers/net/wireless/b43legacy/main.c | 9 ++-- drivers/net/wireless/brcm80211/brcmsmac/channel.c | 4 +- .../net/wireless/brcm80211/brcmsmac/mac80211_if.c | 6 +-- drivers/net/wireless/brcm80211/brcmsmac/main.c | 4 +- drivers/net/wireless/iwlegacy/3945-rs.c | 2 +- drivers/net/wireless/iwlegacy/4965-rs.c | 2 +- drivers/net/wireless/iwlegacy/common.c | 2 +- drivers/net/wireless/iwlwifi/dvm/rs.c | 2 +- drivers/net/wireless/iwlwifi/dvm/rxon.c | 9 ++-- drivers/net/wireless/libertas_tf/main.c | 8 +-- drivers/net/wireless/mac80211_hwsim.c | 42 +++++++++------ drivers/net/wireless/mwl8k.c | 36 +++++++------ drivers/net/wireless/p54/fwio.c | 4 +- drivers/net/wireless/p54/main.c | 4 +- drivers/net/wireless/p54/txrx.c | 4 +- drivers/net/wireless/rt2x00/rt2800lib.c | 8 +-- drivers/net/wireless/rt2x00/rt2x00config.c | 10 ++-- drivers/net/wireless/rt2x00/rt61pci.c | 2 +- drivers/net/wireless/rt2x00/rt73usb.c | 2 +- drivers/net/wireless/rtl818x/rtl8180/dev.c | 4 +- drivers/net/wireless/rtl818x/rtl8180/grf5101.c | 3 +- drivers/net/wireless/rtl818x/rtl8180/max2820.c | 2 +- drivers/net/wireless/rtl818x/rtl8180/rtl8225.c | 3 +- drivers/net/wireless/rtl818x/rtl8180/sa2400.c | 3 +- drivers/net/wireless/rtl818x/rtl8187/dev.c | 4 +- drivers/net/wireless/rtl818x/rtl8187/rtl8225.c | 3 +- drivers/net/wireless/rtlwifi/base.c | 4 +- drivers/net/wireless/rtlwifi/core.c | 6 +-- drivers/net/wireless/rtlwifi/rtl8192ce/trx.c | 4 +- drivers/net/wireless/rtlwifi/rtl8192cu/trx.c | 8 +-- drivers/net/wireless/rtlwifi/rtl8192de/trx.c | 4 +- drivers/net/wireless/rtlwifi/rtl8192se/trx.c | 4 +- drivers/net/wireless/rtlwifi/rtl8723ae/trx.c | 4 +- drivers/net/wireless/ti/wl1251/main.c | 5 +- drivers/net/wireless/ti/wlcore/main.c | 2 +- drivers/net/wireless/zd1211rw/zd_mac.c | 4 +- include/net/mac80211.h | 18 +++---- net/mac80211/cfg.c | 7 +-- net/mac80211/chan.c | 11 ++-- net/mac80211/ieee80211_i.h | 3 +- net/mac80211/main.c | 59 +++++++++++++--------- net/mac80211/mlme.c | 24 ++++++--- net/mac80211/scan.c | 6 +-- net/mac80211/trace.h | 37 +++++++------- net/mac80211/tx.c | 4 +- net/mac80211/util.c | 3 +- 70 files changed, 293 insertions(+), 244 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index 3d339e04efb7..f9a24e599dee 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -1293,7 +1293,8 @@ static int adm8211_config(struct ieee80211_hw *dev, u32 changed) { struct adm8211_priv *priv = dev->priv; struct ieee80211_conf *conf = &dev->conf; - int channel = ieee80211_frequency_to_channel(conf->channel->center_freq); + int channel = + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq); if (channel != priv->channel) { priv->channel = channel; diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c index 5ac5f7ae2721..34c8a33cac06 100644 --- a/drivers/net/wireless/at76c50x-usb.c +++ b/drivers/net/wireless/at76c50x-usb.c @@ -1943,12 +1943,12 @@ static int at76_config(struct ieee80211_hw *hw, u32 changed) struct at76_priv *priv = hw->priv; at76_dbg(DBG_MAC80211, "%s(): channel %d", - __func__, hw->conf.channel->hw_value); + __func__, hw->conf.chandef.chan->hw_value); at76_dbg_dump(DBG_MAC80211, priv->bssid, ETH_ALEN, "bssid:"); mutex_lock(&priv->mtx); - priv->channel = hw->conf.channel->hw_value; + priv->channel = hw->conf.chandef.chan->hw_value; if (is_valid_ether_addr(priv->bssid)) at76_join(priv); diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index afd1e36d308f..17d7fece35d2 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -457,14 +457,14 @@ static int ar5523_set_chan(struct ar5523 *ar) memset(&reset, 0, sizeof(reset)); reset.flags |= cpu_to_be32(UATH_CHAN_2GHZ); reset.flags |= cpu_to_be32(UATH_CHAN_OFDM); - reset.freq = cpu_to_be32(conf->channel->center_freq); + reset.freq = cpu_to_be32(conf->chandef.chan->center_freq); reset.maxrdpower = cpu_to_be32(50); /* XXX */ reset.channelchange = cpu_to_be32(1); reset.keeprccontent = cpu_to_be32(0); ar5523_dbg(ar, "set chan flags 0x%x freq %d\n", be32_to_cpu(reset.flags), - conf->channel->center_freq); + conf->chandef.chan->center_freq); return ar5523_cmd_write(ar, WDCMSG_RESET, &reset, sizeof(reset), 0); } @@ -594,7 +594,7 @@ static void ar5523_data_rx_cb(struct urb *urb) rx_status = IEEE80211_SKB_RXCB(data->skb); memset(rx_status, 0, sizeof(*rx_status)); rx_status->freq = be32_to_cpu(desc->channel); - rx_status->band = hw->conf.channel->band; + rx_status->band = hw->conf.chandef.chan->band; rx_status->signal = -95 + be32_to_cpu(desc->rssi); ieee80211_rx_irqsafe(hw, data->skb); @@ -1153,13 +1153,13 @@ static int ar5523_get_wlan_mode(struct ar5523 *ar, struct ieee80211_sta *sta; u32 sta_rate_set; - band = ar->hw->wiphy->bands[ar->hw->conf.channel->band]; + band = ar->hw->wiphy->bands[ar->hw->conf.chandef.chan->band]; sta = ieee80211_find_sta(ar->vif, bss_conf->bssid); if (!sta) { ar5523_info(ar, "STA not found!\n"); return WLAN_MODE_11b; } - sta_rate_set = sta->supp_rates[ar->hw->conf.channel->band]; + sta_rate_set = sta->supp_rates[ar->hw->conf.chandef.chan->band]; for (bit = 0; bit < band->n_bitrates; bit++) { if (sta_rate_set & 1) { @@ -1197,11 +1197,11 @@ static void ar5523_create_rateset(struct ar5523 *ar, ar5523_info(ar, "STA not found. Cannot set rates\n"); sta_rate_set = bss_conf->basic_rates; } else - sta_rate_set = sta->supp_rates[ar->hw->conf.channel->band]; + sta_rate_set = sta->supp_rates[ar->hw->conf.chandef.chan->band]; ar5523_dbg(ar, "sta rate_set = %08x\n", sta_rate_set); - band = ar->hw->wiphy->bands[ar->hw->conf.channel->band]; + band = ar->hw->wiphy->bands[ar->hw->conf.chandef.chan->band]; for (bit = 0; bit < band->n_bitrates; bit++) { BUG_ON(i >= AR5523_MAX_NRATES); ar5523_dbg(ar, "Considering rate %d : %d\n", diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 1d264c0f5a9b..9b20d9ee2719 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -2639,7 +2639,7 @@ int ath5k_start(struct ieee80211_hw *hw) * be followed by initialization of the appropriate bits * and then setup of the interrupt mask. */ - ah->curchan = ah->hw->conf.channel; + ah->curchan = ah->hw->conf.chandef.chan; ah->imask = AR5K_INT_RXOK | AR5K_INT_RXERR | AR5K_INT_RXEOL diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c index 4264341533ea..06f86f435711 100644 --- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c +++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c @@ -202,7 +202,7 @@ ath5k_config(struct ieee80211_hw *hw, u32 changed) mutex_lock(&ah->lock); if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { - ret = ath5k_chan_set(ah, conf->channel); + ret = ath5k_chan_set(ah, conf->chandef.chan); if (ret < 0) goto unlock; } @@ -678,7 +678,7 @@ ath5k_get_survey(struct ieee80211_hw *hw, int idx, struct survey_info *survey) memcpy(survey, &ah->survey, sizeof(*survey)); - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->noise = ah->ah_noise_floor; survey->filled = SURVEY_INFO_NOISE_DBM | SURVEY_INFO_CHANNEL_TIME | diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c index 5f05c26d1ec4..2ff570f7f8ff 100644 --- a/drivers/net/wireless/ath/ath9k/beacon.c +++ b/drivers/net/wireless/ath/ath9k/beacon.c @@ -79,7 +79,7 @@ static void ath9k_beacon_setup(struct ath_softc *sc, struct ieee80211_vif *vif, u8 chainmask = ah->txchainmask; u8 rate = 0; - sband = &sc->sbands[common->hw->conf.channel->band]; + sband = &sc->sbands[common->hw->conf.chandef.chan->band]; rate = sband->bitrates[rateidx].hw_value; if (vif->bss_conf.use_short_preamble) rate |= sband->bitrates[rateidx].hw_value_short; diff --git a/drivers/net/wireless/ath/ath9k/calib.c b/drivers/net/wireless/ath/ath9k/calib.c index 1e8508530e98..b184f1ff0d38 100644 --- a/drivers/net/wireless/ath/ath9k/calib.c +++ b/drivers/net/wireless/ath/ath9k/calib.c @@ -208,7 +208,7 @@ bool ath9k_hw_reset_calvalid(struct ath_hw *ah) return true; ath_dbg(common, CALIBRATE, "Resetting Cal %d state for channel %u\n", - currCal->calData->calType, conf->channel->center_freq); + currCal->calData->calType, conf->chandef.chan->center_freq); ah->caldata->CalValid &= ~currCal->calData->calType; currCal->calState = CAL_WAITING; diff --git a/drivers/net/wireless/ath/ath9k/common.c b/drivers/net/wireless/ath/ath9k/common.c index 905f1b313961..6c78fe7ca54d 100644 --- a/drivers/net/wireless/ath/ath9k/common.c +++ b/drivers/net/wireless/ath/ath9k/common.c @@ -133,13 +133,14 @@ EXPORT_SYMBOL(ath9k_cmn_update_ichannel); struct ath9k_channel *ath9k_cmn_get_curchannel(struct ieee80211_hw *hw, struct ath_hw *ah) { - struct ieee80211_channel *curchan = hw->conf.channel; + struct ieee80211_channel *curchan = hw->conf.chandef.chan; struct ath9k_channel *channel; u8 chan_idx; chan_idx = curchan->hw_value; channel = &ah->channels[chan_idx]; - ath9k_cmn_update_ichannel(channel, curchan, hw->conf.channel_type); + ath9k_cmn_update_ichannel(channel, curchan, + cfg80211_get_chandef_type(&hw->conf.chandef)); return channel; } diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index a8016d70088a..098e3545e512 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -190,7 +190,7 @@ void ath9k_htc_reset(struct ath9k_htc_priv *priv) { struct ath_hw *ah = priv->ah; struct ath_common *common = ath9k_hw_common(ah); - struct ieee80211_channel *channel = priv->hw->conf.channel; + struct ieee80211_channel *channel = priv->hw->conf.chandef.chan; struct ath9k_hw_cal_data *caldata = NULL; enum htc_phymode mode; __be16 htc_mode; @@ -250,7 +250,7 @@ static int ath9k_htc_set_channel(struct ath9k_htc_priv *priv, struct ath_common *common = ath9k_hw_common(ah); struct ieee80211_conf *conf = &common->hw->conf; bool fastcc; - struct ieee80211_channel *channel = hw->conf.channel; + struct ieee80211_channel *channel = hw->conf.chandef.chan; struct ath9k_hw_cal_data *caldata = NULL; enum htc_phymode mode; __be16 htc_mode; @@ -602,7 +602,7 @@ static void ath9k_htc_setup_rate(struct ath9k_htc_priv *priv, u32 caps = 0; int i, j; - sband = priv->hw->wiphy->bands[priv->hw->conf.channel->band]; + sband = priv->hw->wiphy->bands[priv->hw->conf.chandef.chan->band]; for (i = 0, j = 0; i < sband->n_bitrates; i++) { if (sta->supp_rates[sband->band] & BIT(i)) { @@ -904,7 +904,7 @@ static int ath9k_htc_start(struct ieee80211_hw *hw) struct ath9k_htc_priv *priv = hw->priv; struct ath_hw *ah = priv->ah; struct ath_common *common = ath9k_hw_common(ah); - struct ieee80211_channel *curchan = hw->conf.channel; + struct ieee80211_channel *curchan = hw->conf.chandef.chan; struct ath9k_channel *init_channel; int ret = 0; enum htc_phymode mode; @@ -1193,15 +1193,17 @@ static int ath9k_htc_config(struct ieee80211_hw *hw, u32 changed) } if ((changed & IEEE80211_CONF_CHANGE_CHANNEL) || chip_reset) { - struct ieee80211_channel *curchan = hw->conf.channel; + struct ieee80211_channel *curchan = hw->conf.chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&hw->conf.chandef); int pos = curchan->hw_value; ath_dbg(common, CONFIG, "Set channel: %d MHz\n", curchan->center_freq); ath9k_cmn_update_ichannel(&priv->ah->channels[pos], - hw->conf.channel, - hw->conf.channel_type); + hw->conf.chandef.chan, + channel_type); if (ath9k_htc_set_channel(priv, hw, &priv->ah->channels[pos]) < 0) { ath_err(common, "Unable to set channel\n"); diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 3ad1fd05c5e7..306c55019e77 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -490,7 +490,7 @@ static void ath9k_htc_tx_process(struct ath9k_htc_priv *priv, if (txs->ts_flags & ATH9K_HTC_TXSTAT_SGI) rate->flags |= IEEE80211_TX_RC_SHORT_GI; } else { - if (cur_conf->channel->band == IEEE80211_BAND_5GHZ) + if (cur_conf->chandef.chan->band == IEEE80211_BAND_5GHZ) rate->idx += 4; /* No CCK rates */ } @@ -939,7 +939,7 @@ static void ath9k_process_rate(struct ieee80211_hw *hw, return; } - band = hw->conf.channel->band; + band = hw->conf.chandef.chan->band; sband = hw->wiphy->bands[band]; for (i = 0; i < sband->n_bitrates; i++) { @@ -1078,8 +1078,8 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, priv->ah->stats.avgbrssi = rxbuf->rxstatus.rs_rssi; rx_status->mactime = be64_to_cpu(rxbuf->rxstatus.rs_tstamp); - rx_status->band = hw->conf.channel->band; - rx_status->freq = hw->conf.channel->center_freq; + rx_status->band = hw->conf.chandef.chan->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; rx_status->signal = rxbuf->rxstatus.rs_rssi + ATH_DEFAULT_NOISE_FLOOR; rx_status->antenna = rxbuf->rxstatus.rs_antenna; rx_status->flag |= RX_FLAG_MACTIME_END; diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 2a2ae403e0e5..d5e6a38fe74b 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -139,7 +139,7 @@ static void ath9k_hw_set_clockrate(struct ath_hw *ah) clockrate = 117; else if (!ah->curchan) /* should really check for CCK instead */ clockrate = ATH9K_CLOCK_RATE_CCK; - else if (conf->channel->band == IEEE80211_BAND_2GHZ) + else if (conf->chandef.chan->band == IEEE80211_BAND_2GHZ) clockrate = ATH9K_CLOCK_RATE_2GHZ_OFDM; else if (ah->caps.hw_caps & ATH9K_HW_CAP_FASTCLOCK) clockrate = ATH9K_CLOCK_FAST_RATE_5GHZ_OFDM; @@ -1110,7 +1110,8 @@ void ath9k_hw_init_global_settings(struct ath_hw *ah) * BA frames in some implementations, but it has been found to fix ACK * timeout issues in other cases as well. */ - if (conf->channel && conf->channel->band == IEEE80211_BAND_2GHZ && + if (conf->chandef.chan && + conf->chandef.chan->band == IEEE80211_BAND_2GHZ && !IS_CHAN_HALF_RATE(chan) && !IS_CHAN_QUARTER_RATE(chan)) { acktimeout += 64 - sifstime - ah->slottime; ctstimeout += 48 - sifstime - ah->slottime; diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c index ade3afb21f91..b1433f561cd2 100644 --- a/drivers/net/wireless/ath/ath9k/link.c +++ b/drivers/net/wireless/ath/ath9k/link.c @@ -213,7 +213,7 @@ static bool ath_paprd_send_frame(struct ath_softc *sc, struct sk_buff *skb, int txctl.txq = sc->tx.txq_map[IEEE80211_AC_BE]; memset(tx_info, 0, sizeof(*tx_info)); - tx_info->band = hw->conf.channel->band; + tx_info->band = hw->conf.chandef.chan->band; tx_info->flags |= IEEE80211_TX_CTL_NO_ACK; tx_info->control.rates[0].idx = 0; tx_info->control.rates[0].count = 1; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 24650fd41694..f984a03f912c 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -585,7 +585,7 @@ static int ath9k_start(struct ieee80211_hw *hw) struct ath_softc *sc = hw->priv; struct ath_hw *ah = sc->sc_ah; struct ath_common *common = ath9k_hw_common(ah); - struct ieee80211_channel *curchan = hw->conf.channel; + struct ieee80211_channel *curchan = hw->conf.chandef.chan; struct ath9k_channel *init_channel; int r; @@ -1184,7 +1184,9 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) } if ((changed & IEEE80211_CONF_CHANGE_CHANNEL) || reset_channel) { - struct ieee80211_channel *curchan = hw->conf.channel; + struct ieee80211_channel *curchan = hw->conf.chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&conf->chandef); int pos = curchan->hw_value; int old_pos = -1; unsigned long flags; @@ -1193,7 +1195,7 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) old_pos = ah->curchan - &ah->channels[0]; ath_dbg(common, CONFIG, "Set channel: %d MHz type: %d\n", - curchan->center_freq, conf->channel_type); + curchan->center_freq, channel_type); /* update survey stats for the old channel before switching */ spin_lock_irqsave(&common->cc_lock, flags); @@ -1208,7 +1210,7 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) ath9k_hw_getnf(ah, ah->curchan); ath9k_cmn_update_ichannel(&sc->sc_ah->channels[pos], - curchan, conf->channel_type); + curchan, channel_type); /* * If the operating channel changes, change the survey in-use flags diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index 96ac433ba7f6..aa4d368d8d3d 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -814,7 +814,7 @@ static void ath_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, * So, set fourth rate in series to be same as third one for * above conditions. */ - if ((sc->hw->conf.channel->band == IEEE80211_BAND_2GHZ) && + if ((sc->hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) && (conf_is_ht(&sc->hw->conf))) { u8 dot11rate = rate_table->info[rix].dot11rate; u8 phy = rate_table->info[rix].phy; @@ -1328,7 +1328,7 @@ static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband, ath_dbg(ath9k_hw_common(sc->sc_ah), CONFIG, "Operating HT Bandwidth changed to: %d\n", - sc->hw->conf.channel_type); + cfg80211_get_chandef_type(&sc->hw->conf.chandef)); } } diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index ee156e543147..c90ca57e3722 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -859,7 +859,7 @@ static int ath9k_process_rate(struct ath_common *common, unsigned int i = 0; struct ath_softc __maybe_unused *sc = common->priv; - band = hw->conf.channel->band; + band = hw->conf.chandef.chan->band; sband = hw->wiphy->bands[band]; if (rx_stats->rs_rate & 0x80) { @@ -954,8 +954,8 @@ static int ath9k_rx_skb_preprocess(struct ath_common *common, if (ath9k_process_rate(common, hw, rx_stats, rx_status)) return -EINVAL; - rx_status->band = hw->conf.channel->band; - rx_status->freq = hw->conf.channel->center_freq; + rx_status->band = hw->conf.chandef.chan->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; rx_status->signal = ah->noise + rx_stats->rs_rssi; rx_status->antenna = rx_stats->rs_antenna; rx_status->flag |= RX_FLAG_MACTIME_END; diff --git a/drivers/net/wireless/ath/carl9170/debug.c b/drivers/net/wireless/ath/carl9170/debug.c index 93fe6003a493..7741fe8e0816 100644 --- a/drivers/net/wireless/ath/carl9170/debug.c +++ b/drivers/net/wireless/ath/carl9170/debug.c @@ -654,8 +654,9 @@ static ssize_t carl9170_debugfs_bug_write(struct ar9170 *ar, const char *buf, goto out; case 'P': - err = carl9170_set_channel(ar, ar->hw->conf.channel, - ar->hw->conf.channel_type, CARL9170_RFI_COLD); + err = carl9170_set_channel(ar, ar->hw->conf.chandef.chan, + cfg80211_get_chandef_type(&ar->hw->conf.chandef), + CARL9170_RFI_COLD); if (err < 0) count = err; diff --git a/drivers/net/wireless/ath/carl9170/mac.c b/drivers/net/wireless/ath/carl9170/mac.c index 24d75ab94f0d..a2f005703c04 100644 --- a/drivers/net/wireless/ath/carl9170/mac.c +++ b/drivers/net/wireless/ath/carl9170/mac.c @@ -48,7 +48,7 @@ int carl9170_set_dyn_sifs_ack(struct ar9170 *ar) if (conf_is_ht40(&ar->hw->conf)) val = 0x010a; else { - if (ar->hw->conf.channel->band == IEEE80211_BAND_2GHZ) + if (ar->hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) val = 0x105; else val = 0x104; @@ -66,7 +66,7 @@ int carl9170_set_rts_cts_rate(struct ar9170 *ar) rts_rate = 0x1da; cts_rate = 0x10a; } else { - if (ar->hw->conf.channel->band == IEEE80211_BAND_2GHZ) { + if (ar->hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) { /* 11 mbit CCK */ rts_rate = 033; cts_rate = 003; @@ -93,7 +93,7 @@ int carl9170_set_slot_time(struct ar9170 *ar) return 0; } - if ((ar->hw->conf.channel->band == IEEE80211_BAND_5GHZ) || + if ((ar->hw->conf.chandef.chan->band == IEEE80211_BAND_5GHZ) || vif->bss_conf.use_short_slot) slottime = 9; @@ -120,7 +120,7 @@ int carl9170_set_mac_rates(struct ar9170 *ar) basic |= (vif->bss_conf.basic_rates & 0xff0) << 4; rcu_read_unlock(); - if (ar->hw->conf.channel->band == IEEE80211_BAND_5GHZ) + if (ar->hw->conf.chandef.chan->band == IEEE80211_BAND_5GHZ) mandatory = 0xff00; /* OFDM 6/9/12/18/24/36/48/54 */ else mandatory = 0xff0f; /* OFDM (6/9../54) + CCK (1/2/5.5/11) */ diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 08b193199946..4e268b1360d8 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -929,6 +929,9 @@ static int carl9170_op_config(struct ieee80211_hw *hw, u32 changed) } if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&hw->conf.chandef); + /* adjust slot time for 5 GHz */ err = carl9170_set_slot_time(ar); if (err) @@ -938,8 +941,8 @@ static int carl9170_op_config(struct ieee80211_hw *hw, u32 changed) if (err) goto out; - err = carl9170_set_channel(ar, hw->conf.channel, - hw->conf.channel_type, CARL9170_RFI_NONE); + err = carl9170_set_channel(ar, hw->conf.chandef.chan, + channel_type, CARL9170_RFI_NONE); if (err) goto out; @@ -957,7 +960,7 @@ static int carl9170_op_config(struct ieee80211_hw *hw, u32 changed) } if (changed & IEEE80211_CONF_CHANGE_POWER) { - err = carl9170_set_mac_tpc(ar, ar->hw->conf.channel); + err = carl9170_set_mac_tpc(ar, ar->hw->conf.chandef.chan); if (err) goto out; } diff --git a/drivers/net/wireless/ath/carl9170/phy.c b/drivers/net/wireless/ath/carl9170/phy.c index b72c09cf43a4..c5f1fdd1eaeb 100644 --- a/drivers/net/wireless/ath/carl9170/phy.c +++ b/drivers/net/wireless/ath/carl9170/phy.c @@ -1331,7 +1331,7 @@ static void carl9170_calc_ctl(struct ar9170 *ar, u32 freq, enum carl9170_bw bw) * CTL_ETSI for 2GHz and CTL_FCC for 5GHz. */ ctl_grp = ath_regd_get_band_ctl(&ar->common.regulatory, - ar->hw->conf.channel->band); + ar->hw->conf.chandef.chan->band); /* ctl group not found - either invalid band (NO_CTL) or ww roaming */ if (ctl_grp == NO_CTL || ctl_grp == SD_NO_CTL) @@ -1341,7 +1341,7 @@ static void carl9170_calc_ctl(struct ar9170 *ar, u32 freq, enum carl9170_bw bw) /* skip CTL and heavy clip for CTL_MKK and CTL_ETSI */ return; - if (ar->hw->conf.channel->band == IEEE80211_BAND_2GHZ) { + if (ar->hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) { modes = mode_list_2ghz; nr_modes = ARRAY_SIZE(mode_list_2ghz); } else { diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h index 10e288d470e7..6a4bd8c433b4 100644 --- a/drivers/net/wireless/b43/b43.h +++ b/drivers/net/wireless/b43/b43.h @@ -972,7 +972,7 @@ static inline int b43_is_mode(struct b43_wl *wl, int type) */ static inline enum ieee80211_band b43_current_band(struct b43_wl *wl) { - return wl->hw->conf.channel->band; + return wl->hw->conf.chandef.chan->band; } static inline int b43_bus_may_powerdown(struct b43_wldev *wldev) diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 05682736e466..d135e8975f52 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3848,7 +3848,7 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed) dev = wl->current_dev; /* Switch the band (if necessary). This might change the active core. */ - err = b43_switch_band(wl, conf->channel); + err = b43_switch_band(wl, conf->chandef.chan); if (err) goto out_unlock_mutex; @@ -3878,8 +3878,8 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed) /* Switch to the requested channel. * The firmware takes care of races with the TX handler. */ - if (conf->channel->hw_value != phy->channel) - b43_switch_channel(dev, conf->channel->hw_value); + if (conf->chandef.chan->hw_value != phy->channel) + b43_switch_channel(dev, conf->chandef.chan->hw_value); dev->wl->radiotap_enabled = !!(conf->flags & IEEE80211_CONF_MONITOR); @@ -5002,7 +5002,7 @@ static int b43_op_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = SURVEY_INFO_NOISE_DBM; survey->noise = dev->stats.link_noise; diff --git a/drivers/net/wireless/b43/phy_ht.c b/drivers/net/wireless/b43/phy_ht.c index 7416c5e9154d..016682ea7445 100644 --- a/drivers/net/wireless/b43/phy_ht.c +++ b/drivers/net/wireless/b43/phy_ht.c @@ -525,8 +525,9 @@ static void b43_phy_ht_op_switch_analog(struct b43_wldev *dev, bool on) static int b43_phy_ht_op_switch_channel(struct b43_wldev *dev, unsigned int new_channel) { - struct ieee80211_channel *channel = dev->wl->hw->conf.channel; - enum nl80211_channel_type channel_type = dev->wl->hw->conf.channel_type; + struct ieee80211_channel *channel = dev->wl->hw->conf.chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&dev->wl->hw->conf.chandef); if (b43_current_band(dev->wl) == IEEE80211_BAND_2GHZ) { if ((new_channel < 1) || (new_channel > 14)) diff --git a/drivers/net/wireless/b43/phy_lcn.c b/drivers/net/wireless/b43/phy_lcn.c index a13e28ef6246..0bafa3b17035 100644 --- a/drivers/net/wireless/b43/phy_lcn.c +++ b/drivers/net/wireless/b43/phy_lcn.c @@ -808,8 +808,9 @@ static void b43_phy_lcn_op_switch_analog(struct b43_wldev *dev, bool on) static int b43_phy_lcn_op_switch_channel(struct b43_wldev *dev, unsigned int new_channel) { - struct ieee80211_channel *channel = dev->wl->hw->conf.channel; - enum nl80211_channel_type channel_type = dev->wl->hw->conf.channel_type; + struct ieee80211_channel *channel = dev->wl->hw->conf.chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&dev->wl->hw->conf.chandef); if (b43_current_band(dev->wl) == IEEE80211_BAND_2GHZ) { if ((new_channel < 1) || (new_channel > 14)) diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c index 3c35382ee6c2..949a3bdeede9 100644 --- a/drivers/net/wireless/b43/phy_n.c +++ b/drivers/net/wireless/b43/phy_n.c @@ -5530,8 +5530,9 @@ static void b43_nphy_op_switch_analog(struct b43_wldev *dev, bool on) static int b43_nphy_op_switch_channel(struct b43_wldev *dev, unsigned int new_channel) { - struct ieee80211_channel *channel = dev->wl->hw->conf.channel; - enum nl80211_channel_type channel_type = dev->wl->hw->conf.channel_type; + struct ieee80211_channel *channel = dev->wl->hw->conf.chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&dev->wl->hw->conf.chandef); if (b43_current_band(dev->wl) == IEEE80211_BAND_2GHZ) { if ((new_channel < 1) || (new_channel > 14)) diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 8c3f70e1a013..572668821862 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2720,7 +2720,7 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw, goto out_unlock_mutex; /* Switch the PHY mode (if necessary). */ - switch (conf->channel->band) { + switch (conf->chandef.chan->band) { case IEEE80211_BAND_2GHZ: if (phy->type == B43legacy_PHYTYPE_B) new_phymode = B43legacy_PHYMODE_B; @@ -2748,8 +2748,9 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw, /* Switch to the requested channel. * The firmware takes care of races with the TX handler. */ - if (conf->channel->hw_value != phy->channel) - b43legacy_radio_selectchannel(dev, conf->channel->hw_value, 0); + if (conf->chandef.chan->hw_value != phy->channel) + b43legacy_radio_selectchannel(dev, conf->chandef.chan->hw_value, + 0); dev->wl->radiotap_enabled = !!(conf->flags & IEEE80211_CONF_MONITOR); @@ -3558,7 +3559,7 @@ static int b43legacy_op_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = SURVEY_INFO_NOISE_DBM; survey->noise = dev->stats.link_noise; diff --git a/drivers/net/wireless/brcm80211/brcmsmac/channel.c b/drivers/net/wireless/brcm80211/brcmsmac/channel.c index 10ee314c4229..cc87926f5055 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/channel.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/channel.c @@ -379,7 +379,7 @@ brcms_c_channel_set_chanspec(struct brcms_cm_info *wlc_cm, u16 chanspec, u8 local_constraint_qdbm) { struct brcms_c_info *wlc = wlc_cm->wlc; - struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.channel; + struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.chandef.chan; struct txpwr_limits txpwr; brcms_c_channel_reg_limits(wlc_cm, chanspec, &txpwr); @@ -404,7 +404,7 @@ brcms_c_channel_reg_limits(struct brcms_cm_info *wlc_cm, u16 chanspec, struct txpwr_limits *txpwr) { struct brcms_c_info *wlc = wlc_cm->wlc; - struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.channel; + struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.chandef.chan; uint i; uint chan; int maxpwr; diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c index aa5f43fee5ed..70731d23ddb1 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c @@ -414,10 +414,10 @@ static int brcms_ops_config(struct ieee80211_hw *hw, u32 changed) new_int); } if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { - if (conf->channel_type == NL80211_CHAN_HT20 || - conf->channel_type == NL80211_CHAN_NO_HT) + if (conf->chandef.width == NL80211_CHAN_WIDTH_20 || + conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) err = brcms_c_set_channel(wl->wlc, - conf->channel->hw_value); + conf->chandef.chan->hw_value); else err = -ENOTSUPP; } diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c index 8ef02dca8f8c..e0dc1838cd19 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/main.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c @@ -5099,7 +5099,7 @@ int brcms_c_up(struct brcms_c_info *wlc) wlc->pub->up = true; if (wlc->bandinit_pending) { - ch = wlc->pub->ieee_hw->conf.channel; + ch = wlc->pub->ieee_hw->conf.chandef.chan; brcms_c_suspend_mac_and_wait(wlc); brcms_c_set_chanspec(wlc, ch20mhz_chspec(ch->hw_value)); wlc->bandinit_pending = false; @@ -7748,7 +7748,7 @@ bool brcms_c_dpc(struct brcms_c_info *wlc, bool bounded) void brcms_c_init(struct brcms_c_info *wlc, bool mute_tx) { struct bcma_device *core = wlc->hw->d11core; - struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.channel; + struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.chandef.chan; u16 chanspec; brcms_dbg_info(core, "wl%d\n", wlc->pub->unit); diff --git a/drivers/net/wireless/iwlegacy/3945-rs.c b/drivers/net/wireless/iwlegacy/3945-rs.c index d4fd29ad90dc..c9f197d9ca1e 100644 --- a/drivers/net/wireless/iwlegacy/3945-rs.c +++ b/drivers/net/wireless/iwlegacy/3945-rs.c @@ -347,7 +347,7 @@ il3945_rs_rate_init(struct il_priv *il, struct ieee80211_sta *sta, u8 sta_id) psta = (struct il3945_sta_priv *)sta->drv_priv; rs_sta = &psta->rs_sta; - sband = hw->wiphy->bands[conf->channel->band]; + sband = hw->wiphy->bands[conf->chandef.chan->band]; rs_sta->il = il; diff --git a/drivers/net/wireless/iwlegacy/4965-rs.c b/drivers/net/wireless/iwlegacy/4965-rs.c index e8324b5e5bfe..1d92a59a85ad 100644 --- a/drivers/net/wireless/iwlegacy/4965-rs.c +++ b/drivers/net/wireless/iwlegacy/4965-rs.c @@ -2299,7 +2299,7 @@ il4965_rs_rate_init(struct il_priv *il, struct ieee80211_sta *sta, u8 sta_id) sta_priv = (struct il_station_priv *)sta->drv_priv; lq_sta = &sta_priv->lq_sta; - sband = hw->wiphy->bands[conf->channel->band]; + sband = hw->wiphy->bands[conf->chandef.chan->band]; lq_sta->lq.sta_id = sta_id; diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index 722bfb57cfd5..025d8b0eeafe 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -4974,7 +4974,7 @@ il_mac_config(struct ieee80211_hw *hw, u32 changed) struct il_priv *il = hw->priv; const struct il_channel_info *ch_info; struct ieee80211_conf *conf = &hw->conf; - struct ieee80211_channel *channel = conf->channel; + struct ieee80211_channel *channel = conf->chandef.chan; struct il_ht_config *ht_conf = &il->current_ht_config; unsigned long flags = 0; int ret = 0; diff --git a/drivers/net/wireless/iwlwifi/dvm/rs.c b/drivers/net/wireless/iwlwifi/dvm/rs.c index abe304267261..907bd6e50aad 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rs.c +++ b/drivers/net/wireless/iwlwifi/dvm/rs.c @@ -2831,7 +2831,7 @@ void iwl_rs_rate_init(struct iwl_priv *priv, struct ieee80211_sta *sta, u8 sta_i sta_priv = (struct iwl_station_priv *) sta->drv_priv; lq_sta = &sta_priv->lq_sta; - sband = hw->wiphy->bands[conf->channel->band]; + sband = hw->wiphy->bands[conf->chandef.chan->band]; lq_sta->lq.sta_id = sta_id; diff --git a/drivers/net/wireless/iwlwifi/dvm/rxon.c b/drivers/net/wireless/iwlwifi/dvm/rxon.c index 23be948cf162..085c589e7149 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rxon.c +++ b/drivers/net/wireless/iwlwifi/dvm/rxon.c @@ -78,8 +78,9 @@ void iwl_connection_init_rx_config(struct iwl_priv *priv, ctx->staging.flags |= RXON_FLG_SHORT_PREAMBLE_MSK; #endif - ctx->staging.channel = cpu_to_le16(priv->hw->conf.channel->hw_value); - priv->band = priv->hw->conf.channel->band; + ctx->staging.channel = + cpu_to_le16(priv->hw->conf.chandef.chan->hw_value); + priv->band = priv->hw->conf.chandef.chan->band; iwl_set_flags_for_band(priv, ctx, priv->band, ctx->vif); @@ -951,7 +952,7 @@ static void iwl_calc_basic_rates(struct iwl_priv *priv, unsigned long basic = ctx->vif->bss_conf.basic_rates; int i; - sband = priv->hw->wiphy->bands[priv->hw->conf.channel->band]; + sband = priv->hw->wiphy->bands[priv->hw->conf.chandef.chan->band]; for_each_set_bit(i, &basic, BITS_PER_LONG) { int hw = sband->bitrates[i].hw_value; @@ -1181,7 +1182,7 @@ int iwlagn_mac_config(struct ieee80211_hw *hw, u32 changed) struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); struct iwl_rxon_context *ctx; struct ieee80211_conf *conf = &hw->conf; - struct ieee80211_channel *channel = conf->channel; + struct ieee80211_channel *channel = conf->chandef.chan; int ret = 0; IWL_DEBUG_MAC80211(priv, "enter: changed %#x\n", changed); diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index 7001856241e6..088de9d25c39 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -412,9 +412,9 @@ static int lbtf_op_config(struct ieee80211_hw *hw, u32 changed) struct ieee80211_conf *conf = &hw->conf; lbtf_deb_enter(LBTF_DEB_MACOPS); - if (conf->channel->center_freq != priv->cur_freq) { - priv->cur_freq = conf->channel->center_freq; - lbtf_set_channel(priv, conf->channel->hw_value); + if (conf->chandef.chan->center_freq != priv->cur_freq) { + priv->cur_freq = conf->chandef.chan->center_freq; + lbtf_set_channel(priv, conf->chandef.chan->hw_value); } lbtf_deb_leave(LBTF_DEB_MACOPS); return 0; @@ -537,7 +537,7 @@ static int lbtf_op_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = SURVEY_INFO_NOISE_DBM; survey->noise = priv->noise; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 0064d38276bf..4ac54861e912 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1062,11 +1062,13 @@ out: return HRTIMER_NORESTART; } -static const char *hwsim_chantypes[] = { - [NL80211_CHAN_NO_HT] = "noht", - [NL80211_CHAN_HT20] = "ht20", - [NL80211_CHAN_HT40MINUS] = "ht40-", - [NL80211_CHAN_HT40PLUS] = "ht40+", +static const char * const hwsim_chanwidths[] = { + [NL80211_CHAN_WIDTH_20_NOHT] = "noht", + [NL80211_CHAN_WIDTH_20] = "ht20", + [NL80211_CHAN_WIDTH_40] = "ht40", + [NL80211_CHAN_WIDTH_80] = "vht80", + [NL80211_CHAN_WIDTH_80P80] = "vht80p80", + [NL80211_CHAN_WIDTH_160] = "vht160", }; static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed) @@ -1080,18 +1082,28 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed) [IEEE80211_SMPS_DYNAMIC] = "dynamic", }; - wiphy_debug(hw->wiphy, - "%s (freq=%d/%s idle=%d ps=%d smps=%s)\n", - __func__, - conf->channel ? conf->channel->center_freq : 0, - hwsim_chantypes[conf->channel_type], - !!(conf->flags & IEEE80211_CONF_IDLE), - !!(conf->flags & IEEE80211_CONF_PS), - smps_modes[conf->smps_mode]); + if (conf->chandef.chan) + wiphy_debug(hw->wiphy, + "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n", + __func__, + conf->chandef.chan->center_freq, + conf->chandef.center_freq1, + conf->chandef.center_freq2, + hwsim_chanwidths[conf->chandef.width], + !!(conf->flags & IEEE80211_CONF_IDLE), + !!(conf->flags & IEEE80211_CONF_PS), + smps_modes[conf->smps_mode]); + else + wiphy_debug(hw->wiphy, + "%s (freq=0 idle=%d ps=%d smps=%s)\n", + __func__, + !!(conf->flags & IEEE80211_CONF_IDLE), + !!(conf->flags & IEEE80211_CONF_PS), + smps_modes[conf->smps_mode]); data->idle = !!(conf->flags & IEEE80211_CONF_IDLE); - data->channel = conf->channel; + data->channel = conf->chandef.chan; WARN_ON(data->channel && channels > 1); @@ -1277,7 +1289,7 @@ static int mac80211_hwsim_get_survey( return -ENOENT; /* Current channel */ - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; /* * Magically conjured noise level --- this is only ok for simulated hardware. diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index 091d9a64080a..9f9a1449e812 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -2837,7 +2837,9 @@ static int mwl8k_cmd_tx_power(struct ieee80211_hw *hw, struct ieee80211_conf *conf, unsigned short pwr) { - struct ieee80211_channel *channel = conf->channel; + struct ieee80211_channel *channel = conf->chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&conf->chandef); struct mwl8k_cmd_tx_power *cmd; int rc; int i; @@ -2857,14 +2859,14 @@ static int mwl8k_cmd_tx_power(struct ieee80211_hw *hw, cmd->channel = cpu_to_le16(channel->hw_value); - if (conf->channel_type == NL80211_CHAN_NO_HT || - conf->channel_type == NL80211_CHAN_HT20) { + if (channel_type == NL80211_CHAN_NO_HT || + channel_type == NL80211_CHAN_HT20) { cmd->bw = cpu_to_le16(0x2); } else { cmd->bw = cpu_to_le16(0x4); - if (conf->channel_type == NL80211_CHAN_HT40MINUS) + if (channel_type == NL80211_CHAN_HT40MINUS) cmd->sub_ch = cpu_to_le16(0x3); - else if (conf->channel_type == NL80211_CHAN_HT40PLUS) + else if (channel_type == NL80211_CHAN_HT40PLUS) cmd->sub_ch = cpu_to_le16(0x1); } @@ -3008,7 +3010,9 @@ struct mwl8k_cmd_set_rf_channel { static int mwl8k_cmd_set_rf_channel(struct ieee80211_hw *hw, struct ieee80211_conf *conf) { - struct ieee80211_channel *channel = conf->channel; + struct ieee80211_channel *channel = conf->chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&conf->chandef); struct mwl8k_cmd_set_rf_channel *cmd; int rc; @@ -3026,12 +3030,12 @@ static int mwl8k_cmd_set_rf_channel(struct ieee80211_hw *hw, else if (channel->band == IEEE80211_BAND_5GHZ) cmd->channel_flags |= cpu_to_le32(0x00000004); - if (conf->channel_type == NL80211_CHAN_NO_HT || - conf->channel_type == NL80211_CHAN_HT20) + if (channel_type == NL80211_CHAN_NO_HT || + channel_type == NL80211_CHAN_HT20) cmd->channel_flags |= cpu_to_le32(0x00000080); - else if (conf->channel_type == NL80211_CHAN_HT40MINUS) + else if (channel_type == NL80211_CHAN_HT40MINUS) cmd->channel_flags |= cpu_to_le32(0x000001900); - else if (conf->channel_type == NL80211_CHAN_HT40PLUS) + else if (channel_type == NL80211_CHAN_HT40PLUS) cmd->channel_flags |= cpu_to_le32(0x000000900); rc = mwl8k_post_cmd(hw, &cmd->header); @@ -3950,7 +3954,7 @@ static int mwl8k_cmd_set_new_stn_add(struct ieee80211_hw *hw, memcpy(cmd->mac_addr, sta->addr, ETH_ALEN); cmd->stn_id = cpu_to_le16(sta->aid); cmd->action = cpu_to_le16(MWL8K_STA_ACTION_ADD); - if (hw->conf.channel->band == IEEE80211_BAND_2GHZ) + if (hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) rates = sta->supp_rates[IEEE80211_BAND_2GHZ]; else rates = sta->supp_rates[IEEE80211_BAND_5GHZ] << 5; @@ -4385,7 +4389,7 @@ static int mwl8k_cmd_update_stadb_add(struct ieee80211_hw *hw, p->ht_caps = cpu_to_le16(sta->ht_cap.cap); p->extended_ht_caps = (sta->ht_cap.ampdu_factor & 3) | ((sta->ht_cap.ampdu_density & 7) << 2); - if (hw->conf.channel->band == IEEE80211_BAND_2GHZ) + if (hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) rates = sta->supp_rates[IEEE80211_BAND_2GHZ]; else rates = sta->supp_rates[IEEE80211_BAND_5GHZ] << 5; @@ -4868,7 +4872,7 @@ mwl8k_bss_info_changed_sta(struct ieee80211_hw *hw, struct ieee80211_vif *vif, goto out; } - if (hw->conf.channel->band == IEEE80211_BAND_2GHZ) { + if (hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) { ap_legacy_rates = ap->supp_rates[IEEE80211_BAND_2GHZ]; } else { ap_legacy_rates = @@ -4900,7 +4904,7 @@ mwl8k_bss_info_changed_sta(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (idx) idx--; - if (hw->conf.channel->band == IEEE80211_BAND_2GHZ) + if (hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) rate = mwl8k_rates_24[idx].hw_value; else rate = mwl8k_rates_50[idx].hw_value; @@ -4973,7 +4977,7 @@ mwl8k_bss_info_changed_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (idx) idx--; - if (hw->conf.channel->band == IEEE80211_BAND_2GHZ) + if (hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) rate = mwl8k_rates_24[idx].hw_value; else rate = mwl8k_rates_50[idx].hw_value; @@ -5246,7 +5250,7 @@ static int mwl8k_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = SURVEY_INFO_NOISE_DBM; survey->noise = priv->noise; diff --git a/drivers/net/wireless/p54/fwio.c b/drivers/net/wireless/p54/fwio.c index 9ba85106eec0..b3879fbf5368 100644 --- a/drivers/net/wireless/p54/fwio.c +++ b/drivers/net/wireless/p54/fwio.c @@ -402,7 +402,7 @@ int p54_scan(struct p54_common *priv, u16 mode, u16 dwell) struct p54_rssi_db_entry *rssi_data; unsigned int i; void *entry; - __le16 freq = cpu_to_le16(priv->hw->conf.channel->center_freq); + __le16 freq = cpu_to_le16(priv->hw->conf.chandef.chan->center_freq); skb = p54_alloc_skb(priv, P54_HDR_FLAG_CONTROL_OPSET, sizeof(*head) + 2 + sizeof(*iq_autocal) + sizeof(*body) + @@ -532,7 +532,7 @@ int p54_scan(struct p54_common *priv, u16 mode, u16 dwell) err: wiphy_err(priv->hw->wiphy, "frequency change to channel %d failed.\n", ieee80211_frequency_to_channel( - priv->hw->conf.channel->center_freq)); + priv->hw->conf.chandef.chan->center_freq)); dev_kfree_skb_any(skb); return -EINVAL; diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index ee654a691f38..067e6f2fd050 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -340,7 +340,7 @@ static int p54_config(struct ieee80211_hw *dev, u32 changed) * TODO: Use the LM_SCAN_TRAP to determine the current * operating channel. */ - priv->curchan = priv->hw->conf.channel; + priv->curchan = priv->hw->conf.chandef.chan; p54_reset_stats(priv); WARN_ON(p54_fetch_statistics(priv)); } @@ -480,7 +480,7 @@ static void p54_bss_info_changed(struct ieee80211_hw *dev, p54_set_edcf(priv); } if (changed & BSS_CHANGED_BASIC_RATES) { - if (dev->conf.channel->band == IEEE80211_BAND_5GHZ) + if (dev->conf.chandef.chan->band == IEEE80211_BAND_5GHZ) priv->basic_rate_mask = (info->basic_rates << 4); else priv->basic_rate_mask = info->basic_rates; diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c index 12f0a34477f2..f95de0d16216 100644 --- a/drivers/net/wireless/p54/txrx.c +++ b/drivers/net/wireless/p54/txrx.c @@ -354,13 +354,13 @@ static int p54_rx_data(struct p54_common *priv, struct sk_buff *skb) rx_status->signal = p54_rssi_to_dbm(priv, hdr->rssi); if (hdr->rate & 0x10) rx_status->flag |= RX_FLAG_SHORTPRE; - if (priv->hw->conf.channel->band == IEEE80211_BAND_5GHZ) + if (priv->hw->conf.chandef.chan->band == IEEE80211_BAND_5GHZ) rx_status->rate_idx = (rate < 4) ? 0 : rate - 4; else rx_status->rate_idx = rate; rx_status->freq = freq; - rx_status->band = priv->hw->conf.channel->band; + rx_status->band = priv->hw->conf.chandef.chan->band; rx_status->antenna = hdr->antenna; tsf32 = le32_to_cpu(hdr->tsf32); diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index a658b4bc7da2..34456b45acbb 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -2763,7 +2763,7 @@ static void rt2800_config_txpower(struct rt2x00_dev *rt2x00dev, void rt2800_gain_calibration(struct rt2x00_dev *rt2x00dev) { - rt2800_config_txpower(rt2x00dev, rt2x00dev->hw->conf.channel, + rt2800_config_txpower(rt2x00dev, rt2x00dev->hw->conf.chandef.chan, rt2x00dev->tx_power); } EXPORT_SYMBOL_GPL(rt2800_gain_calibration); @@ -2898,11 +2898,11 @@ void rt2800_config(struct rt2x00_dev *rt2x00dev, if (flags & IEEE80211_CONF_CHANGE_CHANNEL) { rt2800_config_channel(rt2x00dev, libconf->conf, &libconf->rf, &libconf->channel); - rt2800_config_txpower(rt2x00dev, libconf->conf->channel, + rt2800_config_txpower(rt2x00dev, libconf->conf->chandef.chan, libconf->conf->power_level); } if (flags & IEEE80211_CONF_CHANGE_POWER) - rt2800_config_txpower(rt2x00dev, libconf->conf->channel, + rt2800_config_txpower(rt2x00dev, libconf->conf->chandef.chan, libconf->conf->power_level); if (flags & IEEE80211_CONF_CHANGE_RETRY_LIMITS) rt2800_config_retry_limit(rt2x00dev, libconf); @@ -5563,7 +5563,7 @@ int rt2800_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; rt2800_register_read(rt2x00dev, CH_IDLE_STA, &idle); rt2800_register_read(rt2x00dev, CH_BUSY_STA, &busy); diff --git a/drivers/net/wireless/rt2x00/rt2x00config.c b/drivers/net/wireless/rt2x00/rt2x00config.c index 49a63e973934..8cb43f8f3efc 100644 --- a/drivers/net/wireless/rt2x00/rt2x00config.c +++ b/drivers/net/wireless/rt2x00/rt2x00config.c @@ -184,7 +184,7 @@ static u16 rt2x00ht_center_channel(struct rt2x00_dev *rt2x00dev, /* * Initialize center channel to current channel. */ - center_channel = spec->channels[conf->channel->hw_value].channel; + center_channel = spec->channels[conf->chandef.chan->hw_value].channel; /* * Adjust center channel to HT40+ and HT40- operation. @@ -199,7 +199,7 @@ static u16 rt2x00ht_center_channel(struct rt2x00_dev *rt2x00dev, return i; WARN_ON(1); - return conf->channel->hw_value; + return conf->chandef.chan->hw_value; } void rt2x00lib_config(struct rt2x00_dev *rt2x00dev, @@ -227,7 +227,7 @@ void rt2x00lib_config(struct rt2x00_dev *rt2x00dev, hw_value = rt2x00ht_center_channel(rt2x00dev, conf); } else { clear_bit(CONFIG_CHANNEL_HT40, &rt2x00dev->flags); - hw_value = conf->channel->hw_value; + hw_value = conf->chandef.chan->hw_value; } memcpy(&libconf.rf, @@ -279,8 +279,8 @@ void rt2x00lib_config(struct rt2x00_dev *rt2x00dev, else clear_bit(CONFIG_POWERSAVING, &rt2x00dev->flags); - rt2x00dev->curr_band = conf->channel->band; - rt2x00dev->curr_freq = conf->channel->center_freq; + rt2x00dev->curr_band = conf->chandef.chan->band; + rt2x00dev->curr_freq = conf->chandef.chan->center_freq; rt2x00dev->tx_power = conf->power_level; rt2x00dev->short_retry = conf->short_frame_max_tx_count; rt2x00dev->long_retry = conf->long_frame_max_tx_count; diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index f95792cfcf89..f85035cc836f 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -847,7 +847,7 @@ static void rt61pci_config_lna_gain(struct rt2x00_dev *rt2x00dev, u16 eeprom; short lna_gain = 0; - if (libconf->conf->channel->band == IEEE80211_BAND_2GHZ) { + if (libconf->conf->chandef.chan->band == IEEE80211_BAND_2GHZ) { if (test_bit(CAPABILITY_EXTERNAL_LNA_BG, &rt2x00dev->cap_flags)) lna_gain += 14; diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index 24eec66e9fd2..a3387b146bb5 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -739,7 +739,7 @@ static void rt73usb_config_lna_gain(struct rt2x00_dev *rt2x00dev, u16 eeprom; short lna_gain = 0; - if (libconf->conf->channel->band == IEEE80211_BAND_2GHZ) { + if (libconf->conf->chandef.chan->band == IEEE80211_BAND_2GHZ) { if (test_bit(CAPABILITY_EXTERNAL_LNA_BG, &rt2x00dev->cap_flags)) lna_gain += 14; diff --git a/drivers/net/wireless/rtl818x/rtl8180/dev.c b/drivers/net/wireless/rtl818x/rtl8180/dev.c index 1b3c2843221d..91a04e2b8ece 100644 --- a/drivers/net/wireless/rtl818x/rtl8180/dev.c +++ b/drivers/net/wireless/rtl818x/rtl8180/dev.c @@ -147,8 +147,8 @@ static void rtl8180_handle_rx(struct ieee80211_hw *dev) signal = priv->rf->calc_rssi(agc, sq); } rx_status.signal = signal; - rx_status.freq = dev->conf.channel->center_freq; - rx_status.band = dev->conf.channel->band; + rx_status.freq = dev->conf.chandef.chan->center_freq; + rx_status.band = dev->conf.chandef.chan->band; rx_status.mactime = le64_to_cpu(entry->tsft); rx_status.flag |= RX_FLAG_MACTIME_START; if (flags & RTL818X_RX_DESC_FLAG_CRC32_ERR) diff --git a/drivers/net/wireless/rtl818x/rtl8180/grf5101.c b/drivers/net/wireless/rtl818x/rtl8180/grf5101.c index 5ee7589dd546..077ff92cc139 100644 --- a/drivers/net/wireless/rtl818x/rtl8180/grf5101.c +++ b/drivers/net/wireless/rtl818x/rtl8180/grf5101.c @@ -82,7 +82,8 @@ static void grf5101_rf_set_channel(struct ieee80211_hw *dev, struct ieee80211_conf *conf) { struct rtl8180_priv *priv = dev->priv; - int channel = ieee80211_frequency_to_channel(conf->channel->center_freq); + int channel = + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq); u32 txpw = priv->channels[channel - 1].hw_value & 0xFF; u32 chan = channel - 1; diff --git a/drivers/net/wireless/rtl818x/rtl8180/max2820.c b/drivers/net/wireless/rtl818x/rtl8180/max2820.c index 667b3363d437..4715000c94dd 100644 --- a/drivers/net/wireless/rtl818x/rtl8180/max2820.c +++ b/drivers/net/wireless/rtl818x/rtl8180/max2820.c @@ -95,7 +95,7 @@ static void max2820_rf_set_channel(struct ieee80211_hw *dev, { struct rtl8180_priv *priv = dev->priv; int channel = conf ? - ieee80211_frequency_to_channel(conf->channel->center_freq) : 1; + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq) : 1; unsigned int chan_idx = channel - 1; u32 txpw = priv->channels[chan_idx].hw_value & 0xFF; u32 chan = max2820_chan[chan_idx]; diff --git a/drivers/net/wireless/rtl818x/rtl8180/rtl8225.c b/drivers/net/wireless/rtl818x/rtl8180/rtl8225.c index 7c4574ba9d75..cc2a5412c1f0 100644 --- a/drivers/net/wireless/rtl818x/rtl8180/rtl8225.c +++ b/drivers/net/wireless/rtl818x/rtl8180/rtl8225.c @@ -719,7 +719,8 @@ static void rtl8225_rf_set_channel(struct ieee80211_hw *dev, struct ieee80211_conf *conf) { struct rtl8180_priv *priv = dev->priv; - int chan = ieee80211_frequency_to_channel(conf->channel->center_freq); + int chan = + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq); if (priv->rf->init == rtl8225_rf_init) rtl8225_rf_set_tx_power(dev, chan); diff --git a/drivers/net/wireless/rtl818x/rtl8180/sa2400.c b/drivers/net/wireless/rtl818x/rtl8180/sa2400.c index 44771a6286af..b3ec40f6bd23 100644 --- a/drivers/net/wireless/rtl818x/rtl8180/sa2400.c +++ b/drivers/net/wireless/rtl818x/rtl8180/sa2400.c @@ -105,7 +105,8 @@ static void sa2400_rf_set_channel(struct ieee80211_hw *dev, struct ieee80211_conf *conf) { struct rtl8180_priv *priv = dev->priv; - int channel = ieee80211_frequency_to_channel(conf->channel->center_freq); + int channel = + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq); u32 txpw = priv->channels[channel - 1].hw_value & 0xFF; u32 chan = sa2400_chan[channel - 1]; diff --git a/drivers/net/wireless/rtl818x/rtl8187/dev.c b/drivers/net/wireless/rtl818x/rtl8187/dev.c index 4574bd213705..f49220e234b0 100644 --- a/drivers/net/wireless/rtl818x/rtl8187/dev.c +++ b/drivers/net/wireless/rtl818x/rtl8187/dev.c @@ -379,8 +379,8 @@ static void rtl8187_rx_cb(struct urb *urb) rate = (flags >> 20) & 0xF; skb_trim(skb, flags & 0x0FFF); rx_status.rate_idx = rate; - rx_status.freq = dev->conf.channel->center_freq; - rx_status.band = dev->conf.channel->band; + rx_status.freq = dev->conf.chandef.chan->center_freq; + rx_status.band = dev->conf.chandef.chan->band; rx_status.flag |= RX_FLAG_MACTIME_START; if (flags & RTL818X_RX_DESC_FLAG_CRC32_ERR) rx_status.flag |= RX_FLAG_FAILED_FCS_CRC; diff --git a/drivers/net/wireless/rtl818x/rtl8187/rtl8225.c b/drivers/net/wireless/rtl818x/rtl8187/rtl8225.c index 908903f721f5..f0bf35fedbaf 100644 --- a/drivers/net/wireless/rtl818x/rtl8187/rtl8225.c +++ b/drivers/net/wireless/rtl818x/rtl8187/rtl8225.c @@ -905,7 +905,8 @@ static void rtl8225_rf_set_channel(struct ieee80211_hw *dev, struct ieee80211_conf *conf) { struct rtl8187_priv *priv = dev->priv; - int chan = ieee80211_frequency_to_channel(conf->channel->center_freq); + int chan = + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq); if (priv->rf->init == rtl8225_rf_init) rtl8225_rf_set_tx_power(dev, chan); diff --git a/drivers/net/wireless/rtlwifi/base.c b/drivers/net/wireless/rtlwifi/base.c index 99c5cea3fe21..0e7866d1d0e2 100644 --- a/drivers/net/wireless/rtlwifi/base.c +++ b/drivers/net/wireless/rtlwifi/base.c @@ -691,7 +691,7 @@ int rtlwifi_rate_mapping(struct ieee80211_hw *hw, int rate_idx; if (false == isht) { - if (IEEE80211_BAND_2GHZ == hw->conf.channel->band) { + if (IEEE80211_BAND_2GHZ == hw->conf.chandef.chan->band) { switch (desc_rate) { case DESC92_RATE1M: rate_idx = 0; @@ -1365,7 +1365,7 @@ int rtl_send_smps_action(struct ieee80211_hw *hw, rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0); info->control.rates[0].idx = 0; - info->band = hw->conf.channel->band; + info->band = hw->conf.chandef.chan->band; rtlpriv->intf_ops->adapter_tx(hw, sta, skb, &tcb_desc); } err_free: diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c index b5a7a260bf63..64a41ecf86cf 100644 --- a/drivers/net/wireless/rtlwifi/core.c +++ b/drivers/net/wireless/rtlwifi/core.c @@ -320,7 +320,7 @@ static int rtl_op_config(struct ieee80211_hw *hw, u32 changed) } if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { - struct ieee80211_channel *channel = hw->conf.channel; + struct ieee80211_channel *channel = hw->conf.chandef.chan; u8 wide_chan = (u8) channel->hw_value; /* @@ -332,7 +332,7 @@ static int rtl_op_config(struct ieee80211_hw *hw, u32 changed) *info for cisco1253 bw20, so we modify *it here based on UPPER & LOWER */ - switch (hw->conf.channel_type) { + switch (cfg80211_get_chandef_type(&hw->conf.chandef)) { case NL80211_CHAN_HT20: case NL80211_CHAN_NO_HT: /* SC */ @@ -390,7 +390,7 @@ static int rtl_op_config(struct ieee80211_hw *hw, u32 changed) rtlpriv->cfg->ops->switch_channel(hw); rtlpriv->cfg->ops->set_channel_access(hw); rtlpriv->cfg->ops->set_bw_mode(hw, - hw->conf.channel_type); + cfg80211_get_chandef_type(&hw->conf.chandef)); } mutex_unlock(&rtlpriv->locks.conf_mutex); diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c index b9b1a6e0b16e..27e4ebd51091 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c @@ -544,8 +544,8 @@ bool rtl92ce_rx_query_desc(struct ieee80211_hw *hw, stats->timestamp_low = GET_RX_DESC_TSFL(pdesc); stats->rx_is40Mhzpacket = (bool) GET_RX_DESC_BW(pdesc); - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; if (GET_RX_DESC_CRC32(pdesc)) rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c index b6222eedb835..f0dada530153 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c @@ -324,8 +324,8 @@ bool rtl92cu_rx_query_desc(struct ieee80211_hw *hw, && (GET_RX_DESC_FAGGR(pdesc) == 1)); stats->timestamp_low = GET_RX_DESC_TSFL(pdesc); stats->rx_is40Mhzpacket = (bool) GET_RX_DESC_BW(pdesc); - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; if (GET_RX_DESC_CRC32(pdesc)) rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; if (!GET_RX_DESC_SWDEC(pdesc)) @@ -395,8 +395,8 @@ static void _rtl_rx_process(struct ieee80211_hw *hw, struct sk_buff *skb) stats.rx_is40Mhzpacket = (bool) GET_RX_DESC_BW(rxdesc); /* TODO: is center_freq changed when doing scan? */ /* TODO: Shall we add protection or just skip those two step? */ - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; if (GET_RX_DESC_CRC32(rxdesc)) rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; if (!GET_RX_DESC_SWDEC(rxdesc)) diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/trx.c b/drivers/net/wireless/rtlwifi/rtl8192de/trx.c index 941080e03c06..b8ec718a0fab 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192de/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192de/trx.c @@ -499,8 +499,8 @@ bool rtl92de_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats, && (GET_RX_DESC_FAGGR(pdesc) == 1)); stats->timestamp_low = GET_RX_DESC_TSFL(pdesc); stats->rx_is40Mhzpacket = (bool) GET_RX_DESC_BW(pdesc); - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; if (GET_RX_DESC_CRC32(pdesc)) rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; if (!GET_RX_DESC_SWDEC(pdesc)) diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c index 7b0a2e75b8b8..0b074f11f969 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c @@ -538,8 +538,8 @@ bool rtl92se_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats, if (stats->hwerror) return false; - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; hdr = (struct ieee80211_hdr *)(skb->data + stats->rx_drvinfo_size + stats->rx_bufshift); diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c b/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c index ac081297db50..601261d67e84 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c @@ -304,8 +304,8 @@ bool rtl8723ae_rx_query_desc(struct ieee80211_hw *hw, status->is_cck = RTL8723E_RX_HAL_IS_CCK_RATE(status->rate); - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; hdr = (struct ieee80211_hdr *)(skb->data + status->rx_drvinfo_size + status->rx_bufshift); diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index bbbf68cf50a7..3291ffa95273 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -572,7 +572,8 @@ static int wl1251_op_config(struct ieee80211_hw *hw, u32 changed) struct ieee80211_conf *conf = &hw->conf; int channel, ret = 0; - channel = ieee80211_frequency_to_channel(conf->channel->center_freq); + channel = ieee80211_frequency_to_channel( + conf->chandef.chan->center_freq); wl1251_debug(DEBUG_MAC80211, "mac80211 config ch %d psm %s power %d", channel, @@ -1223,7 +1224,7 @@ static int wl1251_op_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = SURVEY_INFO_NOISE_DBM; survey->noise = wl->noise; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index a9f7041c7192..c26cb095010c 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -4474,7 +4474,7 @@ static int wl1271_op_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = 0; return 0; } diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 114364b5d466..c6208a7988e4 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -1156,10 +1156,10 @@ static int zd_op_config(struct ieee80211_hw *hw, u32 changed) struct ieee80211_conf *conf = &hw->conf; spin_lock_irq(&mac->lock); - mac->channel = conf->channel->hw_value; + mac->channel = conf->chandef.chan->hw_value; spin_unlock_irq(&mac->lock); - return zd_chip_set_channel(&mac->chip, conf->channel->hw_value); + return zd_chip_set_channel(&mac->chip, conf->chandef.chan->hw_value); } static void zd_beacon_done(struct zd_mac *mac) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 23a275a9a3b2..64faf015dd1e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -974,8 +974,7 @@ enum ieee80211_smps_mode { * @power_level: requested transmit power (in dBm), backward compatibility * value only that is set to the minimum of all interfaces * - * @channel: the channel to tune to - * @channel_type: the channel (HT) type + * @chandef: the channel definition to tune to * @radar_enabled: whether radar detection is enabled * * @long_frame_max_tx_count: Maximum number of transmissions for a "long" frame @@ -1001,8 +1000,7 @@ struct ieee80211_conf { u8 long_frame_max_tx_count, short_frame_max_tx_count; - struct ieee80211_channel *channel; - enum nl80211_channel_type channel_type; + struct cfg80211_chan_def chandef; bool radar_enabled; enum ieee80211_smps_mode smps_mode; }; @@ -4216,31 +4214,33 @@ void ieee80211_rate_control_unregister(struct rate_control_ops *ops); static inline bool conf_is_ht20(struct ieee80211_conf *conf) { - return conf->channel_type == NL80211_CHAN_HT20; + return conf->chandef.width == NL80211_CHAN_WIDTH_20; } static inline bool conf_is_ht40_minus(struct ieee80211_conf *conf) { - return conf->channel_type == NL80211_CHAN_HT40MINUS; + return conf->chandef.width == NL80211_CHAN_WIDTH_40 && + conf->chandef.center_freq1 < conf->chandef.chan->center_freq; } static inline bool conf_is_ht40_plus(struct ieee80211_conf *conf) { - return conf->channel_type == NL80211_CHAN_HT40PLUS; + return conf->chandef.width == NL80211_CHAN_WIDTH_40 && + conf->chandef.center_freq1 > conf->chandef.chan->center_freq; } static inline bool conf_is_ht40(struct ieee80211_conf *conf) { - return conf_is_ht40_minus(conf) || conf_is_ht40_plus(conf); + return conf->chandef.width == NL80211_CHAN_WIDTH_40; } static inline bool conf_is_ht(struct ieee80211_conf *conf) { - return conf->channel_type != NL80211_CHAN_NO_HT; + return conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT; } static inline enum nl80211_iftype diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 50aaf25d4735..6e43feb49a76 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -805,8 +805,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, IEEE80211_CHANCTX_EXCLUSIVE); } } else if (local->open_count == local->monitors) { - local->_oper_channel = chandef->chan; - local->_oper_channel_type = cfg80211_get_chandef_type(chandef); + local->_oper_chandef = *chandef; ieee80211_hw_config(local, 0); } @@ -3373,9 +3372,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, if (local->use_chanctx) *chandef = local->monitor_chandef; else - cfg80211_chandef_create(chandef, - local->_oper_channel, - local->_oper_channel_type); + *chandef = local->_oper_chandef; ret = 0; } rcu_read_unlock(); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 78c0d90dd641..8024874ba95d 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -22,7 +22,7 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH); if (!local->use_chanctx) { - local->_oper_channel_type = cfg80211_get_chandef_type(chandef); + local->_oper_chandef = *chandef; ieee80211_hw_config(local, 0); } } @@ -77,9 +77,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, ctx->mode = mode; if (!local->use_chanctx) { - local->_oper_channel_type = - cfg80211_get_chandef_type(chandef); - local->_oper_channel = chandef->chan; + local->_oper_chandef = *chandef; ieee80211_hw_config(local, 0); } else { err = drv_add_chanctx(local, ctx); @@ -106,7 +104,10 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local, WARN_ON_ONCE(ctx->refcount != 0); if (!local->use_chanctx) { - local->_oper_channel_type = NL80211_CHAN_NO_HT; + struct cfg80211_chan_def *chandef = &local->_oper_chandef; + chandef->width = NL80211_CHAN_WIDTH_20_NOHT; + chandef->center_freq1 = chandef->chan->center_freq; + chandef->center_freq2 = 0; ieee80211_hw_config(local, 0); } else { drv_remove_chanctx(local, ctx); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c7f8b8b29e58..f9782f0f4348 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1021,8 +1021,7 @@ struct ieee80211_local { struct ieee80211_sub_if_data __rcu *scan_sdata; struct ieee80211_channel *csa_channel; /* For backward compatibility only -- do not use */ - struct ieee80211_channel *_oper_channel; - enum nl80211_channel_type _oper_channel_type; + struct cfg80211_chan_def _oper_chandef; /* Temporary remain-on-channel for off-channel operations */ struct ieee80211_channel *tmp_channel; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b0d286821864..a16b037c9d34 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -95,42 +95,47 @@ static void ieee80211_reconfig_filter(struct work_struct *work) static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - struct ieee80211_channel *chan; + struct cfg80211_chan_def chandef = {}; u32 changed = 0; int power; - enum nl80211_channel_type channel_type; u32 offchannel_flag; offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; + if (local->scan_channel) { - chan = local->scan_channel; + chandef.chan = local->scan_channel; /* If scanning on oper channel, use whatever channel-type * is currently in use. */ - if (chan == local->_oper_channel) - channel_type = local->_oper_channel_type; - else - channel_type = NL80211_CHAN_NO_HT; + if (chandef.chan == local->_oper_chandef.chan) { + chandef = local->_oper_chandef; + } else { + chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + chandef.center_freq1 = chandef.chan->center_freq; + } } else if (local->tmp_channel) { - chan = local->tmp_channel; - channel_type = NL80211_CHAN_NO_HT; - } else { - chan = local->_oper_channel; - channel_type = local->_oper_channel_type; - } - - if (chan != local->_oper_channel || - channel_type != local->_oper_channel_type) + chandef.chan = local->tmp_channel; + chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + chandef.center_freq1 = chandef.chan->center_freq; + } else + chandef = local->_oper_chandef; + + WARN(!cfg80211_chandef_valid(&chandef), + "control:%d MHz width:%d center: %d/%d MHz", + chandef.chan->center_freq, chandef.width, + chandef.center_freq1, chandef.center_freq2); + + if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef)) local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; else local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; - if (offchannel_flag || chan != local->hw.conf.channel || - channel_type != local->hw.conf.channel_type) { - local->hw.conf.channel = chan; - local->hw.conf.channel_type = channel_type; + if (offchannel_flag || + !cfg80211_chandef_identical(&local->hw.conf.chandef, + &local->_oper_chandef)) { + local->hw.conf.chandef = chandef; changed |= IEEE80211_CONF_CHANGE_CHANNEL; } @@ -146,7 +151,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) changed |= IEEE80211_CONF_CHANGE_SMPS; } - power = chan->max_power; + power = chandef.chan->max_power; rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -740,11 +745,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) sband = local->hw.wiphy->bands[band]; if (!sband) continue; - if (!local->use_chanctx && !local->_oper_channel) { + if (!local->use_chanctx && !local->_oper_chandef.chan) { /* init channel we're on */ - local->hw.conf.channel = - local->_oper_channel = &sband->channels[0]; - local->hw.conf.channel_type = NL80211_CHAN_NO_HT; + struct cfg80211_chan_def chandef = { + .chan = &sband->channels[0], + .width = NL80211_CHAN_NO_HT, + .center_freq1 = sband->channels[0].center_freq, + .center_freq2 = 0 + }; + local->hw.conf.chandef = local->_oper_chandef = chandef; } cfg80211_chandef_create(&local->monitor_chandef, &sband->channels[0], diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9958cb7df8f1..237e2ef42ba9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -988,6 +988,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work); + struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; if (!ieee80211_sdata_running(sdata)) @@ -997,21 +998,30 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ifmgd->associated) goto out; - sdata->local->_oper_channel = sdata->local->csa_channel; - if (!sdata->local->ops->channel_switch) { + /* + * FIXME: Here we are downgrading to NL80211_CHAN_WIDTH_20_NOHT + * and don't adjust our ht/vht settings + * This is wrong - we should behave according to the CSA params + */ + local->_oper_chandef.chan = local->csa_channel; + local->_oper_chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + local->_oper_chandef.center_freq1 = + local->_oper_chandef.chan->center_freq; + local->_oper_chandef.center_freq2 = 0; + + if (!local->ops->channel_switch) { /* call "hw_config" only if doing sw channel switch */ - ieee80211_hw_config(sdata->local, - IEEE80211_CONF_CHANGE_CHANNEL); + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); } else { /* update the device channel directly */ - sdata->local->hw.conf.channel = sdata->local->_oper_channel; + local->hw.conf.chandef = local->_oper_chandef; } /* XXX: shouldn't really modify cfg80211-owned data! */ - ifmgd->associated->channel = sdata->local->_oper_channel; + ifmgd->associated->channel = local->_oper_chandef.chan; /* XXX: wait for a beacon first? */ - ieee80211_wake_queues_by_reason(&sdata->local->hw, + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); out: diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index cb34cbbaa20c..581764f92e13 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -384,7 +384,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, { int i; struct ieee80211_sub_if_data *sdata; - enum ieee80211_band band = local->hw.conf.channel->band; + enum ieee80211_band band = local->hw.conf.chandef.chan->band; u32 tx_flags; tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK; @@ -401,7 +401,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, local->scan_req->ssids[i].ssid_len, local->scan_req->ie, local->scan_req->ie_len, local->scan_req->rates[band], false, - tx_flags, local->hw.conf.channel, true); + tx_flags, local->hw.conf.chandef.chan, true); /* * After sending probe requests, wait for probe responses @@ -467,7 +467,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (local->ops->hw_scan) { __set_bit(SCAN_HW_SCANNING, &local->scanning); } else if ((req->n_channels == 1) && - (req->channels[0] == local->_oper_channel)) { + (req->channels[0] == local->_oper_chandef.chan)) { /* * If we are scanning only on the operating channel * then we do not need to stop normal activities diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index d79e374e129a..8286dcef228b 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -28,27 +28,27 @@ #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" -#define CHANDEF_ENTRY __field(u32, control_freq) \ - __field(u32, chan_width) \ - __field(u32, center_freq1) \ +#define CHANDEF_ENTRY __field(u32, control_freq) \ + __field(u32, chan_width) \ + __field(u32, center_freq1) \ __field(u32, center_freq2) -#define CHANDEF_ASSIGN(c) \ - __entry->control_freq = (c)->chan->center_freq; \ - __entry->chan_width = (c)->width; \ - __entry->center_freq1 = (c)->center_freq1; \ +#define CHANDEF_ASSIGN(c) \ + __entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ + __entry->chan_width = (c)->width; \ + __entry->center_freq1 = (c)->center_freq1; \ __entry->center_freq2 = (c)->center_freq2; #define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz" -#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ +#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ __entry->center_freq1, __entry->center_freq2 -#define CHANCTX_ENTRY CHANDEF_ENTRY \ - __field(u8, rx_chains_static) \ +#define CHANCTX_ENTRY CHANDEF_ENTRY \ + __field(u8, rx_chains_static) \ __field(u8, rx_chains_dynamic) -#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ - __entry->rx_chains_static = ctx->conf.rx_chains_static; \ +#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ + __entry->rx_chains_static = ctx->conf.rx_chains_static; \ __entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic #define CHANCTX_PR_FMT CHANDEF_PR_FMT " chains:%d/%d" -#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \ +#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic @@ -286,8 +286,7 @@ TRACE_EVENT(drv_config, __field(u16, listen_interval) __field(u8, long_frame_max_tx_count) __field(u8, short_frame_max_tx_count) - __field(int, center_freq) - __field(int, channel_type) + CHANDEF_ENTRY __field(int, smps) ), @@ -303,15 +302,13 @@ TRACE_EVENT(drv_config, local->hw.conf.long_frame_max_tx_count; __entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count; - __entry->center_freq = local->hw.conf.channel ? - local->hw.conf.channel->center_freq : 0; - __entry->channel_type = local->hw.conf.channel_type; + CHANDEF_ASSIGN(&local->hw.conf.chandef) __entry->smps = local->hw.conf.smps_mode; ), TP_printk( - LOCAL_PR_FMT " ch:%#x freq:%d", - LOCAL_PR_ARG, __entry->changed, __entry->center_freq + LOCAL_PR_FMT " ch:%#x" CHANDEF_PR_FMT, + LOCAL_PR_ARG, __entry->changed, CHANDEF_PR_ARG ) ); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4a83d8dea840..aad0bf5d8812 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1709,7 +1709,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, if (chanctx_conf) chan = chanctx_conf->def.chan; else if (!local->use_chanctx) - chan = local->_oper_channel; + chan = local->_oper_chandef.chan; else goto fail_rcu; @@ -1843,7 +1843,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, * This is the exception! WDS style interfaces are prohibited * when channel contexts are in used so this must be valid */ - band = local->hw.conf.channel->band; + band = local->hw.conf.chandef.chan->band; break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 90cc2b82869b..1734cd21c7ce 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2171,8 +2171,7 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work) /* currently not handled */ WARN_ON(1); else { - cfg80211_chandef_create(&chandef, local->hw.conf.channel, - local->hw.conf.channel_type); + chandef = local->hw.conf.chandef; cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL); } } -- cgit v1.2.3 From c54419321455631079c7d6e60bc732dd0c5914c5 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 25 Mar 2013 14:49:35 +0000 Subject: GRE: Refactor GRE tunneling code. Following patch refactors GRE code into ip tunneling code and GRE specific code. Common tunneling code is moved to ip_tunnel module. ip_tunnel module is written as generic library which can be used by different tunneling implementations. ip_tunnel module contains following components: - packet xmit and rcv generic code. xmit flow looks like (gre_xmit/ipip_xmit)->ip_tunnel_xmit->ip_local_out. - hash table of all devices. - lookup for tunnel devices. - control plane operations like device create, destroy, ioctl, netlink operations code. - registration for tunneling modules, like gre, ipip etc. - define single pcpu_tstats dev->tstats. - struct tnl_ptk_info added to pass parsed tunnel packet parameters. ipip.h header is renamed to ip_tunnel.h Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 +- include/net/gre.h | 51 ++ include/net/ip6_tunnel.h | 1 + include/net/ip_tunnels.h | 177 ++++++ include/net/ipip.h | 84 --- net/ipv4/Kconfig | 5 + net/ipv4/Makefile | 1 + net/ipv4/af_inet.c | 1 - net/ipv4/gre.c | 5 - net/ipv4/ip_gre.c | 1504 ++++++++++------------------------------------ net/ipv4/ip_tunnel.c | 1035 +++++++++++++++++++++++++++++++ net/ipv4/ip_vti.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv6/af_inet6.c | 1 - net/ipv6/ip6_gre.c | 1 + net/ipv6/ip6_tunnel.c | 1 + net/ipv6/sit.c | 2 +- 18 files changed, 1594 insertions(+), 1283 deletions(-) create mode 100644 include/net/ip_tunnels.h delete mode 100644 include/net/ipip.h create mode 100644 net/ipv4/ip_tunnel.c (limited to 'include/net') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 33427fd62515..fe9ea7d14951 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/net/gre.h b/include/net/gre.h index 82665474bcb7..9f03a390c826 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -2,6 +2,7 @@ #define __LINUX_GRE_H #include +#include #define GREPROTO_CISCO 0 #define GREPROTO_PPTP 1 @@ -12,7 +13,57 @@ struct gre_protocol { void (*err_handler)(struct sk_buff *skb, u32 info); }; +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 + int gre_add_protocol(const struct gre_protocol *proto, u8 version); int gre_del_protocol(const struct gre_protocol *proto, u8 version); +static inline __be16 gre_flags_to_tnl_flags(__be16 flags) +{ + __be16 tflags = 0; + + if (flags & GRE_CSUM) + tflags |= TUNNEL_CSUM; + if (flags & GRE_ROUTING) + tflags |= TUNNEL_ROUTING; + if (flags & GRE_KEY) + tflags |= TUNNEL_KEY; + if (flags & GRE_SEQ) + tflags |= TUNNEL_SEQ; + if (flags & GRE_STRICT) + tflags |= TUNNEL_STRICT; + if (flags & GRE_REC) + tflags |= TUNNEL_REC; + if (flags & GRE_VERSION) + tflags |= TUNNEL_VERSION; + + return tflags; +} + +static inline __be16 tnl_flags_to_gre_flags(__be16 tflags) +{ + __be16 flags = 0; + + if (tflags & TUNNEL_CSUM) + flags |= GRE_CSUM; + if (tflags & TUNNEL_ROUTING) + flags |= GRE_ROUTING; + if (tflags & TUNNEL_KEY) + flags |= GRE_KEY; + if (tflags & TUNNEL_SEQ) + flags |= GRE_SEQ; + if (tflags & TUNNEL_STRICT) + flags |= GRE_STRICT; + if (tflags & TUNNEL_REC) + flags |= GRE_REC; + if (tflags & TUNNEL_VERSION) + flags |= GRE_VERSION; + + return flags; +} + #endif diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index ebdef7f60862..4da5de10d1d4 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -3,6 +3,7 @@ #include #include +#include #include #define IP6TUNNEL_ERR_TIMEO (30*HZ) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h new file mode 100644 index 000000000000..4b6f0b28f41f --- /dev/null +++ b/include/net/ip_tunnels.h @@ -0,0 +1,177 @@ +#ifndef __NET_IP_TUNNELS_H +#define __NET_IP_TUNNELS_H 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#include +#endif + +/* Keep error state on tunnel for 30 sec */ +#define IPTUNNEL_ERR_TIMEO (30*HZ) + +/* 6rd prefix/relay information */ +#ifdef CONFIG_IPV6_SIT_6RD +struct ip_tunnel_6rd_parm { + struct in6_addr prefix; + __be32 relay_prefix; + u16 prefixlen; + u16 relay_prefixlen; +}; +#endif + +struct ip_tunnel_prl_entry { + struct ip_tunnel_prl_entry __rcu *next; + __be32 addr; + u16 flags; + struct rcu_head rcu_head; +}; + +struct ip_tunnel { + struct ip_tunnel __rcu *next; + struct hlist_node hash_node; + struct net_device *dev; + + int err_count; /* Number of arrived ICMP errors */ + unsigned long err_time; /* Time when the last ICMP error + * arrived */ + + /* These four fields used only by GRE */ + __u32 i_seqno; /* The last seen seqno */ + __u32 o_seqno; /* The last output seqno */ + int hlen; /* Precalculated header length */ + int mlink; + + struct ip_tunnel_parm parms; + + /* for SIT */ +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd_parm ip6rd; +#endif + struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ + unsigned int prl_count; /* # of entries in PRL */ + int ip_tnl_net_id; + struct gro_cells gro_cells; +}; + +#define TUNNEL_CSUM __cpu_to_be16(0x01) +#define TUNNEL_ROUTING __cpu_to_be16(0x02) +#define TUNNEL_KEY __cpu_to_be16(0x04) +#define TUNNEL_SEQ __cpu_to_be16(0x08) +#define TUNNEL_STRICT __cpu_to_be16(0x10) +#define TUNNEL_REC __cpu_to_be16(0x20) +#define TUNNEL_VERSION __cpu_to_be16(0x40) +#define TUNNEL_NO_KEY __cpu_to_be16(0x80) + +struct tnl_ptk_info { + __be16 flags; + __be16 proto; + __be32 key; + __be32 seq; +}; + +#define PACKET_RCVD 0 +#define PACKET_REJECT 1 + +#define IP_TNL_HASH_BITS 10 +#define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS) + +struct ip_tunnel_net { + struct hlist_head *tunnels; + struct net_device *fb_tunnel_dev; +}; + +int ip_tunnel_init(struct net_device *dev); +void ip_tunnel_uninit(struct net_device *dev); +void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); +int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, + struct rtnl_link_ops *ops, char *devname); + +void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn); + +void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params); +int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); + +struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot); +struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, + int link, __be16 flags, + __be32 remote, __be32 local, + __be32 key); + +int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, bool log_ecn_error); +int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p); +int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p); +void ip_tunnel_setup(struct net_device *dev, int net_id); + +/* Extract dsfield from inner protocol */ +static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, + const struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + return ipv6_get_dsfield((const struct ipv6hdr *)iph); + else + return 0; +} + +/* Propogate ECN bits out */ +static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, + const struct sk_buff *skb) +{ + u8 inner = ip_tunnel_get_dsfield(iph, skb); + + return INET_ECN_encapsulate(tos, inner); +} + +static inline void tunnel_ip_select_ident(struct sk_buff *skb, + const struct iphdr *old_iph, + struct dst_entry *dst) +{ + struct iphdr *iph = ip_hdr(skb); + + /* Use inner packet iph-id if possible. */ + if (skb->protocol == htons(ETH_P_IP) && old_iph->id) + iph->id = old_iph->id; + else + __ip_select_ident(iph, dst, + (skb_shinfo(skb)->gso_segs ?: 1) - 1); +} + +static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int err; + int pkt_len = skb->len - skb_transport_offset(skb); + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); + + nf_reset(skb); + + err = ip_local_out(skb); + if (likely(net_xmit_eval(err) == 0)) { + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } +} +#endif /* __NET_IP_TUNNELS_H */ diff --git a/include/net/ipip.h b/include/net/ipip.h deleted file mode 100644 index 483b91a10bb2..000000000000 --- a/include/net/ipip.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef __NET_IPIP_H -#define __NET_IPIP_H 1 - -#include -#include -#include - -/* Keep error state on tunnel for 30 sec */ -#define IPTUNNEL_ERR_TIMEO (30*HZ) - -/* 6rd prefix/relay information */ -struct ip_tunnel_6rd_parm { - struct in6_addr prefix; - __be32 relay_prefix; - u16 prefixlen; - u16 relay_prefixlen; -}; - -struct ip_tunnel { - struct ip_tunnel __rcu *next; - struct net_device *dev; - - int err_count; /* Number of arrived ICMP errors */ - unsigned long err_time; /* Time when the last ICMP error arrived */ - - /* These four fields used only by GRE */ - __u32 i_seqno; /* The last seen seqno */ - __u32 o_seqno; /* The last output seqno */ - int hlen; /* Precalculated GRE header length */ - int mlink; - - struct ip_tunnel_parm parms; - - /* for SIT */ -#ifdef CONFIG_IPV6_SIT_6RD - struct ip_tunnel_6rd_parm ip6rd; -#endif - struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ - unsigned int prl_count; /* # of entries in PRL */ - - struct gro_cells gro_cells; -}; - -struct ip_tunnel_prl_entry { - struct ip_tunnel_prl_entry __rcu *next; - __be32 addr; - u16 flags; - struct rcu_head rcu_head; -}; - -static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) -{ - int err; - int pkt_len = skb->len - skb_transport_offset(skb); - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); - - nf_reset(skb); - - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } -} - -static inline void tunnel_ip_select_ident(struct sk_buff *skb, - const struct iphdr *old_iph, - struct dst_entry *dst) -{ - struct iphdr *iph = ip_hdr(skb); - - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); -} -#endif diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7944df768454..2073226a8a63 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -186,9 +186,14 @@ config NET_IPGRE_DEMUX This is helper module to demultiplex GRE packets on GRE version field criteria. Required by ip_gre and pptp modules. +config NET_IP_TUNNEL + tristate + default n + config NET_IPGRE tristate "IP: GRE tunnels over IP" depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX + select NET_IP_TUNNEL help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 15ca63ec604e..089cb9f36387 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -13,6 +13,7 @@ obj-y := route.o inetpeer.o protocol.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o +obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 70b2d4cf5801..93824c57b108 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -111,7 +111,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 7a4c710c4cdd..d2d5a99fba09 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -27,11 +27,6 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); -struct gre_base_hdr { - __be16 flags; - __be16 protocol; -}; -#define GRE_HEADER_SECTION 4 int gre_add_protocol(const struct gre_protocol *proto, u8 version) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2e94289a17e8..ad662e906f7e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include @@ -108,15 +108,6 @@ fatal route to network, even if it were you who configured fatal static route: you are innocent. :-) - - - 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain - practically identical code. It would be good to glue them - together, but it is not very evident, how to make them modular. - sit is integral part of IPv6, ipip and gre are naturally modular. - We could extract common parts (hash table, ioctl etc) - to a separate module (ip_tunnel.c). - Alexey Kuznetsov. */ @@ -126,400 +117,135 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); -static void ipgre_tunnel_setup(struct net_device *dev); -static int ipgre_tunnel_bind_dev(struct net_device *dev); - -/* Fallback tunnel: no source, no destination, no key, no options */ - -#define HASH_SIZE 16 static int ipgre_net_id __read_mostly; -struct ipgre_net { - struct ip_tunnel __rcu *tunnels[4][HASH_SIZE]; - - struct net_device *fb_tunnel_dev; -}; - -/* Tunnel hash table */ - -/* - 4 hash tables: - - 3: (remote,local) - 2: (remote,*) - 1: (*,local) - 0: (*,*) +static int gre_tap_net_id __read_mostly; - We require exact key match i.e. if a key is present in packet - it will match only tunnel with the same key; if it is not present, - it will match only keyless tunnel. - - All keysless packets, if not matched configured keyless tunnels - will match fallback tunnel. - */ +static __sum16 check_checksum(struct sk_buff *skb) +{ + __sum16 csum = 0; -#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); -#define tunnels_r_l tunnels[3] -#define tunnels_r tunnels[2] -#define tunnels_l tunnels[1] -#define tunnels_wc tunnels[0] + if (!csum) + break; + /* Fall through. */ -static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; } - tot->multicast = dev->stats.multicast; - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->rx_errors = dev->stats.rx_errors; - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; + return csum; } -/* Does key in tunnel parameters match packet */ -static bool ipgre_key_match(const struct ip_tunnel_parm *p, - __be16 flags, __be32 key) +static int ip_gre_calc_hlen(__be16 o_flags) { - if (p->i_flags & GRE_KEY) { - if (flags & GRE_KEY) - return key == p->i_key; - else - return false; /* key expected, none present */ - } else - return !(flags & GRE_KEY); -} + int addend = 4; -/* Given src, dst and key, find appropriate for input tunnel. */ + if (o_flags&TUNNEL_CSUM) + addend += 4; + if (o_flags&TUNNEL_KEY) + addend += 4; + if (o_flags&TUNNEL_SEQ) + addend += 4; + return addend; +} -static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, - __be32 remote, __be32 local, - __be16 flags, __be32 key, - __be16 gre_proto) +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err, int *hdr_len) { - struct net *net = dev_net(dev); - int link = dev->ifindex; - unsigned int h0 = HASH(remote); - unsigned int h1 = HASH(key); - struct ip_tunnel *t, *cand = NULL; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int dev_type = (gre_proto == htons(ETH_P_TEB)) ? - ARPHRD_ETHER : ARPHRD_IPGRE; - int score, cand_score = 4; - - for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { - if (local != t->parms.iph.saddr || - remote != t->parms.iph.daddr || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } - - for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { - if (remote != t->parms.iph.daddr || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + struct iphdr *iph = ip_hdr(skb); + struct gre_base_hdr *greh; + __be32 *options; - for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { - if ((local != t->parms.iph.saddr && - (local != t->parms.iph.daddr || - !ipv4_is_multicast(local))) || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; - for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { - if (t->parms.i_key != key || - !(t->dev->flags & IFF_UP)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2)); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; - if (cand != NULL) - return cand; + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + *hdr_len = ip_gre_calc_hlen(tpi->flags); - dev = ign->fb_tunnel_dev; - if (dev->flags & IFF_UP) - return netdev_priv(dev); + if (!pskb_may_pull(skb, *hdr_len)) + return -EINVAL; - return NULL; -} + tpi->proto = greh->protocol; -static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign, - struct ip_tunnel_parm *parms) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - __be32 key = parms->i_key; - unsigned int h = HASH(key); - int prio = 0; - - if (local) - prio |= 1; - if (remote && !ipv4_is_multicast(remote)) { - prio |= 2; - h ^= HASH(remote); + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (check_checksum(skb)) { + *csum_err = true; + return -EINVAL; + } + options++; } - return &ign->tunnels[prio][h]; -} - -static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign, - struct ip_tunnel *t) -{ - return __ipgre_bucket(ign, &t->parms); -} - -static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t); + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else + tpi->key = 0; - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); -} + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else + tpi->seq = 0; -static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp; - struct ip_tunnel *iter; - - for (tp = ipgre_bucket(ign, t); - (iter = rtnl_dereference(*tp)) != NULL; - tp = &iter->next) { - if (t == iter) { - rcu_assign_pointer(*tp, t->next); - break; + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + *hdr_len += 4; + if (!pskb_may_pull(skb, *hdr_len)) + return -EINVAL; } } -} - -static struct ip_tunnel *ipgre_tunnel_find(struct net *net, - struct ip_tunnel_parm *parms, - int type) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - __be32 key = parms->i_key; - int link = parms->link; - struct ip_tunnel *t; - struct ip_tunnel __rcu **tp; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - for (tp = __ipgre_bucket(ign, parms); - (t = rtnl_dereference(*tp)) != NULL; - tp = &t->next) - if (local == t->parms.iph.saddr && - remote == t->parms.iph.daddr && - key == t->parms.i_key && - link == t->parms.link && - type == t->dev->type) - break; - - return t; -} - -static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, - struct ip_tunnel_parm *parms, int create) -{ - struct ip_tunnel *t, *nt; - struct net_device *dev; - char name[IFNAMSIZ]; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); - if (t || !create) - return t; - - if (parms->name[0]) - strlcpy(name, parms->name, IFNAMSIZ); - else - strcpy(name, "gre%d"); - - dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); - if (!dev) - return NULL; - - dev_net_set(dev, net); - - nt = netdev_priv(dev); - nt->parms = *parms; - dev->rtnl_link_ops = &ipgre_link_ops; - dev->mtu = ipgre_tunnel_bind_dev(dev); - - if (register_netdevice(dev) < 0) - goto failed_free; - - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) - dev->features |= NETIF_F_LLTX; - - dev_hold(dev); - ipgre_tunnel_link(ign, nt); - return nt; - -failed_free: - free_netdev(dev); - return NULL; -} - -static void ipgre_tunnel_uninit(struct net_device *dev) -{ - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - ipgre_tunnel_unlink(ign, netdev_priv(dev)); - dev_put(dev); + return 0; } - static void ipgre_err(struct sk_buff *skb, u32 info) { -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. + /* All the routers (except for Linux) return only + 8 bytes of packet payload. It means, that precise relaying of + ICMP in the real Internet is absolutely infeasible. - Moreover, Cisco "wise men" put GRE key to the third word - in GRE header. It makes impossible maintaining even soft state for keyed - GRE tunnels with enabled checksum. Tell them "thank you". - - Well, I wonder, rfc1812 was written by Cisco employee, - what the hell these idiots break standards established - by themselves??? - */ + Moreover, Cisco "wise men" put GRE key to the third word + in GRE header. It makes impossible maintaining even soft + state for keyed GRE tunnels with enabled checksum. Tell + them "thank you". + Well, I wonder, rfc1812 was written by Cisco employee, + what the hell these idiots break standards established + by themselves??? + */ + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn; const struct iphdr *iph = (const struct iphdr *)skb->data; - __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2)); - int grehlen = (iph->ihl<<2) + 4; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; - __be16 flags; - __be32 key = 0; + struct tnl_ptk_info tpi; + int hdr_len; + bool csum_err = false; - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) + if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) { + if (!csum_err) /* ignore csum errors. */ return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } } - /* If only 8 bytes returned, keyed message will be dropped here */ - if (skb_headlen(skb) < grehlen) - return; - - if (flags & GRE_KEY) - key = *(((__be32 *)p) + (grehlen / 4) - 1); - switch (type) { default: case ICMP_PARAMETERPROB: @@ -548,8 +274,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; } - t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, - flags, key, p[1]); + if (tpi.proto == htons(ETH_P_TEB)) + itn = net_generic(net, gre_tap_net_id); + else + itn = net_generic(net, ipgre_net_id); + + t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, + iph->daddr, iph->saddr, tpi.key); if (t == NULL) return; @@ -578,158 +309,33 @@ static void ipgre_err(struct sk_buff *skb, u32 info) t->err_time = jiffies; } -static inline u8 -ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb) -{ - u8 inner = 0; - if (skb->protocol == htons(ETH_P_IP)) - inner = old_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) - inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); - return INET_ECN_encapsulate(tos, inner); -} - static int ipgre_rcv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn; const struct iphdr *iph; - u8 *h; - __be16 flags; - __sum16 csum = 0; - __be32 key = 0; - u32 seqno = 0; struct ip_tunnel *tunnel; - int offset = 4; - __be16 gre_proto; - int err; + struct tnl_ptk_info tpi; + int hdr_len; + bool csum_err = false; - if (!pskb_may_pull(skb, 16)) + if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0) goto drop; - iph = ip_hdr(skb); - h = skb->data; - flags = *(__be16 *)h; - - if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { - /* - Version must be 0. - - We do not support routing headers. - */ - if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop; - - if (flags&GRE_CSUM) { - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - if (!csum) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - } - offset += 4; - } - if (flags&GRE_KEY) { - key = *(__be32 *)(h + offset); - offset += 4; - } - if (flags&GRE_SEQ) { - seqno = ntohl(*(__be32 *)(h + offset)); - offset += 4; - } - } + if (tpi.proto == htons(ETH_P_TEB)) + itn = net_generic(net, gre_tap_net_id); + else + itn = net_generic(net, ipgre_net_id); - gre_proto = *(__be16 *)(h + 2); + iph = ip_hdr(skb); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, + iph->saddr, iph->daddr, tpi.key); - tunnel = ipgre_tunnel_lookup(skb->dev, - iph->saddr, iph->daddr, flags, key, - gre_proto); if (tunnel) { - struct pcpu_tstats *tstats; - - secpath_reset(skb); - - skb->protocol = gre_proto; - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IP); - if ((*(h + offset) & 0xF0) != 0x40) - offset += 4; - } - - skb->mac_header = skb->network_header; - __pskb_pull(skb, offset); - skb_postpull_rcsum(skb, skb_transport_header(skb), offset); - skb->pkt_type = PACKET_HOST; -#ifdef CONFIG_NET_IPGRE_BROADCAST - if (ipv4_is_multicast(iph->daddr)) { - /* Looped back packet, drop it! */ - if (rt_is_output_route(skb_rtable(skb))) - goto drop; - tunnel->dev->stats.multicast++; - skb->pkt_type = PACKET_BROADCAST; - } -#endif - - if (((flags&GRE_CSUM) && csum) || - (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { - tunnel->dev->stats.rx_crc_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - if (tunnel->parms.i_flags&GRE_SEQ) { - if (!(flags&GRE_SEQ) || - (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { - tunnel->dev->stats.rx_fifo_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - tunnel->i_seqno = seqno + 1; - } - - /* Warning: All skb pointers will be invalidated! */ - if (tunnel->dev->type == ARPHRD_ETHER) { - if (!pskb_may_pull(skb, ETH_HLEN)) { - tunnel->dev->stats.rx_length_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - - iph = ip_hdr(skb); - skb->protocol = eth_type_trans(skb, tunnel->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - } - - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); - err = IP_ECN_decapsulate(iph, skb); - if (unlikely(err)) { - if (log_ecn_error) - net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", - &iph->saddr, iph->tos); - if (err > 1) { - ++tunnel->dev->stats.rx_frame_errors; - ++tunnel->dev->stats.rx_errors; - goto drop; - } - } - - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); - - gro_cells_receive(&tunnel->gro_cells, skb); + ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - drop: kfree_skb(skb); return 0; @@ -746,7 +352,7 @@ static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; return skb; } else if (skb->ip_summed == CHECKSUM_PARTIAL && - tunnel->parms.o_flags&GRE_CSUM) { + tunnel->parms.o_flags&TUNNEL_CSUM) { err = skb_checksum_help(skb); if (unlikely(err)) goto error; @@ -760,480 +366,157 @@ error: return ERR_PTR(err); } -static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static struct sk_buff *gre_build_header(struct sk_buff *skb, + const struct tnl_ptk_info *tpi, + int hdr_len) { - struct ip_tunnel *tunnel = netdev_priv(dev); - const struct iphdr *old_iph; - const struct iphdr *tiph; - struct flowi4 fl4; - u8 tos; - __be16 df; - struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ - struct iphdr *iph; /* Our new IP header */ - unsigned int max_headroom; /* The extra header space needed */ - int gre_hlen; - __be32 dst; - int mtu; - u8 ttl; - int err; - - skb = handle_offloads(tunnel, skb); - if (IS_ERR(skb)) { - dev->stats.tx_dropped++; - return NETDEV_TX_OK; - } + struct gre_base_hdr *greh; - if (!skb->encapsulation) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb_push(skb, hdr_len); - old_iph = ip_hdr(skb); + greh = (struct gre_base_hdr *)skb->data; + greh->flags = tnl_flags_to_gre_flags(tpi->flags); + greh->protocol = tpi->proto; - if (dev->type == ARPHRD_ETHER) - IPCB(skb)->flags = 0; + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - if (dev->header_ops && dev->type == ARPHRD_IPGRE) { - gre_hlen = 0; - tiph = (const struct iphdr *)skb->data; - } else { - gre_hlen = tunnel->hlen; - tiph = &tunnel->parms.iph; - } - - if ((dst = tiph->daddr) == 0) { - /* NBMA tunnel */ - - if (skb_dst(skb) == NULL) { - dev->stats.tx_fifo_errors++; - goto tx_error; + if (tpi->flags&TUNNEL_SEQ) { + *ptr = tpi->seq; + ptr--; } - - if (skb->protocol == htons(ETH_P_IP)) { - rt = skb_rtable(skb); - dst = rt_nexthop(rt, old_iph->daddr); + if (tpi->flags&TUNNEL_KEY) { + *ptr = tpi->key; + ptr--; } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - const struct in6_addr *addr6; - struct neighbour *neigh; - bool do_tx_error_icmp; - int addr_type; - - neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); - if (neigh == NULL) - goto tx_error; - - addr6 = (const struct in6_addr *)&neigh->primary_key; - addr_type = ipv6_addr_type(addr6); - - if (addr_type == IPV6_ADDR_ANY) { - addr6 = &ipv6_hdr(skb)->daddr; - addr_type = ipv6_addr_type(addr6); - } - - if ((addr_type & IPV6_ADDR_COMPATv4) == 0) - do_tx_error_icmp = true; - else { - do_tx_error_icmp = false; - dst = addr6->s6_addr32[3]; - } - neigh_release(neigh); - if (do_tx_error_icmp) - goto tx_error_icmp; + if (tpi->flags&TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + *(__sum16 *)ptr = 0; + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, + skb->len, 0)); } -#endif - else - goto tx_error; } - ttl = tiph->ttl; - tos = tiph->tos; - if (tos & 0x1) { - tos &= ~0x1; - if (skb->protocol == htons(ETH_P_IP)) - tos = old_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) - tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); - } + return skb; +} - rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr, - tunnel->parms.o_key, RT_TOS(tos), - tunnel->parms.link); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error; - } - tdev = rt->dst.dev; +static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params, + __be16 proto) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct tnl_ptk_info tpi; - if (tdev == dev) { - ip_rt_put(rt); - dev->stats.collisions++; - goto tx_error; + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; } - df = tiph->frag_off; - if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if (skb->protocol == htons(ETH_P_IP)) { - df |= (old_iph->frag_off&htons(IP_DF)); + tpi.flags = tunnel->parms.o_flags; + tpi.proto = proto; + tpi.key = tunnel->parms.o_key; + if (tunnel->parms.o_flags & TUNNEL_SEQ) + tunnel->o_seqno++; + tpi.seq = htonl(tunnel->o_seqno); - if (!skb_is_gso(skb) && - (old_iph->frag_off&htons(IP_DF)) && - mtu < ntohs(old_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } + /* Push GRE header. */ + skb = gre_build_header(skb, &tpi, tunnel->hlen); + if (unlikely(!skb)) { + dev->stats.tx_dropped++; + return; } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - - if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) { - if ((tunnel->parms.iph.daddr && - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || - rt6->rt6i_dst.plen == 128) { - rt6->rt6i_flags |= RTF_MODIFIED; - dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); - } - } - if (!skb_is_gso(skb) && - mtu >= IPV6_MIN_MTU && - mtu < skb->len - tunnel->hlen + gre_hlen) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ip_rt_put(rt); - goto tx_error; - } - } -#endif + ip_tunnel_xmit(skb, dev, tnl_params); +} - if (tunnel->err_count > 0) { - if (time_before(jiffies, - tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { - tunnel->err_count--; +static netdev_tx_t ipgre_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *tnl_params; - dst_link_failure(skb); - } else - tunnel->err_count = 0; - } + skb = handle_offloads(tunnel, skb); + if (IS_ERR(skb)) + goto out; - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; - - if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| - (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { - struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; - if (!new_skb) { - ip_rt_put(rt); - dev->stats.tx_dropped++; - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - if (skb->sk) - skb_set_owner_w(new_skb, skb->sk); - dev_kfree_skb(skb); - skb = new_skb; - old_iph = ip_hdr(skb); - /* Warning : tiph value might point to freed memory */ - } + if (dev->header_ops) { + /* Need space for new headers */ + if (skb_cow_head(skb, dev->needed_headroom - + (tunnel->hlen + sizeof(struct iphdr)))); + goto free_skb; - skb_push(skb, gre_hlen); - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(*iph)); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* - * Push down and install the IPIP header. - */ + tnl_params = (const struct iphdr *)skb->data; - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; - iph->protocol = IPPROTO_GRE; - iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - iph->ttl = ttl; - - tunnel_ip_select_ident(skb, old_iph, &rt->dst); - - if (ttl == 0) { - if (skb->protocol == htons(ETH_P_IP)) - iph->ttl = old_iph->ttl; -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) - iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; -#endif - else - iph->ttl = ip4_dst_hoplimit(&rt->dst); - } - - ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; - ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : skb->protocol; - - if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4); + /* Pull skb since ip_tunnel_xmit() needs skb->data pointing + * to gre header. + */ + skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + } else { + if (skb_cow_head(skb, dev->needed_headroom)) + goto free_skb; - if (tunnel->parms.o_flags&GRE_SEQ) { - ++tunnel->o_seqno; - *ptr = htonl(tunnel->o_seqno); - ptr--; - } - if (tunnel->parms.o_flags&GRE_KEY) { - *ptr = tunnel->parms.o_key; - ptr--; - } - /* Skip GRE checksum if skb is getting offloaded. */ - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && - (tunnel->parms.o_flags&GRE_CSUM)) { - int offset = skb_transport_offset(skb); - - if (skb_has_shared_frag(skb)) { - err = __skb_linearize(skb); - if (err) - goto tx_error; - } - - *ptr = 0; - *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, - skb->len - offset, - 0)); - } + tnl_params = &tunnel->parms.iph; } - iptunnel_xmit(skb, dev); + __gre_xmit(skb, dev, tnl_params, skb->protocol); + return NETDEV_TX_OK; -#if IS_ENABLED(CONFIG_IPV6) -tx_error_icmp: - dst_link_failure(skb); -#endif -tx_error: - dev->stats.tx_errors++; +free_skb: dev_kfree_skb(skb); +out: + dev->stats.tx_dropped++; return NETDEV_TX_OK; } -static int ipgre_tunnel_bind_dev(struct net_device *dev) +static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, + struct net_device *dev) { - struct net_device *tdev = NULL; - struct ip_tunnel *tunnel; - const struct iphdr *iph; - int hlen = LL_MAX_HEADER; - int mtu = ETH_DATA_LEN; - int addend = sizeof(struct iphdr) + 4; - - tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; - - /* Guess output device to choose reasonable mtu and needed_headroom */ - - if (iph->daddr) { - struct flowi4 fl4; - struct rtable *rt; - - rt = ip_route_output_gre(dev_net(dev), &fl4, - iph->daddr, iph->saddr, - tunnel->parms.o_key, - RT_TOS(iph->tos), - tunnel->parms.link); - if (!IS_ERR(rt)) { - tdev = rt->dst.dev; - ip_rt_put(rt); - } - - if (dev->type != ARPHRD_ETHER) - dev->flags |= IFF_POINTOPOINT; - } + struct ip_tunnel *tunnel = netdev_priv(dev); - if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + skb = handle_offloads(tunnel, skb); + if (IS_ERR(skb)) + goto out; - if (tdev) { - hlen = tdev->hard_header_len + tdev->needed_headroom; - mtu = tdev->mtu; - } - dev->iflink = tunnel->parms.link; - - /* Precalculate GRE options length */ - if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { - if (tunnel->parms.o_flags&GRE_CSUM) - addend += 4; - if (tunnel->parms.o_flags&GRE_KEY) - addend += 4; - if (tunnel->parms.o_flags&GRE_SEQ) - addend += 4; - } - dev->needed_headroom = addend + hlen; - mtu -= dev->hard_header_len + addend; + if (skb_cow_head(skb, dev->needed_headroom)) + goto free_skb; - if (mtu < 68) - mtu = 68; + __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); - tunnel->hlen = addend; - /* TCP offload with GRE SEQ is not supported. */ - if (!(tunnel->parms.o_flags & GRE_SEQ)) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } + return NETDEV_TX_OK; - return mtu; +free_skb: + dev_kfree_skb(skb); +out: + dev->stats.tx_dropped++; + return NETDEV_TX_OK; } -static int -ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) +static int ipgre_tunnel_ioctl(struct net_device *dev, + struct ifreq *ifr, int cmd) { int err = 0; struct ip_tunnel_parm p; - struct ip_tunnel *t; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - switch (cmd) { - case SIOCGETTUNNEL: - t = NULL; - if (dev == ign->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { - err = -EFAULT; - break; - } - t = ipgre_tunnel_locate(net, &p, 0); - } - if (t == NULL) - t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - err = -EFAULT; - break; - - case SIOCADDTUNNEL: - case SIOCCHGTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - - err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) - goto done; - if (p.iph.ttl) - p.iph.frag_off |= htons(IP_DF); - - if (!(p.i_flags&GRE_KEY)) - p.i_key = 0; - if (!(p.o_flags&GRE_KEY)) - p.o_key = 0; - - t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - - if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { - if (t->dev != dev) { - err = -EEXIST; - break; - } - } else { - unsigned int nflags = 0; - - t = netdev_priv(dev); - - if (ipv4_is_multicast(p.iph.daddr)) - nflags = IFF_BROADCAST; - else if (p.iph.daddr) - nflags = IFF_POINTOPOINT; - - if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { - err = -EINVAL; - break; - } - ipgre_tunnel_unlink(ign, t); - synchronize_net(); - t->parms.iph.saddr = p.iph.saddr; - t->parms.iph.daddr = p.iph.daddr; - t->parms.i_key = p.i_key; - t->parms.o_key = p.o_key; - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); - ipgre_tunnel_link(ign, t); - netdev_state_change(dev); - } - } - if (t) { - err = 0; - if (cmd == SIOCCHGTUNNEL) { - t->parms.iph.ttl = p.iph.ttl; - t->parms.iph.tos = p.iph.tos; - t->parms.iph.frag_off = p.iph.frag_off; - if (t->parms.link != p.link) { - t->parms.link = p.link; - dev->mtu = ipgre_tunnel_bind_dev(dev); - netdev_state_change(dev); - } - } - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) - err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); - break; - - case SIOCDELTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - if (dev == ign->fb_tunnel_dev) { - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - err = -ENOENT; - if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) - goto done; - err = -EPERM; - if (t == netdev_priv(ign->fb_tunnel_dev)) - goto done; - dev = t->dev; - } - unregister_netdevice(dev); - err = 0; - break; - - default: - err = -EINVAL; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { + return -EINVAL; } + p.i_flags = gre_flags_to_tnl_flags(p.i_flags); + p.o_flags = gre_flags_to_tnl_flags(p.o_flags); -done: - return err; -} + err = ip_tunnel_ioctl(dev, &p, cmd); + if (err) + return err; -static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) - return -EINVAL; - dev->mtu = new_mtu; + p.i_flags = tnl_flags_to_gre_flags(p.i_flags); + p.o_flags = tnl_flags_to_gre_flags(p.o_flags); + + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + return -EFAULT; return 0; } @@ -1263,25 +546,23 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) ... ftp fec0:6666:6666::193.233.7.65 ... - */ - static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { struct ip_tunnel *t = netdev_priv(dev); - struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); - __be16 *p = (__be16 *)(iph+1); + struct iphdr *iph; + struct gre_base_hdr *greh; - memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); - p[0] = t->parms.o_flags; - p[1] = htons(type); + iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph)); + greh = (struct gre_base_hdr *)(iph+1); + greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags); + greh->protocol = htons(type); - /* - * Set the source hardware address. - */ + memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); + /* Set the source hardware address. */ if (saddr) memcpy(&iph->saddr, saddr, 4); if (daddr) @@ -1289,7 +570,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (iph->daddr) return t->hlen; - return -t->hlen; + return -(t->hlen + sizeof(*iph)); } static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) @@ -1343,31 +624,21 @@ static int ipgre_close(struct net_device *dev) } return 0; } - #endif static const struct net_device_ops ipgre_netdev_ops = { .ndo_init = ipgre_tunnel_init, - .ndo_uninit = ipgre_tunnel_uninit, + .ndo_uninit = ip_tunnel_uninit, #ifdef CONFIG_NET_IPGRE_BROADCAST .ndo_open = ipgre_open, .ndo_stop = ipgre_close, #endif - .ndo_start_xmit = ipgre_tunnel_xmit, + .ndo_start_xmit = ipgre_xmit, .ndo_do_ioctl = ipgre_tunnel_ioctl, - .ndo_change_mtu = ipgre_tunnel_change_mtu, - .ndo_get_stats64 = ipgre_get_stats64, + .ndo_change_mtu = ip_tunnel_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; -static void ipgre_dev_free(struct net_device *dev) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - - gro_cells_destroy(&tunnel->gro_cells); - free_percpu(dev->tstats); - free_netdev(dev); -} - #define GRE_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ @@ -1376,35 +647,49 @@ static void ipgre_dev_free(struct net_device *dev) static void ipgre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipgre_netdev_ops; - dev->destructor = ipgre_dev_free; + ip_tunnel_setup(dev, ipgre_net_id); +} - dev->type = ARPHRD_IPGRE; - dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; +static void __gre_tunnel_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel; + + tunnel = netdev_priv(dev); + tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags); + tunnel->parms.iph.protocol = IPPROTO_GRE; + + dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; - dev->flags = IFF_NOARP; dev->iflink = 0; - dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; - dev->features |= GRE_FEATURES; + dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES; dev->hw_features |= GRE_FEATURES; + + if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { + /* TCP offload with GRE SEQ is not supported. */ + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + /* Can use a lockless transmit, unless we generate + * output sequences + */ + dev->features |= NETIF_F_LLTX; + } } static int ipgre_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - struct iphdr *iph; - int err; + struct ip_tunnel *tunnel = netdev_priv(dev); + struct iphdr *iph = &tunnel->parms.iph; - tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; + __gre_tunnel_init(dev); - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); + memcpy(dev->dev_addr, &iph->saddr, 4); + memcpy(dev->broadcast, &iph->daddr, 4); - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); - memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); + dev->type = ARPHRD_IPGRE; + dev->flags = IFF_NOARP; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + dev->addr_len = 4; if (iph->daddr) { #ifdef CONFIG_NET_IPGRE_BROADCAST @@ -1418,106 +703,30 @@ static int ipgre_tunnel_init(struct net_device *dev) } else dev->header_ops = &ipgre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - err = gro_cells_init(&tunnel->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); - return err; - } - - return 0; + return ip_tunnel_init(dev); } -static void ipgre_fb_tunnel_init(struct net_device *dev) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - struct iphdr *iph = &tunnel->parms.iph; - - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - - iph->version = 4; - iph->protocol = IPPROTO_GRE; - iph->ihl = 5; - tunnel->hlen = sizeof(struct iphdr) + 4; - - dev_hold(dev); -} - - static const struct gre_protocol ipgre_protocol = { .handler = ipgre_rcv, .err_handler = ipgre_err, }; -static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) -{ - int prio; - - for (prio = 0; prio < 4; prio++) { - int h; - for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - - t = rtnl_dereference(ign->tunnels[prio][h]); - - while (t != NULL) { - unregister_netdevice_queue(t->dev, head); - t = rtnl_dereference(t->next); - } - } - } -} - static int __net_init ipgre_init_net(struct net *net) { - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int err; - - ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", - ipgre_tunnel_setup); - if (!ign->fb_tunnel_dev) { - err = -ENOMEM; - goto err_alloc_dev; - } - dev_net_set(ign->fb_tunnel_dev, net); - - ipgre_fb_tunnel_init(ign->fb_tunnel_dev); - ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; - - if ((err = register_netdev(ign->fb_tunnel_dev))) - goto err_reg_dev; - - rcu_assign_pointer(ign->tunnels_wc[0], - netdev_priv(ign->fb_tunnel_dev)); - return 0; - -err_reg_dev: - ipgre_dev_free(ign->fb_tunnel_dev); -err_alloc_dev: - return err; + return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } static void __net_exit ipgre_exit_net(struct net *net) { - struct ipgre_net *ign; - LIST_HEAD(list); - - ign = net_generic(net, ipgre_net_id); - rtnl_lock(); - ipgre_destroy_tunnels(ign, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); + ip_tunnel_delete_net(itn); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, .exit = ipgre_exit_net, .id = &ipgre_net_id, - .size = sizeof(struct ipgre_net), + .size = sizeof(struct ip_tunnel_net), }; static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1562,8 +771,8 @@ out: return ipgre_tunnel_validate(tb, data); } -static void ipgre_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms) +static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[], + struct ip_tunnel_parm *parms) { memset(parms, 0, sizeof(*parms)); @@ -1576,10 +785,10 @@ static void ipgre_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) - parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) - parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); @@ -1603,148 +812,46 @@ static void ipgre_netlink_parms(struct nlattr *data[], parms->iph.frag_off = htons(IP_DF); } -static int ipgre_tap_init(struct net_device *dev) +static int gre_tap_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - - tunnel = netdev_priv(dev); - - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - - ipgre_tunnel_bind_dev(dev); + __gre_tunnel_init(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - return 0; + return ip_tunnel_init(dev); } -static const struct net_device_ops ipgre_tap_netdev_ops = { - .ndo_init = ipgre_tap_init, - .ndo_uninit = ipgre_tunnel_uninit, - .ndo_start_xmit = ipgre_tunnel_xmit, +static const struct net_device_ops gre_tap_netdev_ops = { + .ndo_init = gre_tap_init, + .ndo_uninit = ip_tunnel_uninit, + .ndo_start_xmit = gre_tap_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = ipgre_tunnel_change_mtu, - .ndo_get_stats64 = ipgre_get_stats64, + .ndo_change_mtu = ip_tunnel_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void ipgre_tap_setup(struct net_device *dev) { - ether_setup(dev); - - dev->netdev_ops = &ipgre_tap_netdev_ops; - dev->destructor = ipgre_dev_free; - - dev->iflink = 0; - dev->features |= NETIF_F_NETNS_LOCAL; - - dev->features |= GRE_FEATURES; - dev->hw_features |= GRE_FEATURES; + dev->netdev_ops = &gre_tap_netdev_ops; + ip_tunnel_setup(dev, gre_tap_net_id); } -static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) +static int ipgre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *nt; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int mtu; - int err; - - nt = netdev_priv(dev); - ipgre_netlink_parms(data, &nt->parms); - - if (ipgre_tunnel_find(net, &nt->parms, dev->type)) - return -EEXIST; - - if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) - eth_hw_addr_random(dev); - - mtu = ipgre_tunnel_bind_dev(dev); - if (!tb[IFLA_MTU]) - dev->mtu = mtu; - - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) - dev->features |= NETIF_F_LLTX; - - err = register_netdevice(dev); - if (err) - goto out; - - dev_hold(dev); - ipgre_tunnel_link(ign, nt); + struct ip_tunnel_parm p; -out: - return err; + ipgre_netlink_parms(data, tb, &p); + return ip_tunnel_newlink(dev, tb, &p); } static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *t, *nt; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); struct ip_tunnel_parm p; - int mtu; - - if (dev == ign->fb_tunnel_dev) - return -EINVAL; - - nt = netdev_priv(dev); - ipgre_netlink_parms(data, &p); - - t = ipgre_tunnel_locate(net, &p, 0); - - if (t) { - if (t->dev != dev) - return -EEXIST; - } else { - t = nt; - - if (dev->type != ARPHRD_ETHER) { - unsigned int nflags = 0; - - if (ipv4_is_multicast(p.iph.daddr)) - nflags = IFF_BROADCAST; - else if (p.iph.daddr) - nflags = IFF_POINTOPOINT; - - if ((dev->flags ^ nflags) & - (IFF_POINTOPOINT | IFF_BROADCAST)) - return -EINVAL; - } - ipgre_tunnel_unlink(ign, t); - t->parms.iph.saddr = p.iph.saddr; - t->parms.iph.daddr = p.iph.daddr; - t->parms.i_key = p.i_key; - if (dev->type != ARPHRD_ETHER) { - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); - } - ipgre_tunnel_link(ign, t); - netdev_state_change(dev); - } - - t->parms.o_key = p.o_key; - t->parms.iph.ttl = p.iph.ttl; - t->parms.iph.tos = p.iph.tos; - t->parms.iph.frag_off = p.iph.frag_off; - - if (t->parms.link != p.link) { - t->parms.link = p.link; - mtu = ipgre_tunnel_bind_dev(dev); - if (!tb[IFLA_MTU]) - dev->mtu = mtu; - netdev_state_change(dev); - } - - return 0; + ipgre_netlink_parms(data, tb, &p); + return ip_tunnel_changelink(dev, tb, &p); } static size_t ipgre_get_size(const struct net_device *dev) @@ -1779,8 +886,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || - nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || - nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || @@ -1818,6 +925,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = { .validate = ipgre_tunnel_validate, .newlink = ipgre_newlink, .changelink = ipgre_changelink, + .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, }; @@ -1831,13 +939,28 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { .validate = ipgre_tap_validate, .newlink = ipgre_newlink, .changelink = ipgre_changelink, + .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, }; -/* - * And now the modules code and kernel interface. - */ +static int __net_init ipgre_tap_init_net(struct net *net) +{ + return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL); +} + +static void __net_exit ipgre_tap_exit_net(struct net *net) +{ + struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); + ip_tunnel_delete_net(itn); +} + +static struct pernet_operations ipgre_tap_net_ops = { + .init = ipgre_tap_init_net, + .exit = ipgre_tap_exit_net, + .id = &gre_tap_net_id, + .size = sizeof(struct ip_tunnel_net), +}; static int __init ipgre_init(void) { @@ -1849,6 +972,10 @@ static int __init ipgre_init(void) if (err < 0) return err; + err = register_pernet_device(&ipgre_tap_net_ops); + if (err < 0) + goto pnet_tap_faied; + err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err < 0) { pr_info("%s: can't add protocol\n", __func__); @@ -1863,16 +990,17 @@ static int __init ipgre_init(void) if (err < 0) goto tap_ops_failed; -out: - return err; + return 0; tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); add_proto_failed: + unregister_pernet_device(&ipgre_tap_net_ops); +pnet_tap_faied: unregister_pernet_device(&ipgre_net_ops); - goto out; + return err; } static void __exit ipgre_fini(void) @@ -1881,6 +1009,7 @@ static void __exit ipgre_fini(void) rtnl_link_unregister(&ipgre_link_ops); if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) pr_info("%s: can't remove protocol\n", __func__); + unregister_pernet_device(&ipgre_tap_net_ops); unregister_pernet_device(&ipgre_net_ops); } @@ -1890,3 +1019,4 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); MODULE_ALIAS_NETDEV("gre0"); +MODULE_ALIAS_NETDEV("gretap0"); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c new file mode 100644 index 000000000000..9d96b6853f21 --- /dev/null +++ b/net/ipv4/ip_tunnel.c @@ -0,0 +1,1035 @@ +/* + * Copyright (c) 2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#include +#endif + +static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, + __be32 key, __be32 remote) +{ + return hash_32((__force u32)key ^ (__force u32)remote, + IP_TNL_HASH_BITS); +} + +/* Often modified stats are per cpu, other are shared (netdev->stats) */ +struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; + } + + tot->multicast = dev->stats.multicast; + + tot->rx_crc_errors = dev->stats.rx_crc_errors; + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; + tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; + tot->rx_errors = dev->stats.rx_errors; + + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + tot->collisions = dev->stats.collisions; + + return tot; +} +EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); + +static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, + __be16 flags, __be32 key) +{ + if (p->i_flags & TUNNEL_KEY) { + if (flags & TUNNEL_KEY) + return key == p->i_key; + else + /* key expected, none present */ + return false; + } else + return !(flags & TUNNEL_KEY); +} + +/* Fallback tunnel: no source, no destination, no key, no options + + Tunnel hash table: + We require exact key match i.e. if a key is present in packet + it will match only tunnel with the same key; if it is not present, + it will match only keyless tunnel. + + All keysless packets, if not matched configured keyless tunnels + will match fallback tunnel. + Given src, dst and key, find appropriate for input tunnel. +*/ +struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, + int link, __be16 flags, + __be32 remote, __be32 local, + __be32 key) +{ + unsigned int hash; + struct ip_tunnel *t, *cand = NULL; + struct hlist_head *head; + + hash = ip_tunnel_hash(itn, key, remote); + head = &itn->tunnels[hash]; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (local != t->parms.iph.saddr || + remote != t->parms.iph.daddr || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else + cand = t; + } + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (remote != t->parms.iph.daddr || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + + hash = ip_tunnel_hash(itn, key, 0); + head = &itn->tunnels[hash]; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if ((local != t->parms.iph.saddr && + (local != t->parms.iph.daddr || + !ipv4_is_multicast(local))) || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + + if (flags & TUNNEL_NO_KEY) + goto skip_key_lookup; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (t->parms.i_key != key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + +skip_key_lookup: + if (cand) + return cand; + + if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) + return netdev_priv(itn->fb_tunnel_dev); + + + return NULL; +} +EXPORT_SYMBOL_GPL(ip_tunnel_lookup); + +static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms) +{ + unsigned int h; + __be32 remote; + + if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) + remote = parms->iph.daddr; + else + remote = 0; + + h = ip_tunnel_hash(itn, parms->i_key, remote); + return &itn->tunnels[h]; +} + +static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) +{ + struct hlist_head *head = ip_bucket(itn, &t->parms); + + hlist_add_head_rcu(&t->hash_node, head); +} + +static void ip_tunnel_del(struct ip_tunnel *t) +{ + hlist_del_init_rcu(&t->hash_node); +} + +static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms, + int type) +{ + __be32 remote = parms->iph.daddr; + __be32 local = parms->iph.saddr; + __be32 key = parms->i_key; + int link = parms->link; + struct ip_tunnel *t = NULL; + struct hlist_head *head = ip_bucket(itn, parms); + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && + key == t->parms.i_key && + link == t->parms.link && + type == t->dev->type) + break; + } + return t; +} + +static struct net_device *__ip_tunnel_create(struct net *net, + const struct rtnl_link_ops *ops, + struct ip_tunnel_parm *parms) +{ + int err; + struct ip_tunnel *tunnel; + struct net_device *dev; + char name[IFNAMSIZ]; + + if (parms->name[0]) + strlcpy(name, parms->name, IFNAMSIZ); + else { + if (strlen(ops->kind) + 3 >= IFNAMSIZ) { + err = -E2BIG; + goto failed; + } + strlcpy(name, ops->kind, IFNAMSIZ); + strncat(name, "%d", 2); + } + + ASSERT_RTNL(); + dev = alloc_netdev(ops->priv_size, name, ops->setup); + if (!dev) { + err = -ENOMEM; + goto failed; + } + dev_net_set(dev, net); + + dev->rtnl_link_ops = ops; + + tunnel = netdev_priv(dev); + tunnel->parms = *parms; + + err = register_netdevice(dev); + if (err) + goto failed_free; + + return dev; + +failed_free: + free_netdev(dev); +failed: + return ERR_PTR(err); +} + +static inline struct rtable *ip_route_output_tunnel(struct net *net, + struct flowi4 *fl4, + int proto, + __be32 daddr, __be32 saddr, + __be32 key, __u8 tos, int oif) +{ + memset(fl4, 0, sizeof(*fl4)); + fl4->flowi4_oif = oif; + fl4->daddr = daddr; + fl4->saddr = saddr; + fl4->flowi4_tos = tos; + fl4->flowi4_proto = proto; + fl4->fl4_gre_key = key; + return ip_route_output_key(net, fl4); +} + +static int ip_tunnel_bind_dev(struct net_device *dev) +{ + struct net_device *tdev = NULL; + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *iph; + int hlen = LL_MAX_HEADER; + int mtu = ETH_DATA_LEN; + int t_hlen = tunnel->hlen + sizeof(struct iphdr); + + iph = &tunnel->parms.iph; + + /* Guess output device to choose reasonable mtu and needed_headroom */ + if (iph->daddr) { + struct flowi4 fl4; + struct rtable *rt; + + rt = ip_route_output_tunnel(dev_net(dev), &fl4, + tunnel->parms.iph.protocol, + iph->daddr, iph->saddr, + tunnel->parms.o_key, + RT_TOS(iph->tos), + tunnel->parms.link); + if (!IS_ERR(rt)) { + tdev = rt->dst.dev; + ip_rt_put(rt); + } + if (dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; + } + + if (!tdev && tunnel->parms.link) + tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + + if (tdev) { + hlen = tdev->hard_header_len + tdev->needed_headroom; + mtu = tdev->mtu; + } + dev->iflink = tunnel->parms.link; + + dev->needed_headroom = t_hlen + hlen; + mtu -= (dev->hard_header_len + t_hlen); + + if (mtu < 68) + mtu = 68; + + return mtu; +} + +static struct ip_tunnel *ip_tunnel_create(struct net *net, + struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms) +{ + struct ip_tunnel *nt, *fbt; + struct net_device *dev; + + BUG_ON(!itn->fb_tunnel_dev); + fbt = netdev_priv(itn->fb_tunnel_dev); + dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); + if (IS_ERR(dev)) + return NULL; + + dev->mtu = ip_tunnel_bind_dev(dev); + + nt = netdev_priv(dev); + ip_tunnel_add(itn, nt); + return nt; +} + +int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, bool log_ecn_error) +{ + struct pcpu_tstats *tstats; + const struct iphdr *iph = ip_hdr(skb); + int err; + + secpath_reset(skb); + + skb->protocol = tpi->proto; + + skb->mac_header = skb->network_header; + __pskb_pull(skb, tunnel->hlen); + skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (ipv4_is_multicast(iph->daddr)) { + /* Looped back packet, drop it! */ + if (rt_is_output_route(skb_rtable(skb))) + goto drop; + tunnel->dev->stats.multicast++; + skb->pkt_type = PACKET_BROADCAST; + } +#endif + + if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || + ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { + tunnel->dev->stats.rx_crc_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + if (tunnel->parms.i_flags&TUNNEL_SEQ) { + if (!(tpi->flags&TUNNEL_SEQ) || + (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { + tunnel->dev->stats.rx_fifo_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + tunnel->i_seqno = ntohl(tpi->seq) + 1; + } + + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + iph = ip_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } + + skb->pkt_type = PACKET_HOST; + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + + tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + gro_cells_receive(&tunnel->gro_cells, skb); + return 0; + +drop: + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_rcv); + +void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *inner_iph; + struct iphdr *iph; + struct flowi4 fl4; + u8 tos, ttl; + __be16 df; + struct rtable *rt; /* Route to the other host */ + struct net_device *tdev; /* Device to other host */ + unsigned int max_headroom; /* The extra header space needed */ + __be32 dst; + int mtu; + + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + + dst = tnl_params->daddr; + if (dst == 0) { + /* NBMA tunnel */ + + if (skb_dst(skb) == NULL) { + dev->stats.tx_fifo_errors++; + goto tx_error; + } + + if (skb->protocol == htons(ETH_P_IP)) { + rt = skb_rtable(skb); + dst = rt_nexthop(rt, inner_iph->daddr); + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + const struct in6_addr *addr6; + struct neighbour *neigh; + bool do_tx_error_icmp; + int addr_type; + + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (neigh == NULL) + goto tx_error; + + addr6 = (const struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) { + addr6 = &ipv6_hdr(skb)->daddr; + addr_type = ipv6_addr_type(addr6); + } + + if ((addr_type & IPV6_ADDR_COMPATv4) == 0) + do_tx_error_icmp = true; + else { + do_tx_error_icmp = false; + dst = addr6->s6_addr32[3]; + } + neigh_release(neigh); + if (do_tx_error_icmp) + goto tx_error_icmp; + } +#endif + else + goto tx_error; + } + + tos = tnl_params->tos; + if (tos & 0x1) { + tos &= ~0x1; + if (skb->protocol == htons(ETH_P_IP)) + tos = inner_iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + } + + rt = ip_route_output_tunnel(dev_net(dev), &fl4, + tunnel->parms.iph.protocol, + dst, tnl_params->saddr, + tunnel->parms.o_key, + RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + tdev = rt->dst.dev; + + if (tdev == dev) { + ip_rt_put(rt); + dev->stats.collisions++; + goto tx_error; + } + + df = tnl_params->frag_off; + + if (df) + mtu = dst_mtu(&rt->dst) - dev->hard_header_len + - sizeof(struct iphdr); + else + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IP)) { + df |= (inner_iph->frag_off&htons(IP_DF)); + + if (!skb_is_gso(skb) && + (inner_iph->frag_off&htons(IP_DF)) && + mtu < ntohs(inner_iph->tot_len)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + ip_rt_put(rt); + goto tx_error; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && + mtu >= IPV6_MIN_MTU) { + if ((tunnel->parms.iph.daddr && + !ipv4_is_multicast(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); + } + } + + if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && + mtu < skb->len) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ip_rt_put(rt); + goto tx_error; + } + } +#endif + + if (tunnel->err_count > 0) { + if (time_before(jiffies, + tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { + tunnel->err_count--; + + dst_link_failure(skb); + } else + tunnel->err_count = 0; + } + + ttl = tnl_params->ttl; + if (ttl == 0) { + if (skb->protocol == htons(ETH_P_IP)) + ttl = inner_iph->ttl; +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) + ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; +#endif + else + ttl = ip4_dst_hoplimit(&rt->dst); + } + + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) + + rt->dst.header_len; + if (max_headroom > dev->needed_headroom) { + dev->needed_headroom = max_headroom; + if (skb_cow_head(skb, dev->needed_headroom)) { + dev->stats.tx_dropped++; + dev_kfree_skb(skb); + return; + } + } + + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + /* Push down and install the IP header. */ + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = tnl_params->protocol; + iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); + iph->daddr = fl4.daddr; + iph->saddr = fl4.saddr; + iph->ttl = ttl; + tunnel_ip_select_ident(skb, inner_iph, &rt->dst); + + iptunnel_xmit(skb, dev); + return; + +#if IS_ENABLED(CONFIG_IPV6) +tx_error_icmp: + dst_link_failure(skb); +#endif +tx_error: + dev->stats.tx_errors++; + dev_kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(ip_tunnel_xmit); + +static void ip_tunnel_update(struct ip_tunnel_net *itn, + struct ip_tunnel *t, + struct net_device *dev, + struct ip_tunnel_parm *p, + bool set_mtu) +{ + ip_tunnel_del(t); + t->parms.iph.saddr = p->iph.saddr; + t->parms.iph.daddr = p->iph.daddr; + t->parms.i_key = p->i_key; + t->parms.o_key = p->o_key; + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p->iph.saddr, 4); + memcpy(dev->broadcast, &p->iph.daddr, 4); + } + ip_tunnel_add(itn, t); + + t->parms.iph.ttl = p->iph.ttl; + t->parms.iph.tos = p->iph.tos; + t->parms.iph.frag_off = p->iph.frag_off; + + if (t->parms.link != p->link) { + int mtu; + + t->parms.link = p->link; + mtu = ip_tunnel_bind_dev(dev); + if (set_mtu) + dev->mtu = mtu; + } + netdev_state_change(dev); +} + +int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +{ + int err = 0; + struct ip_tunnel *t; + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + + BUG_ON(!itn->fb_tunnel_dev); + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == itn->fb_tunnel_dev) + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + if (t == NULL) + t = netdev_priv(dev); + memcpy(p, &t->parms, sizeof(*p)); + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto done; + if (p->iph.ttl) + p->iph.frag_off |= htons(IP_DF); + if (!(p->i_flags&TUNNEL_KEY)) + p->i_key = 0; + if (!(p->o_flags&TUNNEL_KEY)) + p->o_key = 0; + + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + + if (!t && (cmd == SIOCADDTUNNEL)) + t = ip_tunnel_create(net, itn, p); + + if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { + if (t != NULL) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else { + unsigned int nflags = 0; + + if (ipv4_is_multicast(p->iph.daddr)) + nflags = IFF_BROADCAST; + else if (p->iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { + err = -EINVAL; + break; + } + + t = netdev_priv(dev); + } + } + + if (t) { + err = 0; + ip_tunnel_update(itn, t, dev, p, true); + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto done; + + if (dev == itn->fb_tunnel_dev) { + err = -ENOENT; + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + if (t == NULL) + goto done; + err = -EPERM; + if (t == netdev_priv(itn->fb_tunnel_dev)) + goto done; + dev = t->dev; + } + unregister_netdevice(dev); + err = 0; + break; + + default: + err = -EINVAL; + } + +done: + return err; +} +EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int t_hlen = tunnel->hlen + sizeof(struct iphdr); + + if (new_mtu < 68 || + new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); + +static void ip_tunnel_dev_free(struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + + gro_cells_destroy(&tunnel->gro_cells); + free_percpu(dev->tstats); + free_netdev(dev); +} + +void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn; + + itn = net_generic(net, tunnel->ip_tnl_net_id); + + if (itn->fb_tunnel_dev != dev) { + ip_tunnel_del(netdev_priv(dev)); + unregister_netdevice_queue(dev, head); + } +} +EXPORT_SYMBOL_GPL(ip_tunnel_dellink); + +int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, + struct rtnl_link_ops *ops, char *devname) +{ + struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); + struct ip_tunnel_parm parms; + + itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); + if (!itn->tunnels) + return -ENOMEM; + + if (!ops) { + itn->fb_tunnel_dev = NULL; + return 0; + } + memset(&parms, 0, sizeof(parms)); + if (devname) + strlcpy(parms.name, devname, IFNAMSIZ); + + rtnl_lock(); + itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); + rtnl_unlock(); + if (IS_ERR(itn->fb_tunnel_dev)) { + kfree(itn->tunnels); + return PTR_ERR(itn->fb_tunnel_dev); + } + + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_init_net); + +static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) +{ + int h; + + for (h = 0; h < IP_TNL_HASH_SIZE; h++) { + struct ip_tunnel *t; + struct hlist_node *n; + struct hlist_head *thead = &itn->tunnels[h]; + + hlist_for_each_entry_safe(t, n, thead, hash_node) + unregister_netdevice_queue(t->dev, head); + } + if (itn->fb_tunnel_dev) + unregister_netdevice_queue(itn->fb_tunnel_dev, head); +} + +void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn) +{ + LIST_HEAD(list); + + rtnl_lock(); + ip_tunnel_destroy(itn, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); + kfree(itn->tunnels); +} +EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); + +int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p) +{ + struct ip_tunnel *nt; + struct net *net = dev_net(dev); + struct ip_tunnel_net *itn; + int mtu; + int err; + + nt = netdev_priv(dev); + itn = net_generic(net, nt->ip_tnl_net_id); + + if (ip_tunnel_find(itn, p, dev->type)) + return -EEXIST; + + nt->parms = *p; + err = register_netdevice(dev); + if (err) + goto out; + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + eth_hw_addr_random(dev); + + mtu = ip_tunnel_bind_dev(dev); + if (!tb[IFLA_MTU]) + dev->mtu = mtu; + + ip_tunnel_add(itn, nt); + +out: + return err; +} +EXPORT_SYMBOL_GPL(ip_tunnel_newlink); + +int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t, *nt; + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + + if (dev == itn->fb_tunnel_dev) + return -EINVAL; + + nt = netdev_priv(dev); + + t = ip_tunnel_find(itn, p, dev->type); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + t = nt; + + if (dev->type != ARPHRD_ETHER) { + unsigned int nflags = 0; + + if (ipv4_is_multicast(p->iph.daddr)) + nflags = IFF_BROADCAST; + else if (p->iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags ^ nflags) & + (IFF_POINTOPOINT | IFF_BROADCAST)) + return -EINVAL; + } + } + + ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_changelink); + +int ip_tunnel_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct iphdr *iph = &tunnel->parms.iph; + int err; + + dev->destructor = ip_tunnel_dev_free; + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + err = gro_cells_init(&tunnel->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + iph->version = 4; + iph->ihl = 5; + + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_init); + +void ip_tunnel_uninit(struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn; + + itn = net_generic(net, tunnel->ip_tnl_net_id); + /* fb_tunnel_dev will be unregisted in net-exit call. */ + if (itn->fb_tunnel_dev != dev) + ip_tunnel_del(netdev_priv(dev)); +} +EXPORT_SYMBOL_GPL(ip_tunnel_uninit); + +/* Do least required initialization, rest of init is done in tunnel_init call */ +void ip_tunnel_setup(struct net_device *dev, int net_id) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + tunnel->ip_tnl_net_id = net_id; +} +EXPORT_SYMBOL_GPL(ip_tunnel_setup); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c3a4233c0ac2..6a628fb3349f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 34e006fe2d87..a557d6ab127a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -111,7 +111,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f95b3aa579e..fd61fe16679f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -61,7 +61,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f56277f15903..ab5c7ad482cd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6a6ba73ff265..df89ccaaceaa 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index bef3fedfdc56..1e55866cead7 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -47,6 +47,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 898e671a526b..ee4fc570cf2c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 6752c8db8e0cfedb44ba62806dd15b383ed64000 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Mon, 25 Mar 2013 08:26:16 +0000 Subject: firewire net, ipv4 arp: Extend hardware address and remove driver-level packet inspection. Inspection of upper layer protocol is considered harmful, especially if it is about ARP or other stateful upper layer protocol; driver cannot (and should not) have full state of them. IPv4 over Firewire module used to inspect ARP (both in sending path and in receiving path), and record peer's GUID, max packet size, max speed and fifo address. This patch removes such inspection by extending our "hardware address" definition to include other information as well: max packet size, max speed and fifo. By doing this, The neighbour module in networking subsystem can cache them. Note: As we have started ignoring sspd and max_rec in ARP/NDP, those information will not be used in the driver when sending. When a packet is being sent, the IP layer fills our pseudo header with the extended "hardware address", including GUID and fifo. The driver can look-up node-id (the real but rather volatile low-level address) by GUID, and then the module can send the packet to the wire using parameters provided in the extendedn hardware address. This approach is realistic because IP over IEEE1394 (RFC2734) and IPv6 over IEEE1394 (RFC3146) share same "hardware address" format in their address resolution protocols. Here, extended "hardware address" is defined as follows: union fwnet_hwaddr { u8 u[16]; struct { __be64 uniq_id; /* EUI-64 */ u8 max_rec; /* max packet size */ u8 sspd; /* max speed */ __be16 fifo_hi; /* hi 16bits of FIFO addr */ __be32 fifo_lo; /* lo 32bits of FIFO addr */ } __packed uc; }; Note that Hardware address is declared as union, so that we can map full IP address into this, when implementing MCAP (Multicast Cannel Allocation Protocol) for IPv6, but IP and ARP subsystem do not need to know this format in detail. One difference between original ARP (RFC826) and 1394 ARP (RFC2734) is that 1394 ARP Request/Reply do not contain the target hardware address field (aka ar$tha). This difference is handled in the ARP subsystem. CC: Stephan Gatzka Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- drivers/firewire/net.c | 153 +++++++++---------------------------------------- include/linux/if_arp.h | 12 +++- include/net/firewire.h | 25 ++++++++ net/ipv4/arp.c | 27 +++++++-- 4 files changed, 82 insertions(+), 135 deletions(-) create mode 100644 include/net/firewire.h (limited to 'include/net') diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index cb8aa865ff88..790017eb5051 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -28,6 +28,7 @@ #include #include +#include /* rx limits */ #define FWNET_MAX_FRAGMENTS 30 /* arbitrary, > TX queue depth */ @@ -57,33 +58,6 @@ #define RFC2374_HDR_LASTFRAG 2 /* last fragment */ #define RFC2374_HDR_INTFRAG 3 /* interior fragment */ -#define RFC2734_HW_ADDR_LEN 16 - -struct rfc2734_arp { - __be16 hw_type; /* 0x0018 */ - __be16 proto_type; /* 0x0806 */ - u8 hw_addr_len; /* 16 */ - u8 ip_addr_len; /* 4 */ - __be16 opcode; /* ARP Opcode */ - /* Above is exactly the same format as struct arphdr */ - - __be64 s_uniq_id; /* Sender's 64bit EUI */ - u8 max_rec; /* Sender's max packet size */ - u8 sspd; /* Sender's max speed */ - __be16 fifo_hi; /* hi 16bits of sender's FIFO addr */ - __be32 fifo_lo; /* lo 32bits of sender's FIFO addr */ - __be32 sip; /* Sender's IP Address */ - __be32 tip; /* IP Address of requested hw addr */ -} __packed; - -/* This header format is specific to this driver implementation. */ -#define FWNET_ALEN 8 -#define FWNET_HLEN 10 -struct fwnet_header { - u8 h_dest[FWNET_ALEN]; /* destination address */ - __be16 h_proto; /* packet type ID field */ -} __packed; - static bool fwnet_hwaddr_is_multicast(u8 *ha) { return !!(*ha & 1); @@ -196,8 +170,6 @@ struct fwnet_peer { struct list_head peer_link; struct fwnet_device *dev; u64 guid; - u64 fifo; - __be32 ip; /* guarded by dev->lock */ struct list_head pd_list; /* received partial datagrams */ @@ -226,6 +198,15 @@ struct fwnet_packet_task { u8 enqueued; }; +/* + * Get fifo address embedded in hwaddr + */ +static __u64 fwnet_hwaddr_fifo(union fwnet_hwaddr *ha) +{ + return (u64)get_unaligned_be16(&ha->uc.fifo_hi) << 32 + | get_unaligned_be32(&ha->uc.fifo_lo); +} + /* * saddr == NULL means use device source address. * daddr == NULL means leave destination address (eg unresolved arp). @@ -518,7 +499,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net, bool is_broadcast, u16 ether_type) { struct fwnet_device *dev; - static const __be64 broadcast_hw = cpu_to_be64(~0ULL); int status; __be64 guid; @@ -537,76 +517,11 @@ static int fwnet_finish_incoming_packet(struct net_device *net, /* * Parse the encapsulation header. This actually does the job of - * converting to an ethernet frame header, as well as arp - * conversion if needed. ARP conversion is easier in this - * direction, since we are using ethernet as our backend. - */ - /* - * If this is an ARP packet, convert it. First, we want to make - * use of some of the fields, since they tell us a little bit - * about the sending machine. + * converting to an ethernet-like pseudo frame header. */ - if (ether_type == ETH_P_ARP) { - struct rfc2734_arp *arp1394; - struct arphdr *arp; - unsigned char *arp_ptr; - u64 fifo_addr; - u64 peer_guid; - struct fwnet_peer *peer; - unsigned long flags; - - arp1394 = (struct rfc2734_arp *)skb->data; - arp = (struct arphdr *)skb->data; - arp_ptr = (unsigned char *)(arp + 1); - peer_guid = get_unaligned_be64(&arp1394->s_uniq_id); - fifo_addr = (u64)get_unaligned_be16(&arp1394->fifo_hi) << 32 - | get_unaligned_be32(&arp1394->fifo_lo); - - spin_lock_irqsave(&dev->lock, flags); - peer = fwnet_peer_find_by_guid(dev, peer_guid); - if (peer) { - peer->fifo = fifo_addr; - peer->ip = arp1394->sip; - } - spin_unlock_irqrestore(&dev->lock, flags); - - if (!peer) { - dev_notice(&net->dev, - "no peer for ARP packet from %016llx\n", - (unsigned long long)peer_guid); - goto no_peer; - } - - /* - * Now that we're done with the 1394 specific stuff, we'll - * need to alter some of the data. Believe it or not, all - * that needs to be done is sender_IP_address needs to be - * moved, the destination hardware address get stuffed - * in and the hardware address length set to 8. - * - * IMPORTANT: The code below overwrites 1394 specific data - * needed above so keep the munging of the data for the - * higher level IP stack last. - */ - - arp->ar_hln = 8; - /* skip over sender unique id */ - arp_ptr += arp->ar_hln; - /* move sender IP addr */ - put_unaligned(arp1394->sip, (u32 *)arp_ptr); - /* skip over sender IP addr */ - arp_ptr += arp->ar_pln; - - if (arp->ar_op == htons(ARPOP_REQUEST)) - memset(arp_ptr, 0, sizeof(u64)); - else - memcpy(arp_ptr, net->dev_addr, sizeof(u64)); - } - - /* Now add the ethernet header. */ guid = cpu_to_be64(dev->card->guid); if (dev_hard_header(skb, net, ether_type, - is_broadcast ? &broadcast_hw : &guid, + is_broadcast ? net->broadcast : net->dev_addr, NULL, skb->len) >= 0) { struct fwnet_header *eth; u16 *rawp; @@ -649,7 +564,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net, return 0; - no_peer: err: net->stats.rx_errors++; net->stats.rx_dropped++; @@ -1355,11 +1269,12 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) ptask->dest_node = IEEE1394_ALL_NODES; ptask->speed = SCODE_100; } else { - __be64 guid = get_unaligned((__be64 *)hdr_buf.h_dest); + union fwnet_hwaddr *ha = (union fwnet_hwaddr *)hdr_buf.h_dest; + __be64 guid = get_unaligned(&ha->uc.uniq_id); u8 generation; peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid)); - if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR) + if (!peer) goto fail; generation = peer->generation; @@ -1367,32 +1282,12 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) max_payload = peer->max_payload; datagram_label_ptr = &peer->datagram_label; - ptask->fifo_addr = peer->fifo; + ptask->fifo_addr = fwnet_hwaddr_fifo(ha); ptask->generation = generation; ptask->dest_node = dest_node; ptask->speed = peer->speed; } - /* If this is an ARP packet, convert it */ - if (proto == htons(ETH_P_ARP)) { - struct arphdr *arp = (struct arphdr *)skb->data; - unsigned char *arp_ptr = (unsigned char *)(arp + 1); - struct rfc2734_arp *arp1394 = (struct rfc2734_arp *)skb->data; - __be32 ipaddr; - - ipaddr = get_unaligned((__be32 *)(arp_ptr + FWNET_ALEN)); - - arp1394->hw_addr_len = RFC2734_HW_ADDR_LEN; - arp1394->max_rec = dev->card->max_receive; - arp1394->sspd = dev->card->link_speed; - - put_unaligned_be16(dev->local_fifo >> 32, - &arp1394->fifo_hi); - put_unaligned_be32(dev->local_fifo & 0xffffffff, - &arp1394->fifo_lo); - put_unaligned(ipaddr, &arp1394->sip); - } - ptask->hdr.w0 = 0; ptask->hdr.w1 = 0; ptask->skb = skb; @@ -1507,8 +1402,6 @@ static int fwnet_add_peer(struct fwnet_device *dev, peer->dev = dev; peer->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4]; - peer->fifo = FWNET_NO_FIFO_ADDR; - peer->ip = 0; INIT_LIST_HEAD(&peer->pd_list); peer->pdg_size = 0; peer->datagram_label = 0; @@ -1538,6 +1431,7 @@ static int fwnet_probe(struct device *_dev) struct fwnet_device *dev; unsigned max_mtu; int ret; + union fwnet_hwaddr *ha; mutex_lock(&fwnet_device_mutex); @@ -1582,8 +1476,15 @@ static int fwnet_probe(struct device *_dev) net->mtu = min(1500U, max_mtu); /* Set our hardware address while we're at it */ - put_unaligned_be64(card->guid, net->dev_addr); - put_unaligned_be64(~0ULL, net->broadcast); + ha = (union fwnet_hwaddr *)net->dev_addr; + put_unaligned_be64(card->guid, &ha->uc.uniq_id); + ha->uc.max_rec = dev->card->max_receive; + ha->uc.sspd = dev->card->link_speed; + put_unaligned_be16(dev->local_fifo >> 32, &ha->uc.fifo_hi); + put_unaligned_be32(dev->local_fifo & 0xffffffff, &ha->uc.fifo_lo); + + memset(net->broadcast, -1, net->addr_len); + ret = register_netdev(net); if (ret) goto out; @@ -1632,8 +1533,6 @@ static int fwnet_remove(struct device *_dev) mutex_lock(&fwnet_device_mutex); net = dev->netdev; - if (net && peer->ip) - arp_invalidate(net, peer->ip); fwnet_remove_peer(peer, dev); diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 89b4614a4722..f563907ed776 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -33,7 +33,15 @@ static inline struct arphdr *arp_hdr(const struct sk_buff *skb) static inline int arp_hdr_len(struct net_device *dev) { - /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; + switch (dev->type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + /* ARP header, device address and 2 IP addresses */ + return sizeof(struct arphdr) + dev->addr_len + sizeof(u32) * 2; +#endif + default: + /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ + return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; + } } #endif /* _LINUX_IF_ARP_H */ diff --git a/include/net/firewire.h b/include/net/firewire.h new file mode 100644 index 000000000000..31bcbfe7a220 --- /dev/null +++ b/include/net/firewire.h @@ -0,0 +1,25 @@ +#ifndef _NET_FIREWIRE_H +#define _NET_FIREWIRE_H + +/* Pseudo L2 address */ +#define FWNET_ALEN 16 +union fwnet_hwaddr { + u8 u[FWNET_ALEN]; + /* "Hardware address" defined in RFC2734/RF3146 */ + struct { + __be64 uniq_id; /* EUI-64 */ + u8 max_rec; /* max packet size */ + u8 sspd; /* max speed */ + __be16 fifo_hi; /* hi 16bits of FIFO addr */ + __be32 fifo_lo; /* lo 32bits of FIFO addr */ + } __packed uc; +}; + +/* Pseudo L2 Header */ +#define FWNET_HLEN 18 +struct fwnet_header { + u8 h_dest[FWNET_ALEN]; /* destination address */ + __be16 h_proto; /* packet type ID field */ +} __packed; + +#endif diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index fea4929f6200..247ec1951c35 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -654,11 +654,19 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp_ptr += dev->addr_len; memcpy(arp_ptr, &src_ip, 4); arp_ptr += 4; - if (target_hw != NULL) - memcpy(arp_ptr, target_hw, dev->addr_len); - else - memset(arp_ptr, 0, dev->addr_len); - arp_ptr += dev->addr_len; + + switch (dev->type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + break; +#endif + default: + if (target_hw != NULL) + memcpy(arp_ptr, target_hw, dev->addr_len); + else + memset(arp_ptr, 0, dev->addr_len); + arp_ptr += dev->addr_len; + } memcpy(arp_ptr, &dest_ip, 4); return skb; @@ -781,7 +789,14 @@ static int arp_process(struct sk_buff *skb) arp_ptr += dev->addr_len; memcpy(&sip, arp_ptr, 4); arp_ptr += 4; - arp_ptr += dev->addr_len; + switch (dev_type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + break; +#endif + default: + arp_ptr += dev->addr_len; + } memcpy(&tip, arp_ptr, 4); /* * Check for bad requests for 127.x.x.x and requests for multicast -- cgit v1.2.3 From 1b5ab0def4f6e42e8b8097c3b11d2e8d96baafec Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 27 Mar 2013 05:55:56 +0000 Subject: net: use the frag lru_lock to protect netns_frags.nqueues update Move the protection of netns_frags.nqueues updates under the LRU_lock, instead of the write lock. As they are located on the same cacheline, and this is also needed when transitioning to use per hash bucket locking. Signed-off-by: Jesper Dangaard Brouer Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 ++ net/ipv4/inet_fragment.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 64b4e7d23b8a..7cac9c5789b5 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -143,6 +143,7 @@ static inline void inet_frag_lru_del(struct inet_frag_queue *q) { spin_lock(&q->net->lru_lock); list_del(&q->lru_list); + q->net->nqueues--; spin_unlock(&q->net->lru_lock); } @@ -151,6 +152,7 @@ static inline void inet_frag_lru_add(struct netns_frags *nf, { spin_lock(&nf->lru_lock); list_add_tail(&q->lru_list, &nf->lru_list); + q->net->nqueues++; spin_unlock(&nf->lru_lock); } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 8ba548a8efce..1206ca64b0ea 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -124,7 +124,6 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) { write_lock(&f->lock); hlist_del(&fq->list); - fq->net->nqueues--; write_unlock(&f->lock); inet_frag_lru_del(fq); } @@ -260,7 +259,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &f->hash[hash]); - nf->nqueues++; write_unlock(&f->lock); inet_frag_lru_add(nf, qp); return qp; -- cgit v1.2.3 From 7d4c04fc170087119727119074e72445f2bb192b Mon Sep 17 00:00:00 2001 From: "Keller, Jacob E" Date: Thu, 28 Mar 2013 11:19:25 +0000 Subject: net: add option to enable error queue packets waking select Currently, when a socket receives something on the error queue it only wakes up the socket on select if it is in the "read" list, that is the socket has something to read. It is useful also to wake the socket if it is in the error list, which would enable software to wait on error queue packets without waking up for regular data on the socket. The main use case is for receiving timestamped transmit packets which return the timestamp to the socket via the error queue. This enables an application to select on the socket for the error queue only instead of for the regular traffic. -v2- * Added the SO_SELECT_ERR_QUEUE socket option to every architechture specific file * Modified every socket poll function that checks error queue Signed-off-by: Jacob Keller Cc: Jeffrey Kirsher Cc: Richard Cochran Cc: Matthew Vick Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/avr32/include/uapi/asm/socket.h | 2 ++ arch/cris/include/uapi/asm/socket.h | 2 ++ arch/frv/include/uapi/asm/socket.h | 2 ++ arch/h8300/include/uapi/asm/socket.h | 2 ++ arch/ia64/include/uapi/asm/socket.h | 2 ++ arch/m32r/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/mn10300/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/powerpc/include/uapi/asm/socket.h | 2 ++ arch/s390/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ arch/xtensa/include/uapi/asm/socket.h | 2 ++ include/net/sock.h | 1 + include/uapi/asm-generic/socket.h | 2 ++ net/bluetooth/af_bluetooth.c | 3 ++- net/core/datagram.c | 4 +++- net/core/sock.c | 8 ++++++++ net/iucv/af_iucv.c | 3 ++- net/nfc/llcp/sock.c | 3 ++- net/sctp/socket.c | 3 ++- net/unix/af_unix.c | 4 +++- 23 files changed, 53 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index c5195524d1ef..eee6ea76bdaf 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -79,4 +79,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 51c6401582ea..37401f535126 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index 50692b738c75..ba409c9947bc 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -74,6 +74,8 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index 595391f0f98c..31dbb5d8e13d 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -72,5 +72,7 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h index 43e32621da7d..5d1c6d0870e6 100644 --- a/arch/h8300/include/uapi/asm/socket.h +++ b/arch/h8300/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index c567adc8bea5..6b4329f18b29 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -81,4 +81,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 519afa2755db..2a3b59e0e171 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 47132f44c955..3b211507be7f 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -90,4 +90,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index 5c7c7c988544..b4ce844c9391 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 526e4b9aece0..70c512a386f7 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -71,6 +71,8 @@ #define SO_LOCK_FILTER 0x4025 +#define SO_SELECT_ERR_QUEUE 0x4026 + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index a26dcaece509..a36daf3c6f9a 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -79,4 +79,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index f99eea7fff0f..2dacb306835c 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -78,4 +78,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index cbbad74b2e06..89f49b68a21c 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -68,6 +68,8 @@ #define SO_LOCK_FILTER 0x0028 +#define SO_SELECT_ERR_QUEUE 0x0029 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 35905cb6e419..a8f44f50e651 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -83,4 +83,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 14f6e9d19dc7..08f05f964737 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -667,6 +667,7 @@ enum sock_flags { * user-space instead. */ SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */ + SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 4ef3acbba5da..c5d2e3a1cf68 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -74,4 +74,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index d3ee69b35a78..409902f892ff 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -422,7 +422,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, return bt_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; diff --git a/net/core/datagram.c b/net/core/datagram.c index 368f9c3f9dc6..36da5b663514 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -749,7 +749,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; + if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/core/sock.c b/net/core/sock.c index a19e728d5fb6..2ff5f3619a8d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -907,6 +907,10 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_NOFCS, valbool); break; + case SO_SELECT_ERR_QUEUE: + sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); + break; + default: ret = -ENOPROTOOPT; break; @@ -1160,6 +1164,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_FILTER_LOCKED); break; + case SO_SELECT_ERR_QUEUE: + v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); + break; + default: return -ENOPROTOOPT; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a7d11ffe4284..f0550a38f295 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1461,7 +1461,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, return iucv_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP; diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index f1b377e247fe..2d55e8a45958 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -521,7 +521,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, return llcp_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b9070736b8d9..dd21ae3013d8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6185,7 +6185,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Is there any exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 971282b6f6a3..fb7a63ff71a5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2196,7 +2196,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; + if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) -- cgit v1.2.3 From c90558dae51cef334f3d9d447cf7c0fd1bfe725d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:57:59 +0200 Subject: ipvs: avoid routing by TOS for real server Avoid replacing the cached route for real server on every packet with different TOS. I doubt that routing by TOS for real server is used at all, so we should be better with such optimization. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 1 - net/netfilter/ipvs/ip_vs_xmit.c | 58 ++++++++++++++++++----------------------- 2 files changed, 25 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index bee87badabef..64db11769cc2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -753,7 +753,6 @@ struct ip_vs_dest { /* for destination cache */ spinlock_t dst_lock; /* lock of dst_cache */ struct dst_entry *dst_cache; /* destination cache entry */ - u32 dst_rtos; /* RT_TOS(tos) for dst */ u32 dst_cookie; union nf_inet_addr dst_saddr; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index ee6b7a9f1ec2..4b0bd15ad7c1 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -57,27 +57,24 @@ enum { * Destination cache to speed up outgoing route lookup */ static inline void -__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, - u32 dst_cookie) +__ip_vs_dst_set(struct ip_vs_dest *dest, struct dst_entry *dst, u32 dst_cookie) { struct dst_entry *old_dst; old_dst = dest->dst_cache; dest->dst_cache = dst; - dest->dst_rtos = rtos; dest->dst_cookie = dst_cookie; dst_release(old_dst); } static inline struct dst_entry * -__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) +__ip_vs_dst_check(struct ip_vs_dest *dest) { struct dst_entry *dst = dest->dst_cache; if (!dst) return NULL; - if ((dst->obsolete || rtos != dest->dst_rtos) && - dst->ops->check(dst, dest->dst_cookie) == NULL) { + if (dst->obsolete && dst->ops->check(dst, dest->dst_cookie) == NULL) { dest->dst_cache = NULL; dst_release(dst); return NULL; @@ -104,7 +101,7 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) /* Get route to daddr, update *saddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, - u32 rtos, int rt_mode, __be32 *saddr) + int rt_mode, __be32 *saddr) { struct flowi4 fl4; struct rtable *rt; @@ -113,7 +110,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; - fl4.flowi4_tos = rtos; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; @@ -124,7 +120,7 @@ retry: if (PTR_ERR(rt) == -EINVAL && *saddr && rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { *saddr = 0; - flowi4_update_output(&fl4, 0, rtos, daddr, 0); + flowi4_update_output(&fl4, 0, 0, daddr, 0); goto retry; } IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); @@ -132,7 +128,7 @@ retry: } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); *saddr = fl4.saddr; - flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); + flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr); loop++; goto retry; } @@ -143,7 +139,7 @@ retry: /* Get route to destination or remote server */ static struct rtable * __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, - __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) + __be32 daddr, int rt_mode, __be32 *ret_saddr) { struct net *net = dev_net(skb_dst(skb)->dev); struct rtable *rt; /* Route to the other host */ @@ -152,19 +148,18 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, if (dest) { spin_lock(&dest->dst_lock); - if (!(rt = (struct rtable *) - __ip_vs_dst_check(dest, rtos))) { - rt = do_output_route4(net, dest->addr.ip, rtos, - rt_mode, &dest->dst_saddr.ip); + rt = (struct rtable *) __ip_vs_dst_check(dest); + if (!rt) { + rt = do_output_route4(net, dest->addr.ip, rt_mode, + &dest->dst_saddr.ip); if (!rt) { spin_unlock(&dest->dst_lock); return NULL; } - __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); - IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " - "rtos=%X\n", + __ip_vs_dst_set(dest, dst_clone(&rt->dst), 0); + IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", &dest->addr.ip, &dest->dst_saddr.ip, - atomic_read(&rt->dst.__refcnt), rtos); + atomic_read(&rt->dst.__refcnt)); } daddr = dest->addr.ip; if (ret_saddr) @@ -177,7 +172,7 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; - rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); + rt = do_output_route4(net, daddr, rt_mode, &saddr); if (!rt) return NULL; if (ret_saddr) @@ -307,7 +302,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, if (dest) { spin_lock(&dest->dst_lock); - rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); + rt = (struct rt6_info *)__ip_vs_dst_check(dest); if (!rt) { u32 cookie; @@ -320,7 +315,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, } rt = (struct rt6_info *) dst; cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; - __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); + __ip_vs_dst_set(dest, dst_clone(&rt->dst), cookie); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", &dest->addr.in6, &dest->dst_saddr.in6, atomic_read(&rt->dst.__refcnt)); @@ -449,8 +444,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), - IP_VS_RT_MODE_NON_LOCAL, NULL))) + rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, + NULL); + if (!rt) goto tx_error_icmp; /* MTU checking */ @@ -581,10 +577,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_RDR, NULL))) + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR, NULL))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* @@ -832,10 +827,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_CONNECT, - &saddr))) + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_CONNECT, &saddr))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); @@ -1067,7 +1061,6 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_KNOWN_NH, NULL))) @@ -1223,7 +1216,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(ip_hdr(skb)->tos), rt_mode, NULL))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; -- cgit v1.2.3 From d1deae4d3ab37d833f278fec975a8f2ddeb78f3b Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:02 +0200 Subject: ipvs: rename functions related to dst_cache reset Move and give better names to two functions: - ip_vs_dst_reset to __ip_vs_dst_cache_reset - __ip_vs_dev_reset to ip_vs_forget_dev Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 1 - net/netfilter/ipvs/ip_vs_ctl.c | 34 ++++++++++++++++++++++------------ net/netfilter/ipvs/ip_vs_xmit.c | 14 -------------- 3 files changed, 22 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 64db11769cc2..8ad73a834655 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1427,7 +1427,6 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset, unsigned int hooknum, struct ip_vs_iphdr *iph); -extern void ip_vs_dst_reset(struct ip_vs_dest *dest); #ifdef CONFIG_IP_VS_IPV6 extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6b55ba69c800..5265eaa7b42b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -641,6 +641,17 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, return dest; } +/* Release dst_cache for dest in user context */ +static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) +{ + struct dst_entry *old_dst; + + old_dst = dest->dst_cache; + dest->dst_cache = NULL; + dst_release(old_dst); + dest->dst_saddr.ip = 0; +} + /* * Lookup dest by {svc,addr,port} in the destination trash. * The destination trash is used to hold the destinations that are removed @@ -690,7 +701,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); list_del(&dest->n_list); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); __ip_vs_unbind_svc(dest); free_percpu(dest->stats.cpustats); kfree(dest); @@ -717,7 +728,7 @@ static void ip_vs_trash_cleanup(struct net *net) list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { list_del(&dest->n_list); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); __ip_vs_unbind_svc(dest); free_percpu(dest->stats.cpustats); kfree(dest); @@ -811,7 +822,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->l_threshold = udest->l_threshold; spin_lock_bh(&dest->dst_lock); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); if (add) @@ -1037,7 +1048,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) dest->vfwmark, IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); /* simply decrease svc->refcnt here, let the caller check and release the service if nobody refers to it. Only user context can release destination and service, @@ -1496,11 +1507,10 @@ void ip_vs_service_net_cleanup(struct net *net) mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); } -/* - * Release dst hold by dst_cache - */ + +/* Put all references for device (dst_cache) */ static inline void -__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) +ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) { spin_lock_bh(&dest->dst_lock); if (dest->dst_cache && dest->dst_cache->dev == dev) { @@ -1509,7 +1519,7 @@ __ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); } spin_unlock_bh(&dest->dst_lock); @@ -1537,7 +1547,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { - __ip_vs_dev_reset(dest, dev); + ip_vs_forget_dev(dest, dev); } } } @@ -1546,7 +1556,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { - __ip_vs_dev_reset(dest, dev); + ip_vs_forget_dev(dest, dev); } } @@ -1554,7 +1564,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } list_for_each_entry(dest, &ipvs->dest_trash, n_list) { - __ip_vs_dev_reset(dest, dev); + ip_vs_forget_dev(dest, dev); } mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 7cd7c61692ac..6448a2e862dc 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -362,20 +362,6 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, #endif -/* - * Release dest->dst_cache before a dest is removed - */ -void -ip_vs_dst_reset(struct ip_vs_dest *dest) -{ - struct dst_entry *old_dst; - - old_dst = dest->dst_cache; - dest->dst_cache = NULL; - dst_release(old_dst); - dest->dst_saddr.ip = 0; -} - /* return NF_ACCEPT to allow forwarding or other NF_xxx on error */ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, struct ip_vs_conn *cp) -- cgit v1.2.3 From 026ace060dfe29275d2188297a62fa37d6c1a02c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:06 +0200 Subject: ipvs: optimize dst usage for real server Currently when forwarding requests to real servers we use dst_lock and atomic operations when cloning the dst_cache value. As the dst_cache value does not change most of the time it is better to use RCU and to lock dst_lock only when we need to replace the obsoleted dst. For this to work we keep dst_cache in new structure protected by RCU. For packets to remote real servers we will use noref version of dst_cache, it will be valid while we are in RCU read-side critical section because now dst_release for replaced dsts will be invoked after the grace period. Packets to local real servers that are passed to local stack with NF_ACCEPT need a dst clone. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 ++- net/netfilter/ipvs/ip_vs_core.c | 11 ++- net/netfilter/ipvs/ip_vs_ctl.c | 25 ++++-- net/netfilter/ipvs/ip_vs_xmit.c | 188 ++++++++++++++++++++++++++++++---------- 4 files changed, 177 insertions(+), 59 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 8ad73a834655..a150ff5d838c 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -724,6 +724,13 @@ struct ip_vs_service { struct ip_vs_pe *pe; }; +/* Information for cached dst */ +struct ip_vs_dest_dst { + struct dst_entry *dst_cache; /* destination cache entry */ + u32 dst_cookie; + union nf_inet_addr dst_saddr; + struct rcu_head rcu_head; +}; /* * The real server destination forwarding entry @@ -752,9 +759,7 @@ struct ip_vs_dest { /* for destination cache */ spinlock_t dst_lock; /* lock of dst_cache */ - struct dst_entry *dst_cache; /* destination cache entry */ - u32 dst_cookie; - union nf_inet_addr dst_saddr; + struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */ /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ @@ -1427,6 +1432,7 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset, unsigned int hooknum, struct ip_vs_iphdr *iph); +extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head); #ifdef CONFIG_IP_VS_IPV6 extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2aef23ed748b..6ad24e7ef440 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1395,10 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) goto ignore_ipip; /* Prefer the resulting PMTU */ if (dest) { - spin_lock(&dest->dst_lock); - if (dest->dst_cache) - mtu = dst_mtu(dest->dst_cache); - spin_unlock(&dest->dst_lock); + struct ip_vs_dest_dst *dest_dst; + + rcu_read_lock(); + dest_dst = rcu_dereference(dest->dest_dst); + if (dest_dst) + mtu = dst_mtu(dest_dst->dst_cache); + rcu_read_unlock(); } if (mtu > 68 + sizeof(struct iphdr)) mtu -= sizeof(struct iphdr); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5265eaa7b42b..ef48cc55f775 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -641,15 +641,26 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, return dest; } -/* Release dst_cache for dest in user context */ +void ip_vs_dest_dst_rcu_free(struct rcu_head *head) +{ + struct ip_vs_dest_dst *dest_dst = container_of(head, + struct ip_vs_dest_dst, + rcu_head); + + dst_release(dest_dst->dst_cache); + kfree(dest_dst); +} + +/* Release dest_dst and dst_cache for dest in user context */ static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) { - struct dst_entry *old_dst; + struct ip_vs_dest_dst *old; - old_dst = dest->dst_cache; - dest->dst_cache = NULL; - dst_release(old_dst); - dest->dst_saddr.ip = 0; + old = rcu_dereference_protected(dest->dest_dst, 1); + if (old) { + RCU_INIT_POINTER(dest->dest_dst, NULL); + call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); + } } /* @@ -1513,7 +1524,7 @@ static inline void ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) { spin_lock_bh(&dest->dst_lock); - if (dest->dst_cache && dest->dst_cache->dev == dev) { + if (dest->dest_dst && dest->dest_dst->dst_cache->dev == dev) { IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", dev->name, IP_VS_DBG_ADDR(dest->af, &dest->addr), diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 603eb8a50e19..3db7889edefc 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -17,6 +17,8 @@ * - not all connections have destination server, for example, * connections in backup server when fwmark is used * - bypass connections use daddr from packet + * - we can use dst without ref while sending in RCU section, we use + * ref when returning NF_ACCEPT for NAT-ed packet via loopback * LOCAL_OUT rules: * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) * - skb->pkt_type is not set yet @@ -54,34 +56,51 @@ enum { IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */ }; +static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void) +{ + return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC); +} + +static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst) +{ + kfree(dest_dst); +} + /* * Destination cache to speed up outgoing route lookup */ static inline void -__ip_vs_dst_set(struct ip_vs_dest *dest, struct dst_entry *dst, u32 dst_cookie) +__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst, + struct dst_entry *dst, u32 dst_cookie) { - struct dst_entry *old_dst; + struct ip_vs_dest_dst *old; + + old = rcu_dereference_protected(dest->dest_dst, + lockdep_is_held(&dest->dst_lock)); + + if (dest_dst) { + dest_dst->dst_cache = dst; + dest_dst->dst_cookie = dst_cookie; + } + rcu_assign_pointer(dest->dest_dst, dest_dst); - old_dst = dest->dst_cache; - dest->dst_cache = dst; - dest->dst_cookie = dst_cookie; - dst_release(old_dst); + if (old) + call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); } -static inline struct dst_entry * +static inline struct ip_vs_dest_dst * __ip_vs_dst_check(struct ip_vs_dest *dest) { - struct dst_entry *dst = dest->dst_cache; + struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst); + struct dst_entry *dst; - if (!dst) + if (!dest_dst) return NULL; - if (dst->obsolete && dst->ops->check(dst, dest->dst_cookie) == NULL) { - dest->dst_cache = NULL; - dst_release(dst); + dst = dest_dst->dst_cache; + if (dst->obsolete && + dst->ops->check(dst, dest_dst->dst_cookie) == NULL) return NULL; - } - dst_hold(dst); - return dst; + return dest_dst; } static inline bool @@ -144,35 +163,48 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, { struct net *net = dev_net(skb_dst(skb)->dev); struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_dest_dst *dest_dst; struct rtable *rt; /* Route to the other host */ struct rtable *ort; /* Original route */ struct iphdr *iph; __be16 df; int mtu; - int local; + int local, noref = 1; if (dest) { - spin_lock(&dest->dst_lock); - rt = (struct rtable *) __ip_vs_dst_check(dest); - if (!rt) { + dest_dst = __ip_vs_dst_check(dest); + if (likely(dest_dst)) + rt = (struct rtable *) dest_dst->dst_cache; + else { + dest_dst = ip_vs_dest_dst_alloc(); + spin_lock(&dest->dst_lock); + if (!dest_dst) { + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock(&dest->dst_lock); + goto err_unreach; + } rt = do_output_route4(net, dest->addr.ip, rt_mode, - &dest->dst_saddr.ip); + &dest_dst->dst_saddr.ip); if (!rt) { + __ip_vs_dst_set(dest, NULL, NULL, 0); spin_unlock(&dest->dst_lock); + ip_vs_dest_dst_free(dest_dst); goto err_unreach; } - __ip_vs_dst_set(dest, dst_clone(&rt->dst), 0); + __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); + spin_unlock(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", - &dest->addr.ip, &dest->dst_saddr.ip, + &dest->addr.ip, &dest_dst->dst_saddr.ip, atomic_read(&rt->dst.__refcnt)); } daddr = dest->addr.ip; if (ret_saddr) - *ret_saddr = dest->dst_saddr.ip; - spin_unlock(&dest->dst_lock); + *ret_saddr = dest_dst->dst_saddr.ip; } else { __be32 saddr = htonl(INADDR_ANY); + noref = 0; + /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ @@ -210,7 +242,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, goto err_put; } /* skb to local stack, preserve old route */ - ip_rt_put(rt); + if (!noref) + ip_rt_put(rt); return local; } @@ -240,12 +273,19 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, } skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); + if (noref) { + if (!local) + skb_dst_set_noref_force(skb, &rt->dst); + else + skb_dst_set(skb, dst_clone(&rt->dst)); + } else + skb_dst_set(skb, &rt->dst); return local; err_put: - ip_rt_put(rt); + if (!noref) + ip_rt_put(rt); return -1; err_unreach: @@ -303,36 +343,48 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) { struct net *net = dev_net(skb_dst(skb)->dev); + struct ip_vs_dest_dst *dest_dst; struct rt6_info *rt; /* Route to the other host */ struct rt6_info *ort; /* Original route */ struct dst_entry *dst; int mtu; - int local; + int local, noref = 1; if (dest) { - spin_lock(&dest->dst_lock); - rt = (struct rt6_info *)__ip_vs_dst_check(dest); - if (!rt) { + dest_dst = __ip_vs_dst_check(dest); + if (likely(dest_dst)) + rt = (struct rt6_info *) dest_dst->dst_cache; + else { u32 cookie; + dest_dst = ip_vs_dest_dst_alloc(); + spin_lock(&dest->dst_lock); + if (!dest_dst) { + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock(&dest->dst_lock); + goto err_unreach; + } dst = __ip_vs_route_output_v6(net, &dest->addr.in6, - &dest->dst_saddr.in6, + &dest_dst->dst_saddr.in6, do_xfrm); if (!dst) { + __ip_vs_dst_set(dest, NULL, NULL, 0); spin_unlock(&dest->dst_lock); + ip_vs_dest_dst_free(dest_dst); goto err_unreach; } rt = (struct rt6_info *) dst; cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; - __ip_vs_dst_set(dest, dst_clone(&rt->dst), cookie); + __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); + spin_unlock(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", - &dest->addr.in6, &dest->dst_saddr.in6, + &dest->addr.in6, &dest_dst->dst_saddr.in6, atomic_read(&rt->dst.__refcnt)); } if (ret_saddr) - *ret_saddr = dest->dst_saddr.in6; - spin_unlock(&dest->dst_lock); + *ret_saddr = dest_dst->dst_saddr.in6; } else { + noref = 0; dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); if (!dst) goto err_unreach; @@ -367,7 +419,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, goto err_put; } /* skb to local stack, preserve old route */ - dst_release(&rt->dst); + if (!noref) + dst_release(&rt->dst); return local; } @@ -399,12 +452,19 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, } skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); + if (noref) { + if (!local) + skb_dst_set_noref_force(skb, &rt->dst); + else + skb_dst_set(skb, dst_clone(&rt->dst)); + } else + skb_dst_set(skb, &rt->dst); return local; err_put: - dst_release(&rt->dst); + if (!noref) + dst_release(&rt->dst); return -1; err_unreach: @@ -494,6 +554,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, NULL) < 0) goto tx_error; @@ -504,12 +565,14 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -521,6 +584,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, { EnterFunction(10); + rcu_read_lock(); if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL, ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) goto tx_error; @@ -529,12 +593,14 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -553,6 +619,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; @@ -620,12 +687,14 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); + rcu_read_unlock(); LeaveFunction(10); return rc; tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -640,6 +709,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { __be16 _pt, *p; @@ -707,6 +777,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); + rcu_read_unlock(); LeaveFunction(10); return rc; @@ -714,6 +785,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: LeaveFunction(10); kfree_skb(skb); + rcu_read_unlock(); return NF_STOLEN; } #endif @@ -755,6 +827,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -762,8 +835,10 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_TUNNEL, &saddr); if (local < 0) goto tx_error; - if (local) + if (local) { + rcu_read_unlock(); return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); + } rt = skb_rtable(skb); tdev = rt->dst.dev; @@ -818,6 +893,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); @@ -825,6 +901,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -844,6 +921,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, &saddr, ipvsh, 1, IP_VS_RT_MODE_LOCAL | @@ -851,8 +929,10 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_TUNNEL); if (local < 0) goto tx_error; - if (local) + if (local) { + rcu_read_unlock(); return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); + } rt = (struct rt6_info *) skb_dst(skb); tdev = rt->dst.dev; @@ -901,6 +981,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip6_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); @@ -908,6 +989,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -926,14 +1008,17 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_KNOWN_NH, NULL); if (local < 0) goto tx_error; - if (local) + if (local) { + rcu_read_unlock(); return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); + } ip_send_check(ip_hdr(skb)); @@ -941,12 +1026,14 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -960,25 +1047,30 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL); if (local < 0) goto tx_error; - if (local) + if (local) { + rcu_read_unlock(); return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); + } /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1023,6 +1115,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; + rcu_read_lock(); local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL); if (local < 0) goto tx_error; @@ -1067,10 +1160,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); + rcu_read_unlock(); goto out; tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); + rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); @@ -1111,6 +1206,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; + rcu_read_lock(); local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); if (local < 0) @@ -1156,10 +1252,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); + rcu_read_unlock(); goto out; tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); + rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); -- cgit v1.2.3 From 363c97d7435ebba8a040f86e29bdec79ee182f0c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:07 +0200 Subject: ipvs: convert app locks We use locks like tcp_app_lock, udp_app_lock, sctp_app_lock to protect access to the protocol hash tables from readers in packet context while the application instances (inc) are [un]registered under global mutex. As the hash tables are mostly read when conns are created and bound to app, use RCU for readers and reclaim app instance after grace period. Simplify ip_vs_app_inc_get because we use usecnt only for statistics and rely on module refcounting. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 4 +--- net/netfilter/ipvs/ip_vs_app.c | 27 +++++++++++++++++++-------- net/netfilter/ipvs/ip_vs_ftp.c | 2 ++ net/netfilter/ipvs/ip_vs_proto_sctp.c | 18 ++++++------------ net/netfilter/ipvs/ip_vs_proto_tcp.c | 18 ++++++------------ net/netfilter/ipvs/ip_vs_proto_udp.c | 19 ++++++------------- 6 files changed, 40 insertions(+), 48 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a150ff5d838c..84ca17141a44 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -823,6 +823,7 @@ struct ip_vs_app { struct ip_vs_app *app; /* its real application */ __be16 port; /* port number in net order */ atomic_t usecnt; /* usage counter */ + struct rcu_head rcu_head; /* * output hook: Process packet in inout direction, diff set for TCP. @@ -908,7 +909,6 @@ struct netns_ipvs { #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) struct list_head tcp_apps[TCP_APP_TAB_SIZE]; - spinlock_t tcp_app_lock; #endif /* ip_vs_proto_udp */ #ifdef CONFIG_IP_VS_PROTO_UDP @@ -916,7 +916,6 @@ struct netns_ipvs { #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) struct list_head udp_apps[UDP_APP_TAB_SIZE]; - spinlock_t udp_app_lock; #endif /* ip_vs_proto_sctp */ #ifdef CONFIG_IP_VS_PROTO_SCTP @@ -925,7 +924,6 @@ struct netns_ipvs { #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) /* Hash table for SCTP application incarnations */ struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; - spinlock_t sctp_app_lock; #endif /* ip_vs_conn */ atomic_t conn_count; /* connection counter */ diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 0b779d7df881..a95603004bc4 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) module_put(app->module); } +static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) +{ + kfree(inc->timeout_table); + kfree(inc); +} + +static void ip_vs_app_inc_rcu_free(struct rcu_head *head) +{ + struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); + + ip_vs_app_inc_destroy(inc); +} /* * Allocate/initialize app incarnation and register it in proto apps. @@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, return 0; out: - kfree(inc->timeout_table); - kfree(inc); + ip_vs_app_inc_destroy(inc); return ret; } @@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) list_del(&inc->a_list); - kfree(inc->timeout_table); - kfree(inc); + call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); } @@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc) { int result; - atomic_inc(&inc->usecnt); - if (unlikely((result = ip_vs_app_get(inc->app)) != 1)) - atomic_dec(&inc->usecnt); + result = ip_vs_app_get(inc->app); + if (result) + atomic_inc(&inc->usecnt); return result; } @@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc) */ void ip_vs_app_inc_put(struct ip_vs_app *inc) { - ip_vs_app_put(inc->app); atomic_dec(&inc->usecnt); + ip_vs_app_put(inc->app); } @@ -218,6 +228,7 @@ out_unlock: /* * ip_vs_app unregistration routine * We are sure there are no app incarnations attached to services + * Caller should use synchronize_rcu() or rcu_barrier() */ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 4f53a5f04437..7f90825ef1ce 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -480,6 +480,7 @@ static int __init ip_vs_ftp_init(void) int rv; rv = register_pernet_subsys(&ip_vs_ftp_ops); + /* rcu_barrier() is called by netns on error */ return rv; } @@ -489,6 +490,7 @@ static int __init ip_vs_ftp_init(void) static void __exit ip_vs_ftp_exit(void) { unregister_pernet_subsys(&ip_vs_ftp_ops); + /* rcu_barrier() is called by netns */ } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index cd1d7298f7ba..f7190cdf023e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1016,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc) hash = sctp_app_hashkey(port); - spin_lock_bh(&ipvs->sctp_app_lock); list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->sctp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->sctp_app_lock); return ret; } static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); - spin_lock_bh(&ipvs->sctp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->sctp_app_lock); + list_del_rcu(&inc->p_list); } static int sctp_app_conn_bind(struct ip_vs_conn *cp) @@ -1055,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = sctp_app_hashkey(cp->vport); - spin_lock(&ipvs->sctp_app_lock); - list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->sctp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -1076,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->sctp_app_lock); + rcu_read_unlock(); out: return result; } @@ -1090,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); - spin_lock_init(&ipvs->sctp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 9af653a75825..0bbc3feae682 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) hash = tcp_app_hashkey(port); - spin_lock_bh(&ipvs->tcp_app_lock); list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->tcp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->tcp_app_lock); return ret; } @@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) static void tcp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); - spin_lock_bh(&ipvs->tcp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->tcp_app_lock); + list_del_rcu(&inc->p_list); } @@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = tcp_app_hashkey(cp->vport); - spin_lock(&ipvs->tcp_app_lock); - list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->tcp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->tcp_app_lock); + rcu_read_unlock(); out: return result; @@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); - spin_lock_init(&ipvs->tcp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 503a842c90d2..1a03e2d9c6ba 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc) hash = udp_app_hashkey(port); - - spin_lock_bh(&ipvs->udp_app_lock); list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->udp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->udp_app_lock); return ret; } @@ -380,12 +377,9 @@ static void udp_unregister_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); - struct netns_ipvs *ipvs = net_ipvs(net); - spin_lock_bh(&ipvs->udp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->udp_app_lock); + list_del_rcu(&inc->p_list); } @@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); - spin_lock(&ipvs->udp_app_lock); - list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->udp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->udp_app_lock); + rcu_read_unlock(); out: return result; @@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); - spin_lock_init(&ipvs->udp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); if (!pd->timeout_table) -- cgit v1.2.3 From 276472eae063d717b775fdfc87529937402d0e08 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:08 +0200 Subject: ipvs: remove rs_lock by using RCU rs_lock was used to protect rs_table (hash table) from updaters (under global mutex) and readers (packet handlers). We can remove rs_lock by using RCU lock for readers. Reclaiming dest only with kfree_rcu is enough because the readers access only fields from the ip_vs_dest structure. Use hlist for rs_table. As we are now using hlist_del_rcu, introduce in_rs_table flag as replacement for the list_empty checks which do not work with RCU. It is needed because only NAT dests are in the rs_table. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 14 ++++--- net/netfilter/ipvs/ip_vs_core.c | 5 +-- net/netfilter/ipvs/ip_vs_ctl.c | 88 +++++++++++++++-------------------------- 3 files changed, 41 insertions(+), 66 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 84ca17141a44..b06aa6c939fa 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -738,7 +738,7 @@ struct ip_vs_dest_dst { */ struct ip_vs_dest { struct list_head n_list; /* for the dests in the service */ - struct list_head d_list; /* for table with all the dests */ + struct hlist_node d_list; /* for table with all the dests */ u16 af; /* address family */ __be16 port; /* port number of the server */ @@ -767,6 +767,9 @@ struct ip_vs_dest { __be16 vport; /* virtual port number */ union nf_inet_addr vaddr; /* virtual IP address */ __u32 vfwmark; /* firewall mark of service */ + + struct rcu_head rcu_head; + unsigned int in_rs_table:1; /* we are in rs_table */ }; @@ -897,7 +900,7 @@ struct netns_ipvs { #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) - struct list_head rs_table[IP_VS_RTAB_SIZE]; + struct hlist_head rs_table[IP_VS_RTAB_SIZE]; /* ip_vs_app */ struct list_head app_list; /* ip_vs_proto */ @@ -933,7 +936,6 @@ struct netns_ipvs { int num_services; /* no of virtual services */ - rwlock_t rs_lock; /* real services table */ /* Trash for destinations */ struct list_head dest_trash; /* Service counters */ @@ -1376,9 +1378,9 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc) atomic_dec(&svc->usecnt); } -extern struct ip_vs_dest * -ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, - const union nf_inet_addr *daddr, __be16 dport); +extern bool +ip_vs_has_real_service(struct net *net, int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport); extern int ip_vs_use_count_inc(void); extern void ip_vs_use_count_dec(void); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 6ad24e7ef440..4fc749c6ec27 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1161,9 +1161,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) sizeof(_ports), _ports, &iph); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(net, af, iph.protocol, - &iph.saddr, - pptr[0])) { + if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr, + pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ef48cc55f775..182d95807bd5 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -508,17 +508,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af, & IP_VS_RTAB_MASK; } -/* - * Hashes ip_vs_dest in rs_table by . - * should be called with locked tables. - */ -static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) +/* Hash ip_vs_dest in rs_table by . */ +static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) { unsigned int hash; - if (!list_empty(&dest->d_list)) { - return 0; - } + if (dest->in_rs_table) + return; /* * Hash by proto,addr,port, @@ -526,60 +522,47 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) */ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); - list_add(&dest->d_list, &ipvs->rs_table[hash]); - - return 1; + hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); + dest->in_rs_table = 1; } -/* - * UNhashes ip_vs_dest from rs_table. - * should be called with locked tables. - */ -static int ip_vs_rs_unhash(struct ip_vs_dest *dest) +/* Unhash ip_vs_dest from rs_table. */ +static void ip_vs_rs_unhash(struct ip_vs_dest *dest) { /* * Remove it from the rs_table table. */ - if (!list_empty(&dest->d_list)) { - list_del_init(&dest->d_list); + if (dest->in_rs_table) { + hlist_del_rcu(&dest->d_list); + dest->in_rs_table = 0; } - - return 1; } -/* - * Lookup real service by in the real service table. - */ -struct ip_vs_dest * -ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, - const union nf_inet_addr *daddr, - __be16 dport) +/* Check if real service by is present */ +bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport) { struct netns_ipvs *ipvs = net_ipvs(net); unsigned int hash; struct ip_vs_dest *dest; - /* - * Check for "full" addressed entries - * Return the first found entry - */ + /* Check for "full" addressed entries */ hash = ip_vs_rs_hashkey(af, daddr, dport); - read_lock(&ipvs->rs_lock); - list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { - if ((dest->af == af) - && ip_vs_addr_equal(af, &dest->addr, daddr) - && (dest->port == dport) - && ((dest->protocol == protocol) || - dest->vfwmark)) { + rcu_read_lock(); + hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { + if (dest->port == dport && + dest->af == af && + ip_vs_addr_equal(af, &dest->addr, daddr) && + (dest->protocol == protocol || dest->vfwmark)) { /* HIT */ - read_unlock(&ipvs->rs_lock); - return dest; + rcu_read_unlock(); + return true; } } - read_unlock(&ipvs->rs_lock); + rcu_read_unlock(); - return NULL; + return false; } /* @@ -612,9 +595,6 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * the backup synchronization daemon. It finds the * destination to be bound to the received connection * on the backup. - * - * ip_vs_lookup_real_service() looked promissing, but - * seems not working as expected. */ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, @@ -715,7 +695,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, __ip_vs_dst_cache_reset(dest); __ip_vs_unbind_svc(dest); free_percpu(dest->stats.cpustats); - kfree(dest); + kfree_rcu(dest, rcu_head); } } @@ -742,7 +722,7 @@ static void ip_vs_trash_cleanup(struct net *net) __ip_vs_dst_cache_reset(dest); __ip_vs_unbind_svc(dest); free_percpu(dest->stats.cpustats); - kfree(dest); + kfree_rcu(dest, rcu_head); } } @@ -807,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, * Put the real service in rs_table if not present. * For now only for NAT! */ - write_lock_bh(&ipvs->rs_lock); ip_vs_rs_hash(ipvs, dest); - write_unlock_bh(&ipvs->rs_lock); } atomic_set(&dest->conn_flags, conn_flags); @@ -905,7 +883,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atomic_set(&dest->persistconns, 0); atomic_set(&dest->refcnt, 1); - INIT_LIST_HEAD(&dest->d_list); + INIT_HLIST_NODE(&dest->d_list); spin_lock_init(&dest->dst_lock); spin_lock_init(&dest->stats.lock); __ip_vs_update_dest(svc, dest, udest, 1); @@ -1045,9 +1023,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) /* * Remove it from the d-linked list with the real services. */ - write_lock_bh(&ipvs->rs_lock); ip_vs_rs_unhash(dest); - write_unlock_bh(&ipvs->rs_lock); /* * Decrease the refcnt of the dest, and free the dest @@ -1067,7 +1043,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) time, so the operation here is OK */ atomic_dec(&dest->svc->refcnt); free_percpu(dest->stats.cpustats); - kfree(dest); + kfree_rcu(dest, rcu_head); } else { IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " "dest->refcnt=%d\n", @@ -3811,11 +3787,9 @@ int __net_init ip_vs_control_net_init(struct net *net) int idx; struct netns_ipvs *ipvs = net_ipvs(net); - rwlock_init(&ipvs->rs_lock); - /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) - INIT_LIST_HEAD(&ipvs->rs_table[idx]); + INIT_HLIST_HEAD(&ipvs->rs_table[idx]); INIT_LIST_HEAD(&ipvs->dest_trash); atomic_set(&ipvs->ftpsvc_counter, 0); @@ -3892,7 +3866,7 @@ int __init ip_vs_control_init(void) EnterFunction(2); - /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ + /* Initialize svc_table, ip_vs_svc_fwm_table */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { INIT_LIST_HEAD(&ip_vs_svc_table[idx]); INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); -- cgit v1.2.3 From 088339a57d6042a8a19a3d5794594b558cd7b624 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:10 +0200 Subject: ipvs: convert connection locking Convert __ip_vs_conntbl_lock_array as follows: - readers that do not modify conn lists will use RCU lock - updaters that modify lists will use spinlock_t Now for conn lookups we will use RCU read-side critical section. Without using __ip_vs_conn_get such places have access to connection fields and can dereference some pointers like pe and pe_data plus the ability to update timer expiration. If full access is required we contend for reference. We add barrier in __ip_vs_conn_put, so that other CPUs see the refcnt operation after other writes. With the introduction of ip_vs_conn_unlink() we try to reorganize ip_vs_conn_expire(), so that unhashing of connections that should stay more time is avoided, even if it is for very short time. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 +++ net/netfilter/ipvs/ip_vs_conn.c | 230 +++++++++++++++++++++------------------- 2 files changed, 134 insertions(+), 108 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index b06aa6c939fa..5700b07b5186 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -620,6 +620,8 @@ struct ip_vs_conn { const struct ip_vs_pe *pe; char *pe_data; __u8 pe_data_len; + + struct rcu_head rcu_head; }; /* @@ -1185,9 +1187,19 @@ struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, int inverse); +/* Get reference to gain full access to conn. + * By default, RCU read-side critical sections have access only to + * conn fields and its PE data, see ip_vs_conn_rcu_free() for reference. + */ +static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp) +{ + return atomic_inc_not_zero(&cp->refcnt); +} + /* put back the conn without restarting its timer */ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) { + smp_mb__before_atomic_dec(); atomic_dec(&cp->refcnt); } extern void ip_vs_conn_put(struct ip_vs_conn *cp); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 704e514e02ab..b0cd2be01d75 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -79,51 +79,21 @@ static unsigned int ip_vs_conn_rnd __read_mostly; struct ip_vs_aligned_lock { - rwlock_t l; + spinlock_t l; } __attribute__((__aligned__(SMP_CACHE_BYTES))); /* lock array for conn table */ static struct ip_vs_aligned_lock __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned; -static inline void ct_read_lock(unsigned int key) -{ - read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_unlock(unsigned int key) -{ - read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - static inline void ct_write_lock(unsigned int key) { - write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + spin_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } static inline void ct_write_unlock(unsigned int key) { - write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_lock_bh(unsigned int key) -{ - read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_unlock_bh(unsigned int key) -{ - read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_write_lock_bh(unsigned int key) -{ - write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_write_unlock_bh(unsigned int key) -{ - write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + spin_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } @@ -201,9 +171,9 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) spin_lock(&cp->lock); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { - hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]); cp->flags |= IP_VS_CONN_F_HASHED; atomic_inc(&cp->refcnt); + hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); ret = 1; } else { pr_err("%s(): request for already hashed, called from %pF\n", @@ -220,7 +190,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) /* * UNhashes ip_vs_conn from ip_vs_conn_tab. - * returns bool success. + * returns bool success. Caller should hold conn reference. */ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) { @@ -234,7 +204,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { - hlist_del(&cp->c_list); + hlist_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; atomic_dec(&cp->refcnt); ret = 1; @@ -247,6 +217,36 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) return ret; } +/* Try to unlink ip_vs_conn from ip_vs_conn_tab. + * returns bool success. + */ +static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) +{ + unsigned int hash; + bool ret; + + hash = ip_vs_conn_hashkey_conn(cp); + + ct_write_lock(hash); + spin_lock(&cp->lock); + + if (cp->flags & IP_VS_CONN_F_HASHED) { + ret = false; + /* Decrease refcnt and unlink conn only if we are last user */ + if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) { + hlist_del_rcu(&cp->c_list); + cp->flags &= ~IP_VS_CONN_F_HASHED; + ret = true; + } + } else + ret = atomic_read(&cp->refcnt) ? false : true; + + spin_unlock(&cp->lock); + ct_write_unlock(hash); + + return ret; +} + /* * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. @@ -262,9 +262,9 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) hash = ip_vs_conn_hashkey_param(p, false); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->cport == cp->cport && p->vport == cp->vport && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && @@ -272,14 +272,15 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && p->protocol == cp->protocol && ip_vs_conn_net_eq(cp, p->net)) { + if (!__ip_vs_conn_get(cp)) + continue; /* HIT */ - atomic_inc(&cp->refcnt); - ct_read_unlock(hash); + rcu_read_unlock(); return cp; } } - ct_read_unlock(hash); + rcu_read_unlock(); return NULL; } @@ -346,14 +347,16 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) hash = ip_vs_conn_hashkey_param(p, false); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (!ip_vs_conn_net_eq(cp, p->net)) continue; if (p->pe_data && p->pe->ct_match) { - if (p->pe == cp->pe && p->pe->ct_match(p, cp)) - goto out; + if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { + if (__ip_vs_conn_get(cp)) + goto out; + } continue; } @@ -365,15 +368,15 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) p->af, p->vaddr, &cp->vaddr) && p->cport == cp->cport && p->vport == cp->vport && cp->flags & IP_VS_CONN_F_TEMPLATE && - p->protocol == cp->protocol) - goto out; + p->protocol == cp->protocol) { + if (__ip_vs_conn_get(cp)) + goto out; + } } cp = NULL; out: - if (cp) - atomic_inc(&cp->refcnt); - ct_read_unlock(hash); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", ip_vs_proto_name(p->protocol), @@ -398,23 +401,24 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) */ hash = ip_vs_conn_hashkey_param(p, true); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->vport == cp->cport && p->cport == cp->dport && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && p->protocol == cp->protocol && ip_vs_conn_net_eq(cp, p->net)) { + if (!__ip_vs_conn_get(cp)) + continue; /* HIT */ - atomic_inc(&cp->refcnt); ret = cp; break; } } - ct_read_unlock(hash); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", ip_vs_proto_name(p->protocol), @@ -757,41 +761,36 @@ int ip_vs_check_template(struct ip_vs_conn *ct) * Simply decrease the refcnt of the template, * don't restart its timer. */ - atomic_dec(&ct->refcnt); + __ip_vs_conn_put(ct); return 0; } return 1; } +static void ip_vs_conn_rcu_free(struct rcu_head *head) +{ + struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn, + rcu_head); + + ip_vs_pe_put(cp->pe); + kfree(cp->pe_data); + kmem_cache_free(ip_vs_conn_cachep, cp); +} + static void ip_vs_conn_expire(unsigned long data) { struct ip_vs_conn *cp = (struct ip_vs_conn *)data; struct net *net = ip_vs_conn_net(cp); struct netns_ipvs *ipvs = net_ipvs(net); - cp->timeout = 60*HZ; - - /* - * hey, I'm using it - */ - atomic_inc(&cp->refcnt); - /* * do I control anybody? */ if (atomic_read(&cp->n_control)) goto expire_later; - /* - * unhash it if it is hashed in the conn table - */ - if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) - goto expire_later; - - /* - * refcnt==1 implies I'm the only one referrer - */ - if (likely(atomic_read(&cp->refcnt) == 1)) { + /* Unlink conn if not referenced anymore */ + if (likely(ip_vs_conn_unlink(cp))) { /* delete the timer if it is activated by other users */ del_timer(&cp->timer); @@ -810,38 +809,41 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_conn_drop_conntrack(cp); } - ip_vs_pe_put(cp->pe); - kfree(cp->pe_data); if (unlikely(cp->app != NULL)) ip_vs_unbind_app(cp); ip_vs_unbind_dest(cp); if (cp->flags & IP_VS_CONN_F_NO_CPORT) atomic_dec(&ip_vs_conn_no_cport_cnt); + call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free); atomic_dec(&ipvs->conn_count); - - kmem_cache_free(ip_vs_conn_cachep, cp); return; } - /* hash it back to the table */ - ip_vs_conn_hash(cp); - expire_later: - IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n", - atomic_read(&cp->refcnt)-1, + IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n", + atomic_read(&cp->refcnt), atomic_read(&cp->n_control)); + atomic_inc(&cp->refcnt); + cp->timeout = 60*HZ; + if (ipvs->sync_state & IP_VS_STATE_MASTER) ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs)); ip_vs_conn_put(cp); } - +/* Modify timer, so that it expires as soon as possible. + * Can be called without reference only if under RCU lock. + */ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) { - if (del_timer(&cp->timer)) - mod_timer(&cp->timer, jiffies); + /* Using mod_timer_pending will ensure the timer is not + * modified after the final del_timer in ip_vs_conn_expire. + */ + if (timer_pending(&cp->timer) && + time_after(cp->timer.expires, jiffies)) + mod_timer_pending(&cp->timer, jiffies); } @@ -952,14 +954,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) struct ip_vs_iter_state *iter = seq->private; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - ct_read_lock_bh(idx); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { + /* __ip_vs_conn_get() is not needed by + * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show + */ if (pos-- == 0) { iter->l = &ip_vs_conn_tab[idx]; return cp; } } - ct_read_unlock_bh(idx); + rcu_read_unlock(); } return NULL; @@ -977,6 +982,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; struct ip_vs_iter_state *iter = seq->private; + struct hlist_node *e; struct hlist_head *l = iter->l; int idx; @@ -985,19 +991,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) return ip_vs_conn_array(seq, 0); /* more on same hash chain? */ - if (cp->c_list.next) - return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list); + e = rcu_dereference(hlist_next_rcu(&cp->c_list)); + if (e) + return hlist_entry(e, struct ip_vs_conn, c_list); + rcu_read_unlock(); idx = l - ip_vs_conn_tab; - ct_read_unlock_bh(idx); - while (++idx < ip_vs_conn_tab_size) { - ct_read_lock_bh(idx); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { iter->l = &ip_vs_conn_tab[idx]; return cp; } - ct_read_unlock_bh(idx); + rcu_read_unlock(); } iter->l = NULL; return NULL; @@ -1009,7 +1015,7 @@ static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) struct hlist_head *l = iter->l; if (l) - ct_read_unlock_bh(l - ip_vs_conn_tab); + rcu_read_unlock(); } static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) @@ -1188,7 +1194,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp) void ip_vs_random_dropentry(struct net *net) { int idx; - struct ip_vs_conn *cp; + struct ip_vs_conn *cp, *cp_c; /* * Randomly scan 1/32 of the whole table every second @@ -1199,9 +1205,9 @@ void ip_vs_random_dropentry(struct net *net) /* * Lock is actually needed in this loop. */ - ct_write_lock_bh(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; @@ -1228,12 +1234,15 @@ void ip_vs_random_dropentry(struct net *net) IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); - if (cp->control) { + cp_c = cp->control; + /* cp->control is valid only with reference to cp */ + if (cp_c && __ip_vs_conn_get(cp)) { IP_VS_DBG(4, "del conn template\n"); - ip_vs_conn_expire_now(cp->control); + ip_vs_conn_expire_now(cp_c); + __ip_vs_conn_put(cp); } } - ct_write_unlock_bh(hash); + rcu_read_unlock(); } } @@ -1244,7 +1253,7 @@ void ip_vs_random_dropentry(struct net *net) static void ip_vs_conn_flush(struct net *net) { int idx; - struct ip_vs_conn *cp; + struct ip_vs_conn *cp, *cp_c; struct netns_ipvs *ipvs = net_ipvs(net); flush_again: @@ -1252,19 +1261,22 @@ flush_again: /* * Lock is actually needed in this loop. */ - ct_write_lock_bh(idx); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) continue; IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); - if (cp->control) { + cp_c = cp->control; + /* cp->control is valid only with reference to cp */ + if (cp_c && __ip_vs_conn_get(cp)) { IP_VS_DBG(4, "del conn template\n"); - ip_vs_conn_expire_now(cp->control); + ip_vs_conn_expire_now(cp_c); + __ip_vs_conn_put(cp); } } - ct_write_unlock_bh(idx); + rcu_read_unlock(); } /* the counter may be not NULL, because maybe some conn entries @@ -1331,7 +1343,7 @@ int __init ip_vs_conn_init(void) INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]); for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { - rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); + spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l); } /* calculate the random value for connection hash */ @@ -1342,6 +1354,8 @@ int __init ip_vs_conn_init(void) void ip_vs_conn_cleanup(void) { + /* Wait all ip_vs_conn_rcu_free() callbacks to complete */ + rcu_barrier(); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); -- cgit v1.2.3 From 1845ed0bb29fa7864781021e0c8d06af318f358a Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:11 +0200 Subject: ipvs: reorder keys in connection structure __ip_vs_conn_in_get and ip_vs_conn_out_get are hot places. Optimize them, so that ports are matched first. By moving net and fwmark below, on 32-bit arch we can fit caddr in 32-byte cache line and all addresses in 64-byte cache line. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 ++++++------ net/netfilter/ipvs/ip_vs_conn.c | 19 ++++++++++--------- 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 5700b07b5186..929e04c6b82e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -566,20 +566,19 @@ struct ip_vs_conn_param { */ struct ip_vs_conn { struct hlist_node c_list; /* hashed list heads */ -#ifdef CONFIG_NET_NS - struct net *net; /* Name space */ -#endif /* Protocol, addresses and port numbers */ - u16 af; /* address family */ __be16 cport; - __be16 vport; __be16 dport; - __u32 fwmark; /* Fire wall mark from skb */ + __be16 vport; + u16 af; /* address family */ union nf_inet_addr caddr; /* client address */ union nf_inet_addr vaddr; /* virtual address */ union nf_inet_addr daddr; /* destination address */ volatile __u32 flags; /* status flags */ __u16 protocol; /* Which protocol (TCP/UDP) */ +#ifdef CONFIG_NET_NS + struct net *net; /* Name space */ +#endif /* counter and timer */ atomic_t refcnt; /* reference count */ @@ -593,6 +592,7 @@ struct ip_vs_conn { * state transition triggerd * synchronization */ + __u32 fwmark; /* Fire wall mark from skb */ unsigned long sync_endtime; /* jiffies + sent_retries */ /* Control members */ diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index b0cd2be01d75..416015b34c23 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -265,8 +265,8 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->af == p->af && - p->cport == cp->cport && p->vport == cp->vport && + if (p->cport == cp->cport && p->vport == cp->vport && + cp->af == p->af && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && @@ -350,9 +350,9 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (!ip_vs_conn_net_eq(cp, p->net)) - continue; - if (p->pe_data && p->pe->ct_match) { + if (unlikely(p->pe_data && p->pe->ct_match)) { + if (!ip_vs_conn_net_eq(cp, p->net)) + continue; if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { if (__ip_vs_conn_get(cp)) goto out; @@ -366,9 +366,10 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) * p->vaddr is a fwmark */ ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, p->vaddr, &cp->vaddr) && - p->cport == cp->cport && p->vport == cp->vport && + p->vport == cp->vport && p->cport == cp->cport && cp->flags & IP_VS_CONN_F_TEMPLATE && - p->protocol == cp->protocol) { + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { if (__ip_vs_conn_get(cp)) goto out; } @@ -404,8 +405,8 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->af == p->af && - p->vport == cp->cport && p->cport == cp->dport && + if (p->vport == cp->cport && p->cport == cp->dport && + cp->af == p->af && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && p->protocol == cp->protocol && -- cgit v1.2.3 From 9a05475cebdd6341884b5901e53870be26e65158 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:12 +0200 Subject: ipvs: avoid kmem_cache_zalloc in ip_vs_conn_new We have many fields to set and few to reset, use kmem_cache_alloc instead to save some cycles. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 15 +++++++++++++++ net/netfilter/ipvs/ip_vs_conn.c | 24 +++++++++++++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 929e04c6b82e..43886bb282fb 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -233,6 +233,21 @@ static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst, dst->ip = src->ip; } +static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst, + const union nf_inet_addr *src) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + dst->in6 = src->in6; + return; + } +#endif + dst->ip = src->ip; + dst->all[1] = 0; + dst->all[2] = 0; + dst->all[3] = 0; +} + static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a, const union nf_inet_addr *b) { diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 416015b34c23..e3e2b4d3b6d8 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -861,7 +861,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, p->protocol); - cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); + cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { IP_VS_ERR_RL("%s(): no memory\n", __func__); return NULL; @@ -872,13 +872,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, ip_vs_conn_net_set(cp, p->net); cp->af = p->af; cp->protocol = p->protocol; - ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); + ip_vs_addr_set(p->af, &cp->caddr, p->caddr); cp->cport = p->cport; - ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr); + ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr); cp->vport = p->vport; /* proto should only be IPPROTO_IP if d_addr is a fwmark */ - ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, - &cp->daddr, daddr); + ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, + &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; cp->fwmark = fwmark; @@ -887,6 +887,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, cp->pe = p->pe; cp->pe_data = p->pe_data; cp->pe_data_len = p->pe_data_len; + } else { + cp->pe = NULL; + cp->pe_data = NULL; + cp->pe_data_len = 0; } spin_lock_init(&cp->lock); @@ -897,18 +901,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, */ atomic_set(&cp->refcnt, 1); + cp->control = NULL; atomic_set(&cp->n_control, 0); atomic_set(&cp->in_pkts, 0); + cp->packet_xmit = NULL; + cp->app = NULL; + cp->app_data = NULL; + /* reset struct ip_vs_seq */ + cp->in_seq.delta = 0; + cp->out_seq.delta = 0; + atomic_inc(&ipvs->conn_count); if (flags & IP_VS_CONN_F_NO_CPORT) atomic_inc(&ip_vs_conn_no_cport_cnt); /* Bind the connection with a destination server */ + cp->dest = NULL; ip_vs_bind_dest(cp, dest); /* Set its state and timeout */ cp->state = 0; + cp->old_state = 0; cp->timeout = 3*HZ; cp->sync_endtime = jiffies & ~3UL; -- cgit v1.2.3 From 6b6df46663e7aa6f7b1d82435a3488f9b81316b3 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:37 +0200 Subject: ipvs: preparations for using rcu in schedulers Allow schedulers to use rcu_dereference when returning destination on lookup. The RCU read-side critical section will allow ip_vs_bind_dest to get dest refcnt as preparation for the step where destinations will be deleted without an IP_VS_WAIT_WHILE guard that holds the packet processing during update. Add new optional scheduler methods add_dest, del_dest and upd_dest. For now the methods are called together with update_service but update_service will be removed in a following change. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 6 ++++++ net/netfilter/ipvs/ip_vs_core.c | 6 ++++++ net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++++++ 3 files changed, 20 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 43886bb282fb..d91385c1b3a2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -805,6 +805,12 @@ struct ip_vs_scheduler { int (*done_service)(struct ip_vs_service *svc); /* scheduler updating service */ int (*update_service)(struct ip_vs_service *svc); + /* dest is linked */ + int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); + /* dest is unlinked */ + int (*del_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); + /* dest is updated */ + int (*upd_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); /* selecting a server from the given service */ struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc, diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4fc749c6ec27..939ad11ed534 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -301,8 +301,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * template is not available. * return *ignored=0 i.e. ICMP and NF_DROP */ + rcu_read_lock(); dest = svc->scheduler->schedule(svc, skb); if (!dest) { + rcu_read_unlock(); IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); *ignored = 0; @@ -318,6 +320,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * when the template expires */ ct = ip_vs_conn_new(¶m, &dest->addr, dport, IP_VS_CONN_F_TEMPLATE, dest, skb->mark); + rcu_read_unlock(); if (ct == NULL) { kfree(param.pe_data); *ignored = -1; @@ -446,8 +449,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, return NULL; } + rcu_read_lock(); dest = svc->scheduler->schedule(svc, skb); if (dest == NULL) { + rcu_read_unlock(); IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; } @@ -468,6 +473,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); + rcu_read_unlock(); if (!cp) { *ignored = -1; return NULL; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 182d95807bd5..d64f800a3426 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -825,6 +825,11 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, if (add) { list_add(&dest->n_list, &svc->destinations); svc->num_dests++; + if (svc->scheduler->add_dest) + svc->scheduler->add_dest(svc, dest); + } else { + if (svc->scheduler->upd_dest) + svc->scheduler->upd_dest(svc, dest); } /* call the update_service, because server weight may be changed */ @@ -1071,6 +1076,9 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, list_del(&dest->n_list); svc->num_dests--; + if (svcupd && svc->scheduler->del_dest) + svc->scheduler->del_dest(svc, dest); + /* * Call the update_service function of its scheduler */ -- cgit v1.2.3 From fca9c20ae1e510525f8a2aaa25861789fd721193 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:38 +0200 Subject: ipvs: add ip_vs_dest_hold and ip_vs_dest_put ip_vs_dest_hold will be used under RCU lock while ip_vs_dest_put can be called even after dest is removed from service, as it happens for conns and some schedulers. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 10 ++++++++++ net/netfilter/ipvs/ip_vs_conn.c | 9 ++------- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- 4 files changed, 16 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d91385c1b3a2..7d3027f2c06e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1427,6 +1427,16 @@ ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, __u16 protocol, __u32 fwmark, __u32 flags); extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); +static inline void ip_vs_dest_hold(struct ip_vs_dest *dest) +{ + atomic_inc(&dest->refcnt); +} + +static inline void ip_vs_dest_put(struct ip_vs_dest *dest) +{ + smp_mb__before_atomic_dec(); + atomic_dec(&dest->refcnt); +} /* * IPVS sync daemon data and function prototypes diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index e3e2b4d3b6d8..1b29e4a2b26c 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -554,7 +554,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) return; /* Increase the refcnt counter of the dest */ - atomic_inc(&dest->refcnt); + ip_vs_dest_hold(dest); conn_flags = atomic_read(&dest->conn_flags); if (cp->protocol != IPPROTO_UDP) @@ -700,12 +700,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) dest->flags &= ~IP_VS_DEST_F_OVERLOAD; } - /* - * Simply decrease the refcnt of the dest, because the - * dest will be either in service's destination list - * or in the trash. - */ - atomic_dec(&dest->refcnt); + ip_vs_dest_put(dest); } static int expire_quiescent_template(struct netns_ipvs *ipvs, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d64f800a3426..a4f638880470 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -616,7 +616,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, if (!dest) dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); if (dest) - atomic_inc(&dest->refcnt); + ip_vs_dest_hold(dest); ip_vs_service_put(svc); return dest; } @@ -1056,7 +1056,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) ntohs(dest->port), atomic_read(&dest->refcnt)); list_add(&dest->n_list, &ipvs->dest_trash); - atomic_inc(&dest->refcnt); + ip_vs_dest_hold(dest); } } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 44fd10c539ac..6cc3e52f1f35 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -861,7 +861,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, if (!dest) { dest = ip_vs_try_bind_dest(cp); if (dest) - atomic_dec(&dest->refcnt); + ip_vs_dest_put(dest); } } else { /* @@ -874,7 +874,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); if (dest) - atomic_dec(&dest->refcnt); + ip_vs_dest_put(dest); if (!cp) { if (param->pe_data) kfree(param->pe_data); -- cgit v1.2.3 From 578bc3ef1e473abb9ea99046a307fef0094b22af Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:49 +0200 Subject: ipvs: reorganize dest trash All dests will go to trash, no exceptions. But we have to use new list node t_list for this, due to RCU changes in following patches. Dests will wait there initial grace period and later all conns and schedulers to put their reference. The dests don't get reference for staying in dest trash as before. As result, we do not load ip_vs_dest_put with extra checks for last refcnt and the schedulers do not need to play games with atomic_inc_not_zero while selecting best destination. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 9 +++ net/netfilter/ipvs/ip_vs_ctl.c | 178 ++++++++++++++++++++++++----------------- 2 files changed, 113 insertions(+), 74 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7d3027f2c06e..18aeb85079f8 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -749,6 +749,8 @@ struct ip_vs_dest_dst { struct rcu_head rcu_head; }; +/* In grace period after removing */ +#define IP_VS_DEST_STATE_REMOVING 0x01 /* * The real server destination forwarding entry * with ip address, port number, and so on. @@ -766,6 +768,7 @@ struct ip_vs_dest { atomic_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ + unsigned long state; /* state flags */ /* connection counters and thresholds */ atomic_t activeconns; /* active connections */ @@ -785,6 +788,7 @@ struct ip_vs_dest { union nf_inet_addr vaddr; /* virtual IP address */ __u32 vfwmark; /* firewall mark of service */ + struct list_head t_list; /* in dest_trash */ struct rcu_head rcu_head; unsigned int in_rs_table:1; /* we are in rs_table */ }; @@ -912,6 +916,9 @@ struct ipvs_master_sync_state { struct netns_ipvs *ipvs; }; +/* How much time to keep dests in trash */ +#define IP_VS_DEST_TRASH_PERIOD (120 * HZ) + /* IPVS in network namespace */ struct netns_ipvs { int gen; /* Generation */ @@ -961,6 +968,8 @@ struct netns_ipvs { /* Trash for destinations */ struct list_head dest_trash; + spinlock_t dest_trash_lock; + struct timer_list dest_trash_timer; /* expiration timer */ /* Service counters */ atomic_t ftpsvc_counter; atomic_t nullsvc_counter; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a4f638880470..80d366b7daab 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -71,7 +71,7 @@ int ip_vs_get_debug_level(void) /* Protos */ -static void __ip_vs_del_service(struct ip_vs_service *svc); +static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); #ifdef CONFIG_IP_VS_IPV6 @@ -657,19 +657,25 @@ static struct ip_vs_dest * ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, __be16 dport) { - struct ip_vs_dest *dest, *nxt; + struct ip_vs_dest *dest; struct netns_ipvs *ipvs = net_ipvs(svc->net); /* * Find the destination in trash */ - list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { + spin_lock_bh(&ipvs->dest_trash_lock); + list_for_each_entry(dest, &ipvs->dest_trash, t_list) { IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " "dest->refcnt=%d\n", dest->vfwmark, IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); + /* We can not reuse dest while in grace period + * because conns still can use dest->svc + */ + if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) + continue; if (dest->af == svc->af && ip_vs_addr_equal(svc->af, &dest->addr, daddr) && dest->port == dport && @@ -679,29 +685,27 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && dest->vport == svc->port))) { /* HIT */ - return dest; - } - - /* - * Try to purge the destination from trash if not referenced - */ - if (atomic_read(&dest->refcnt) == 1) { - IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u " - "from trash\n", - dest->vfwmark, - IP_VS_DBG_ADDR(svc->af, &dest->addr), - ntohs(dest->port)); - list_del(&dest->n_list); - __ip_vs_dst_cache_reset(dest); - __ip_vs_unbind_svc(dest); - free_percpu(dest->stats.cpustats); - kfree_rcu(dest, rcu_head); + list_del(&dest->t_list); + ip_vs_dest_hold(dest); + goto out; } } - return NULL; + dest = NULL; + +out: + spin_unlock_bh(&ipvs->dest_trash_lock); + + return dest; } +static void ip_vs_dest_free(struct ip_vs_dest *dest) +{ + __ip_vs_dst_cache_reset(dest); + __ip_vs_unbind_svc(dest); + free_percpu(dest->stats.cpustats); + kfree(dest); +} /* * Clean up all the destinations in the trash @@ -710,19 +714,18 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * When the ip_vs_control_clearup is activated by ipvs module exit, * the service tables must have been flushed and all the connections * are expired, and the refcnt of each destination in the trash must - * be 1, so we simply release them here. + * be 0, so we simply release them here. */ static void ip_vs_trash_cleanup(struct net *net) { struct ip_vs_dest *dest, *nxt; struct netns_ipvs *ipvs = net_ipvs(net); - list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { - list_del(&dest->n_list); - __ip_vs_dst_cache_reset(dest); - __ip_vs_unbind_svc(dest); - free_percpu(dest->stats.cpustats); - kfree_rcu(dest, rcu_head); + del_timer_sync(&ipvs->dest_trash_timer); + /* No need to use dest_trash_lock */ + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { + list_del(&dest->t_list); + ip_vs_dest_free(dest); } } @@ -955,11 +958,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) IP_VS_DBG_ADDR(svc->af, &dest->vaddr), ntohs(dest->vport)); - /* - * Get the destination from the trash - */ - list_del(&dest->n_list); - __ip_vs_update_dest(svc, dest, udest, 1); ret = 0; } else { @@ -1015,11 +1013,21 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return 0; } +static void ip_vs_dest_wait_readers(struct rcu_head *head) +{ + struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest, + rcu_head); + + /* End of grace period after unlinking */ + clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); +} + /* * Delete a destination (must be already unlinked from the service) */ -static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) +static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, + bool cleanup) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1030,34 +1038,22 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) */ ip_vs_rs_unhash(dest); - /* - * Decrease the refcnt of the dest, and free the dest - * if nobody refers to it (refcnt=0). Otherwise, throw - * the destination into the trash. - */ - if (atomic_dec_and_test(&dest->refcnt)) { - IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n", - dest->vfwmark, - IP_VS_DBG_ADDR(dest->af, &dest->addr), - ntohs(dest->port)); - __ip_vs_dst_cache_reset(dest); - /* simply decrease svc->refcnt here, let the caller check - and release the service if nobody refers to it. - Only user context can release destination and service, - and only one user context can update virtual service at a - time, so the operation here is OK */ - atomic_dec(&dest->svc->refcnt); - free_percpu(dest->stats.cpustats); - kfree_rcu(dest, rcu_head); - } else { - IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " - "dest->refcnt=%d\n", - IP_VS_DBG_ADDR(dest->af, &dest->addr), - ntohs(dest->port), - atomic_read(&dest->refcnt)); - list_add(&dest->n_list, &ipvs->dest_trash); - ip_vs_dest_hold(dest); + if (!cleanup) { + set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); + call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers); } + + spin_lock_bh(&ipvs->dest_trash_lock); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), + atomic_read(&dest->refcnt)); + if (list_empty(&ipvs->dest_trash) && !cleanup) + mod_timer(&ipvs->dest_trash_timer, + jiffies + IP_VS_DEST_TRASH_PERIOD); + /* dest lives in trash without reference */ + list_add(&dest->t_list, &ipvs->dest_trash); + spin_unlock_bh(&ipvs->dest_trash_lock); + ip_vs_dest_put(dest); } @@ -1122,13 +1118,38 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete the destination */ - __ip_vs_del_dest(svc->net, dest); + __ip_vs_del_dest(svc->net, dest, false); LeaveFunction(2); return 0; } +static void ip_vs_dest_trash_expire(unsigned long data) +{ + struct net *net = (struct net *) data; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_dest *dest, *next; + + spin_lock(&ipvs->dest_trash_lock); + list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { + /* Skip if dest is in grace period */ + if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) + continue; + if (atomic_read(&dest->refcnt) > 0) + continue; + IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", + dest->vfwmark, + IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), + ntohs(dest->port)); + list_del(&dest->t_list); + ip_vs_dest_free(dest); + } + if (!list_empty(&ipvs->dest_trash)) + mod_timer(&ipvs->dest_trash_timer, + jiffies + IP_VS_DEST_TRASH_PERIOD); + spin_unlock(&ipvs->dest_trash_lock); +} /* * Add a service into the service hash table @@ -1358,7 +1379,7 @@ out: * - The service must be unlinked, unlocked and not referenced! * - We are called under _bh lock */ -static void __ip_vs_del_service(struct ip_vs_service *svc) +static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) { struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; @@ -1394,7 +1415,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) */ list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { __ip_vs_unlink_dest(svc, dest, 0); - __ip_vs_del_dest(svc->net, dest); + __ip_vs_del_dest(svc->net, dest, cleanup); } /* @@ -1424,7 +1445,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) /* * Unlink a service from list and try to delete it if its refcnt reached 0 */ -static void ip_vs_unlink_service(struct ip_vs_service *svc) +static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) { /* * Unhash it from the service table @@ -1438,7 +1459,7 @@ static void ip_vs_unlink_service(struct ip_vs_service *svc) */ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - __ip_vs_del_service(svc); + __ip_vs_del_service(svc, cleanup); write_unlock_bh(&__ip_vs_svc_lock); } @@ -1450,7 +1471,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc) { if (svc == NULL) return -EEXIST; - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, false); return 0; } @@ -1459,7 +1480,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* * Flush all the virtual services */ -static int ip_vs_flush(struct net *net) +static int ip_vs_flush(struct net *net, bool cleanup) { int idx; struct ip_vs_service *svc, *nxt; @@ -1471,7 +1492,7 @@ static int ip_vs_flush(struct net *net) list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net)) - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, cleanup); } } @@ -1482,7 +1503,7 @@ static int ip_vs_flush(struct net *net) list_for_each_entry_safe(svc, nxt, &ip_vs_svc_fwm_table[idx], f_list) { if (net_eq(svc->net, net)) - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, cleanup); } } @@ -1498,7 +1519,7 @@ void ip_vs_service_net_cleanup(struct net *net) EnterFunction(2); /* Check for "full" addressed entries */ mutex_lock(&__ip_vs_mutex); - ip_vs_flush(net); + ip_vs_flush(net, true); mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); } @@ -1558,9 +1579,11 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } } - list_for_each_entry(dest, &ipvs->dest_trash, n_list) { + spin_lock_bh(&ipvs->dest_trash_lock); + list_for_each_entry(dest, &ipvs->dest_trash, t_list) { ip_vs_forget_dev(dest, dev); } + spin_unlock_bh(&ipvs->dest_trash_lock); mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); return NOTIFY_DONE; @@ -2394,7 +2417,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ - ret = ip_vs_flush(net); + ret = ip_vs_flush(net, false); goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ @@ -3403,7 +3426,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) mutex_lock(&__ip_vs_mutex); if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(net); + ret = ip_vs_flush(net, false); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { ret = ip_vs_genl_set_config(net, info->attrs); @@ -3800,6 +3823,9 @@ int __net_init ip_vs_control_net_init(struct net *net) INIT_HLIST_HEAD(&ipvs->rs_table[idx]); INIT_LIST_HEAD(&ipvs->dest_trash); + spin_lock_init(&ipvs->dest_trash_lock); + setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, + (unsigned long) net); atomic_set(&ipvs->ftpsvc_counter, 0); atomic_set(&ipvs->nullsvc_counter, 0); @@ -3829,6 +3855,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + /* Some dest can be in grace period even before cleanup, we have to + * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called. + */ + rcu_barrier(); ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); -- cgit v1.2.3 From ed3ffc4e48e2b03d5b23988f3cfa0ad8d79e0092 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:50 +0200 Subject: ipvs: do not expect result from done_service This method releases the scheduler state, it can not fail. Such change will help to properly replace the scheduler in following patch. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 4 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 5 +---- net/netfilter/ipvs/ip_vs_dh.c | 4 +--- net/netfilter/ipvs/ip_vs_lblc.c | 4 +--- net/netfilter/ipvs/ip_vs_lblcr.c | 4 +--- net/netfilter/ipvs/ip_vs_sched.c | 13 ++++--------- net/netfilter/ipvs/ip_vs_sh.c | 4 +--- net/netfilter/ipvs/ip_vs_wrr.c | 4 +--- 8 files changed, 12 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 18aeb85079f8..4990de698576 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -806,7 +806,7 @@ struct ip_vs_scheduler { /* scheduler initializing service */ int (*init_service)(struct ip_vs_service *svc); /* scheduling service finish */ - int (*done_service)(struct ip_vs_service *svc); + void (*done_service)(struct ip_vs_service *svc); /* scheduler updating service */ int (*update_service)(struct ip_vs_service *svc); /* dest is linked */ @@ -1392,7 +1392,7 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); extern int ip_vs_bind_scheduler(struct ip_vs_service *svc, struct ip_vs_scheduler *scheduler); -extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); +extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 80d366b7daab..d02272685dda 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1334,10 +1334,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) /* * Unbind the old scheduler */ - if ((ret = ip_vs_unbind_scheduler(svc))) { - old_sched = sched; - goto out_unlock; - } + ip_vs_unbind_scheduler(svc); /* * Bind the new scheduler diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index ebe80f44e712..89c27230d93a 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -174,7 +174,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc) } -static int ip_vs_dh_done_svc(struct ip_vs_service *svc) +static void ip_vs_dh_done_svc(struct ip_vs_service *svc) { struct ip_vs_dh_state *s = svc->sched_data; @@ -185,8 +185,6 @@ static int ip_vs_dh_done_svc(struct ip_vs_service *svc) kfree_rcu(s, rcu_head); IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); - - return 0; } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index b873e177435a..c7ff97895237 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -388,7 +388,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) } -static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) +static void ip_vs_lblc_done_svc(struct ip_vs_service *svc) { struct ip_vs_lblc_table *tbl = svc->sched_data; @@ -402,8 +402,6 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) kfree_rcu(tbl, rcu_head); IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", sizeof(*tbl)); - - return 0; } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index c22f173d528c..6049b85df41f 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -555,7 +555,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) } -static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) +static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc) { struct ip_vs_lblcr_table *tbl = svc->sched_data; @@ -569,8 +569,6 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) kfree_rcu(tbl, rcu_head); IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", sizeof(*tbl)); - - return 0; } diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 7f11d3d37a42..1b715d0caf43 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -64,22 +64,17 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, /* * Unbind a service with its scheduler */ -int ip_vs_unbind_scheduler(struct ip_vs_service *svc) +void ip_vs_unbind_scheduler(struct ip_vs_service *svc) { struct ip_vs_scheduler *sched = svc->scheduler; if (!sched) - return 0; + return; - if (sched->done_service) { - if (sched->done_service(svc) != 0) { - pr_err("%s(): done error\n", __func__); - return -EINVAL; - } - } + if (sched->done_service) + sched->done_service(svc); svc->scheduler = NULL; - return 0; } diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 55e76d8db711..81c1a10c7b49 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -187,7 +187,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc) } -static int ip_vs_sh_done_svc(struct ip_vs_service *svc) +static void ip_vs_sh_done_svc(struct ip_vs_service *svc) { struct ip_vs_sh_state *s = svc->sched_data; @@ -198,8 +198,6 @@ static int ip_vs_sh_done_svc(struct ip_vs_service *svc) kfree_rcu(s, rcu_head); IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); - - return 0; } diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 98cb05e345c9..a74fd9bab953 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -129,7 +129,7 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc) } -static int ip_vs_wrr_done_svc(struct ip_vs_service *svc) +static void ip_vs_wrr_done_svc(struct ip_vs_service *svc) { struct ip_vs_wrr_mark *mark = svc->sched_data; @@ -137,8 +137,6 @@ static int ip_vs_wrr_done_svc(struct ip_vs_service *svc) * Release the mark variable */ kfree_rcu(mark, rcu_head); - - return 0; } -- cgit v1.2.3 From ba3a3ce14ea26d602b253ef13a56d540827cd51d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:51 +0200 Subject: ipvs: convert sched_lock to spin lock As all read_locks are gone spin lock is preferred. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- net/netfilter/ipvs/ip_vs_lblc.c | 18 +++++++++--------- net/netfilter/ipvs/ip_vs_lblcr.c | 26 +++++++++++++------------- net/netfilter/ipvs/ip_vs_rr.c | 10 +++++----- net/netfilter/ipvs/ip_vs_wrr.c | 8 ++++---- 6 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4990de698576..4a7bc63b9cba 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -734,7 +734,7 @@ struct ip_vs_service { /* for scheduling */ struct ip_vs_scheduler *scheduler; /* bound scheduler object */ - rwlock_t sched_lock; /* lock sched_data */ + spinlock_t sched_lock; /* lock sched_data */ void *sched_data; /* scheduler application data */ /* alternate persistence engine */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d02272685dda..2bfd807bc93f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1219,7 +1219,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, svc->net = net; INIT_LIST_HEAD(&svc->destinations); - rwlock_init(&svc->sched_lock); + spin_lock_init(&svc->sched_lock); spin_lock_init(&svc->stats.lock); /* Bind the scheduler */ diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index c7ff97895237..ffef8a162148 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -194,7 +194,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, /* * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP - * address to a server. Called under write lock. + * address to a server. Called under spin lock. */ static inline struct ip_vs_lblc_entry * ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, @@ -242,7 +242,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) struct hlist_node *next; int i; - write_lock_bh(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); tbl->dead = 1; for (i=0; ibucket[i], list) { @@ -250,7 +250,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) atomic_dec(&tbl->entries); } } - write_unlock_bh(&svc->sched_lock); + spin_unlock_bh(&svc->sched_lock); } static int sysctl_lblc_expiration(struct ip_vs_service *svc) @@ -274,7 +274,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) for (i=0, j=tbl->rover; isched_lock); + spin_lock(&svc->sched_lock); hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + @@ -284,7 +284,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -330,7 +330,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) for (i=0, j=tbl->rover; isched_lock); + spin_lock(&svc->sched_lock); hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) continue; @@ -339,7 +339,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -527,10 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } /* If we fail to create a cache entry, we'll just use the valid dest */ - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); if (!tbl->dead) ip_vs_lblc_new(tbl, &iph.daddr, dest); - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 6049b85df41f..cdfe6a95eddb 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -368,7 +368,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, /* * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination - * IP address to a server. Called under write lock. + * IP address to a server. Called under spin lock. */ static inline struct ip_vs_lblcr_entry * ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, @@ -412,14 +412,14 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc) struct ip_vs_lblcr_entry *en; struct hlist_node *next; - write_lock_bh(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); tbl->dead = 1; for (i=0; ibucket[i], list) { ip_vs_lblcr_free(en); } } - write_unlock_bh(&svc->sched_lock); + spin_unlock_bh(&svc->sched_lock); } static int sysctl_lblcr_expiration(struct ip_vs_service *svc) @@ -443,7 +443,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) for (i=0, j=tbl->rover; isched_lock); + spin_lock(&svc->sched_lock); hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_after(en->lastuse + sysctl_lblcr_expiration(svc), now)) @@ -452,7 +452,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) ip_vs_lblcr_free(en); atomic_dec(&tbl->entries); } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -498,7 +498,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) for (i=0, j=tbl->rover; isched_lock); + spin_lock(&svc->sched_lock); hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) continue; @@ -507,7 +507,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -678,7 +678,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) if (atomic_read(&en->set.size) > 1 && time_after(jiffies, en->set.lastmod + sysctl_lblcr_expiration(svc))) { - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); if (atomic_read(&en->set.size) > 1) { struct ip_vs_dest *m; @@ -686,7 +686,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) if (m) ip_vs_dest_set_erase(&en->set, m); } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); } /* If the destination is not overloaded, use it */ @@ -701,10 +701,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } /* Update our cache entry */ - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); if (!tbl->dead) ip_vs_dest_set_insert(&en->set, dest, true); - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); goto out; } @@ -716,10 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } /* If we fail to create a cache entry, we'll just use the valid dest */ - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); if (!tbl->dead) ip_vs_lblcr_new(tbl, &iph.daddr, dest); - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index 3942890c6f19..aa4601ff1ccc 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -39,14 +39,14 @@ static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) { struct list_head *p; - write_lock_bh(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); p = (struct list_head *) svc->sched_data; /* dest is already unlinked, so p->prev is not valid but * p->next is valid, use it to reach previous entry. */ if (p == &dest->n_list) svc->sched_data = p->next->prev; - write_unlock_bh(&svc->sched_lock); + spin_unlock_bh(&svc->sched_lock); return 0; } @@ -63,7 +63,7 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); p = (struct list_head *) svc->sched_data; last = dest = list_entry(p, struct ip_vs_dest, n_list); @@ -85,13 +85,13 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } while (pass < 2 && p != &svc->destinations); stop: - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); ip_vs_scheduler_err(svc, "no destination available"); return NULL; out: svc->sched_data = &dest->n_list; - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); IP_VS_DBG_BUF(6, "RR: server %s:%u " "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index a74fd9bab953..b173ef907a14 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -145,7 +145,7 @@ static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, { struct ip_vs_wrr_mark *mark = svc->sched_data; - write_lock_bh(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list); mark->di = ip_vs_wrr_gcd_weight(svc); mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1); @@ -153,7 +153,7 @@ static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, mark->cw = mark->mw; else if (mark->di > 1) mark->cw = (mark->cw / mark->di) * mark->di + 1; - write_unlock_bh(&svc->sched_lock); + spin_unlock_bh(&svc->sched_lock); return 0; } @@ -170,7 +170,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); dest = mark->cl; /* No available dests? */ if (mark->mw == 0) @@ -222,7 +222,7 @@ found: mark->cl = dest; out: - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); return dest; err_noavail: -- cgit v1.2.3 From 413c2d04e9494ca38629d8a7ffeff1e4398a9fe3 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:52 +0200 Subject: ipvs: convert dests to rcu In previous commits the schedulers started to access svc->destinations with _rcu list traversal primitives because the IP_VS_WAIT_WHILE macro still plays the role of grace period. Now it is time to finish the updating part, i.e. adding and deleting of dests with _rcu suffix before removing the IP_VS_WAIT_WHILE in next commit. We use the same rule for conns as for the schedulers: dests can be searched in RCU read-side critical section where ip_vs_dest_hold can be called by ip_vs_bind_dest. Some things are not perfect, for example, calling functions like ip_vs_lookup_dest from updating code under RCU, just because we use some function both from reader and from updater. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 8 +++++--- net/netfilter/ipvs/ip_vs_ctl.c | 32 +++++++++++++++++--------------- net/netfilter/ipvs/ip_vs_sync.c | 11 ++++------- 4 files changed, 27 insertions(+), 26 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4a7bc63b9cba..78a6634653a2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1434,7 +1434,7 @@ extern struct ip_vs_dest * ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol, __u32 fwmark, __u32 flags); -extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); +extern void ip_vs_try_bind_dest(struct ip_vs_conn *cp); static inline void ip_vs_dest_hold(struct ip_vs_dest *dest) { diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 1b29e4a2b26c..54de34077b63 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -611,10 +611,11 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) * Check if there is a destination for the connection, if so * bind the connection to the destination. */ -struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) +void ip_vs_try_bind_dest(struct ip_vs_conn *cp) { struct ip_vs_dest *dest; + rcu_read_lock(); dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark, cp->flags); @@ -624,7 +625,8 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) spin_lock(&cp->lock); if (cp->dest) { spin_unlock(&cp->lock); - return dest; + rcu_read_unlock(); + return; } /* Applications work depending on the forwarding method @@ -648,7 +650,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) if (pd && atomic_read(&pd->appcnt)) ip_vs_bind_app(cp, pd->pp); } - return dest; + rcu_read_unlock(); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2bfd807bc93f..0763cc6e092b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -565,8 +565,8 @@ bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, return false; } -/* - * Lookup destination by {addr,port} in the given service +/* Lookup destination by {addr,port} in the given service + * Called under RCU lock. */ static struct ip_vs_dest * ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, @@ -577,7 +577,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, /* * Find the destination for the given service */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if ((dest->af == svc->af) && ip_vs_addr_equal(svc->af, &dest->addr, daddr) && (dest->port == dport)) { @@ -591,10 +591,11 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, /* * Find destination by {daddr,dport,vaddr,protocol} - * Cretaed to be used in ip_vs_process_message() in + * Created to be used in ip_vs_process_message() in * the backup synchronization daemon. It finds the * destination to be bound to the received connection * on the backup. + * Called under RCU lock, no refcnt is returned. */ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, @@ -615,8 +616,6 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, dest = ip_vs_lookup_dest(svc, daddr, port); if (!dest) dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); - if (dest) - ip_vs_dest_hold(dest); ip_vs_service_put(svc); return dest; } @@ -826,7 +825,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); if (add) { - list_add(&dest->n_list, &svc->destinations); + list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; if (svc->scheduler->add_dest) svc->scheduler->add_dest(svc, dest); @@ -933,10 +932,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ip_vs_addr_copy(svc->af, &daddr, &udest->addr); - /* - * Check if the dest already exists in the list - */ + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &daddr, dport); + rcu_read_unlock(); if (dest != NULL) { IP_VS_DBG(1, "%s(): dest already exists\n", __func__); @@ -997,10 +996,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ip_vs_addr_copy(svc->af, &daddr, &udest->addr); - /* - * Lookup the destination list - */ + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &daddr, dport); + rcu_read_unlock(); if (dest == NULL) { IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); @@ -1069,7 +1068,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, /* * Remove it from the d-linked destination list. */ - list_del(&dest->n_list); + list_del_rcu(&dest->n_list); svc->num_dests--; if (svcupd && svc->scheduler->del_dest) @@ -1094,7 +1093,10 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) EnterFunction(2); + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &udest->addr, dport); + rcu_read_unlock(); if (dest == NULL) { IP_VS_DBG(1, "%s(): destination not found!\n", __func__); @@ -2104,7 +2106,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) else seq_putc(seq, '\n'); - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { #ifdef CONFIG_IP_VS_IPV6 if (dest->af == AF_INET6) seq_printf(seq, diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 6cc3e52f1f35..97241749216d 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -858,23 +858,20 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; cp->flags = flags; spin_unlock(&cp->lock); - if (!dest) { - dest = ip_vs_try_bind_dest(cp); - if (dest) - ip_vs_dest_put(dest); - } + if (!dest) + ip_vs_try_bind_dest(cp); } else { /* * Find the appropriate destination for the connection. * If it is not found the connection will remain unbound * but still handled. */ + rcu_read_lock(); dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark, flags); cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); - if (dest) - ip_vs_dest_put(dest); + rcu_read_unlock(); if (!cp) { if (param->pe_data) kfree(param->pe_data); -- cgit v1.2.3 From ceec4c3816818459d90c92152e61371ff5b1d5a1 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:53 +0200 Subject: ipvs: convert services to rcu This is the final step in RCU conversion. Things that are removed: - svc->usecnt: now svc is accessed under RCU read lock - svc->inc: and some unused code - ip_vs_bind_pe and ip_vs_unbind_pe: no ability to replace PE - __ip_vs_svc_lock: replaced with RCU - IP_VS_WAIT_WHILE: now readers lookup svcs and dests under RCU and work in parallel with configuration Other changes: - before now, a RCU read-side critical section included the calling of the schedule method, now it is extended to include service lookup - ip_vs_svc_table and ip_vs_svc_fwm_table are now using hlist - svc->pe and svc->scheduler remain to the end (of grace period), the schedulers are prepared for such RCU readers even after done_service is called but they need to use synchronize_rcu because last ip_vs_scheduler_put can happen while RCU read-side critical sections use an outdated svc->scheduler pointer - as planned, update_service is removed - empty services can be freed immediately after grace period. If dests were present, the services are freed from the dest trash code Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 28 +--- net/netfilter/ipvs/ip_vs_core.c | 26 +-- net/netfilter/ipvs/ip_vs_ctl.c | 299 ++++++++++++++-------------------- net/netfilter/ipvs/ip_vs_dh.c | 1 + net/netfilter/ipvs/ip_vs_lblc.c | 1 + net/netfilter/ipvs/ip_vs_lblcr.c | 1 + net/netfilter/ipvs/ip_vs_lc.c | 1 + net/netfilter/ipvs/ip_vs_nq.c | 1 + net/netfilter/ipvs/ip_vs_pe.c | 12 -- net/netfilter/ipvs/ip_vs_proto_sctp.c | 14 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 14 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 14 +- net/netfilter/ipvs/ip_vs_rr.c | 1 + net/netfilter/ipvs/ip_vs_sched.c | 28 ++-- net/netfilter/ipvs/ip_vs_sed.c | 1 + net/netfilter/ipvs/ip_vs_sh.c | 1 + net/netfilter/ipvs/ip_vs_wlc.c | 1 + net/netfilter/ipvs/ip_vs_wrr.c | 1 + 18 files changed, 186 insertions(+), 259 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 78a6634653a2..f9f5b057b480 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -359,8 +359,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, #define LeaveFunction(level) do {} while (0) #endif -#define IP_VS_WAIT_WHILE(expr) while (expr) { cpu_relax(); } - /* * The port number of FTP service (in network order). @@ -712,10 +710,9 @@ struct ip_vs_dest_user_kern { * and the forwarding entries */ struct ip_vs_service { - struct list_head s_list; /* for normal service table */ - struct list_head f_list; /* for fwmark-based service table */ + struct hlist_node s_list; /* for normal service table */ + struct hlist_node f_list; /* for fwmark-based service table */ atomic_t refcnt; /* reference counter */ - atomic_t usecnt; /* use counter */ u16 af; /* address family */ __u16 protocol; /* which protocol (TCP/UDP) */ @@ -730,15 +727,16 @@ struct ip_vs_service { struct list_head destinations; /* real server d-linked list */ __u32 num_dests; /* number of servers */ struct ip_vs_stats stats; /* statistics for the service */ - struct ip_vs_app *inc; /* bind conns to this app inc */ /* for scheduling */ - struct ip_vs_scheduler *scheduler; /* bound scheduler object */ + struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */ spinlock_t sched_lock; /* lock sched_data */ void *sched_data; /* scheduler application data */ /* alternate persistence engine */ - struct ip_vs_pe *pe; + struct ip_vs_pe __rcu *pe; + + struct rcu_head rcu_head; }; /* Information for cached dst */ @@ -807,8 +805,6 @@ struct ip_vs_scheduler { int (*init_service)(struct ip_vs_service *svc); /* scheduling service finish */ void (*done_service)(struct ip_vs_service *svc); - /* scheduler updating service */ - int (*update_service)(struct ip_vs_service *svc); /* dest is linked */ int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); /* dest is unlinked */ @@ -1344,8 +1340,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc); extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); -void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe); -void ip_vs_unbind_pe(struct ip_vs_service *svc); int register_ip_vs_pe(struct ip_vs_pe *pe); int unregister_ip_vs_pe(struct ip_vs_pe *pe); struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); @@ -1392,7 +1386,8 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); extern int ip_vs_bind_scheduler(struct ip_vs_service *svc, struct ip_vs_scheduler *scheduler); -extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc); +extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc, + struct ip_vs_scheduler *sched); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * @@ -1412,14 +1407,9 @@ extern struct ip_vs_stats ip_vs_stats; extern int sysctl_ip_vs_sync_ver; extern struct ip_vs_service * -ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, +ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport); -static inline void ip_vs_service_put(struct ip_vs_service *svc) -{ - atomic_dec(&svc->usecnt); -} - extern bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 939ad11ed534..79df3c61d4d8 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -203,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, { ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, vport, p); - p->pe = svc->pe; + p->pe = rcu_dereference(svc->pe); if (p->pe && p->pe->fill_param) return p->pe->fill_param(p, skb); @@ -296,15 +296,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* Check if a template already exists */ ct = ip_vs_ct_in_get(¶m); if (!ct || !ip_vs_check_template(ct)) { + struct ip_vs_scheduler *sched; + /* * No template found or the dest of the connection * template is not available. * return *ignored=0 i.e. ICMP and NF_DROP */ - rcu_read_lock(); - dest = svc->scheduler->schedule(svc, skb); + sched = rcu_dereference(svc->scheduler); + dest = sched->schedule(svc, skb); if (!dest) { - rcu_read_unlock(); IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); *ignored = 0; @@ -320,7 +321,6 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * when the template expires */ ct = ip_vs_conn_new(¶m, &dest->addr, dport, IP_VS_CONN_F_TEMPLATE, dest, skb->mark); - rcu_read_unlock(); if (ct == NULL) { kfree(param.pe_data); *ignored = -1; @@ -394,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, { struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; + struct ip_vs_scheduler *sched; struct ip_vs_dest *dest; __be16 _ports[2], *pptr; unsigned int flags; @@ -449,10 +450,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, return NULL; } - rcu_read_lock(); - dest = svc->scheduler->schedule(svc, skb); + sched = rcu_dereference(svc->scheduler); + dest = sched->schedule(svc, skb); if (dest == NULL) { - rcu_read_unlock(); IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; } @@ -473,7 +473,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); - rcu_read_unlock(); if (!cp) { *ignored = -1; return NULL; @@ -510,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) { - ip_vs_service_put(svc); return NF_DROP; } @@ -536,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; - ip_vs_service_put(svc); - /* create a new connection entry */ IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { @@ -574,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ - if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { - ip_vs_service_put(svc); + if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) return NF_ACCEPT; - } - - ip_vs_service_put(svc); /* * Notify the client that the destination is unreachable, and diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 0763cc6e092b..9e4074c26dc2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -55,9 +55,6 @@ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ static DEFINE_MUTEX(__ip_vs_mutex); -/* lock for service table */ -static DEFINE_RWLOCK(__ip_vs_svc_lock); - /* sysctl variables */ #ifdef CONFIG_IP_VS_DEBUG @@ -257,9 +254,9 @@ ip_vs_use_count_dec(void) #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) /* the service table hashed by */ -static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; +static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; /* the service table hashed by fwmark */ -static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; +static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; /* @@ -314,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) */ hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, &svc->addr, svc->port); - list_add(&svc->s_list, &ip_vs_svc_table[hash]); + hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* * Hash it by fwmark in svc_fwm_table */ hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); - list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); + hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); } svc->flags |= IP_VS_SVC_F_HASHED; @@ -344,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) if (svc->fwmark == 0) { /* Remove it from the svc_table table */ - list_del(&svc->s_list); + hlist_del_rcu(&svc->s_list); } else { /* Remove it from the svc_fwm_table table */ - list_del(&svc->f_list); + hlist_del_rcu(&svc->f_list); } svc->flags &= ~IP_VS_SVC_F_HASHED; @@ -369,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol, /* Check for "full" addressed entries */ hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); - list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ + hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { if ((svc->af == af) && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) @@ -396,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) /* Check for fwmark addressed entries */ hash = ip_vs_svc_fwm_hashkey(net, fwmark); - list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { if (svc->fwmark == fwmark && svc->af == af && net_eq(svc->net, net)) { /* HIT */ @@ -407,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) return NULL; } +/* Find service, called under RCU lock */ struct ip_vs_service * -ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, - const union nf_inet_addr *vaddr, __be16 vport) +ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; struct netns_ipvs *ipvs = net_ipvs(net); - read_lock(&__ip_vs_svc_lock); - /* * Check the table hashed by fwmark first */ @@ -451,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, } out: - if (svc) - atomic_inc(&svc->usecnt); - read_unlock(&__ip_vs_svc_lock); - IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", fwmark, ip_vs_proto_name(protocol), IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), @@ -471,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) dest->svc = svc; } +static void ip_vs_service_free(struct ip_vs_service *svc) +{ + if (svc->stats.cpustats) + free_percpu(svc->stats.cpustats); + kfree(svc); +} + static void __ip_vs_unbind_svc(struct ip_vs_dest *dest) { @@ -478,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) dest->svc = NULL; if (atomic_dec_and_test(&svc->refcnt)) { - IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", + IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), - ntohs(svc->port), atomic_read(&svc->usecnt)); - free_percpu(svc->stats.cpustats); - kfree(svc); + ntohs(svc->port)); + ip_vs_service_free(svc); } } @@ -608,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, struct ip_vs_service *svc; __be16 port = dport; - svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); + svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) @@ -616,7 +614,6 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, dest = ip_vs_lookup_dest(svc, daddr, port); if (!dest) dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); - ip_vs_service_put(svc); return dest; } @@ -774,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct ip_vs_scheduler *sched; int conn_flags; /* set the weight and the flags */ @@ -816,29 +814,17 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); - if (add) - ip_vs_start_estimator(svc->net, &dest->stats); - - write_lock_bh(&__ip_vs_svc_lock); - - /* Wait until all other svc users go away */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - + sched = rcu_dereference_protected(svc->scheduler, 1); if (add) { + ip_vs_start_estimator(svc->net, &dest->stats); list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; - if (svc->scheduler->add_dest) - svc->scheduler->add_dest(svc, dest); + if (sched->add_dest) + sched->add_dest(svc, dest); } else { - if (svc->scheduler->upd_dest) - svc->scheduler->upd_dest(svc, dest); + if (sched->upd_dest) + sched->upd_dest(svc, dest); } - - /* call the update_service, because server weight may be changed */ - if (svc->scheduler->update_service) - svc->scheduler->update_service(svc); - - write_unlock_bh(&__ip_vs_svc_lock); } @@ -1071,14 +1057,13 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, list_del_rcu(&dest->n_list); svc->num_dests--; - if (svcupd && svc->scheduler->del_dest) - svc->scheduler->del_dest(svc, dest); + if (svcupd) { + struct ip_vs_scheduler *sched; - /* - * Call the update_service function of its scheduler - */ - if (svcupd && svc->scheduler->update_service) - svc->scheduler->update_service(svc); + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched->del_dest) + sched->del_dest(svc, dest); + } } @@ -1103,20 +1088,11 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ENOENT; } - write_lock_bh(&__ip_vs_svc_lock); - - /* - * Wait until all other svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - /* * Unlink dest from the service */ __ip_vs_unlink_dest(svc, dest, 1); - write_unlock_bh(&__ip_vs_svc_lock); - /* * Delete the destination */ @@ -1207,7 +1183,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, } /* I'm the first user of the service */ - atomic_set(&svc->usecnt, 0); atomic_set(&svc->refcnt, 0); svc->af = u->af; @@ -1231,7 +1206,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, sched = NULL; /* Bind the ct retriever */ - ip_vs_bind_pe(svc, pe); + RCU_INIT_POINTER(svc->pe, pe); pe = NULL; /* Update the virtual service counters */ @@ -1247,9 +1222,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ipvs->num_services++; /* Hash the service into the service table */ - write_lock_bh(&__ip_vs_svc_lock); ip_vs_svc_hash(svc); - write_unlock_bh(&__ip_vs_svc_lock); *svc_p = svc; /* Now there is a service - full throttle */ @@ -1259,15 +1232,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, out_err: if (svc != NULL) { - ip_vs_unbind_scheduler(svc); - if (svc->inc) { - local_bh_disable(); - ip_vs_app_inc_put(svc->inc); - local_bh_enable(); - } - if (svc->stats.cpustats) - free_percpu(svc->stats.cpustats); - kfree(svc); + ip_vs_unbind_scheduler(svc, sched); + ip_vs_service_free(svc); } ip_vs_scheduler_put(sched); ip_vs_pe_put(pe); @@ -1317,12 +1283,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) } #endif - write_lock_bh(&__ip_vs_svc_lock); - - /* - * Wait until all other svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); + old_sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched != old_sched) { + /* Bind the new scheduler */ + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) { + old_sched = sched; + goto out; + } + /* Unbind the old scheduler on success */ + ip_vs_unbind_scheduler(svc, old_sched); + } /* * Set the flags and timeout value @@ -1331,47 +1302,23 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; - old_sched = svc->scheduler; - if (sched != old_sched) { - /* - * Unbind the old scheduler - */ - ip_vs_unbind_scheduler(svc); + old_pe = rcu_dereference_protected(svc->pe, 1); + if (pe != old_pe) + rcu_assign_pointer(svc->pe, pe); - /* - * Bind the new scheduler - */ - if ((ret = ip_vs_bind_scheduler(svc, sched))) { - /* - * If ip_vs_bind_scheduler fails, restore the old - * scheduler. - * The main reason of failure is out of memory. - * - * The question is if the old scheduler can be - * restored all the time. TODO: if it cannot be - * restored some time, we must delete the service, - * otherwise the system may crash. - */ - ip_vs_bind_scheduler(svc, old_sched); - old_sched = sched; - goto out_unlock; - } - } - - old_pe = svc->pe; - if (pe != old_pe) { - ip_vs_unbind_pe(svc); - ip_vs_bind_pe(svc, pe); - } - -out_unlock: - write_unlock_bh(&__ip_vs_svc_lock); out: ip_vs_scheduler_put(old_sched); ip_vs_pe_put(old_pe); return ret; } +static void ip_vs_service_rcu_free(struct rcu_head *head) +{ + struct ip_vs_service *svc; + + svc = container_of(head, struct ip_vs_service, rcu_head); + ip_vs_service_free(svc); +} /* * Delete a service from the service list @@ -1394,21 +1341,14 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) ip_vs_stop_estimator(svc->net, &svc->stats); /* Unbind scheduler */ - old_sched = svc->scheduler; - ip_vs_unbind_scheduler(svc); + old_sched = rcu_dereference_protected(svc->scheduler, 1); + ip_vs_unbind_scheduler(svc, old_sched); ip_vs_scheduler_put(old_sched); - /* Unbind persistence engine */ - old_pe = svc->pe; - ip_vs_unbind_pe(svc); + /* Unbind persistence engine, keep svc->pe */ + old_pe = rcu_dereference_protected(svc->pe, 1); ip_vs_pe_put(old_pe); - /* Unbind app inc */ - if (svc->inc) { - ip_vs_app_inc_put(svc->inc); - svc->inc = NULL; - } - /* * Unlink the whole destination list */ @@ -1428,13 +1368,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* * Free the service if nobody refers to it */ - if (atomic_read(&svc->refcnt) == 0) { - IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", + if (atomic_dec_and_test(&svc->refcnt)) { + IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), - ntohs(svc->port), atomic_read(&svc->usecnt)); - free_percpu(svc->stats.cpustats); - kfree(svc); + ntohs(svc->port)); + call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); } /* decrease the module use count */ @@ -1446,21 +1385,14 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) */ static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) { + /* Hold svc to avoid double release from dest_trash */ + atomic_inc(&svc->refcnt); /* * Unhash it from the service table */ - write_lock_bh(&__ip_vs_svc_lock); - ip_vs_svc_unhash(svc); - /* - * Wait until all the svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - __ip_vs_del_service(svc, cleanup); - - write_unlock_bh(&__ip_vs_svc_lock); } /* @@ -1482,14 +1414,15 @@ static int ip_vs_del_service(struct ip_vs_service *svc) static int ip_vs_flush(struct net *net, bool cleanup) { int idx; - struct ip_vs_service *svc, *nxt; + struct ip_vs_service *svc; + struct hlist_node *n; /* * Flush the service table hashed by */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], - s_list) { + hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], + s_list) { if (net_eq(svc->net, net)) ip_vs_unlink_service(svc, cleanup); } @@ -1499,8 +1432,8 @@ static int ip_vs_flush(struct net *net, bool cleanup) * Flush the service table hashed by fwmark */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, - &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], + f_list) { if (net_eq(svc->net, net)) ip_vs_unlink_service(svc, cleanup); } @@ -1558,7 +1491,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, EnterFunction(2); mutex_lock(&__ip_vs_mutex); for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { @@ -1567,7 +1500,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } } - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { @@ -1595,12 +1528,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) { struct ip_vs_dest *dest; - write_lock_bh(&__ip_vs_svc_lock); list_for_each_entry(dest, &svc->destinations, n_list) { ip_vs_zero_stats(&dest->stats); } ip_vs_zero_stats(&svc->stats); - write_unlock_bh(&__ip_vs_svc_lock); return 0; } @@ -1610,14 +1541,14 @@ static int ip_vs_zero_all(struct net *net) struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net)) ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { if (net_eq(svc->net, net)) ip_vs_zero_service(svc); } @@ -1945,7 +1876,7 @@ static struct ctl_table vs_vars[] = { struct ip_vs_iter { struct seq_net_private p; /* Do not move this, netns depends upon it*/ - struct list_head *table; + struct hlist_head *table; int bucket; }; @@ -1978,7 +1909,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_table; iter->bucket = idx; @@ -1989,7 +1920,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* keep looking in fwmark */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], + f_list) { if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_fwm_table; iter->bucket = idx; @@ -2002,17 +1934,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) } static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) -__acquires(__ip_vs_svc_lock) { - read_lock_bh(&__ip_vs_svc_lock); + rcu_read_lock(); return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct list_head *e; + struct hlist_node *e; struct ip_vs_iter *iter; struct ip_vs_service *svc; @@ -2025,13 +1956,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (iter->table == ip_vs_svc_table) { /* next service in table hashed by protocol */ - if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket]) - return list_entry(e, struct ip_vs_service, s_list); - + e = rcu_dereference(hlist_next_rcu(&svc->s_list)); + if (e) + return hlist_entry(e, struct ip_vs_service, s_list); while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket], - s_list) { + hlist_for_each_entry_rcu(svc, + &ip_vs_svc_table[iter->bucket], + s_list) { return svc; } } @@ -2042,13 +1974,15 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } /* next service in hashed by fwmark */ - if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket]) - return list_entry(e, struct ip_vs_service, f_list); + e = rcu_dereference(hlist_next_rcu(&svc->f_list)); + if (e) + return hlist_entry(e, struct ip_vs_service, f_list); scan_fwmark: while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket], - f_list) + hlist_for_each_entry_rcu(svc, + &ip_vs_svc_fwm_table[iter->bucket], + f_list) return svc; } @@ -2056,9 +1990,8 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) -__releases(__ip_vs_svc_lock) { - read_unlock_bh(&__ip_vs_svc_lock); + rcu_read_unlock(); } @@ -2076,6 +2009,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_service *svc = v; const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; + struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 @@ -2084,18 +2018,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), - svc->scheduler->name); + sched->name); else #endif seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - svc->scheduler->name, + sched->name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { seq_printf(seq, "FWM %08X %s %s", - svc->fwmark, svc->scheduler->name, + svc->fwmark, sched->name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } @@ -2451,11 +2385,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) } /* Lookup the exact service by or fwmark */ + rcu_read_lock(); if (usvc.fwmark == 0) svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, &usvc.addr, usvc.port); else svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); + rcu_read_unlock(); if (cmd != IP_VS_SO_SET_ADD && (svc == NULL || svc->protocol != usvc.protocol)) { @@ -2507,11 +2443,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { + struct ip_vs_scheduler *sched; + + sched = rcu_dereference_protected(src->scheduler, 1); dst->protocol = src->protocol; dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; - strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); + strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); dst->flags = src->flags; dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; @@ -2530,7 +2469,7 @@ __ip_vs_get_service_entries(struct net *net, int ret = 0; for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; @@ -2549,7 +2488,7 @@ __ip_vs_get_service_entries(struct net *net, } for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; @@ -2578,11 +2517,13 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, union nf_inet_addr addr = { .ip = get->addr }; int ret = 0; + rcu_read_lock(); if (get->fwmark) svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); else svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, get->port); + rcu_read_unlock(); if (svc) { int count = 0; @@ -2765,12 +2706,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) entry = (struct ip_vs_service_entry *)arg; addr.ip = entry->addr; + rcu_read_lock(); if (entry->fwmark) svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); else svc = __ip_vs_service_find(net, AF_INET, entry->protocol, &addr, entry->port); + rcu_read_unlock(); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2927,6 +2870,7 @@ nla_put_failure: static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_service *svc) { + struct ip_vs_scheduler *sched; struct nlattr *nl_service; struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; @@ -2947,7 +2891,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, goto nla_put_failure; } - if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) || + sched = rcu_dereference_protected(svc->scheduler, 1); + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || (svc->pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || @@ -2998,7 +2943,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, mutex_lock(&__ip_vs_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -3009,7 +2954,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -3069,11 +3014,13 @@ static int ip_vs_genl_parse_service(struct net *net, usvc->fwmark = 0; } + rcu_read_lock(); if (usvc->fwmark) svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); else svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, &usvc->addr, usvc->port); + rcu_read_unlock(); *ret_svc = svc; /* If a full entry was requested, check for the additional fields */ @@ -3905,8 +3852,8 @@ int __init ip_vs_control_init(void) /* Initialize svc_table, ip_vs_svc_fwm_table */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_svc_table[idx]); - INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); + INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); + INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); } smp_wmb(); /* Do we really need it now ? */ diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 89c27230d93a..ccab120df45e 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -269,6 +269,7 @@ static int __init ip_vs_dh_init(void) static void __exit ip_vs_dh_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index ffef8a162148..d8e5238254db 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -633,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); unregister_pernet_subsys(&ip_vs_lblc_ops); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index cdfe6a95eddb..041b7cc356fb 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -821,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); unregister_pernet_subsys(&ip_vs_lblcr_ops); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 0cabf78fbc31..5128e338a749 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void) static void __exit ip_vs_lc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lc_scheduler); + synchronize_rcu(); } module_init(ip_vs_lc_init); diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 51dc0cf20d90..646cfd4baa73 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void) static void __exit ip_vs_nq_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_nq_scheduler); + synchronize_rcu(); } module_init(ip_vs_nq_init); diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 5d9774c4cc4c..1a82b29ce8ea 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -16,18 +16,6 @@ static LIST_HEAD(ip_vs_pe); /* semaphore for IPVS PEs. */ static DEFINE_MUTEX(ip_vs_pe_mutex); -/* Bind a service with a pe */ -void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe) -{ - svc->pe = pe; -} - -/* Unbind a service from its pe */ -void ip_vs_unbind_pe(struct ip_vs_service *svc) -{ - svc->pe = NULL; -} - /* Get pe in the pe list by name */ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) { diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index f7190cdf023e..4de5176a9981 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (sch == NULL) return 0; net = skb_net(skb); + rcu_read_lock(); if ((sch->type == SCTP_CID_INIT) && - (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, sh->dest))) { + (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, sh->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 0bbc3feae682..7de3342e9797 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, } net = skb_net(skb); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ + rcu_read_lock(); if (th->syn && - (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, th->dest))) { + (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, th->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 1a03e2d9c6ba..b62a3c0ff9bf 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, return 0; } net = skb_net(skb); - svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, uh->dest); + rcu_read_lock(); + svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, uh->dest); if (svc) { int ignored; @@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index aa4601ff1ccc..749c98a7dd2c 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -121,6 +121,7 @@ static int __init ip_vs_rr_init(void) static void __exit ip_vs_rr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); + synchronize_rcu(); } module_init(ip_vs_rr_init); diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 1b715d0caf43..4dbcda6258bc 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, { int ret; - svc->scheduler = scheduler; - if (scheduler->init_service) { ret = scheduler->init_service(svc); if (ret) { @@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, return ret; } } - + rcu_assign_pointer(svc->scheduler, scheduler); return 0; } @@ -64,17 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, /* * Unbind a service with its scheduler */ -void ip_vs_unbind_scheduler(struct ip_vs_service *svc) +void ip_vs_unbind_scheduler(struct ip_vs_service *svc, + struct ip_vs_scheduler *sched) { - struct ip_vs_scheduler *sched = svc->scheduler; + struct ip_vs_scheduler *cur_sched; - if (!sched) + cur_sched = rcu_dereference_protected(svc->scheduler, 1); + /* This check proves that old 'sched' was installed */ + if (!cur_sched) return; if (sched->done_service) sched->done_service(svc); - - svc->scheduler = NULL; + /* svc->scheduler can not be set to NULL */ } @@ -148,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) { + struct ip_vs_scheduler *sched; + + sched = rcu_dereference(svc->scheduler); if (svc->fwmark) { IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", - svc->scheduler->name, svc->fwmark, - svc->fwmark, msg); + sched->name, svc->fwmark, svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", - svc->scheduler->name, - ip_vs_proto_name(svc->protocol), + sched->name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); #endif } else { IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", - svc->scheduler->name, - ip_vs_proto_name(svc->protocol), + sched->name, ip_vs_proto_name(svc->protocol), &svc->addr.ip, ntohs(svc->port), msg); } } diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index d01187084b7f..f3205925359a 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void) static void __exit ip_vs_sed_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_sed_scheduler); + synchronize_rcu(); } module_init(ip_vs_sed_init); diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 81c1a10c7b49..0df269d7c99f 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -283,6 +283,7 @@ static int __init ip_vs_sh_init(void) static void __exit ip_vs_sh_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index dafae881c622..c60a81c4ce9a 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void) static void __exit ip_vs_wlc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler); + synchronize_rcu(); } module_init(ip_vs_wlc_init); diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index b173ef907a14..32c646eb8747 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -261,6 +261,7 @@ static int __init ip_vs_wrr_init(void) static void __exit ip_vs_wrr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler); + synchronize_rcu(); } module_init(ip_vs_wrr_init); -- cgit v1.2.3 From 8466563e16d5198b6efeb3b51791b95b6aaacb6b Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 2 Apr 2013 13:13:07 -0400 Subject: tcp: Remove dead sysctl_tcp_cookie_size declaration Remove a declaration left over from the TCPCT-ectomy. This sysctl is no longer referenced anywhere since 1a2c6181c4 ("tcp: Remove TCPCT"). Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index d1dcb596230e..4475aaf0af57 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -282,7 +282,6 @@ extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; -extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; -- cgit v1.2.3 From d4299ce6b33c0afd22cf6a170cfaf89c63d1114d Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Wed, 27 Mar 2013 20:04:57 -0300 Subject: Bluetooth: Remove unneeded hci_req_cmd_status function This patch removes the hci_req_cmd_status function since it is not used anymore. The HCI request framework now considers the HCI command has complete once the Command Status or Command Complete Event is received. Signed-off-by: Andre Guedes Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_core.c | 26 -------------------------- 2 files changed, 27 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 358a6983d3bb..0e7ee892d7ab 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1055,7 +1055,6 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); -void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status); int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 123992984a7c..a199d631e31c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3322,32 +3322,6 @@ call_complete: req_complete(hdev, status); } -void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status) -{ - hci_req_complete_t req_complete = NULL; - - BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); - - if (status) { - hci_req_cmd_complete(hdev, opcode, status); - return; - } - - /* No need to handle success status if there are more commands */ - if (!hci_req_is_complete(hdev)) - return; - - if (hdev->sent_cmd) - req_complete = bt_cb(hdev->sent_cmd)->req.complete; - - /* If the request doesn't have a complete callback or there - * are other commands/requests in the hdev queue we consider - * this request as completed. - */ - if (!req_complete || !skb_queue_empty(&hdev->cmd_q)) - hci_req_cmd_complete(hdev, opcode, status); -} - static void hci_rx_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work); -- cgit v1.2.3 From b6ddb638235d90ed67af9af40e63880fd66a1939 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 2 Apr 2013 13:34:31 +0300 Subject: Bluetooth: Track received events in hdev This patch adds tracking of received HCI events to the hci_dev struct. This is necessary so that a subsequent patch can implement a function for sending a single command synchronously and returning the resulting command complete parameters in the function return value. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 3 +++ net/bluetooth/hci_event.c | 12 ++++++++++++ 3 files changed, 16 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0e7ee892d7ab..89eda2ef2380 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -244,6 +244,7 @@ struct hci_dev { struct sk_buff_head raw_q; struct sk_buff_head cmd_q; + struct sk_buff *recv_evt; struct sk_buff *sent_cmd; struct sk_buff *reassembly[NUM_REASSEMBLY]; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a199d631e31c..7c323bd112ff 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1136,6 +1136,9 @@ static int hci_dev_do_close(struct hci_dev *hdev) hdev->sent_cmd = NULL; } + kfree_skb(hdev->recv_evt); + hdev->recv_evt = NULL; + /* After this point our queues are empty * and no tasks are scheduled. */ hdev->close(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7e7fbca59439..ed0efb7255b0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3699,6 +3699,18 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) struct hci_event_hdr *hdr = (void *) skb->data; __u8 event = hdr->evt; + hci_dev_lock(hdev); + + /* Received events are (currently) only needed when a request is + * ongoing so avoid unnecessary memory allocation. + */ + if (hdev->req_status == HCI_REQ_PEND) { + kfree_skb(hdev->recv_evt); + hdev->recv_evt = skb_clone(skb, GFP_KERNEL); + } + + hci_dev_unlock(hdev); + skb_pull(skb, HCI_EVENT_HDR_SIZE); switch (event) { -- cgit v1.2.3 From 75e84b7c522c6e07964cd1f5bf28535768a1e9fa Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 2 Apr 2013 13:35:04 +0300 Subject: Bluetooth: Add __hci_cmd_sync() helper function This patch adds a helper function for sending a single HCI command waiting for its completion and then returning back the parameters in the resulting command complete event (if there was one). The implementation is very similar to that of hci_req_sync() except that instead of invocing a callback for sending HCI commands the function constructs and sends one itself and after being woken up picks the last received event from hdev->recv_evt (if it matches the right criteria) and returns it. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 ++ net/bluetooth/hci_core.c | 102 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 89eda2ef2380..755743d508aa 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1057,6 +1057,9 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + void *param, u32 timeout); + int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 7c323bd112ff..8b2d543fb143 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -79,6 +79,108 @@ static void hci_req_cancel(struct hci_dev *hdev, int err) } } +struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode) +{ + struct hci_ev_cmd_complete *ev; + struct hci_event_hdr *hdr; + struct sk_buff *skb; + + hci_dev_lock(hdev); + + skb = hdev->recv_evt; + hdev->recv_evt = NULL; + + hci_dev_unlock(hdev); + + if (!skb) + return ERR_PTR(-ENODATA); + + if (skb->len < sizeof(*hdr)) { + BT_ERR("Too short HCI event"); + goto failed; + } + + hdr = (void *) skb->data; + skb_pull(skb, HCI_EVENT_HDR_SIZE); + + if (hdr->evt != HCI_EV_CMD_COMPLETE) { + BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); + goto failed; + } + + if (skb->len < sizeof(*ev)) { + BT_ERR("Too short cmd_complete event"); + goto failed; + } + + ev = (void *) skb->data; + skb_pull(skb, sizeof(*ev)); + + if (opcode == __le16_to_cpu(ev->opcode)) + return skb; + + BT_DBG("opcode doesn't match (0x%2.2x != 0x%2.2x)", opcode, + __le16_to_cpu(ev->opcode)); + +failed: + kfree_skb(skb); + return ERR_PTR(-ENODATA); +} + +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + void *param, u32 timeout) +{ + DECLARE_WAITQUEUE(wait, current); + struct hci_request req; + int err = 0; + + BT_DBG("%s", hdev->name); + + hci_req_init(&req, hdev); + + hci_req_add(&req, opcode, plen, param); + + hdev->req_status = HCI_REQ_PEND; + + err = hci_req_run(&req, hci_req_sync_complete); + if (err < 0) + return ERR_PTR(err); + + add_wait_queue(&hdev->req_wait_q, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + schedule_timeout(timeout); + + remove_wait_queue(&hdev->req_wait_q, &wait); + + if (signal_pending(current)) + return ERR_PTR(-EINTR); + + switch (hdev->req_status) { + case HCI_REQ_DONE: + err = -bt_to_errno(hdev->req_result); + break; + + case HCI_REQ_CANCELED: + err = -hdev->req_result; + break; + + default: + err = -ETIMEDOUT; + break; + } + + hdev->req_status = hdev->req_result = 0; + + BT_DBG("%s end: err %d", hdev->name, err); + + if (err < 0) + return ERR_PTR(err); + + return hci_get_cmd_complete(hdev, opcode); +} +EXPORT_SYMBOL(__hci_cmd_sync); + /* Execute request and wait for completion. */ static int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, -- cgit v1.2.3 From 02350a725f5bc44490c30a10e7e04a12a5ecd406 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 3 Apr 2013 21:50:29 +0300 Subject: Bluetooth: Add support for custom event terminated commands This patch adds support for having commands within HCI requests that do not result in a command complete but some other event. This is at least needed for some vendor specific commands to be issued in the hdev->setup() procecure, but might also be useful for other commands. The way that the support is implemented is by extending the skb control buffer to have a field to indicate that the command is expected to terminate with a special event. After sending the command each received event can then be compared against this field through hdev->sent_cmd. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 1 + include/net/bluetooth/hci_core.h | 2 ++ net/bluetooth/hci_core.c | 10 +++++++++- net/bluetooth/hci_event.c | 11 ++++++++++- 4 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index ed6e9552252e..591fee7d0060 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -266,6 +266,7 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status); struct hci_req_ctrl { bool start; + u8 event; hci_req_complete_t complete; }; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 755743d508aa..b85eefb230fd 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1055,6 +1055,8 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void *param, + u8 event); void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8b2d543fb143..7f1413cae2cb 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2645,7 +2645,8 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) } /* Queue a command to an asynchronous HCI request */ -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void *param, + u8 event) { struct hci_dev *hdev = req->hdev; struct sk_buff *skb; @@ -2669,9 +2670,16 @@ void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) if (skb_queue_empty(&req->cmd_q)) bt_cb(skb)->req.start = true; + bt_cb(skb)->req.event = event; + skb_queue_tail(&req->cmd_q, skb); } +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) +{ + hci_req_add_ev(req, opcode, plen, param, 0); +} + /* Get data from the previously sent command */ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ed0efb7255b0..0a2b128d2cc9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2463,7 +2463,9 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) if (opcode != HCI_OP_NOP) del_timer(&hdev->cmd_timer); - hci_req_cmd_complete(hdev, opcode, ev->status); + if (ev->status || + (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event)) + hci_req_cmd_complete(hdev, opcode, ev->status); if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); @@ -3713,6 +3715,13 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) skb_pull(skb, HCI_EVENT_HDR_SIZE); + if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) { + struct hci_command_hdr *hdr = (void *) hdev->sent_cmd->data; + u16 opcode = __le16_to_cpu(hdr->opcode); + + hci_req_cmd_complete(hdev, opcode, 0); + } + switch (event) { case HCI_EV_INQUIRY_COMPLETE: hci_inquiry_complete_evt(hdev, skb); -- cgit v1.2.3 From 7b1abbbed0f2a1bc19bb8c0d48a284466043092a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 3 Apr 2013 21:54:47 +0300 Subject: Bluetooth: Add __hci_cmd_sync_ev function This patch adds a __hci_cmd_sync_ev function, analogous to __hci_cmd_sync except that it also takes an event parameter to indicate that the command completes with a special event instead of command complete. Internally this new function takes advantage of the hci_req_add_ev function introduced in the previous patch. The primary expected user of this new function are the setup routines of HCI drivers which may want to send custom commands and return only when they have completed. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 ++ net/bluetooth/hci_core.c | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b85eefb230fd..47129b1ee20b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1061,6 +1061,8 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, void *param, u32 timeout); +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, + void *param, u8 event, u32 timeout); int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 7f1413cae2cb..9567e32a1f0c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -79,7 +79,7 @@ static void hci_req_cancel(struct hci_dev *hdev, int err) } } -struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode) +struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 event) { struct hci_ev_cmd_complete *ev; struct hci_event_hdr *hdr; @@ -103,6 +103,12 @@ struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode) hdr = (void *) skb->data; skb_pull(skb, HCI_EVENT_HDR_SIZE); + if (event) { + if (hdr->evt != event) + goto failed; + return skb; + } + if (hdr->evt != HCI_EV_CMD_COMPLETE) { BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); goto failed; @@ -127,8 +133,8 @@ failed: return ERR_PTR(-ENODATA); } -struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - void *param, u32 timeout) +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, + void *param, u8 event, u32 timeout) { DECLARE_WAITQUEUE(wait, current); struct hci_request req; @@ -138,7 +144,7 @@ struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, hci_req_init(&req, hdev); - hci_req_add(&req, opcode, plen, param); + hci_req_add_ev(&req, opcode, plen, param, event); hdev->req_status = HCI_REQ_PEND; @@ -177,7 +183,14 @@ struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, if (err < 0) return ERR_PTR(err); - return hci_get_cmd_complete(hdev, opcode); + return hci_get_cmd_complete(hdev, opcode, event); +} +EXPORT_SYMBOL(__hci_cmd_sync_ev); + +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + void *param, u32 timeout) +{ + return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout); } EXPORT_SYMBOL(__hci_cmd_sync); -- cgit v1.2.3 From f41c70c4d5e3f6c2a7f9e5dfc10af452591a2484 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 12 Nov 2012 14:02:14 +0900 Subject: Bluetooth: Add driver setup stage for early init Some drivers require a special stage for their early init. This is always specific to the driver or transport. So call back into driver to allow bringing up the device. The advantage with this stage is that the Bluetooth core is actually handling the HCI layer now. This means that command and event processing is available. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 33 ++++++++++++++++++++------------- 2 files changed, 21 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 47129b1ee20b..395e8f6982f9 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -293,6 +293,7 @@ struct hci_dev { int (*open)(struct hci_dev *hdev); int (*close)(struct hci_dev *hdev); int (*flush)(struct hci_dev *hdev); + int (*setup)(struct hci_dev *hdev); int (*send)(struct sk_buff *skb); void (*notify)(struct hci_dev *hdev, unsigned int evt); int (*ioctl)(struct hci_dev *hdev, unsigned int cmd, unsigned long arg); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9567e32a1f0c..0f00b8bc279f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1127,26 +1127,33 @@ int hci_dev_open(__u16 dev) goto done; } - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) - set_bit(HCI_RAW, &hdev->flags); - - /* Treat all non BR/EDR controllers as raw devices if - enable_hs is not set */ - if (hdev->dev_type != HCI_BREDR && !enable_hs) - set_bit(HCI_RAW, &hdev->flags); - if (hdev->open(hdev)) { ret = -EIO; goto done; } - if (!test_bit(HCI_RAW, &hdev->flags)) { - atomic_set(&hdev->cmd_cnt, 1); - set_bit(HCI_INIT, &hdev->flags); - ret = __hci_init(hdev); - clear_bit(HCI_INIT, &hdev->flags); + atomic_set(&hdev->cmd_cnt, 1); + set_bit(HCI_INIT, &hdev->flags); + + if (hdev->setup && test_bit(HCI_SETUP, &hdev->dev_flags)) + ret = hdev->setup(hdev); + + if (!ret) { + /* Treat all non BR/EDR controllers as raw devices if + * enable_hs is not set. + */ + if (hdev->dev_type != HCI_BREDR && !enable_hs) + set_bit(HCI_RAW, &hdev->flags); + + if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + set_bit(HCI_RAW, &hdev->flags); + + if (!test_bit(HCI_RAW, &hdev->flags)) + ret = __hci_init(hdev); } + clear_bit(HCI_INIT, &hdev->flags); + if (!ret) { hci_dev_hold(hdev); set_bit(HCI_UP, &hdev->flags); -- cgit v1.2.3 From 5afff03815e26abf34702ec10422535224cdfe38 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 12 Nov 2012 14:02:16 +0900 Subject: Bluetooth: Remove driver init queue from core The driver init queue is no longer needed. This can be all handled inside the drivers now. So remove it. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 2 -- net/bluetooth/hci_core.c | 23 ----------------------- 2 files changed, 25 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 395e8f6982f9..d4e13bf5ae59 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -269,8 +269,6 @@ struct hci_dev { struct hci_dev_stats stat; - struct sk_buff_head driver_init; - atomic_t promisc; struct dentry *debugfs; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0f00b8bc279f..9570358adb77 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -316,29 +316,9 @@ static void amp_init(struct hci_request *req) static void hci_init1_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; - struct hci_request init_req; - struct sk_buff *skb; BT_DBG("%s %ld", hdev->name, opt); - /* Driver initialization */ - - hci_req_init(&init_req, hdev); - - /* Special commands */ - while ((skb = skb_dequeue(&hdev->driver_init))) { - bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; - skb->dev = (void *) hdev; - - if (skb_queue_empty(&init_req.cmd_q)) - bt_cb(skb)->req.start = true; - - skb_queue_tail(&init_req.cmd_q, skb); - } - skb_queue_purge(&hdev->driver_init); - - hci_req_run(&init_req, NULL); - /* Reset */ if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) hci_reset_req(req, 0); @@ -2144,7 +2124,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); - skb_queue_head_init(&hdev->driver_init); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); skb_queue_head_init(&hdev->raw_q); @@ -2163,8 +2142,6 @@ EXPORT_SYMBOL(hci_alloc_dev); /* Free HCI device */ void hci_free_dev(struct hci_dev *hdev) { - skb_queue_purge(&hdev->driver_init); - /* will free via device release */ put_device(&hdev->dev); } -- cgit v1.2.3 From 19952cc4f8f572493293a8caed27c4be89c5fc9d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 3 Apr 2013 23:38:16 +0000 Subject: net: frag queue per hash bucket locking This patch implements per hash bucket locking for the frag queue hash. This removes two write locks, and the only remaining write lock is for protecting hash rebuild. This essentially reduce the readers-writer lock to a rebuild lock. This patch is part of "net: frag performance followup" http://thread.gmane.org/gmane.linux.network/263644 of which two patches have already been accepted: Same test setup as previous: (http://thread.gmane.org/gmane.linux.network/257155) Two 10G interfaces, on seperate NUMA nodes, are under-test, and uses Ethernet flow-control. A third interface is used for generating the DoS attack (with trafgen). Notice, I have changed the frag DoS generator script to be more efficient/deadly. Before it would only hit one RX queue, now its sending packets causing multi-queue RX, due to "better" RX hashing. Test types summary (netperf UDP_STREAM): Test-20G64K == 2x10G with 65K fragments Test-20G3F == 2x10G with 3x fragments (3*1472 bytes) Test-20G64K+DoS == Same as 20G64K with frag DoS Test-20G3F+DoS == Same as 20G3F with frag DoS Test-20G64K+MQ == Same as 20G64K with Multi-Queue frag DoS Test-20G3F+MQ == Same as 20G3F with Multi-Queue frag DoS When I rebased this-patch(03) (on top of net-next commit a210576c) and removed the _bh spinlock, I saw a performance regression. BUT this was caused by some unrelated change in-between. See tests below. Test (A) is what I reported before for patch-02, accepted in commit 1b5ab0de. Test (B) verifying-retest of commit 1b5ab0de corrospond to patch-02. Test (C) is what I reported before for this-patch Test (D) is net-next master HEAD (commit a210576c), which reveals some (unknown) performance regression (compared against test (B)). Test (D) function as a new base-test. Performance table summary (in Mbit/s): (#) Test-type: 20G64K 20G3F 20G64K+DoS 20G3F+DoS 20G64K+MQ 20G3F+MQ ---------- ------- ------- ---------- --------- -------- ------- (A) Patch-02 : 18848.7 13230.1 4103.04 5310.36 130.0 440.2 (B) 1b5ab0de : 18841.5 13156.8 4101.08 5314.57 129.0 424.2 (C) Patch-03v1: 18838.0 13490.5 4405.11 6814.72 196.6 461.6 (D) a210576c : 18321.5 11250.4 3635.34 5160.13 119.1 405.2 (E) with _bh : 17247.3 11492.6 3994.74 6405.29 166.7 413.6 (F) without bh: 17471.3 11298.7 3818.05 6102.11 165.7 406.3 Test (E) and (F) is this-patch(03), with(V1) and without(V2) the _bh spinlocks. I cannot explain the slow down for 20G64K (but its an artificial "lab-test" so I'm not worried). But the other results does show improvements. And test (E) "with _bh" version is slightly better. Signed-off-by: Jesper Dangaard Brouer Acked-by: Hannes Frederic Sowa Acked-by: Eric Dumazet ---- V2: - By analysis from Hannes Frederic Sowa and Eric Dumazet, we don't need the spinlock _bh versions, as Netfilter currently does a local_bh_disable() before entering inet_fragment. - Fold-in desc from cover-mail V3: - Drop the chain_len counter per hash bucket. Signed-off-by: David S. Miller --- include/net/inet_frag.h | 8 ++++++- net/ipv4/inet_fragment.c | 57 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 51 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 7cac9c5789b5..6f41b45e819e 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -50,10 +50,16 @@ struct inet_frag_queue { */ #define INETFRAGS_MAXDEPTH 128 +struct inet_frag_bucket { + struct hlist_head chain; + spinlock_t chain_lock; +}; + struct inet_frags { - struct hlist_head hash[INETFRAGS_HASHSZ]; + struct inet_frag_bucket hash[INETFRAGS_HASHSZ]; /* This rwlock is a global lock (seperate per IPv4, IPv6 and * netfilter). Important to keep this on a seperate cacheline. + * Its primarily a rebuild protection rwlock. */ rwlock_t lock ____cacheline_aligned_in_smp; int secret_interval; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 1206ca64b0ea..e97d66a1fdde 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -52,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy) unsigned long now = jiffies; int i; + /* Per bucket lock NOT needed here, due to write lock protection */ write_lock(&f->lock); + get_random_bytes(&f->rnd, sizeof(u32)); for (i = 0; i < INETFRAGS_HASHSZ; i++) { + struct inet_frag_bucket *hb; struct inet_frag_queue *q; struct hlist_node *n; - hlist_for_each_entry_safe(q, n, &f->hash[i], list) { + hb = &f->hash[i]; + hlist_for_each_entry_safe(q, n, &hb->chain, list) { unsigned int hval = f->hashfn(q); if (hval != i) { + struct inet_frag_bucket *hb_dest; + hlist_del(&q->list); /* Relink to new hash chain. */ - hlist_add_head(&q->list, &f->hash[hval]); + hb_dest = &f->hash[hval]; + hlist_add_head(&q->list, &hb_dest->chain); } } } @@ -78,9 +85,12 @@ void inet_frags_init(struct inet_frags *f) { int i; - for (i = 0; i < INETFRAGS_HASHSZ; i++) - INIT_HLIST_HEAD(&f->hash[i]); + for (i = 0; i < INETFRAGS_HASHSZ; i++) { + struct inet_frag_bucket *hb = &f->hash[i]; + spin_lock_init(&hb->chain_lock); + INIT_HLIST_HEAD(&hb->chain); + } rwlock_init(&f->lock); f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ @@ -122,9 +132,18 @@ EXPORT_SYMBOL(inet_frags_exit_net); static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) { - write_lock(&f->lock); + struct inet_frag_bucket *hb; + unsigned int hash; + + read_lock(&f->lock); + hash = f->hashfn(fq); + hb = &f->hash[hash]; + + spin_lock(&hb->chain_lock); hlist_del(&fq->list); - write_unlock(&f->lock); + spin_unlock(&hb->chain_lock); + + read_unlock(&f->lock); inet_frag_lru_del(fq); } @@ -226,27 +245,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, void *arg) { + struct inet_frag_bucket *hb; struct inet_frag_queue *qp; #ifdef CONFIG_SMP #endif unsigned int hash; - write_lock(&f->lock); + read_lock(&f->lock); /* Protects against hash rebuild */ /* * While we stayed w/o the lock other CPU could update * the rnd seed, so we need to re-calculate the hash * chain. Fortunatelly the qp_in can be used to get one. */ hash = f->hashfn(qp_in); + hb = &f->hash[hash]; + spin_lock(&hb->chain_lock); + #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we - * promoted read lock to write lock. + * released the hash bucket lock. */ - hlist_for_each_entry(qp, &f->hash[hash], list) { + hlist_for_each_entry(qp, &hb->chain, list) { if (qp->net == nf && f->match(qp, arg)) { atomic_inc(&qp->refcnt); - write_unlock(&f->lock); + spin_unlock(&hb->chain_lock); + read_unlock(&f->lock); qp_in->last_in |= INET_FRAG_COMPLETE; inet_frag_put(qp_in, f); return qp; @@ -258,8 +282,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt); - hlist_add_head(&qp->list, &f->hash[hash]); - write_unlock(&f->lock); + hlist_add_head(&qp->list, &hb->chain); + spin_unlock(&hb->chain_lock); + read_unlock(&f->lock); inet_frag_lru_add(nf, qp); return qp; } @@ -300,17 +325,23 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) __releases(&f->lock) { + struct inet_frag_bucket *hb; struct inet_frag_queue *q; int depth = 0; - hlist_for_each_entry(q, &f->hash[hash], list) { + hb = &f->hash[hash]; + + spin_lock(&hb->chain_lock); + hlist_for_each_entry(q, &hb->chain, list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); + spin_unlock(&hb->chain_lock); read_unlock(&f->lock); return q; } depth++; } + spin_unlock(&hb->chain_lock); read_unlock(&f->lock); if (depth <= INETFRAGS_MAXDEPTH) -- cgit v1.2.3 From f3c1a44a2208d14b061ad665d9549c9b321f38e5 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Sun, 24 Mar 2013 23:50:39 +0000 Subject: netfilter: make /proc/net/netfilter pernet This patch makes this proc dentry pernet. So far only init_net had a /proc/net/netfilter directory. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 2 ++ include/net/netns/netfilter.h | 11 +++++++++++ net/netfilter/core.c | 33 +++++++++++++++++++++++++++++---- 3 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 include/net/netns/netfilter.h (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index de644bcd8613..b17697827482 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include @@ -94,6 +95,7 @@ struct net { struct netns_dccp dccp; #endif #ifdef CONFIG_NETFILTER + struct netns_nf nf; struct netns_xt xt; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h new file mode 100644 index 000000000000..248ca1c68dba --- /dev/null +++ b/include/net/netns/netfilter.h @@ -0,0 +1,11 @@ +#ifndef __NETNS_NETFILTER_H +#define __NETNS_NETFILTER_H + +#include + +struct netns_nf { +#if defined CONFIG_PROC_FS + struct proc_dir_entry *proc_netfilter; +#endif +}; +#endif diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a9c488b6c50d..b085184d9b45 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -281,6 +281,34 @@ struct proc_dir_entry *proc_net_netfilter; EXPORT_SYMBOL(proc_net_netfilter); #endif +static int __net_init netfilter_net_init(struct net *net) +{ +#ifdef CONFIG_PROC_FS + net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", + net->proc_net); + if (net_eq(net, &init_net)) { + if (!net->nf.proc_netfilter) + return -ENOMEM; + else + proc_net_netfilter = net->nf.proc_netfilter; + } else if (!net->nf.proc_netfilter) { + pr_err("cannot create netfilter proc entry"); + return -ENOMEM; + } +#endif + return 0; +} + +static void __net_exit netfilter_net_exit(struct net *net) +{ + remove_proc_entry("netfilter", net->proc_net); +} + +static struct pernet_operations netfilter_net_ops = { + .init = netfilter_net_init, + .exit = netfilter_net_exit, +}; + void __init netfilter_init(void) { int i, h; @@ -289,11 +317,8 @@ void __init netfilter_init(void) INIT_LIST_HEAD(&nf_hooks[i][h]); } -#ifdef CONFIG_PROC_FS - proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net); - if (!proc_net_netfilter) + if (register_pernet_subsys(&netfilter_net_ops) < 0) panic("cannot create netfilter proc entry"); -#endif if (netfilter_log_init() < 0) panic("cannot initialize nf_log"); -- cgit v1.2.3 From 30e0c6a6bee24db0166b7ca709277cd693e179f2 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Sun, 24 Mar 2013 23:50:40 +0000 Subject: netfilter: nf_log: prepare net namespace support for loggers This patch adds netns support to nf_log and it prepares netns support for existing loggers. It is composed of four major changes. 1) nf_log_register has been split to two functions: nf_log_register and nf_log_set. The new nf_log_register is used to globally register the nf_logger and nf_log_set is used for enabling pernet support from nf_loggers. Per netns is not yet complete after this patch, it comes in separate follow up patches. 2) Add net as a parameter of nf_log_bind_pf. Per netns is not yet complete after this patch, it only allows to bind the nf_logger to the protocol family from init_net and it skips other cases. 3) Adapt all nf_log_packet callers to pass netns as parameter. After this patch, this function only works for init_net. 4) Make the sysctl net/netfilter/nf_log pernet. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 14 +- include/net/netns/netfilter.h | 7 + net/bridge/netfilter/ebt_log.c | 7 +- net/bridge/netfilter/ebt_nflog.c | 5 +- net/ipv4/netfilter/ip_tables.c | 3 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 +- net/ipv6/netfilter/ip6_tables.c | 3 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 7 +- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_conntrack_proto_dccp.c | 9 +- net/netfilter/nf_conntrack_proto_tcp.c | 18 +- net/netfilter/nf_conntrack_proto_udp.c | 6 +- net/netfilter/nf_conntrack_proto_udplite.c | 8 +- net/netfilter/nf_log.c | 225 ++++++++++++++++++------- net/netfilter/nfnetlink_log.c | 5 +- net/netfilter/xt_osf.c | 6 +- 16 files changed, 233 insertions(+), 100 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index e991bd0a27af..31f1fb9eb784 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -49,12 +49,18 @@ struct nf_logger { int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); -int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger); -void nf_log_unbind_pf(u_int8_t pf); +void nf_log_set(struct net *net, u_int8_t pf, + const struct nf_logger *logger); +void nf_log_unset(struct net *net, const struct nf_logger *logger); + +int nf_log_bind_pf(struct net *net, u_int8_t pf, + const struct nf_logger *logger); +void nf_log_unbind_pf(struct net *net, u_int8_t pf); /* Calls the registered backend logging function */ -__printf(7, 8) -void nf_log_packet(u_int8_t pf, +__printf(8, 9) +void nf_log_packet(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 248ca1c68dba..88740024ccf3 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -2,10 +2,17 @@ #define __NETNS_NETFILTER_H #include +#include + +struct nf_logger; struct netns_nf { #if defined CONFIG_PROC_FS struct proc_dir_entry *proc_netfilter; +#endif + const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; +#ifdef CONFIG_SYSCTL + struct ctl_table_header *nf_log_dir_header; #endif }; #endif diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 92de5e5f9db2..08e5ea5ec4ed 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -176,17 +176,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); li.type = NF_LOG_TYPE_LOG; li.u.log.level = info->loglevel; li.u.log.logflags = info->bitmask; if (info->bitmask & EBT_LOG_NFLOG) - nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, "%s", info->prefix); + nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, + par->in, par->out, &li, "%s", info->prefix); else ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + par->out, &li, info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 5be68bbcc341..59ac7952010d 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out, - &li, "%s", info->prefix); + nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, "%s", info->prefix); return EBT_CONTINUE; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 1b433aac2663..e391db1f056d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -258,6 +258,7 @@ static void trace_packet(const struct sk_buff *skb, const char *hookname, *chainname, *comment; const struct ipt_entry *iter; unsigned int rulenum = 0; + struct net *net = dev_net(in ? in : out); table_base = private->entries[smp_processor_id()]; root = get_entry(table_base, private->hook_entry[hook]); @@ -270,7 +271,7 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, + nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 5241d997ab75..c2cd63d2d892 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -187,8 +187,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl, icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "nf_ct_icmp: short packet "); + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, + NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; } @@ -196,7 +196,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; } @@ -209,7 +209,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, */ if (icmph->type > NR_ICMP_TYPES) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 341b54ade72c..8861b1ef420e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -284,6 +284,7 @@ static void trace_packet(const struct sk_buff *skb, const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; + struct net *net = dev_net(in ? in : out); table_base = private->entries[smp_processor_id()]; root = get_entry(table_base, private->hook_entry[hook]); @@ -296,7 +297,7 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo, + nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 24df3dde0076..b3807c5cb888 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL, + NULL, NULL, "nf_ct_icmpv6: invalid new with type %d ", type + 128); return false; @@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: short packet "); return -NF_ACCEPT; } @@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed "); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 94b4b9853f60..a0b1c5c23d1c 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -353,7 +353,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); - nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, + nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); va_end(args); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index ba65b2041eb4..a99b6c3427b0 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -456,7 +456,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); + nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL, + NULL, msg); return false; } @@ -542,13 +543,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; } @@ -613,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 83876e9877f1..f021a2076c87 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -720,7 +720,7 @@ static bool tcp_in_window(const struct nf_conn *ct, tn->tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? @@ -772,7 +772,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: short packet "); return -NF_ACCEPT; } @@ -780,7 +780,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -793,7 +793,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; } @@ -802,7 +802,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } @@ -949,7 +949,7 @@ static int tcp_packet(struct nf_conn *ct, } spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored in " "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; @@ -959,7 +959,7 @@ static int tcp_packet(struct nf_conn *ct, dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: @@ -969,8 +969,8 @@ static int tcp_packet(struct nf_conn *ct, /* Invalid RST */ spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid RST "); + nf_log_packet(net, pf, 0, skb, NULL, NULL, + NULL, "nf_ct_tcp: invalid RST "); return -NF_ACCEPT; } if (index == TCP_RST_SET diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 59623cc56e8d..fee43228e115 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -119,7 +119,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; } @@ -127,7 +127,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -143,7 +143,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index ca969f6273f7..2750e6c69f82 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -131,7 +131,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: short packet "); return -NF_ACCEPT; } @@ -141,7 +141,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, cscov = udplen; else if (cscov < sizeof(*hdr) || cscov > udplen) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: invalid checksum coverage "); return -NF_ACCEPT; } @@ -149,7 +149,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, /* UDPLITE mandates checksums */ if (!hdr->check) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: checksum missing "); return -NF_ACCEPT; } @@ -159,7 +159,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: bad UDPLite checksum "); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 9e312695c818..8d331dc9b1ea 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,7 +16,6 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 -static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); @@ -32,13 +31,52 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) return NULL; } +void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) +{ + const struct nf_logger *log; + + if (!net_eq(net, &init_net)) + return; + + if (pf == NFPROTO_UNSPEC) + return; + + mutex_lock(&nf_log_mutex); + log = rcu_dereference_protected(net->nf.nf_loggers[pf], + lockdep_is_held(&nf_log_mutex)); + if (log == NULL) + rcu_assign_pointer(net->nf.nf_loggers[pf], logger); + + mutex_unlock(&nf_log_mutex); +} +EXPORT_SYMBOL(nf_log_set); + +void nf_log_unset(struct net *net, const struct nf_logger *logger) +{ + int i; + const struct nf_logger *log; + + if (!net_eq(net, &init_net)) + return; + + mutex_lock(&nf_log_mutex); + for (i = 0; i < NFPROTO_NUMPROTO; i++) { + log = rcu_dereference_protected(net->nf.nf_loggers[i], + lockdep_is_held(&nf_log_mutex)); + if (log == logger) + RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL); + } + mutex_unlock(&nf_log_mutex); + synchronize_rcu(); +} +EXPORT_SYMBOL(nf_log_unset); + /* return EEXIST if the same logger is registered, 0 on success. */ int nf_log_register(u_int8_t pf, struct nf_logger *logger) { - const struct nf_logger *llog; int i; - if (pf >= ARRAY_SIZE(nf_loggers)) + if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers)) return -EINVAL; for (i = 0; i < ARRAY_SIZE(logger->list); i++) @@ -52,63 +90,62 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) } else { /* register at end of list to honor first register win */ list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); - llog = rcu_dereference_protected(nf_loggers[pf], - lockdep_is_held(&nf_log_mutex)); - if (llog == NULL) - rcu_assign_pointer(nf_loggers[pf], logger); } mutex_unlock(&nf_log_mutex); + nf_log_set(&init_net, pf, logger); return 0; } EXPORT_SYMBOL(nf_log_register); void nf_log_unregister(struct nf_logger *logger) { - const struct nf_logger *c_logger; int i; mutex_lock(&nf_log_mutex); - for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { - c_logger = rcu_dereference_protected(nf_loggers[i], - lockdep_is_held(&nf_log_mutex)); - if (c_logger == logger) - RCU_INIT_POINTER(nf_loggers[i], NULL); + for (i = 0; i < NFPROTO_NUMPROTO; i++) list_del(&logger->list[i]); - } mutex_unlock(&nf_log_mutex); - synchronize_rcu(); + nf_log_unset(&init_net, logger); } EXPORT_SYMBOL(nf_log_unregister); -int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) +int nf_log_bind_pf(struct net *net, u_int8_t pf, + const struct nf_logger *logger) { - if (pf >= ARRAY_SIZE(nf_loggers)) + if (!net_eq(net, &init_net)) + return 0; + + if (pf >= ARRAY_SIZE(net->nf.nf_loggers)) return -EINVAL; mutex_lock(&nf_log_mutex); if (__find_logger(pf, logger->name) == NULL) { mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[pf], logger); + rcu_assign_pointer(net->nf.nf_loggers[pf], logger); mutex_unlock(&nf_log_mutex); return 0; } EXPORT_SYMBOL(nf_log_bind_pf); -void nf_log_unbind_pf(u_int8_t pf) +void nf_log_unbind_pf(struct net *net, u_int8_t pf) { - if (pf >= ARRAY_SIZE(nf_loggers)) + if (!net_eq(net, &init_net)) + return; + + if (pf >= ARRAY_SIZE(net->nf.nf_loggers)) return; mutex_lock(&nf_log_mutex); - RCU_INIT_POINTER(nf_loggers[pf], NULL); + RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL); mutex_unlock(&nf_log_mutex); } EXPORT_SYMBOL(nf_log_unbind_pf); -void nf_log_packet(u_int8_t pf, +void nf_log_packet(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -120,8 +157,11 @@ void nf_log_packet(u_int8_t pf, char prefix[NF_LOG_PREFIXLEN]; const struct nf_logger *logger; + if (!net_eq(net, &init_net)) + return; + rcu_read_lock(); - logger = rcu_dereference(nf_loggers[pf]); + logger = rcu_dereference(net->nf.nf_loggers[pf]); if (logger) { va_start(args, fmt); vsnprintf(prefix, sizeof(prefix), fmt, args); @@ -135,9 +175,11 @@ EXPORT_SYMBOL(nf_log_packet); #ifdef CONFIG_PROC_FS static void *seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq_file_net(seq); + mutex_lock(&nf_log_mutex); - if (*pos >= ARRAY_SIZE(nf_loggers)) + if (*pos >= ARRAY_SIZE(net->nf.nf_loggers)) return NULL; return pos; @@ -145,9 +187,11 @@ static void *seq_start(struct seq_file *seq, loff_t *pos) static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { + struct net *net = seq_file_net(s); + (*pos)++; - if (*pos >= ARRAY_SIZE(nf_loggers)) + if (*pos >= ARRAY_SIZE(net->nf.nf_loggers)) return NULL; return pos; @@ -164,8 +208,9 @@ static int seq_show(struct seq_file *s, void *v) const struct nf_logger *logger; struct nf_logger *t; int ret; + struct net *net = seq_file_net(s); - logger = rcu_dereference_protected(nf_loggers[*pos], + logger = rcu_dereference_protected(net->nf.nf_loggers[*pos], lockdep_is_held(&nf_log_mutex)); if (!logger) @@ -199,7 +244,8 @@ static const struct seq_operations nflog_seq_ops = { static int nflog_open(struct inode *inode, struct file *file) { - return seq_open(file, &nflog_seq_ops); + return seq_open_net(inode, file, &nflog_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations nflog_file_ops = { @@ -207,7 +253,7 @@ static const struct file_operations nflog_file_ops = { .open = nflog_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; @@ -216,7 +262,6 @@ static const struct file_operations nflog_file_ops = { #ifdef CONFIG_SYSCTL static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; -static struct ctl_table_header *nf_log_dir_header; static int nf_log_proc_dostring(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -226,15 +271,19 @@ static int nf_log_proc_dostring(ctl_table *table, int write, size_t size = *lenp; int r = 0; int tindex = (unsigned long)table->extra1; + struct net *net = current->nsproxy->net_ns; if (write) { + if (!net_eq(net, &init_net)) + return -EPERM; + if (size > sizeof(buf)) size = sizeof(buf); if (copy_from_user(buf, buffer, size)) return -EFAULT; if (!strcmp(buf, "NONE")) { - nf_log_unbind_pf(tindex); + nf_log_unbind_pf(net, tindex); return 0; } mutex_lock(&nf_log_mutex); @@ -243,11 +292,11 @@ static int nf_log_proc_dostring(ctl_table *table, int write, mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[tindex], logger); + rcu_assign_pointer(net->nf.nf_loggers[tindex], logger); mutex_unlock(&nf_log_mutex); } else { mutex_lock(&nf_log_mutex); - logger = rcu_dereference_protected(nf_loggers[tindex], + logger = rcu_dereference_protected(net->nf.nf_loggers[tindex], lockdep_is_held(&nf_log_mutex)); if (!logger) table->data = "NONE"; @@ -260,49 +309,111 @@ static int nf_log_proc_dostring(ctl_table *table, int write, return r; } -static __init int netfilter_log_sysctl_init(void) +static int netfilter_log_sysctl_init(struct net *net) { int i; - - for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { - snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i); - nf_log_sysctl_table[i].procname = - nf_log_sysctl_fnames[i-NFPROTO_UNSPEC]; - nf_log_sysctl_table[i].data = NULL; - nf_log_sysctl_table[i].maxlen = - NFLOGGER_NAME_LEN * sizeof(char); - nf_log_sysctl_table[i].mode = 0644; - nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; - nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; + struct ctl_table *table; + + table = nf_log_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(nf_log_sysctl_table, + sizeof(nf_log_sysctl_table), + GFP_KERNEL); + if (!table) + goto err_alloc; + } else { + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { + snprintf(nf_log_sysctl_fnames[i], + 3, "%d", i); + nf_log_sysctl_table[i].procname = + nf_log_sysctl_fnames[i]; + nf_log_sysctl_table[i].data = NULL; + nf_log_sysctl_table[i].maxlen = + NFLOGGER_NAME_LEN * sizeof(char); + nf_log_sysctl_table[i].mode = 0644; + nf_log_sysctl_table[i].proc_handler = + nf_log_proc_dostring; + nf_log_sysctl_table[i].extra1 = + (void *)(unsigned long) i; + } } - nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log", - nf_log_sysctl_table); - if (!nf_log_dir_header) - return -ENOMEM; + net->nf.nf_log_dir_header = register_net_sysctl(net, + "net/netfilter/nf_log", + table); + if (!net->nf.nf_log_dir_header) + goto err_reg; return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void netfilter_log_sysctl_exit(struct net *net) +{ + struct ctl_table *table; + + table = net->nf.nf_log_dir_header->ctl_table_arg; + unregister_net_sysctl_table(net->nf.nf_log_dir_header); + if (!net_eq(net, &init_net)) + kfree(table); } #else -static __init int netfilter_log_sysctl_init(void) +static int netfilter_log_sysctl_init(struct net *net) { return 0; } + +static void netfilter_log_sysctl_exit(struct net *net) +{ +} #endif /* CONFIG_SYSCTL */ -int __init netfilter_log_init(void) +static int __net_init nf_log_net_init(struct net *net) { - int i, r; + int ret = -ENOMEM; + #ifdef CONFIG_PROC_FS if (!proc_create("nf_log", S_IRUGO, - proc_net_netfilter, &nflog_file_ops)) - return -1; + net->nf.proc_netfilter, &nflog_file_ops)) + return ret; #endif + ret = netfilter_log_sysctl_init(net); + if (ret < 0) + goto out_sysctl; - /* Errors will trigger panic, unroll on error is unnecessary. */ - r = netfilter_log_sysctl_init(); - if (r < 0) - return r; + return 0; + +out_sysctl: + /* For init_net: errors will trigger panic, don't unroll on error. */ + if (!net_eq(net, &init_net)) + remove_proc_entry("nf_log", net->nf.proc_netfilter); + + return ret; +} + +static void __net_exit nf_log_net_exit(struct net *net) +{ + netfilter_log_sysctl_exit(net); + remove_proc_entry("nf_log", net->nf.proc_netfilter); +} + +static struct pernet_operations nf_log_net_ops = { + .init = nf_log_net_init, + .exit = nf_log_net_exit, +}; + +int __init netfilter_log_init(void) +{ + int i, ret; + + ret = register_pernet_subsys(&nf_log_net_ops); + if (ret < 0) + return ret; for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) INIT_LIST_HEAD(&(nf_loggers_l[i])); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f248db572972..b593fd1f8cab 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -767,6 +767,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, u_int16_t group_num = ntohs(nfmsg->res_id); struct nfulnl_instance *inst; struct nfulnl_msg_config_cmd *cmd = NULL; + struct net *net = sock_net(ctnl); int ret = 0; if (nfula[NFULA_CFG_CMD]) { @@ -776,9 +777,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, /* Commands without queue context */ switch (cmd->command) { case NFULNL_CFG_CMD_PF_BIND: - return nf_log_bind_pf(pf, &nfulnl_logger); + return nf_log_bind_pf(net, pf, &nfulnl_logger); case NFULNL_CFG_CMD_PF_UNBIND: - nf_log_unbind_pf(pf); + nf_log_unbind_pf(net, pf); return 0; } } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index a5e673d32bda..647d989a01e6 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -201,6 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) unsigned char opts[MAX_IPOPTLEN]; const struct xt_osf_finger *kf; const struct xt_osf_user_finger *f; + struct net *net = dev_net(p->in ? p->in : p->out); if (!info) return false; @@ -325,7 +326,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) fcount++; if (info->flags & XT_OSF_LOG) - nf_log_packet(p->family, p->hooknum, skb, + nf_log_packet(net, p->family, p->hooknum, skb, p->in, p->out, NULL, "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", f->genre, f->version, f->subtype, @@ -341,7 +342,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) - nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL, + nf_log_packet(net, p->family, p->hooknum, skb, p->in, + p->out, NULL, "Remote OS is not known: %pI4:%u -> %pI4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); -- cgit v1.2.3 From b3916db32c4a3124eee9f3742a2f4723731d7602 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 5 Apr 2013 14:57:34 +0200 Subject: Bluetooth: hidp: verify l2cap sockets We need to verify that the given sockets actually are l2cap sockets. If they aren't, we are not supposed to access bt_sk(sock) and we shouldn't start the session if the offsets turn out to be valid local BT addresses. That is, if someone passes a TCP socket to HIDCONNADD, then we access some random offset in the TCP socket (which isn't even guaranteed to be valid). Fix this by checking that the socket is an l2cap socket. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/hidp/core.c | 2 ++ net/bluetooth/l2cap_sock.c | 6 ++++++ 3 files changed, 9 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index cdd33021f831..278830ef92cd 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -786,6 +786,7 @@ extern bool disable_ertm; int l2cap_init_sockets(void); void l2cap_cleanup_sockets(void); +bool l2cap_is_socket(struct socket *sock); void __l2cap_connect_rsp_defer(struct l2cap_chan *chan); int __l2cap_wait_ack(struct sock *sk); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 2342327f3335..4ab82cb3eac3 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -973,6 +973,8 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, BT_DBG(""); + if (!l2cap_is_socket(ctrl_sock) || !l2cap_is_socket(intr_sock)) + return -EINVAL; if (bacmp(&bt_sk(ctrl_sock->sk)->src, &bt_sk(intr_sock->sk)->src) || bacmp(&bt_sk(ctrl_sock->sk)->dst, &bt_sk(intr_sock->sk)->dst)) return -ENOTUNIQ; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 7f9704993b74..141e7b058b7e 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -43,6 +43,12 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio); +bool l2cap_is_socket(struct socket *sock) +{ + return sock && sock->ops == &l2cap_sock_ops; +} +EXPORT_SYMBOL(l2cap_is_socket); + static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) { struct sock *sk = sock->sk; -- cgit v1.2.3 From 6b0ee8c036ecb3ac92e18e6ca0dca7bff88beaf0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 3 Apr 2013 17:28:16 +0000 Subject: scm: Stop passing struct cred Now that uids and gids are completely encapsulated in kuid_t and kgid_t we no longer need to pass struct cred which allowed us to test both the uid and the user namespace for equality. Passing struct cred potentially allows us to pass the entire group list as BSD does but I don't believe the cost of cache line misses justifies retaining code for a future potential application. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/af_unix.h | 3 ++- include/net/scm.h | 16 ++++++---------- net/core/scm.c | 16 ---------------- net/unix/af_unix.c | 16 ++++++++-------- 4 files changed, 16 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 0a996a3517ed..a8836e8445cc 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -29,7 +29,8 @@ struct unix_address { struct unix_skb_parms { struct pid *pid; /* Skb credentials */ - const struct cred *cred; + kuid_t uid; + kgid_t gid; struct scm_fp_list *fp; /* Passed files */ #ifdef CONFIG_SECURITY_NETWORK u32 secid; /* Security ID */ diff --git a/include/net/scm.h b/include/net/scm.h index 975cca01048b..5a4c6a9eb122 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -26,7 +26,6 @@ struct scm_fp_list { struct scm_cookie { struct pid *pid; /* Skb credentials */ - const struct cred *cred; struct scm_fp_list *fp; /* Passed files */ struct scm_creds creds; /* Skb credentials */ #ifdef CONFIG_SECURITY_NETWORK @@ -51,23 +50,18 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co #endif /* CONFIG_SECURITY_NETWORK */ static __inline__ void scm_set_cred(struct scm_cookie *scm, - struct pid *pid, const struct cred *cred) + struct pid *pid, kuid_t uid, kgid_t gid) { scm->pid = get_pid(pid); - scm->cred = cred ? get_cred(cred) : NULL; scm->creds.pid = pid_vnr(pid); - scm->creds.uid = cred ? cred->euid : INVALID_UID; - scm->creds.gid = cred ? cred->egid : INVALID_GID; + scm->creds.uid = uid; + scm->creds.gid = gid; } static __inline__ void scm_destroy_cred(struct scm_cookie *scm) { put_pid(scm->pid); scm->pid = NULL; - - if (scm->cred) - put_cred(scm->cred); - scm->cred = NULL; } static __inline__ void scm_destroy(struct scm_cookie *scm) @@ -81,8 +75,10 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, bool forcecreds) { memset(scm, 0, sizeof(*scm)); + scm->creds.uid = INVALID_UID; + scm->creds.gid = INVALID_GID; if (forcecreds) - scm_set_cred(scm, task_tgid(current), current_cred()); + scm_set_cred(scm, task_tgid(current), current_euid(), current_egid()); unix_get_peersec_dgram(sock, scm); if (msg->msg_controllen <= 0) return 0; diff --git a/net/core/scm.c b/net/core/scm.c index 2dc6cdaaae8a..83b2b383c865 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -187,22 +187,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) p->creds.uid = uid; p->creds.gid = gid; - - if (!p->cred || - !uid_eq(p->cred->euid, uid) || - !gid_eq(p->cred->egid, gid)) { - struct cred *cred; - err = -ENOMEM; - cred = prepare_creds(); - if (!cred) - goto error; - - cred->uid = cred->euid = uid; - cred->gid = cred->egid = gid; - if (p->cred) - put_cred(p->cred); - p->cred = cred; - } break; } default: diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 824eaf2c3afa..5ca1631de7ef 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1340,7 +1340,6 @@ static void unix_destruct_scm(struct sk_buff *skb) struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); scm.pid = UNIXCB(skb).pid; - scm.cred = UNIXCB(skb).cred; if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); @@ -1391,8 +1390,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen int err = 0; UNIXCB(skb).pid = get_pid(scm->pid); - if (scm->cred) - UNIXCB(skb).cred = get_cred(scm->cred); + UNIXCB(skb).uid = scm->creds.uid; + UNIXCB(skb).gid = scm->creds.gid; UNIXCB(skb).fp = NULL; if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); @@ -1409,13 +1408,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, const struct sock *other) { - if (UNIXCB(skb).cred) + if (UNIXCB(skb).pid) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || !other->sk_socket || test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); - UNIXCB(skb).cred = get_current_cred(); + current_euid_egid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } @@ -1819,7 +1818,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, siocb->scm = &tmp_scm; memset(&tmp_scm, 0, sizeof(tmp_scm)); } - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(siocb->scm, skb); if (!(flags & MSG_PEEK)) { @@ -1991,11 +1990,12 @@ again: if (check_creds) { /* Never glue messages from different writers */ if ((UNIXCB(skb).pid != siocb->scm->pid) || - (UNIXCB(skb).cred != siocb->scm->cred)) + !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) || + !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); check_creds = 1; } -- cgit v1.2.3 From d87c8c6d1562f12df101c5b9857170d110e7353a Mon Sep 17 00:00:00 2001 From: Werner Almesberger Date: Thu, 4 Apr 2013 06:32:12 +0000 Subject: IEEE 802.15.4: remove get_bsn from "struct ieee802154_mlme_ops" It served no purpose: we never call it from anywhere in the stack and the only driver that did implement it (fakehard) merely provided a dummy value. There is also considerable doubt whether it would make sense to even attempt beacon processing at this level in the Linux kernel. Signed-off-by: Werner Almesberger Signed-off-by: David S. Miller --- drivers/net/ieee802154/fakehard.c | 21 --------------------- include/net/ieee802154_netdev.h | 1 - 2 files changed, 22 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ieee802154/fakehard.c b/drivers/net/ieee802154/fakehard.c index 8f1c25676d44..bf0d55e2dd63 100644 --- a/drivers/net/ieee802154/fakehard.c +++ b/drivers/net/ieee802154/fakehard.c @@ -105,26 +105,6 @@ static u8 fake_get_dsn(const struct net_device *dev) return 0x00; /* DSN are implemented in HW, so return just 0 */ } -/** - * fake_get_bsn - Retrieve the BSN of the device. - * @dev: The network device to retrieve the BSN for. - * - * Returns the IEEE 802.15.4 BSN for the network device. - * The BSN is the sequence number which will be added to each - * beacon frame sent by the MAC. - * - * BSN means 'Beacon Sequence Number'. - * - * Note: This is in section 7.2.1.2 of the IEEE 802.15.4-2006 - * document. - */ -static u8 fake_get_bsn(const struct net_device *dev) -{ - BUG_ON(dev->type != ARPHRD_IEEE802154); - - return 0x00; /* BSN are implemented in HW, so return just 0 */ -} - /** * fake_assoc_req - Make an association request to the HW. * @dev: The network device which we are associating to a network. @@ -264,7 +244,6 @@ static struct ieee802154_mlme_ops fake_mlme = { .get_pan_id = fake_get_pan_id, .get_short_addr = fake_get_short_addr, .get_dsn = fake_get_dsn, - .get_bsn = fake_get_bsn, }; static int ieee802154_fake_open(struct net_device *dev) diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index d104c882fc29..642f94c0fa2f 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -110,7 +110,6 @@ struct ieee802154_mlme_ops { u16 (*get_pan_id)(const struct net_device *dev); u16 (*get_short_addr)(const struct net_device *dev); u8 (*get_dsn)(const struct net_device *dev); - u8 (*get_bsn)(const struct net_device *dev); }; /* The IEEE 802.15.4 standard defines 2 type of the devices: -- cgit v1.2.3 From 56aa091d60a63fee83d2c894edb69b7c159966c7 Mon Sep 17 00:00:00 2001 From: Werner Almesberger Date: Thu, 4 Apr 2013 06:32:35 +0000 Subject: ieee802154/nl-mac.c: make some MLME operations optional Check for NULL before calling the following operations from "struct ieee802154_mlme_ops": assoc_req, assoc_resp, disassoc_req, start_req, and scan_req. This fixes a current oops where those functions are called but not implemented. It also updates the documentation to clarify that they are now optional by design. If a call to an unimplemented function is attempted, the kernel returns EOPNOTSUPP via netlink. The following operations are still required: get_phy, get_pan_id, get_short_addr, and get_dsn. Note that the places where this patch changes the initialization of "ret" should not affect the rest of the code since "ret" was always set (again) before returning its value. Signed-off-by: Werner Almesberger Signed-off-by: David S. Miller --- Documentation/networking/ieee802154.txt | 5 +++-- include/net/ieee802154_netdev.h | 4 ++++ net/ieee802154/nl-mac.c | 25 ++++++++++++++++++++----- 3 files changed, 27 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt index 703cf4370c79..67a9cb259d40 100644 --- a/Documentation/networking/ieee802154.txt +++ b/Documentation/networking/ieee802154.txt @@ -71,8 +71,9 @@ submits skb to qdisc), so if you need something from that cb later, you should store info in the skb->data on your own. To hook the MLME interface you have to populate the ml_priv field of your -net_device with a pointer to struct ieee802154_mlme_ops instance. All fields are -required. +net_device with a pointer to struct ieee802154_mlme_ops instance. The fields +assoc_req, assoc_resp, disassoc_req, start_req, and scan_req are optional. +All other fields are required. We provide an example of simple HardMAC driver at drivers/ieee802154/fakehard.c diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 642f94c0fa2f..8196d5d40359 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -85,6 +85,8 @@ struct wpan_phy; * Use wpan_wpy_put to put that reference. */ struct ieee802154_mlme_ops { + /* The following fields are optional (can be NULL). */ + int (*assoc_req)(struct net_device *dev, struct ieee802154_addr *addr, u8 channel, u8 page, u8 cap); @@ -101,6 +103,8 @@ struct ieee802154_mlme_ops { int (*scan_req)(struct net_device *dev, u8 type, u32 channels, u8 page, u8 duration); + /* The fields below are required. */ + struct wpan_phy *(*get_phy)(const struct net_device *dev); /* diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 96bb08abece2..b0bdd8c51e9c 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -315,7 +315,7 @@ static int ieee802154_associate_req(struct sk_buff *skb, struct net_device *dev; struct ieee802154_addr addr; u8 page; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_CHANNEL] || !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || @@ -327,6 +327,8 @@ static int ieee802154_associate_req(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->assoc_req) + goto out; if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) { addr.addr_type = IEEE802154_ADDR_LONG; @@ -350,6 +352,7 @@ static int ieee802154_associate_req(struct sk_buff *skb, page, nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); +out: dev_put(dev); return ret; } @@ -359,7 +362,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb, { struct net_device *dev; struct ieee802154_addr addr; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_STATUS] || !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] || @@ -369,6 +372,8 @@ static int ieee802154_associate_resp(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->assoc_resp) + goto out; addr.addr_type = IEEE802154_ADDR_LONG; nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], @@ -380,6 +385,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb, nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); +out: dev_put(dev); return ret; } @@ -389,7 +395,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, { struct net_device *dev; struct ieee802154_addr addr; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || @@ -399,6 +405,8 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->disassoc_req) + goto out; if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) { addr.addr_type = IEEE802154_ADDR_LONG; @@ -415,6 +423,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); +out: dev_put(dev); return ret; } @@ -432,7 +441,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) u8 channel, bcn_ord, sf_ord; u8 page; int pan_coord, blx, coord_realign; - int ret; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] || @@ -448,6 +457,8 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->start_req) + goto out; addr.addr_type = IEEE802154_ADDR_SHORT; addr.short_addr = nla_get_u16( @@ -476,6 +487,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page, bcn_ord, sf_ord, pan_coord, blx, coord_realign); +out: dev_put(dev); return ret; } @@ -483,7 +495,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; - int ret; + int ret = -EOPNOTSUPP; u8 type; u32 channels; u8 duration; @@ -497,6 +509,8 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->scan_req) + goto out; type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]); channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); @@ -511,6 +525,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, duration); +out: dev_put(dev); return ret; } -- cgit v1.2.3 From f53adae4eae5ad9f7343ff4a0fc68b468c981138 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 8 Apr 2013 04:01:30 +0000 Subject: net: ipv6: add tokenized interface identifier support This patch adds support for IPv6 tokenized IIDs, that allow for administrators to assign well-known host-part addresses to nodes whilst still obtaining global network prefix from Router Advertisements. It is currently in draft status. The primary target for such support is server platforms where addresses are usually manually configured, rather than using DHCPv6 or SLAAC. By using tokenised identifiers, hosts can still determine their network prefix by use of SLAAC, but more readily be automatically renumbered should their network prefix change. [...] The disadvantage with static addresses is that they are likely to require manual editing should the network prefix in use change. If instead there were a method to only manually configure the static identifier part of the IPv6 address, then the address could be automatically updated when a new prefix was introduced, as described in [RFC4192] for example. In such cases a DNS server might be configured with such a tokenised interface identifier of ::53, and SLAAC would use the token in constructing the interface address, using the advertised prefix. [...] http://tools.ietf.org/html/draft-chown-6man-tokenised-ipv6-identifiers-02 The implementation is partially based on top of Mark K. Thompson's proof of concept. However, it uses the Netlink interface for configuration resp. data retrival, so that it can be easily extended in future. Successfully tested by myself. Cc: Hannes Frederic Sowa Cc: YOSHIFUJI Hideaki Cc: Thomas Graf Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/if_inet6.h | 2 + include/uapi/linux/if_link.h | 1 + net/ipv6/addrconf.c | 87 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 87 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 93563221d29a..f1063d62cd13 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -187,6 +187,8 @@ struct inet6_dev { struct list_head tempaddr_list; #endif + struct in6_addr token; + struct neigh_parms *nd_parms; struct inet6_dev *next; struct ipv6_devconf cnf; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index c4edfe11f1f7..6b35c4211f65 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -201,6 +201,7 @@ enum { IFLA_INET6_MCAST, /* MC things. What of them? */ IFLA_INET6_CACHEINFO, /* time values and max reasm size */ IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ + IFLA_INET6_TOKEN, /* device token */ __IFLA_INET6_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a33b157d9ccf..65d8139d60c5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -422,6 +422,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ipv6_regen_rndid((unsigned long) ndev); } #endif + memset(ndev->token.s6_addr, 0, sizeof(ndev->token.s6_addr)); if (netif_running(dev) && addrconf_qdisc_ok(dev)) ndev->if_flags |= IF_READY; @@ -2136,8 +2137,14 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); - if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && - ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { + + if (!ipv6_addr_any(&in6_dev->token)) { + read_lock_bh(&in6_dev->lock); + memcpy(addr.s6_addr + 8, + in6_dev->token.s6_addr + 8, 8); + read_unlock_bh(&in6_dev->lock); + } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && + ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { in6_dev_put(in6_dev); return; } @@ -4165,7 +4172,8 @@ static inline size_t inet6_ifla6_size(void) + nla_total_size(sizeof(struct ifla_cacheinfo)) + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ - + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ + + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ + + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */ } static inline size_t inet6_if_nlmsg_size(void) @@ -4252,6 +4260,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) goto nla_put_failure; snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); + nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); + if (nla == NULL) + goto nla_put_failure; + read_lock_bh(&idev->lock); + memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla)); + read_unlock_bh(&idev->lock); + return 0; nla_put_failure: @@ -4279,6 +4294,71 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) return 0; } +static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) +{ + struct in6_addr ll_addr; + struct inet6_ifaddr *ifp; + struct net_device *dev = idev->dev; + + if (token == NULL) + return -EINVAL; + if (ipv6_addr_any(token)) + return -EINVAL; + if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) + return -EINVAL; + if (idev->dead || !(idev->if_flags & IF_READY)) + return -EINVAL; + if (!ipv6_accept_ra(idev)) + return -EINVAL; + if (idev->cnf.rtr_solicits <= 0) + return -EINVAL; + + write_lock_bh(&idev->lock); + + BUILD_BUG_ON(sizeof(token->s6_addr) != 16); + memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8); + + write_unlock_bh(&idev->lock); + + ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE | IFA_F_OPTIMISTIC); + ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters); + + write_lock_bh(&idev->lock); + idev->if_flags |= IF_RS_SENT; + + /* Well, that's kinda nasty ... */ + list_for_each_entry(ifp, &idev->addr_list, if_list) { + spin_lock(&ifp->lock); + if (ipv6_addr_src_scope(&ifp->addr) == + IPV6_ADDR_SCOPE_GLOBAL) { + ifp->valid_lft = 0; + ifp->prefered_lft = 0; + } + spin_unlock(&ifp->lock); + } + + write_unlock_bh(&idev->lock); + return 0; +} + +static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) +{ + int err = -EINVAL; + struct inet6_dev *idev = __in6_dev_get(dev); + struct nlattr *tb[IFLA_INET6_MAX + 1]; + + if (!idev) + return -EAFNOSUPPORT; + + if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0) + BUG(); + + if (tb[IFLA_INET6_TOKEN]) + err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); + + return err; +} + static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, u32 portid, u32 seq, int event, unsigned int flags) { @@ -4981,6 +5061,7 @@ static struct rtnl_af_ops inet6_ops = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, .get_link_af_size = inet6_get_link_af_size, + .set_link_af = inet6_set_link_af, }; /* -- cgit v1.2.3 From 617fe29d45bdfffba2739e6512c83e766e6ae72c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2013 03:47:16 +0000 Subject: net: ipv6: only invalidate previously tokenized addresses Instead of invalidating all IPv6 addresses with global scope when one decides to use IPv6 tokens, we should only invalidate previous tokens and leave the rest intact until they expire eventually (or are intact forever). For doing this less greedy approach, we're adding a bool at the end of inet6_ifaddr structure instead, for two reasons: i) per-inet6_ifaddr flag space is already used up, making it wider might not be a good idea, since ii) also we do not necessarily need to export this information into user space. Suggested-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/if_inet6.h | 2 ++ net/ipv6/addrconf.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f1063d62cd13..100fb8cec17c 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -71,6 +71,8 @@ struct inet6_ifaddr { struct inet6_ifaddr *ifpub; int regen_count; #endif + bool tokenized; + struct rcu_head rcu; }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 713ebe303f61..28b61e89bbb8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -878,6 +878,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, ifa->prefix_len = pfxlen; ifa->flags = flags | IFA_F_TENTATIVE; ifa->cstamp = ifa->tstamp = jiffies; + ifa->tokenized = false; ifa->rt = rt; @@ -2134,6 +2135,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) struct inet6_ifaddr *ifp; struct in6_addr addr; int create = 0, update_lft = 0; + bool tokenized = false; if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); @@ -2143,6 +2145,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) memcpy(addr.s6_addr + 8, in6_dev->token.s6_addr + 8, 8); read_unlock_bh(&in6_dev->lock); + tokenized = true; } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { in6_dev_put(in6_dev); @@ -2185,6 +2188,7 @@ ok: update_lft = create = 1; ifp->cstamp = jiffies; + ifp->tokenized = tokenized; addrconf_dad_start(ifp); } @@ -4339,8 +4343,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) /* Well, that's kinda nasty ... */ list_for_each_entry(ifp, &idev->addr_list, if_list) { spin_lock(&ifp->lock); - if (ipv6_addr_src_scope(&ifp->addr) == - IPV6_ADDR_SCOPE_GLOBAL) { + if (ifp->tokenized) { ifp->valid_lft = 0; ifp->prefered_lft = 0; } -- cgit v1.2.3 From 211d2f97e936d206a5e45f6f64ecbc2c51a2b46c Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Mon, 8 Apr 2013 20:03:35 +0000 Subject: cls_cgroup: remove task_struct parameter from sock_update_classid() The callers always pass current to sock_update_classid(). Signed-off-by: Li Zefan Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/cls_cgroup.h | 4 ++-- net/core/scm.c | 2 +- net/core/sock.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 2581638f4a3d..0fee0617fb7d 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -24,7 +24,7 @@ struct cgroup_cls_state u32 classid; }; -extern void sock_update_classid(struct sock *sk, struct task_struct *task); +extern void sock_update_classid(struct sock *sk); #if IS_BUILTIN(CONFIG_NET_CLS_CGROUP) static inline u32 task_cls_classid(struct task_struct *p) @@ -61,7 +61,7 @@ static inline u32 task_cls_classid(struct task_struct *p) } #endif #else /* !CGROUP_NET_CLS_CGROUP */ -static inline void sock_update_classid(struct sock *sk, struct task_struct *task) +static inline void sock_update_classid(struct sock *sk) { } diff --git a/net/core/scm.c b/net/core/scm.c index 83b2b383c865..c77b7c3d2f9d 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -291,7 +291,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) sock = sock_from_file(fp[i], &err); if (sock) { sock_update_netprioidx(sock->sk, current); - sock_update_classid(sock->sk, current); + sock_update_classid(sock->sk); } fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/sock.c b/net/core/sock.c index 2ff5f3619a8d..0e1d2fe827ec 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1307,11 +1307,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) } #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk, struct task_struct *task) +void sock_update_classid(struct sock *sk) { u32 classid; - classid = task_cls_classid(task); + classid = task_cls_classid(current); if (classid != sk->sk_classid) sk->sk_classid = classid; } @@ -1353,7 +1353,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, get_net(net)); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk, current); + sock_update_classid(sk); sock_update_netprioidx(sk, current); } -- cgit v1.2.3 From 6ffd46410248ee39b46c2cdafb79791c2e618932 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Mon, 8 Apr 2013 20:03:47 +0000 Subject: netprio_cgroup: remove task_struct parameter from sock_update_netprio() The callers always pass current to sock_update_netprio(). Signed-off-by: Li Zefan Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/netprio_cgroup.h | 4 ++-- net/core/scm.c | 2 +- net/core/sock.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index 1d04b6f0fbd4..50ab8c26ab59 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -29,7 +29,7 @@ struct cgroup_netprio_state { struct cgroup_subsys_state css; }; -extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task); +extern void sock_update_netprioidx(struct sock *sk); #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) @@ -68,7 +68,7 @@ static inline u32 task_netprioidx(struct task_struct *p) return 0; } -#define sock_update_netprioidx(sk, task) +#define sock_update_netprioidx(sk) #endif /* CONFIG_NETPRIO_CGROUP */ diff --git a/net/core/scm.c b/net/core/scm.c index c77b7c3d2f9d..03795d0147f2 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -290,7 +290,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); if (sock) { - sock_update_netprioidx(sock->sk, current); + sock_update_netprioidx(sock->sk); sock_update_classid(sock->sk); } fd_install(new_fd, get_file(fp[i])); diff --git a/net/core/sock.c b/net/core/sock.c index 0e1d2fe827ec..d4f4cea726e7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1319,12 +1319,12 @@ EXPORT_SYMBOL(sock_update_classid); #endif #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) -void sock_update_netprioidx(struct sock *sk, struct task_struct *task) +void sock_update_netprioidx(struct sock *sk) { if (in_interrupt()) return; - sk->sk_cgrp_prioidx = task_netprioidx(task); + sk->sk_cgrp_prioidx = task_netprioidx(current); } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif @@ -1354,7 +1354,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); - sock_update_netprioidx(sk, current); + sock_update_netprioidx(sk); } return sk; -- cgit v1.2.3 From 1b8664341100716202c29d67f24d67094a82971e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2013 05:54:01 +0000 Subject: net: sctp: introduce uapi header for sctp This patch introduces an UAPI header for the SCTP protocol, so that we can facilitate the maintenance and development of user land applications or libraries, in particular in terms of header synchronization. To not break compatibility, some fragments from lksctp-tools' netinet/sctp.h have been carefully included, while taking care that neither kernel nor user land breaks, so both compile fine with this change (for lksctp-tools I tested with the old netinet/sctp.h header and with a newly adapted one that includes the uapi sctp header). lksctp-tools smoke test run through successfully as well in both cases. Suggested-by: Neil Horman Cc: Neil Horman Cc: Vlad Yasevich Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- fs/dlm/lowcomms.c | 2 +- include/linux/sctp.h | 6 +- include/net/sctp/constants.h | 1 - include/net/sctp/user.h | 782 --------------------------------------- include/uapi/linux/Kbuild | 1 + include/uapi/linux/sctp.h | 846 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 850 insertions(+), 788 deletions(-) delete mode 100644 include/net/sctp/user.h create mode 100644 include/uapi/linux/sctp.h (limited to 'include/net') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 4f5ad246582f..d0ccd2fd79eb 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -52,8 +52,8 @@ #include #include #include +#include #include -#include #include #include "dlm_internal.h" diff --git a/include/linux/sctp.h b/include/linux/sctp.h index c11a28706fa4..3bfe8d6ee248 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -53,7 +53,9 @@ #include /* We need in_addr. */ #include /* We need in6_addr. */ +#include +#include /* Section 3.1. SCTP Common Header Format */ typedef struct sctphdr { @@ -63,14 +65,10 @@ typedef struct sctphdr { __le32 checksum; } __packed sctp_sctphdr_t; -#ifdef __KERNEL__ -#include - static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb) { return (struct sctphdr *)skb_transport_header(skb); } -#endif /* Section 3.2. Chunk Field Descriptions. */ typedef struct sctp_chunkhdr { diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index a7dd5c50df79..ca50e0751e47 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -49,7 +49,6 @@ #include #include /* For ipv6hdr. */ -#include #include /* For TCP states used in sctp_sock_state_t */ /* Value used for stream negotiation. */ diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h deleted file mode 100644 index 9a0ae091366d..000000000000 --- a/include/net/sctp/user.h +++ /dev/null @@ -1,782 +0,0 @@ -/* SCTP kernel implementation - * (C) Copyright IBM Corp. 2001, 2004 - * Copyright (c) 1999-2000 Cisco, Inc. - * Copyright (c) 1999-2001 Motorola, Inc. - * Copyright (c) 2002 Intel Corp. - * - * This file is part of the SCTP kernel implementation - * - * This header represents the structures and constants needed to support - * the SCTP Extension to the Sockets API. - * - * This SCTP implementation is free software; - * you can redistribute it and/or modify it under the terms of - * the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This SCTP implementation is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * ************************ - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - * - * Please send any bug reports or fixes you make to the - * email address(es): - * lksctp developers - * - * Or submit a bug report through the following website: - * http://www.sf.net/projects/lksctp - * - * Written or modified by: - * La Monte H.P. Yarroll - * R. Stewart - * K. Morneau - * Q. Xie - * Karl Knutson - * Jon Grimm - * Daisy Chang - * Ryan Layer - * Ardelle Fan - * Sridhar Samudrala - * - * Any bugs reported given to us we will try to fix... any fixes shared will - * be incorporated into the next SCTP release. - */ - -#ifndef __net_sctp_user_h__ -#define __net_sctp_user_h__ - -#include -#include - -typedef __s32 sctp_assoc_t; - -/* The following symbols come from the Sockets API Extensions for - * SCTP . - */ -#define SCTP_RTOINFO 0 -#define SCTP_ASSOCINFO 1 -#define SCTP_INITMSG 2 -#define SCTP_NODELAY 3 /* Get/set nodelay option. */ -#define SCTP_AUTOCLOSE 4 -#define SCTP_SET_PEER_PRIMARY_ADDR 5 -#define SCTP_PRIMARY_ADDR 6 -#define SCTP_ADAPTATION_LAYER 7 -#define SCTP_DISABLE_FRAGMENTS 8 -#define SCTP_PEER_ADDR_PARAMS 9 -#define SCTP_DEFAULT_SEND_PARAM 10 -#define SCTP_EVENTS 11 -#define SCTP_I_WANT_MAPPED_V4_ADDR 12 /* Turn on/off mapped v4 addresses */ -#define SCTP_MAXSEG 13 /* Get/set maximum fragment. */ -#define SCTP_STATUS 14 -#define SCTP_GET_PEER_ADDR_INFO 15 -#define SCTP_DELAYED_ACK_TIME 16 -#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK_TIME -#define SCTP_DELAYED_SACK SCTP_DELAYED_ACK_TIME -#define SCTP_CONTEXT 17 -#define SCTP_FRAGMENT_INTERLEAVE 18 -#define SCTP_PARTIAL_DELIVERY_POINT 19 /* Set/Get partial delivery point */ -#define SCTP_MAX_BURST 20 /* Set/Get max burst */ -#define SCTP_AUTH_CHUNK 21 /* Set only: add a chunk type to authenticate */ -#define SCTP_HMAC_IDENT 22 -#define SCTP_AUTH_KEY 23 -#define SCTP_AUTH_ACTIVE_KEY 24 -#define SCTP_AUTH_DELETE_KEY 25 -#define SCTP_PEER_AUTH_CHUNKS 26 /* Read only */ -#define SCTP_LOCAL_AUTH_CHUNKS 27 /* Read only */ -#define SCTP_GET_ASSOC_NUMBER 28 /* Read only */ -#define SCTP_GET_ASSOC_ID_LIST 29 /* Read only */ -#define SCTP_AUTO_ASCONF 30 -#define SCTP_PEER_ADDR_THLDS 31 - -/* Internal Socket Options. Some of the sctp library functions are - * implemented using these socket options. - */ -#define SCTP_SOCKOPT_BINDX_ADD 100 /* BINDX requests for adding addrs */ -#define SCTP_SOCKOPT_BINDX_REM 101 /* BINDX requests for removing addrs. */ -#define SCTP_SOCKOPT_PEELOFF 102 /* peel off association. */ -/* Options 104-106 are deprecated and removed. Do not use this space */ -#define SCTP_SOCKOPT_CONNECTX_OLD 107 /* CONNECTX old requests. */ -#define SCTP_GET_PEER_ADDRS 108 /* Get all peer address. */ -#define SCTP_GET_LOCAL_ADDRS 109 /* Get all local address. */ -#define SCTP_SOCKOPT_CONNECTX 110 /* CONNECTX requests. */ -#define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ -#define SCTP_GET_ASSOC_STATS 112 /* Read only */ - -/* - * 5.2.1 SCTP Initiation Structure (SCTP_INIT) - * - * This cmsghdr structure provides information for initializing new - * SCTP associations with sendmsg(). The SCTP_INITMSG socket option - * uses this same data structure. This structure is not used for - * recvmsg(). - * - * cmsg_level cmsg_type cmsg_data[] - * ------------ ------------ ---------------------- - * IPPROTO_SCTP SCTP_INIT struct sctp_initmsg - * - */ -struct sctp_initmsg { - __u16 sinit_num_ostreams; - __u16 sinit_max_instreams; - __u16 sinit_max_attempts; - __u16 sinit_max_init_timeo; -}; - -/* - * 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) - * - * This cmsghdr structure specifies SCTP options for sendmsg() and - * describes SCTP header information about a received message through - * recvmsg(). - * - * cmsg_level cmsg_type cmsg_data[] - * ------------ ------------ ---------------------- - * IPPROTO_SCTP SCTP_SNDRCV struct sctp_sndrcvinfo - * - */ -struct sctp_sndrcvinfo { - __u16 sinfo_stream; - __u16 sinfo_ssn; - __u16 sinfo_flags; - __u32 sinfo_ppid; - __u32 sinfo_context; - __u32 sinfo_timetolive; - __u32 sinfo_tsn; - __u32 sinfo_cumtsn; - sctp_assoc_t sinfo_assoc_id; -}; - -/* - * sinfo_flags: 16 bits (unsigned integer) - * - * This field may contain any of the following flags and is composed of - * a bitwise OR of these values. - */ - -enum sctp_sinfo_flags { - SCTP_UNORDERED = 1, /* Send/receive message unordered. */ - SCTP_ADDR_OVER = 2, /* Override the primary destination. */ - SCTP_ABORT=4, /* Send an ABORT message to the peer. */ - SCTP_SACK_IMMEDIATELY = 8, /* SACK should be sent without delay */ - SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */ -}; - - -/* These are cmsg_types. */ -typedef enum sctp_cmsg_type { - SCTP_INIT, /* 5.2.1 SCTP Initiation Structure */ - SCTP_SNDRCV, /* 5.2.2 SCTP Header Information Structure */ -} sctp_cmsg_t; - - -/* - * 5.3.1.1 SCTP_ASSOC_CHANGE - * - * Communication notifications inform the ULP that an SCTP association - * has either begun or ended. The identifier for a new association is - * provided by this notificaion. The notification information has the - * following format: - * - */ -struct sctp_assoc_change { - __u16 sac_type; - __u16 sac_flags; - __u32 sac_length; - __u16 sac_state; - __u16 sac_error; - __u16 sac_outbound_streams; - __u16 sac_inbound_streams; - sctp_assoc_t sac_assoc_id; - __u8 sac_info[0]; -}; - -/* - * sac_state: 32 bits (signed integer) - * - * This field holds one of a number of values that communicate the - * event that happened to the association. They include: - * - * Note: The following state names deviate from the API draft as - * the names clash too easily with other kernel symbols. - */ -enum sctp_sac_state { - SCTP_COMM_UP, - SCTP_COMM_LOST, - SCTP_RESTART, - SCTP_SHUTDOWN_COMP, - SCTP_CANT_STR_ASSOC, -}; - -/* - * 5.3.1.2 SCTP_PEER_ADDR_CHANGE - * - * When a destination address on a multi-homed peer encounters a change - * an interface details event is sent. The information has the - * following structure: - */ -struct sctp_paddr_change { - __u16 spc_type; - __u16 spc_flags; - __u32 spc_length; - struct sockaddr_storage spc_aaddr; - int spc_state; - int spc_error; - sctp_assoc_t spc_assoc_id; -} __attribute__((packed, aligned(4))); - -/* - * spc_state: 32 bits (signed integer) - * - * This field holds one of a number of values that communicate the - * event that happened to the address. They include: - */ -enum sctp_spc_state { - SCTP_ADDR_AVAILABLE, - SCTP_ADDR_UNREACHABLE, - SCTP_ADDR_REMOVED, - SCTP_ADDR_ADDED, - SCTP_ADDR_MADE_PRIM, - SCTP_ADDR_CONFIRMED, -}; - - -/* - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * A remote peer may send an Operational Error message to its peer. - * This message indicates a variety of error conditions on an - * association. The entire error TLV as it appears on the wire is - * included in a SCTP_REMOTE_ERROR event. Please refer to the SCTP - * specification [SCTP] and any extensions for a list of possible - * error formats. SCTP error TLVs have the format: - */ -struct sctp_remote_error { - __u16 sre_type; - __u16 sre_flags; - __u32 sre_length; - __u16 sre_error; - sctp_assoc_t sre_assoc_id; - __u8 sre_data[0]; -}; - - -/* - * 5.3.1.4 SCTP_SEND_FAILED - * - * If SCTP cannot deliver a message it may return the message as a - * notification. - */ -struct sctp_send_failed { - __u16 ssf_type; - __u16 ssf_flags; - __u32 ssf_length; - __u32 ssf_error; - struct sctp_sndrcvinfo ssf_info; - sctp_assoc_t ssf_assoc_id; - __u8 ssf_data[0]; -}; - -/* - * ssf_flags: 16 bits (unsigned integer) - * - * The flag value will take one of the following values - * - * SCTP_DATA_UNSENT - Indicates that the data was never put on - * the wire. - * - * SCTP_DATA_SENT - Indicates that the data was put on the wire. - * Note that this does not necessarily mean that the - * data was (or was not) successfully delivered. - */ -enum sctp_ssf_flags { - SCTP_DATA_UNSENT, - SCTP_DATA_SENT, -}; - -/* - * 5.3.1.5 SCTP_SHUTDOWN_EVENT - * - * When a peer sends a SHUTDOWN, SCTP delivers this notification to - * inform the application that it should cease sending data. - */ -struct sctp_shutdown_event { - __u16 sse_type; - __u16 sse_flags; - __u32 sse_length; - sctp_assoc_t sse_assoc_id; -}; - -/* - * 5.3.1.6 SCTP_ADAPTATION_INDICATION - * - * When a peer sends a Adaptation Layer Indication parameter , SCTP - * delivers this notification to inform the application - * that of the peers requested adaptation layer. - */ -struct sctp_adaptation_event { - __u16 sai_type; - __u16 sai_flags; - __u32 sai_length; - __u32 sai_adaptation_ind; - sctp_assoc_t sai_assoc_id; -}; - -/* - * 5.3.1.7 SCTP_PARTIAL_DELIVERY_EVENT - * - * When a receiver is engaged in a partial delivery of a - * message this notification will be used to indicate - * various events. - */ -struct sctp_pdapi_event { - __u16 pdapi_type; - __u16 pdapi_flags; - __u32 pdapi_length; - __u32 pdapi_indication; - sctp_assoc_t pdapi_assoc_id; -}; - -enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, }; - -struct sctp_authkey_event { - __u16 auth_type; - __u16 auth_flags; - __u32 auth_length; - __u16 auth_keynumber; - __u16 auth_altkeynumber; - __u32 auth_indication; - sctp_assoc_t auth_assoc_id; -}; - -enum { SCTP_AUTH_NEWKEY = 0, }; - -/* - * 6.1.9. SCTP_SENDER_DRY_EVENT - * - * When the SCTP stack has no more user data to send or retransmit, this - * notification is given to the user. Also, at the time when a user app - * subscribes to this event, if there is no data to be sent or - * retransmit, the stack will immediately send up this notification. - */ -struct sctp_sender_dry_event { - __u16 sender_dry_type; - __u16 sender_dry_flags; - __u32 sender_dry_length; - sctp_assoc_t sender_dry_assoc_id; -}; - -/* - * Described in Section 7.3 - * Ancillary Data and Notification Interest Options - */ -struct sctp_event_subscribe { - __u8 sctp_data_io_event; - __u8 sctp_association_event; - __u8 sctp_address_event; - __u8 sctp_send_failure_event; - __u8 sctp_peer_error_event; - __u8 sctp_shutdown_event; - __u8 sctp_partial_delivery_event; - __u8 sctp_adaptation_layer_event; - __u8 sctp_authentication_event; - __u8 sctp_sender_dry_event; -}; - -/* - * 5.3.1 SCTP Notification Structure - * - * The notification structure is defined as the union of all - * notification types. - * - */ -union sctp_notification { - struct { - __u16 sn_type; /* Notification type. */ - __u16 sn_flags; - __u32 sn_length; - } sn_header; - struct sctp_assoc_change sn_assoc_change; - struct sctp_paddr_change sn_paddr_change; - struct sctp_remote_error sn_remote_error; - struct sctp_send_failed sn_send_failed; - struct sctp_shutdown_event sn_shutdown_event; - struct sctp_adaptation_event sn_adaptation_event; - struct sctp_pdapi_event sn_pdapi_event; - struct sctp_authkey_event sn_authkey_event; - struct sctp_sender_dry_event sn_sender_dry_event; -}; - -/* Section 5.3.1 - * All standard values for sn_type flags are greater than 2^15. - * Values from 2^15 and down are reserved. - */ - -enum sctp_sn_type { - SCTP_SN_TYPE_BASE = (1<<15), - SCTP_ASSOC_CHANGE, - SCTP_PEER_ADDR_CHANGE, - SCTP_SEND_FAILED, - SCTP_REMOTE_ERROR, - SCTP_SHUTDOWN_EVENT, - SCTP_PARTIAL_DELIVERY_EVENT, - SCTP_ADAPTATION_INDICATION, - SCTP_AUTHENTICATION_EVENT, -#define SCTP_AUTHENTICATION_INDICATION SCTP_AUTHENTICATION_EVENT - SCTP_SENDER_DRY_EVENT, -}; - -/* Notification error codes used to fill up the error fields in some - * notifications. - * SCTP_PEER_ADDRESS_CHAGE : spc_error - * SCTP_ASSOC_CHANGE : sac_error - * These names should be potentially included in the draft 04 of the SCTP - * sockets API specification. - */ -typedef enum sctp_sn_error { - SCTP_FAILED_THRESHOLD, - SCTP_RECEIVED_SACK, - SCTP_HEARTBEAT_SUCCESS, - SCTP_RESPONSE_TO_USER_REQ, - SCTP_INTERNAL_ERROR, - SCTP_SHUTDOWN_GUARD_EXPIRES, - SCTP_PEER_FAULTY, -} sctp_sn_error_t; - -/* - * 7.1.1 Retransmission Timeout Parameters (SCTP_RTOINFO) - * - * The protocol parameters used to initialize and bound retransmission - * timeout (RTO) are tunable. See [SCTP] for more information on how - * these parameters are used in RTO calculation. - */ -struct sctp_rtoinfo { - sctp_assoc_t srto_assoc_id; - __u32 srto_initial; - __u32 srto_max; - __u32 srto_min; -}; - -/* - * 7.1.2 Association Parameters (SCTP_ASSOCINFO) - * - * This option is used to both examine and set various association and - * endpoint parameters. - */ -struct sctp_assocparams { - sctp_assoc_t sasoc_assoc_id; - __u16 sasoc_asocmaxrxt; - __u16 sasoc_number_peer_destinations; - __u32 sasoc_peer_rwnd; - __u32 sasoc_local_rwnd; - __u32 sasoc_cookie_life; -}; - -/* - * 7.1.9 Set Peer Primary Address (SCTP_SET_PEER_PRIMARY_ADDR) - * - * Requests that the peer mark the enclosed address as the association - * primary. The enclosed address must be one of the association's - * locally bound addresses. The following structure is used to make a - * set primary request: - */ -struct sctp_setpeerprim { - sctp_assoc_t sspp_assoc_id; - struct sockaddr_storage sspp_addr; -} __attribute__((packed, aligned(4))); - -/* - * 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR) - * - * Requests that the local SCTP stack use the enclosed peer address as - * the association primary. The enclosed address must be one of the - * association peer's addresses. The following structure is used to - * make a set peer primary request: - */ -struct sctp_prim { - sctp_assoc_t ssp_assoc_id; - struct sockaddr_storage ssp_addr; -} __attribute__((packed, aligned(4))); - -/* - * 7.1.11 Set Adaptation Layer Indicator (SCTP_ADAPTATION_LAYER) - * - * Requests that the local endpoint set the specified Adaptation Layer - * Indication parameter for all future INIT and INIT-ACK exchanges. - */ -struct sctp_setadaptation { - __u32 ssb_adaptation_ind; -}; - -/* - * 7.1.13 Peer Address Parameters (SCTP_PEER_ADDR_PARAMS) - * - * Applications can enable or disable heartbeats for any peer address - * of an association, modify an address's heartbeat interval, force a - * heartbeat to be sent immediately, and adjust the address's maximum - * number of retransmissions sent before an address is considered - * unreachable. The following structure is used to access and modify an - * address's parameters: - */ -enum sctp_spp_flags { - SPP_HB_ENABLE = 1<<0, /*Enable heartbeats*/ - SPP_HB_DISABLE = 1<<1, /*Disable heartbeats*/ - SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE, - SPP_HB_DEMAND = 1<<2, /*Send heartbeat immediately*/ - SPP_PMTUD_ENABLE = 1<<3, /*Enable PMTU discovery*/ - SPP_PMTUD_DISABLE = 1<<4, /*Disable PMTU discovery*/ - SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE, - SPP_SACKDELAY_ENABLE = 1<<5, /*Enable SACK*/ - SPP_SACKDELAY_DISABLE = 1<<6, /*Disable SACK*/ - SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE, - SPP_HB_TIME_IS_ZERO = 1<<7, /* Set HB delay to 0 */ -}; - -struct sctp_paddrparams { - sctp_assoc_t spp_assoc_id; - struct sockaddr_storage spp_address; - __u32 spp_hbinterval; - __u16 spp_pathmaxrxt; - __u32 spp_pathmtu; - __u32 spp_sackdelay; - __u32 spp_flags; -} __attribute__((packed, aligned(4))); - -/* - * 7.1.18. Add a chunk that must be authenticated (SCTP_AUTH_CHUNK) - * - * This set option adds a chunk type that the user is requesting to be - * received only in an authenticated way. Changes to the list of chunks - * will only effect future associations on the socket. - */ -struct sctp_authchunk { - __u8 sauth_chunk; -}; - -/* - * 7.1.19. Get or set the list of supported HMAC Identifiers (SCTP_HMAC_IDENT) - * - * This option gets or sets the list of HMAC algorithms that the local - * endpoint requires the peer to use. -*/ -struct sctp_hmacalgo { - __u32 shmac_num_idents; - __u16 shmac_idents[]; -}; - -/* - * 7.1.20. Set a shared key (SCTP_AUTH_KEY) - * - * This option will set a shared secret key which is used to build an - * association shared key. - */ -struct sctp_authkey { - sctp_assoc_t sca_assoc_id; - __u16 sca_keynumber; - __u16 sca_keylength; - __u8 sca_key[]; -}; - -/* - * 7.1.21. Get or set the active shared key (SCTP_AUTH_ACTIVE_KEY) - * - * This option will get or set the active shared key to be used to build - * the association shared key. - */ - -struct sctp_authkeyid { - sctp_assoc_t scact_assoc_id; - __u16 scact_keynumber; -}; - - -/* - * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) - * - * This option will effect the way delayed acks are performed. This - * option allows you to get or set the delayed ack time, in - * milliseconds. It also allows changing the delayed ack frequency. - * Changing the frequency to 1 disables the delayed sack algorithm. If - * the assoc_id is 0, then this sets or gets the endpoints default - * values. If the assoc_id field is non-zero, then the set or get - * effects the specified association for the one to many model (the - * assoc_id field is ignored by the one to one model). Note that if - * sack_delay or sack_freq are 0 when setting this option, then the - * current values will remain unchanged. - */ -struct sctp_sack_info { - sctp_assoc_t sack_assoc_id; - uint32_t sack_delay; - uint32_t sack_freq; -}; - -struct sctp_assoc_value { - sctp_assoc_t assoc_id; - uint32_t assoc_value; -}; - -/* - * 7.2.2 Peer Address Information - * - * Applications can retrieve information about a specific peer address - * of an association, including its reachability state, congestion - * window, and retransmission timer values. This information is - * read-only. The following structure is used to access this - * information: - */ -struct sctp_paddrinfo { - sctp_assoc_t spinfo_assoc_id; - struct sockaddr_storage spinfo_address; - __s32 spinfo_state; - __u32 spinfo_cwnd; - __u32 spinfo_srtt; - __u32 spinfo_rto; - __u32 spinfo_mtu; -} __attribute__((packed, aligned(4))); - -/* Peer addresses's state. */ -/* UNKNOWN: Peer address passed by the upper layer in sendmsg or connect[x] - * calls. - * UNCONFIRMED: Peer address received in INIT/INIT-ACK address parameters. - * Not yet confirmed by a heartbeat and not available for data - * transfers. - * ACTIVE : Peer address confirmed, active and available for data transfers. - * INACTIVE: Peer address inactive and not available for data transfers. - */ -enum sctp_spinfo_state { - SCTP_INACTIVE, - SCTP_PF, - SCTP_ACTIVE, - SCTP_UNCONFIRMED, - SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */ -}; - -/* - * 7.2.1 Association Status (SCTP_STATUS) - * - * Applications can retrieve current status information about an - * association, including association state, peer receiver window size, - * number of unacked data chunks, and number of data chunks pending - * receipt. This information is read-only. The following structure is - * used to access this information: - */ -struct sctp_status { - sctp_assoc_t sstat_assoc_id; - __s32 sstat_state; - __u32 sstat_rwnd; - __u16 sstat_unackdata; - __u16 sstat_penddata; - __u16 sstat_instrms; - __u16 sstat_outstrms; - __u32 sstat_fragmentation_point; - struct sctp_paddrinfo sstat_primary; -}; - -/* - * 7.2.3. Get the list of chunks the peer requires to be authenticated - * (SCTP_PEER_AUTH_CHUNKS) - * - * This option gets a list of chunks for a specified association that - * the peer requires to be received authenticated only. - */ -struct sctp_authchunks { - sctp_assoc_t gauth_assoc_id; - __u32 gauth_number_of_chunks; - uint8_t gauth_chunks[]; -}; - -/* - * 8.2.6. Get the Current Identifiers of Associations - * (SCTP_GET_ASSOC_ID_LIST) - * - * This option gets the current list of SCTP association identifiers of - * the SCTP associations handled by a one-to-many style socket. - */ -struct sctp_assoc_ids { - __u32 gaids_number_of_ids; - sctp_assoc_t gaids_assoc_id[]; -}; - -/* - * 8.3, 8.5 get all peer/local addresses in an association. - * This parameter struct is used by SCTP_GET_PEER_ADDRS and - * SCTP_GET_LOCAL_ADDRS socket options used internally to implement - * sctp_getpaddrs() and sctp_getladdrs() API. - */ -struct sctp_getaddrs_old { - sctp_assoc_t assoc_id; - int addr_num; - struct sockaddr __user *addrs; -}; -struct sctp_getaddrs { - sctp_assoc_t assoc_id; /*input*/ - __u32 addr_num; /*output*/ - __u8 addrs[0]; /*output, variable size*/ -}; - -/* A socket user request obtained via SCTP_GET_ASSOC_STATS that retrieves - * association stats. All stats are counts except sas_maxrto and - * sas_obs_rto_ipaddr. maxrto is the max observed rto + transport since - * the last call. Will return 0 when RTO was not update since last call - */ -struct sctp_assoc_stats { - sctp_assoc_t sas_assoc_id; /* Input */ - /* Transport of observed max RTO */ - struct sockaddr_storage sas_obs_rto_ipaddr; - __u64 sas_maxrto; /* Maximum Observed RTO for period */ - __u64 sas_isacks; /* SACKs received */ - __u64 sas_osacks; /* SACKs sent */ - __u64 sas_opackets; /* Packets sent */ - __u64 sas_ipackets; /* Packets received */ - __u64 sas_rtxchunks; /* Retransmitted Chunks */ - __u64 sas_outofseqtsns;/* TSN received > next expected */ - __u64 sas_idupchunks; /* Dups received (ordered+unordered) */ - __u64 sas_gapcnt; /* Gap Acknowledgements Received */ - __u64 sas_ouodchunks; /* Unordered data chunks sent */ - __u64 sas_iuodchunks; /* Unordered data chunks received */ - __u64 sas_oodchunks; /* Ordered data chunks sent */ - __u64 sas_iodchunks; /* Ordered data chunks received */ - __u64 sas_octrlchunks; /* Control chunks sent */ - __u64 sas_ictrlchunks; /* Control chunks received */ -}; - -/* These are bit fields for msghdr->msg_flags. See section 5.1. */ -/* On user space Linux, these live in as an enum. */ -enum sctp_msg_flags { - MSG_NOTIFICATION = 0x8000, -#define MSG_NOTIFICATION MSG_NOTIFICATION -}; - -/* - * 8.1 sctp_bindx() - * - * The flags parameter is formed from the bitwise OR of zero or more of the - * following currently defined flags: - */ -#define SCTP_BINDX_ADD_ADDR 0x01 -#define SCTP_BINDX_REM_ADDR 0x02 - -/* This is the structure that is passed as an argument(optval) to - * getsockopt(SCTP_SOCKOPT_PEELOFF). - */ -typedef struct { - sctp_assoc_t associd; - int sd; -} sctp_peeloff_arg_t; - -/* - * Peer Address Thresholds socket option - */ -struct sctp_paddrthlds { - sctp_assoc_t spt_assoc_id; - struct sockaddr_storage spt_address; - __u16 spt_pathmaxrxt; - __u16 spt_pathpfthld; -}; -#endif /* __net_sctp_user_h__ */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 5c8a1d25e21c..7df190525337 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -331,6 +331,7 @@ header-y += rtnetlink.h header-y += scc.h header-y += sched.h header-y += screen_info.h +header-y += sctp.h header-y += sdla.h header-y += seccomp.h header-y += securebits.h diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h new file mode 100644 index 000000000000..66b466e4ca08 --- /dev/null +++ b/include/uapi/linux/sctp.h @@ -0,0 +1,846 @@ +/* SCTP kernel implementation + * (C) Copyright IBM Corp. 2001, 2004 + * Copyright (c) 1999-2000 Cisco, Inc. + * Copyright (c) 1999-2001 Motorola, Inc. + * Copyright (c) 2002 Intel Corp. + * + * This file is part of the SCTP kernel implementation + * + * This header represents the structures and constants needed to support + * the SCTP Extension to the Sockets API. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Please send any bug reports or fixes you make to the + * email address(es): + * lksctp developers + * + * Or submit a bug report through the following website: + * http://www.sf.net/projects/lksctp + * + * Written or modified by: + * La Monte H.P. Yarroll + * R. Stewart + * K. Morneau + * Q. Xie + * Karl Knutson + * Jon Grimm + * Daisy Chang + * Ryan Layer + * Ardelle Fan + * Sridhar Samudrala + * Inaky Perez-Gonzalez + * Vlad Yasevich + * + * Any bugs reported given to us we will try to fix... any fixes shared will + * be incorporated into the next SCTP release. + */ + +#ifndef _UAPI_SCTP_H +#define _UAPI_SCTP_H + +#include +#include + +typedef __s32 sctp_assoc_t; + +/* The following symbols come from the Sockets API Extensions for + * SCTP . + */ +#define SCTP_RTOINFO 0 +#define SCTP_ASSOCINFO 1 +#define SCTP_INITMSG 2 +#define SCTP_NODELAY 3 /* Get/set nodelay option. */ +#define SCTP_AUTOCLOSE 4 +#define SCTP_SET_PEER_PRIMARY_ADDR 5 +#define SCTP_PRIMARY_ADDR 6 +#define SCTP_ADAPTATION_LAYER 7 +#define SCTP_DISABLE_FRAGMENTS 8 +#define SCTP_PEER_ADDR_PARAMS 9 +#define SCTP_DEFAULT_SEND_PARAM 10 +#define SCTP_EVENTS 11 +#define SCTP_I_WANT_MAPPED_V4_ADDR 12 /* Turn on/off mapped v4 addresses */ +#define SCTP_MAXSEG 13 /* Get/set maximum fragment. */ +#define SCTP_STATUS 14 +#define SCTP_GET_PEER_ADDR_INFO 15 +#define SCTP_DELAYED_ACK_TIME 16 +#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK_TIME +#define SCTP_DELAYED_SACK SCTP_DELAYED_ACK_TIME +#define SCTP_CONTEXT 17 +#define SCTP_FRAGMENT_INTERLEAVE 18 +#define SCTP_PARTIAL_DELIVERY_POINT 19 /* Set/Get partial delivery point */ +#define SCTP_MAX_BURST 20 /* Set/Get max burst */ +#define SCTP_AUTH_CHUNK 21 /* Set only: add a chunk type to authenticate */ +#define SCTP_HMAC_IDENT 22 +#define SCTP_AUTH_KEY 23 +#define SCTP_AUTH_ACTIVE_KEY 24 +#define SCTP_AUTH_DELETE_KEY 25 +#define SCTP_PEER_AUTH_CHUNKS 26 /* Read only */ +#define SCTP_LOCAL_AUTH_CHUNKS 27 /* Read only */ +#define SCTP_GET_ASSOC_NUMBER 28 /* Read only */ +#define SCTP_GET_ASSOC_ID_LIST 29 /* Read only */ +#define SCTP_AUTO_ASCONF 30 +#define SCTP_PEER_ADDR_THLDS 31 + +/* Internal Socket Options. Some of the sctp library functions are + * implemented using these socket options. + */ +#define SCTP_SOCKOPT_BINDX_ADD 100 /* BINDX requests for adding addrs */ +#define SCTP_SOCKOPT_BINDX_REM 101 /* BINDX requests for removing addrs. */ +#define SCTP_SOCKOPT_PEELOFF 102 /* peel off association. */ +/* Options 104-106 are deprecated and removed. Do not use this space */ +#define SCTP_SOCKOPT_CONNECTX_OLD 107 /* CONNECTX old requests. */ +#define SCTP_GET_PEER_ADDRS 108 /* Get all peer address. */ +#define SCTP_GET_LOCAL_ADDRS 109 /* Get all local address. */ +#define SCTP_SOCKOPT_CONNECTX 110 /* CONNECTX requests. */ +#define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ +#define SCTP_GET_ASSOC_STATS 112 /* Read only */ + +/* + * 5.2.1 SCTP Initiation Structure (SCTP_INIT) + * + * This cmsghdr structure provides information for initializing new + * SCTP associations with sendmsg(). The SCTP_INITMSG socket option + * uses this same data structure. This structure is not used for + * recvmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ ---------------------- + * IPPROTO_SCTP SCTP_INIT struct sctp_initmsg + * + */ +struct sctp_initmsg { + __u16 sinit_num_ostreams; + __u16 sinit_max_instreams; + __u16 sinit_max_attempts; + __u16 sinit_max_init_timeo; +}; + +/* + * 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) + * + * This cmsghdr structure specifies SCTP options for sendmsg() and + * describes SCTP header information about a received message through + * recvmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ ---------------------- + * IPPROTO_SCTP SCTP_SNDRCV struct sctp_sndrcvinfo + * + */ +struct sctp_sndrcvinfo { + __u16 sinfo_stream; + __u16 sinfo_ssn; + __u16 sinfo_flags; + __u32 sinfo_ppid; + __u32 sinfo_context; + __u32 sinfo_timetolive; + __u32 sinfo_tsn; + __u32 sinfo_cumtsn; + sctp_assoc_t sinfo_assoc_id; +}; + +/* + * sinfo_flags: 16 bits (unsigned integer) + * + * This field may contain any of the following flags and is composed of + * a bitwise OR of these values. + */ + +enum sctp_sinfo_flags { + SCTP_UNORDERED = 1, /* Send/receive message unordered. */ + SCTP_ADDR_OVER = 2, /* Override the primary destination. */ + SCTP_ABORT=4, /* Send an ABORT message to the peer. */ + SCTP_SACK_IMMEDIATELY = 8, /* SACK should be sent without delay */ + SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */ +}; + +typedef union { + __u8 raw; + struct sctp_initmsg init; + struct sctp_sndrcvinfo sndrcv; +} sctp_cmsg_data_t; + +/* These are cmsg_types. */ +typedef enum sctp_cmsg_type { + SCTP_INIT, /* 5.2.1 SCTP Initiation Structure */ +#define SCTP_INIT SCTP_INIT + SCTP_SNDRCV, /* 5.2.2 SCTP Header Information Structure */ +#define SCTP_SNDRCV SCTP_SNDRCV +} sctp_cmsg_t; + +/* + * 5.3.1.1 SCTP_ASSOC_CHANGE + * + * Communication notifications inform the ULP that an SCTP association + * has either begun or ended. The identifier for a new association is + * provided by this notificaion. The notification information has the + * following format: + * + */ +struct sctp_assoc_change { + __u16 sac_type; + __u16 sac_flags; + __u32 sac_length; + __u16 sac_state; + __u16 sac_error; + __u16 sac_outbound_streams; + __u16 sac_inbound_streams; + sctp_assoc_t sac_assoc_id; + __u8 sac_info[0]; +}; + +/* + * sac_state: 32 bits (signed integer) + * + * This field holds one of a number of values that communicate the + * event that happened to the association. They include: + * + * Note: The following state names deviate from the API draft as + * the names clash too easily with other kernel symbols. + */ +enum sctp_sac_state { + SCTP_COMM_UP, + SCTP_COMM_LOST, + SCTP_RESTART, + SCTP_SHUTDOWN_COMP, + SCTP_CANT_STR_ASSOC, +}; + +/* + * 5.3.1.2 SCTP_PEER_ADDR_CHANGE + * + * When a destination address on a multi-homed peer encounters a change + * an interface details event is sent. The information has the + * following structure: + */ +struct sctp_paddr_change { + __u16 spc_type; + __u16 spc_flags; + __u32 spc_length; + struct sockaddr_storage spc_aaddr; + int spc_state; + int spc_error; + sctp_assoc_t spc_assoc_id; +} __attribute__((packed, aligned(4))); + +/* + * spc_state: 32 bits (signed integer) + * + * This field holds one of a number of values that communicate the + * event that happened to the address. They include: + */ +enum sctp_spc_state { + SCTP_ADDR_AVAILABLE, + SCTP_ADDR_UNREACHABLE, + SCTP_ADDR_REMOVED, + SCTP_ADDR_ADDED, + SCTP_ADDR_MADE_PRIM, + SCTP_ADDR_CONFIRMED, +}; + + +/* + * 5.3.1.3 SCTP_REMOTE_ERROR + * + * A remote peer may send an Operational Error message to its peer. + * This message indicates a variety of error conditions on an + * association. The entire error TLV as it appears on the wire is + * included in a SCTP_REMOTE_ERROR event. Please refer to the SCTP + * specification [SCTP] and any extensions for a list of possible + * error formats. SCTP error TLVs have the format: + */ +struct sctp_remote_error { + __u16 sre_type; + __u16 sre_flags; + __u32 sre_length; + __u16 sre_error; + sctp_assoc_t sre_assoc_id; + __u8 sre_data[0]; +}; + + +/* + * 5.3.1.4 SCTP_SEND_FAILED + * + * If SCTP cannot deliver a message it may return the message as a + * notification. + */ +struct sctp_send_failed { + __u16 ssf_type; + __u16 ssf_flags; + __u32 ssf_length; + __u32 ssf_error; + struct sctp_sndrcvinfo ssf_info; + sctp_assoc_t ssf_assoc_id; + __u8 ssf_data[0]; +}; + +/* + * ssf_flags: 16 bits (unsigned integer) + * + * The flag value will take one of the following values + * + * SCTP_DATA_UNSENT - Indicates that the data was never put on + * the wire. + * + * SCTP_DATA_SENT - Indicates that the data was put on the wire. + * Note that this does not necessarily mean that the + * data was (or was not) successfully delivered. + */ +enum sctp_ssf_flags { + SCTP_DATA_UNSENT, + SCTP_DATA_SENT, +}; + +/* + * 5.3.1.5 SCTP_SHUTDOWN_EVENT + * + * When a peer sends a SHUTDOWN, SCTP delivers this notification to + * inform the application that it should cease sending data. + */ +struct sctp_shutdown_event { + __u16 sse_type; + __u16 sse_flags; + __u32 sse_length; + sctp_assoc_t sse_assoc_id; +}; + +/* + * 5.3.1.6 SCTP_ADAPTATION_INDICATION + * + * When a peer sends a Adaptation Layer Indication parameter , SCTP + * delivers this notification to inform the application + * that of the peers requested adaptation layer. + */ +struct sctp_adaptation_event { + __u16 sai_type; + __u16 sai_flags; + __u32 sai_length; + __u32 sai_adaptation_ind; + sctp_assoc_t sai_assoc_id; +}; + +/* + * 5.3.1.7 SCTP_PARTIAL_DELIVERY_EVENT + * + * When a receiver is engaged in a partial delivery of a + * message this notification will be used to indicate + * various events. + */ +struct sctp_pdapi_event { + __u16 pdapi_type; + __u16 pdapi_flags; + __u32 pdapi_length; + __u32 pdapi_indication; + sctp_assoc_t pdapi_assoc_id; +}; + +enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, }; + +/* + * 5.3.1.8. SCTP_AUTHENTICATION_EVENT + * + * When a receiver is using authentication this message will provide + * notifications regarding new keys being made active as well as errors. + */ +struct sctp_authkey_event { + __u16 auth_type; + __u16 auth_flags; + __u32 auth_length; + __u16 auth_keynumber; + __u16 auth_altkeynumber; + __u32 auth_indication; + sctp_assoc_t auth_assoc_id; +}; + +enum { SCTP_AUTH_NEWKEY = 0, }; + +/* + * 6.1.9. SCTP_SENDER_DRY_EVENT + * + * When the SCTP stack has no more user data to send or retransmit, this + * notification is given to the user. Also, at the time when a user app + * subscribes to this event, if there is no data to be sent or + * retransmit, the stack will immediately send up this notification. + */ +struct sctp_sender_dry_event { + __u16 sender_dry_type; + __u16 sender_dry_flags; + __u32 sender_dry_length; + sctp_assoc_t sender_dry_assoc_id; +}; + +/* + * Described in Section 7.3 + * Ancillary Data and Notification Interest Options + */ +struct sctp_event_subscribe { + __u8 sctp_data_io_event; + __u8 sctp_association_event; + __u8 sctp_address_event; + __u8 sctp_send_failure_event; + __u8 sctp_peer_error_event; + __u8 sctp_shutdown_event; + __u8 sctp_partial_delivery_event; + __u8 sctp_adaptation_layer_event; + __u8 sctp_authentication_event; + __u8 sctp_sender_dry_event; +}; + +/* + * 5.3.1 SCTP Notification Structure + * + * The notification structure is defined as the union of all + * notification types. + * + */ +union sctp_notification { + struct { + __u16 sn_type; /* Notification type. */ + __u16 sn_flags; + __u32 sn_length; + } sn_header; + struct sctp_assoc_change sn_assoc_change; + struct sctp_paddr_change sn_paddr_change; + struct sctp_remote_error sn_remote_error; + struct sctp_send_failed sn_send_failed; + struct sctp_shutdown_event sn_shutdown_event; + struct sctp_adaptation_event sn_adaptation_event; + struct sctp_pdapi_event sn_pdapi_event; + struct sctp_authkey_event sn_authkey_event; + struct sctp_sender_dry_event sn_sender_dry_event; +}; + +/* Section 5.3.1 + * All standard values for sn_type flags are greater than 2^15. + * Values from 2^15 and down are reserved. + */ + +enum sctp_sn_type { + SCTP_SN_TYPE_BASE = (1<<15), + SCTP_ASSOC_CHANGE, +#define SCTP_ASSOC_CHANGE SCTP_ASSOC_CHANGE + SCTP_PEER_ADDR_CHANGE, +#define SCTP_PEER_ADDR_CHANGE SCTP_PEER_ADDR_CHANGE + SCTP_SEND_FAILED, +#define SCTP_SEND_FAILED SCTP_SEND_FAILED + SCTP_REMOTE_ERROR, +#define SCTP_REMOTE_ERROR SCTP_REMOTE_ERROR + SCTP_SHUTDOWN_EVENT, +#define SCTP_SHUTDOWN_EVENT SCTP_SHUTDOWN_EVENT + SCTP_PARTIAL_DELIVERY_EVENT, +#define SCTP_PARTIAL_DELIVERY_EVENT SCTP_PARTIAL_DELIVERY_EVENT + SCTP_ADAPTATION_INDICATION, +#define SCTP_ADAPTATION_INDICATION SCTP_ADAPTATION_INDICATION + SCTP_AUTHENTICATION_EVENT, +#define SCTP_AUTHENTICATION_INDICATION SCTP_AUTHENTICATION_EVENT + SCTP_SENDER_DRY_EVENT, +#define SCTP_SENDER_DRY_EVENT SCTP_SENDER_DRY_EVENT +}; + +/* Notification error codes used to fill up the error fields in some + * notifications. + * SCTP_PEER_ADDRESS_CHAGE : spc_error + * SCTP_ASSOC_CHANGE : sac_error + * These names should be potentially included in the draft 04 of the SCTP + * sockets API specification. + */ +typedef enum sctp_sn_error { + SCTP_FAILED_THRESHOLD, + SCTP_RECEIVED_SACK, + SCTP_HEARTBEAT_SUCCESS, + SCTP_RESPONSE_TO_USER_REQ, + SCTP_INTERNAL_ERROR, + SCTP_SHUTDOWN_GUARD_EXPIRES, + SCTP_PEER_FAULTY, +} sctp_sn_error_t; + +/* + * 7.1.1 Retransmission Timeout Parameters (SCTP_RTOINFO) + * + * The protocol parameters used to initialize and bound retransmission + * timeout (RTO) are tunable. See [SCTP] for more information on how + * these parameters are used in RTO calculation. + */ +struct sctp_rtoinfo { + sctp_assoc_t srto_assoc_id; + __u32 srto_initial; + __u32 srto_max; + __u32 srto_min; +}; + +/* + * 7.1.2 Association Parameters (SCTP_ASSOCINFO) + * + * This option is used to both examine and set various association and + * endpoint parameters. + */ +struct sctp_assocparams { + sctp_assoc_t sasoc_assoc_id; + __u16 sasoc_asocmaxrxt; + __u16 sasoc_number_peer_destinations; + __u32 sasoc_peer_rwnd; + __u32 sasoc_local_rwnd; + __u32 sasoc_cookie_life; +}; + +/* + * 7.1.9 Set Peer Primary Address (SCTP_SET_PEER_PRIMARY_ADDR) + * + * Requests that the peer mark the enclosed address as the association + * primary. The enclosed address must be one of the association's + * locally bound addresses. The following structure is used to make a + * set primary request: + */ +struct sctp_setpeerprim { + sctp_assoc_t sspp_assoc_id; + struct sockaddr_storage sspp_addr; +} __attribute__((packed, aligned(4))); + +/* + * 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR) + * + * Requests that the local SCTP stack use the enclosed peer address as + * the association primary. The enclosed address must be one of the + * association peer's addresses. The following structure is used to + * make a set peer primary request: + */ +struct sctp_prim { + sctp_assoc_t ssp_assoc_id; + struct sockaddr_storage ssp_addr; +} __attribute__((packed, aligned(4))); + +/* For backward compatibility use, define the old name too */ +#define sctp_setprim sctp_prim + +/* + * 7.1.11 Set Adaptation Layer Indicator (SCTP_ADAPTATION_LAYER) + * + * Requests that the local endpoint set the specified Adaptation Layer + * Indication parameter for all future INIT and INIT-ACK exchanges. + */ +struct sctp_setadaptation { + __u32 ssb_adaptation_ind; +}; + +/* + * 7.1.13 Peer Address Parameters (SCTP_PEER_ADDR_PARAMS) + * + * Applications can enable or disable heartbeats for any peer address + * of an association, modify an address's heartbeat interval, force a + * heartbeat to be sent immediately, and adjust the address's maximum + * number of retransmissions sent before an address is considered + * unreachable. The following structure is used to access and modify an + * address's parameters: + */ +enum sctp_spp_flags { + SPP_HB_ENABLE = 1<<0, /*Enable heartbeats*/ + SPP_HB_DISABLE = 1<<1, /*Disable heartbeats*/ + SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE, + SPP_HB_DEMAND = 1<<2, /*Send heartbeat immediately*/ + SPP_PMTUD_ENABLE = 1<<3, /*Enable PMTU discovery*/ + SPP_PMTUD_DISABLE = 1<<4, /*Disable PMTU discovery*/ + SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE, + SPP_SACKDELAY_ENABLE = 1<<5, /*Enable SACK*/ + SPP_SACKDELAY_DISABLE = 1<<6, /*Disable SACK*/ + SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE, + SPP_HB_TIME_IS_ZERO = 1<<7, /* Set HB delay to 0 */ +}; + +struct sctp_paddrparams { + sctp_assoc_t spp_assoc_id; + struct sockaddr_storage spp_address; + __u32 spp_hbinterval; + __u16 spp_pathmaxrxt; + __u32 spp_pathmtu; + __u32 spp_sackdelay; + __u32 spp_flags; +} __attribute__((packed, aligned(4))); + +/* + * 7.1.18. Add a chunk that must be authenticated (SCTP_AUTH_CHUNK) + * + * This set option adds a chunk type that the user is requesting to be + * received only in an authenticated way. Changes to the list of chunks + * will only effect future associations on the socket. + */ +struct sctp_authchunk { + __u8 sauth_chunk; +}; + +/* + * 7.1.19. Get or set the list of supported HMAC Identifiers (SCTP_HMAC_IDENT) + * + * This option gets or sets the list of HMAC algorithms that the local + * endpoint requires the peer to use. + */ +#ifndef __KERNEL__ +/* This here is only used by user space as is. It might not be a good idea + * to export/reveal the whole structure with reserved fields etc. + */ +enum { + SCTP_AUTH_HMAC_ID_SHA1 = 1, + SCTP_AUTH_HMAC_ID_SHA256 = 3, +}; +#endif + +struct sctp_hmacalgo { + __u32 shmac_num_idents; + __u16 shmac_idents[]; +}; + +/* Sadly, user and kernel space have different names for + * this structure member, so this is to not break anything. + */ +#define shmac_number_of_idents shmac_num_idents + +/* + * 7.1.20. Set a shared key (SCTP_AUTH_KEY) + * + * This option will set a shared secret key which is used to build an + * association shared key. + */ +struct sctp_authkey { + sctp_assoc_t sca_assoc_id; + __u16 sca_keynumber; + __u16 sca_keylength; + __u8 sca_key[]; +}; + +/* + * 7.1.21. Get or set the active shared key (SCTP_AUTH_ACTIVE_KEY) + * + * This option will get or set the active shared key to be used to build + * the association shared key. + */ + +struct sctp_authkeyid { + sctp_assoc_t scact_assoc_id; + __u16 scact_keynumber; +}; + + +/* + * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) + * + * This option will effect the way delayed acks are performed. This + * option allows you to get or set the delayed ack time, in + * milliseconds. It also allows changing the delayed ack frequency. + * Changing the frequency to 1 disables the delayed sack algorithm. If + * the assoc_id is 0, then this sets or gets the endpoints default + * values. If the assoc_id field is non-zero, then the set or get + * effects the specified association for the one to many model (the + * assoc_id field is ignored by the one to one model). Note that if + * sack_delay or sack_freq are 0 when setting this option, then the + * current values will remain unchanged. + */ +struct sctp_sack_info { + sctp_assoc_t sack_assoc_id; + uint32_t sack_delay; + uint32_t sack_freq; +}; + +struct sctp_assoc_value { + sctp_assoc_t assoc_id; + uint32_t assoc_value; +}; + +/* + * 7.2.2 Peer Address Information + * + * Applications can retrieve information about a specific peer address + * of an association, including its reachability state, congestion + * window, and retransmission timer values. This information is + * read-only. The following structure is used to access this + * information: + */ +struct sctp_paddrinfo { + sctp_assoc_t spinfo_assoc_id; + struct sockaddr_storage spinfo_address; + __s32 spinfo_state; + __u32 spinfo_cwnd; + __u32 spinfo_srtt; + __u32 spinfo_rto; + __u32 spinfo_mtu; +} __attribute__((packed, aligned(4))); + +/* Peer addresses's state. */ +/* UNKNOWN: Peer address passed by the upper layer in sendmsg or connect[x] + * calls. + * UNCONFIRMED: Peer address received in INIT/INIT-ACK address parameters. + * Not yet confirmed by a heartbeat and not available for data + * transfers. + * ACTIVE : Peer address confirmed, active and available for data transfers. + * INACTIVE: Peer address inactive and not available for data transfers. + */ +enum sctp_spinfo_state { + SCTP_INACTIVE, + SCTP_PF, + SCTP_ACTIVE, + SCTP_UNCONFIRMED, + SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */ +}; + +/* + * 7.2.1 Association Status (SCTP_STATUS) + * + * Applications can retrieve current status information about an + * association, including association state, peer receiver window size, + * number of unacked data chunks, and number of data chunks pending + * receipt. This information is read-only. The following structure is + * used to access this information: + */ +struct sctp_status { + sctp_assoc_t sstat_assoc_id; + __s32 sstat_state; + __u32 sstat_rwnd; + __u16 sstat_unackdata; + __u16 sstat_penddata; + __u16 sstat_instrms; + __u16 sstat_outstrms; + __u32 sstat_fragmentation_point; + struct sctp_paddrinfo sstat_primary; +}; + +/* + * 7.2.3. Get the list of chunks the peer requires to be authenticated + * (SCTP_PEER_AUTH_CHUNKS) + * + * This option gets a list of chunks for a specified association that + * the peer requires to be received authenticated only. + */ +struct sctp_authchunks { + sctp_assoc_t gauth_assoc_id; + __u32 gauth_number_of_chunks; + uint8_t gauth_chunks[]; +}; + +/* The broken spelling has been released already in lksctp-tools header, + * so don't break anyone, now that it's fixed. + */ +#define guth_number_of_chunks gauth_number_of_chunks + +/* Association states. */ +enum sctp_sstat_state { + SCTP_EMPTY = 0, + SCTP_CLOSED = 1, + SCTP_COOKIE_WAIT = 2, + SCTP_COOKIE_ECHOED = 3, + SCTP_ESTABLISHED = 4, + SCTP_SHUTDOWN_PENDING = 5, + SCTP_SHUTDOWN_SENT = 6, + SCTP_SHUTDOWN_RECEIVED = 7, + SCTP_SHUTDOWN_ACK_SENT = 8, +}; + +/* + * 8.2.6. Get the Current Identifiers of Associations + * (SCTP_GET_ASSOC_ID_LIST) + * + * This option gets the current list of SCTP association identifiers of + * the SCTP associations handled by a one-to-many style socket. + */ +struct sctp_assoc_ids { + __u32 gaids_number_of_ids; + sctp_assoc_t gaids_assoc_id[]; +}; + +/* + * 8.3, 8.5 get all peer/local addresses in an association. + * This parameter struct is used by SCTP_GET_PEER_ADDRS and + * SCTP_GET_LOCAL_ADDRS socket options used internally to implement + * sctp_getpaddrs() and sctp_getladdrs() API. + */ +struct sctp_getaddrs_old { + sctp_assoc_t assoc_id; + int addr_num; +#ifdef __KERNEL__ + struct sockaddr __user *addrs; +#else + struct sockaddr *addrs; +#endif +}; + +struct sctp_getaddrs { + sctp_assoc_t assoc_id; /*input*/ + __u32 addr_num; /*output*/ + __u8 addrs[0]; /*output, variable size*/ +}; + +/* A socket user request obtained via SCTP_GET_ASSOC_STATS that retrieves + * association stats. All stats are counts except sas_maxrto and + * sas_obs_rto_ipaddr. maxrto is the max observed rto + transport since + * the last call. Will return 0 when RTO was not update since last call + */ +struct sctp_assoc_stats { + sctp_assoc_t sas_assoc_id; /* Input */ + /* Transport of observed max RTO */ + struct sockaddr_storage sas_obs_rto_ipaddr; + __u64 sas_maxrto; /* Maximum Observed RTO for period */ + __u64 sas_isacks; /* SACKs received */ + __u64 sas_osacks; /* SACKs sent */ + __u64 sas_opackets; /* Packets sent */ + __u64 sas_ipackets; /* Packets received */ + __u64 sas_rtxchunks; /* Retransmitted Chunks */ + __u64 sas_outofseqtsns;/* TSN received > next expected */ + __u64 sas_idupchunks; /* Dups received (ordered+unordered) */ + __u64 sas_gapcnt; /* Gap Acknowledgements Received */ + __u64 sas_ouodchunks; /* Unordered data chunks sent */ + __u64 sas_iuodchunks; /* Unordered data chunks received */ + __u64 sas_oodchunks; /* Ordered data chunks sent */ + __u64 sas_iodchunks; /* Ordered data chunks received */ + __u64 sas_octrlchunks; /* Control chunks sent */ + __u64 sas_ictrlchunks; /* Control chunks received */ +}; + +/* These are bit fields for msghdr->msg_flags. See section 5.1. */ +/* On user space Linux, these live in as an enum. */ +enum sctp_msg_flags { + MSG_NOTIFICATION = 0x8000, +#define MSG_NOTIFICATION MSG_NOTIFICATION +}; + +/* + * 8.1 sctp_bindx() + * + * The flags parameter is formed from the bitwise OR of zero or more of the + * following currently defined flags: + */ +#define SCTP_BINDX_ADD_ADDR 0x01 +#define SCTP_BINDX_REM_ADDR 0x02 + +/* This is the structure that is passed as an argument(optval) to + * getsockopt(SCTP_SOCKOPT_PEELOFF). + */ +typedef struct { + sctp_assoc_t associd; + int sd; +} sctp_peeloff_arg_t; + +/* + * Peer Address Thresholds socket option + */ +struct sctp_paddrthlds { + sctp_assoc_t spt_assoc_id; + struct sockaddr_storage spt_address; + __u16 spt_pathmaxrxt; + __u16 spt_pathpfthld; +}; + +#endif /* _UAPI_SCTP_H */ -- cgit v1.2.3 From 0ca54f6c5fd4ce58aa044d1fc7f00d7f6cf2801c Mon Sep 17 00:00:00 2001 From: Marek Puzyniak Date: Wed, 10 Apr 2013 13:19:13 +0200 Subject: mac80211: provide SSID in IBSS mode Some drivers need SSID in AP and IBSS mode. AP SSID is provided through BSS_CHANGED_SSID notification. There was no easy way to do the same for IBSS. In IBSS mode SSID is known but was not stored in BSS configuration. Extend the AP-mode functionality to also work in IBSS mode. Signed-off-by: Marek Puzyniak Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++-- net/mac80211/ibss.c | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 64faf015dd1e..0dde213dd3b6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -209,7 +209,7 @@ struct ieee80211_chanctx_conf { * @BSS_CHANGED_QOS: QoS for this association was enabled/disabled. Note * that it is only ever disabled for station mode. * @BSS_CHANGED_IDLE: Idle changed for this BSS/interface. - * @BSS_CHANGED_SSID: SSID changed for this BSS (AP mode) + * @BSS_CHANGED_SSID: SSID changed for this BSS (AP and IBSS mode) * @BSS_CHANGED_AP_PROBE_RESP: Probe Response changed for this BSS (AP mode) * @BSS_CHANGED_PS: PS changed for this BSS (STA mode) * @BSS_CHANGED_TXPOWER: TX power setting changed for this interface @@ -326,7 +326,7 @@ enum ieee80211_rssi_event { * your driver/device needs to do. * @ps: power-save mode (STA only). This flag is NOT affected by * offchannel/dynamic_ps operations. - * @ssid: The SSID of the current vif. Only valid in AP-mode. + * @ssid: The SSID of the current vif. Valid in AP and IBSS mode. * @ssid_len: Length of SSID given in @ssid. * @hidden_ssid: The SSID of the current vif is hidden. Only valid in AP-mode. * @txpower: TX power in dBm diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 2a0b2186d98f..b7bf6d76f1d9 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -209,6 +209,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.beacon_int = beacon_int; sdata->vif.bss_conf.basic_rates = basic_rates; + sdata->vif.bss_conf.ssid_len = ifibss->ssid_len; + memcpy(sdata->vif.bss_conf.ssid, ifibss->ssid, ifibss->ssid_len); bss_change = BSS_CHANGED_BEACON_INT; bss_change |= ieee80211_reset_erp_info(sdata); bss_change |= BSS_CHANGED_BSSID; @@ -217,6 +219,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, bss_change |= BSS_CHANGED_BASIC_RATES; bss_change |= BSS_CHANGED_HT; bss_change |= BSS_CHANGED_IBSS; + bss_change |= BSS_CHANGED_SSID; /* * In 5 GHz/802.11a, we can always use short slot time. @@ -1159,6 +1162,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.ibss_joined = false; sdata->vif.bss_conf.ibss_creator = false; sdata->vif.bss_conf.enable_beacon = false; + sdata->vif.bss_conf.ssid_len = 0; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); -- cgit v1.2.3 From 76a68ba0ae097be72dfa8f918b3139130da769a4 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:37 +0200 Subject: Bluetooth: rename hci_conn_put to hci_conn_drop We use _get() and _put() for device ref-counting in the kernel. However, hci_conn_put() is _not_ used for ref-counting, hence, rename it to hci_conn_drop() so we can later fix ref-counting and introduce hci_conn_put(). hci_conn_hold() and hci_conn_put() are currently used to manage how long a connection should be held alive. When the last user drops the connection, we spawn a delayed work that performs the disconnect. Obviously, this has nothing to do with ref-counting for the _object_ but rather for the keep-alive of the connection. But we really _need_ proper ref-counting for the _object_ to allow connection-users like rfcomm-tty, HIDP or others. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 6 +++--- net/bluetooth/hci_event.c | 36 ++++++++++++++++++------------------ net/bluetooth/l2cap_core.c | 6 +++--- net/bluetooth/mgmt.c | 6 +++--- net/bluetooth/sco.c | 6 +++--- net/bluetooth/smp.c | 2 +- 7 files changed, 32 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d4e13bf5ae59..78ea9c7c202c 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -612,7 +612,7 @@ static inline void hci_conn_hold(struct hci_conn *conn) cancel_delayed_work(&conn->disc_work); } -static inline void hci_conn_put(struct hci_conn *conn) +static inline void hci_conn_drop(struct hci_conn *conn) { BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt)); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b9f90169940b..30d7dfc23002 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -433,7 +433,7 @@ int hci_conn_del(struct hci_conn *conn) struct hci_conn *acl = conn->link; if (acl) { acl->link = NULL; - hci_conn_put(acl); + hci_conn_drop(acl); } } @@ -565,7 +565,7 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, if (!sco) { sco = hci_conn_add(hdev, type, dst); if (!sco) { - hci_conn_put(acl); + hci_conn_drop(acl); return ERR_PTR(-ENOMEM); } } @@ -980,7 +980,7 @@ void hci_chan_del(struct hci_chan *chan) synchronize_rcu(); - hci_conn_put(conn); + hci_conn_drop(conn); skb_queue_purge(&chan->data_q); kfree(chan); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0a2b128d2cc9..2cf28b198b31 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1190,7 +1190,7 @@ static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status) if (conn) { if (conn->state == BT_CONFIG) { hci_proto_connect_cfm(conn, status); - hci_conn_put(conn); + hci_conn_drop(conn); } } @@ -1217,7 +1217,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status) if (conn) { if (conn->state == BT_CONFIG) { hci_proto_connect_cfm(conn, status); - hci_conn_put(conn); + hci_conn_drop(conn); } } @@ -1379,7 +1379,7 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status) if (conn) { if (conn->state == BT_CONFIG) { hci_proto_connect_cfm(conn, status); - hci_conn_put(conn); + hci_conn_drop(conn); } } @@ -1406,7 +1406,7 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status) if (conn) { if (conn->state == BT_CONFIG) { hci_proto_connect_cfm(conn, status); - hci_conn_put(conn); + hci_conn_drop(conn); } } @@ -1860,7 +1860,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) } else { conn->state = BT_CONNECT2; hci_proto_connect_cfm(conn, 0); - hci_conn_put(conn); + hci_conn_drop(conn); } } else { /* Connection rejected */ @@ -1967,14 +1967,14 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } else { conn->state = BT_CONNECTED; hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); } } else { hci_auth_cfm(conn, ev->status); hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; - hci_conn_put(conn); + hci_conn_drop(conn); } if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) { @@ -2058,7 +2058,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->status && conn->state == BT_CONNECTED) { hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); - hci_conn_put(conn); + hci_conn_drop(conn); goto unlock; } @@ -2067,7 +2067,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->state = BT_CONNECTED; hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); } else hci_encrypt_cfm(conn, ev->status, ev->encrypt); } @@ -2142,7 +2142,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev, if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); } unlock: @@ -2682,7 +2682,7 @@ static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) if (conn->state == BT_CONNECTED) { hci_conn_hold(conn); conn->disc_timeout = HCI_PAIRING_TIMEOUT; - hci_conn_put(conn); + hci_conn_drop(conn); } if (!test_bit(HCI_PAIRABLE, &hdev->dev_flags)) @@ -2785,7 +2785,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->key_type != HCI_LK_CHANGED_COMBINATION) conn->key_type = ev->key_type; - hci_conn_put(conn); + hci_conn_drop(conn); } if (test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) @@ -2954,7 +2954,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev, if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); } unlock: @@ -3087,7 +3087,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, if (ev->status && conn->state == BT_CONNECTED) { hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); - hci_conn_put(conn); + hci_conn_drop(conn); goto unlock; } @@ -3096,13 +3096,13 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, conn->state = BT_CONNECTED; hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); } else { hci_auth_cfm(conn, ev->status); hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; - hci_conn_put(conn); + hci_conn_drop(conn); } unlock: @@ -3363,7 +3363,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev, mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); unlock: hci_dev_unlock(hdev); @@ -3451,7 +3451,7 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev, hci_conn_hold(hcon); hcon->disc_timeout = HCI_DISCONN_TIMEOUT; - hci_conn_put(hcon); + hci_conn_drop(hcon); hci_conn_hold_device(hcon); hci_conn_add_sysfs(hcon); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 7c7e9321f1ea..7cdb93c21b32 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -571,7 +571,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) chan->conn = NULL; if (chan->chan_type != L2CAP_CHAN_CONN_FIX_A2MP) - hci_conn_put(conn->hcon); + hci_conn_drop(conn->hcon); if (mgr && mgr->bredr_chan == chan) mgr->bredr_chan = NULL; @@ -1697,7 +1697,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, conn = l2cap_conn_add(hcon, 0); if (!conn) { - hci_conn_put(hcon); + hci_conn_drop(hcon); err = -ENOMEM; goto done; } @@ -1707,7 +1707,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, if (!list_empty(&conn->chan_l)) { err = -EBUSY; - hci_conn_put(hcon); + hci_conn_drop(hcon); } if (err) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 03e7e732215f..34ba1647e6e8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2131,7 +2131,7 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status) conn->security_cfm_cb = NULL; conn->disconn_cfm_cb = NULL; - hci_conn_put(conn); + hci_conn_drop(conn); mgmt_pending_remove(cmd); } @@ -2222,7 +2222,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, } if (conn->connect_cfm_cb) { - hci_conn_put(conn); + hci_conn_drop(conn); err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto unlock; @@ -2231,7 +2231,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, hdev, data, len); if (!cmd) { err = -ENOMEM; - hci_conn_put(conn); + hci_conn_drop(conn); goto unlock; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d919d1161ab4..9909eec6afe3 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -185,7 +185,7 @@ static int sco_connect(struct sock *sk) conn = sco_conn_add(hcon); if (!conn) { - hci_conn_put(hcon); + hci_conn_drop(hcon); err = -ENOMEM; goto done; } @@ -353,7 +353,7 @@ static void __sco_sock_close(struct sock *sk) if (sco_pi(sk)->conn->hcon) { sk->sk_state = BT_DISCONN; sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); - hci_conn_put(sco_pi(sk)->conn->hcon); + hci_conn_drop(sco_pi(sk)->conn->hcon); sco_pi(sk)->conn->hcon = NULL; } else sco_chan_del(sk, ECONNRESET); @@ -882,7 +882,7 @@ static void sco_chan_del(struct sock *sk, int err) sco_conn_unlock(conn); if (conn->hcon) - hci_conn_put(conn->hcon); + hci_conn_drop(conn->hcon); } sk->sk_state = BT_CLOSED; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 5abefb12891d..b2296d3857a0 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -522,7 +522,7 @@ void smp_chan_destroy(struct l2cap_conn *conn) kfree(smp); conn->smp_chan = NULL; conn->hcon->smp_conn = NULL; - hci_conn_put(conn->hcon); + hci_conn_drop(conn->hcon); } int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) -- cgit v1.2.3 From be055b2f89b5842f41363b5655a33dffb51a8294 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 11 Apr 2013 11:52:20 +0200 Subject: NFC: RFKILL support All NFC devices will now get proper RFKILL support as long as they provide some dev_up and dev_down hooks. Rfkilling an NFC device will bring it down while it is left to userspace to bring it back up when being rfkill unblocked. This is very similar to what Bluetooth does. Acked-by: Marcel Holtmann Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 2 ++ net/nfc/core.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'include/net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 87a6417fc934..5eb80bb3cbb2 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -122,6 +122,8 @@ struct nfc_dev { bool shutting_down; + struct rfkill *rfkill; + struct nfc_ops *ops; }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) diff --git a/net/nfc/core.c b/net/nfc/core.c index c571ca9a960c..40d2527693da 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,11 @@ int nfc_dev_up(struct nfc_dev *dev) device_lock(&dev->dev); + if (dev->rfkill && rfkill_blocked(dev->rfkill)) { + rc = -ERFKILL; + goto error; + } + if (!device_is_registered(&dev->dev)) { rc = -ENODEV; goto error; @@ -117,6 +123,24 @@ error: return rc; } +static int nfc_rfkill_set_block(void *data, bool blocked) +{ + struct nfc_dev *dev = data; + + pr_debug("%s blocked %d", dev_name(&dev->dev), blocked); + + if (!blocked) + return 0; + + nfc_dev_down(dev); + + return 0; +} + +static const struct rfkill_ops nfc_rfkill_ops = { + .set_block = nfc_rfkill_set_block, +}; + /** * nfc_start_poll - start polling for nfc targets * @@ -840,6 +864,15 @@ int nfc_register_device(struct nfc_dev *dev) pr_debug("The userspace won't be notified that the device %s was added\n", dev_name(&dev->dev)); + dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev, + RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev); + if (dev->rfkill) { + if (rfkill_register(dev->rfkill) < 0) { + rfkill_destroy(dev->rfkill); + dev->rfkill = NULL; + } + } + return 0; } EXPORT_SYMBOL(nfc_register_device); @@ -857,6 +890,11 @@ void nfc_unregister_device(struct nfc_dev *dev) id = dev->idx; + if (dev->rfkill) { + rfkill_unregister(dev->rfkill); + rfkill_destroy(dev->rfkill); + } + if (dev->ops->check_presence) { device_lock(&dev->dev); dev->shutting_down = true; -- cgit v1.2.3 From d6a4a10411764cf1c3a5dad4f06c5ebe5194488b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Apr 2013 11:31:52 +0000 Subject: tcp: GSO should be TSQ friendly I noticed that TSQ (TCP Small queues) was less effective when TSO is turned off, and GSO is on. If BQL is not enabled, TSQ has then no effect. It turns out the GSO engine frees the original gso_skb at the time the fragments are generated and queued to the NIC. We should instead call the tcp_wfree() destructor for the last fragment, to keep the flow control as intended in TSQ. This effectively limits the number of queued packets on qdisc + NIC layers. Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Yuchung Cheng Cc: Nandita Dukkipati Cc: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp.c | 12 ++++++++++++ net/ipv4/tcp_output.c | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4475aaf0af57..5bba80fbd1d9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -370,6 +370,7 @@ extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); extern void tcp_release_cb(struct sock *sk); +extern void tcp_wfree(struct sk_buff *skb); extern void tcp_write_timer_handler(struct sock *sk); extern void tcp_delack_timer_handler(struct sock *sk); extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a96f7b586277..963bda18486f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2885,6 +2885,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, __be32 delta; unsigned int oldlen; unsigned int mss; + struct sk_buff *gso_skb = skb; if (!pskb_may_pull(skb, sizeof(*th))) goto out; @@ -2953,6 +2954,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, th->cwr = 0; } while (skb->next); + /* Following permits TCP Small Queues to work well with GSO : + * The callback to TCP stack will be called at the time last frag + * is freed at TX completion, and not right now when gso_skb + * is freed by GSO engine + */ + if (gso_skb->destructor == tcp_wfree) { + swap(gso_skb->sk, skb->sk); + swap(gso_skb->destructor, skb->destructor); + swap(gso_skb->truesize, skb->truesize); + } + delta = htonl(oldlen + (skb->tail - skb->transport_header) + skb->data_len); th->check = ~csum_fold((__force __wsum)((__force u32)th->check + diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index af354c98fdb5..d12694353540 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -787,7 +787,7 @@ void __init tcp_tasklet_init(void) * We cant xmit new skbs from this context, as we might already * hold qdisc lock. */ -static void tcp_wfree(struct sk_buff *skb) +void tcp_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); -- cgit v1.2.3 From bf84a01063eaab2f1a37d72d1b903445b3a25a4e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 14 Apr 2013 08:08:13 +0000 Subject: net: sock: make sock_tx_timestamp void Currently, sock_tx_timestamp() always returns 0. The comment that describes the sock_tx_timestamp() function wrongly says that it returns an error when an invalid argument is passed (from commit 20d4947353be, ``net: socket infrastructure for SO_TIMESTAMPING''). Make the function void, so that we can also remove all the unneeded if conditions that check for such a _non-existant_ error case in the output path. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/sock.h | 5 ++--- net/can/raw.c | 5 ++--- net/ipv4/ping.c | 5 ++--- net/ipv4/udp.c | 6 +++--- net/ipv6/ip6_output.c | 7 ++----- net/packet/af_packet.c | 10 ++++------ net/socket.c | 3 +-- 7 files changed, 16 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 08f05f964737..5c97b0fc5623 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2159,10 +2159,9 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, * @sk: socket sending this packet * @tx_flags: filled with instructions for time stamping * - * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if - * parameters are invalid. + * Currently only depends on SOCK_TIMESTAMPING* flags. */ -extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); +extern void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed diff --git a/net/can/raw.c b/net/can/raw.c index c1764e41ddaf..1085e65f848e 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -711,9 +711,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto free_skb; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto free_skb; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); skb->dev = dev; skb->sk = sk; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2e91006d6076..7d93d62cd5fd 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -514,9 +514,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; ipc.tx_flags = 0; - err = sock_tx_timestamp(sk, &ipc.tx_flags); - if (err) - return err; + + sock_tx_timestamp(sk, &ipc.tx_flags); if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7117d1467b02..2722db024a0b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -902,9 +902,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - err = sock_tx_timestamp(sk, &ipc.tx_flags); - if (err) - return err; + + sock_tx_timestamp(sk, &ipc.tx_flags); + if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 155eccfa7760..d2eedf192330 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1224,11 +1224,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } /* For UDP, check if TX timestamp is enabled */ - if (sk->sk_type == SOCK_DGRAM) { - err = sock_tx_timestamp(sk, &tx_flags); - if (err) - goto error; - } + if (sk->sk_type == SOCK_DGRAM) + sock_tx_timestamp(sk, &tx_flags); /* * Let's try using as much space as possible. diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8e4644ff8d34..77d71f84758c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1505,9 +1505,8 @@ retry: skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_unlock; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -2312,9 +2311,8 @@ static int packet_snd(struct socket *sock, err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); if (err) goto out_free; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_free; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (!gso_type && (len > dev->mtu + reserve + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, diff --git a/net/socket.c b/net/socket.c index 88f759adf3af..36883fea44f3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -600,7 +600,7 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) +void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) { *tx_flags = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) @@ -609,7 +609,6 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) *tx_flags |= SKBTX_SW_TSTAMP; if (sock_flag(sk, SOCK_WIFI_STATUS)) *tx_flags |= SKBTX_WIFI_STATUS; - return 0; } EXPORT_SYMBOL(sock_tx_timestamp); -- cgit v1.2.3 From ff2266cddd69f5e0c9d5121ed9218d2f694406cc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 15 Apr 2013 03:27:17 +0000 Subject: net: sctp: remove sctp_ep_common struct member 'malloced' There is actually no need to keep this member in the structure, because after init it's always 1 anyway, thus always kfree called. This seems to be an ancient leftover from the very initial implementation from 2.5 times. Only in case the initialization of an association fails, we leave base.malloced as 0, but we nevertheless kfree it in the error path in sctp_association_new(). Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 -- net/sctp/associola.c | 8 ++------ net/sctp/endpointola.c | 10 +++------- 3 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0e0f9d2322e3..3e80eedab17d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1174,11 +1174,9 @@ struct sctp_ep_common { /* Some fields to help us manage this object. * refcnt - Reference count access to this object. * dead - Do not attempt to use this object. - * malloced - Do we need to kfree this object? */ atomic_t refcnt; char dead; - char malloced; /* What socket does this endpoint belong to? */ struct sock *sk; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index d2709e2b7be6..b893aa6862f4 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -105,7 +105,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initialize the object handling fields. */ atomic_set(&asoc->base.refcnt, 1); asoc->base.dead = 0; - asoc->base.malloced = 0; /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); @@ -371,7 +370,6 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, if (!sctp_association_init(asoc, ep, sk, scope, gfp)) goto fail_init; - asoc->base.malloced = 1; SCTP_DBG_OBJCNT_INC(assoc); SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); @@ -484,10 +482,8 @@ static void sctp_association_destroy(struct sctp_association *asoc) WARN_ON(atomic_read(&asoc->rmem_alloc)); - if (asoc->base.malloced) { - kfree(asoc); - SCTP_DBG_OBJCNT_DEC(assoc); - } + kfree(asoc); + SCTP_DBG_OBJCNT_DEC(assoc); } /* Change the primary destination address for the peer. */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 12ed45dbe75d..46bbfc266efc 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -122,7 +122,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the basic object fields. */ atomic_set(&ep->base.refcnt, 1); ep->base.dead = 0; - ep->base.malloced = 1; /* Create an input queue. */ sctp_inq_init(&ep->base.inqueue); @@ -198,7 +197,7 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) goto fail; if (!sctp_endpoint_init(ep, sk, gfp)) goto fail_init; - ep->base.malloced = 1; + SCTP_DBG_OBJCNT_INC(ep); return ep; @@ -279,11 +278,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (ep->base.sk) sock_put(ep->base.sk); - /* Finally, free up our memory. */ - if (ep->base.malloced) { - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); - } + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); } /* Hold a reference to an endpoint. */ -- cgit v1.2.3 From 0022d2dd4d76e0e7d5c241c343a5016fdfa2ad4f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 15 Apr 2013 03:27:18 +0000 Subject: net: sctp: minor: make sctp_ep_common's member 'dead' a bool Since dead only holds two states (0,1), make it a bool instead of a 'char', which is more appropriate for its purpose. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- net/sctp/associola.c | 4 ++-- net/sctp/endpointola.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 3e80eedab17d..e12aa77abc59 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1176,7 +1176,7 @@ struct sctp_ep_common { * dead - Do not attempt to use this object. */ atomic_t refcnt; - char dead; + bool dead; /* What socket does this endpoint belong to? */ struct sock *sk; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index b893aa6862f4..423549a714e5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -104,7 +104,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initialize the object handling fields. */ atomic_set(&asoc->base.refcnt, 1); - asoc->base.dead = 0; + asoc->base.dead = false; /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); @@ -407,7 +407,7 @@ void sctp_association_free(struct sctp_association *asoc) /* Mark as dead, so other users can know this structure is * going away. */ - asoc->base.dead = 1; + asoc->base.dead = true; /* Dispose of any data lying around in the outqueue. */ sctp_outq_free(&asoc->outqueue); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 46bbfc266efc..5fbd7bc6bb11 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -121,7 +121,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the basic object fields. */ atomic_set(&ep->base.refcnt, 1); - ep->base.dead = 0; + ep->base.dead = false; /* Create an input queue. */ sctp_inq_init(&ep->base.inqueue); @@ -233,7 +233,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep, */ void sctp_endpoint_free(struct sctp_endpoint *ep) { - ep->base.dead = 1; + ep->base.dead = true; ep->base.sk->sk_state = SCTP_SS_CLOSED; -- cgit v1.2.3 From 1ce3e82b0eb472161313183be0033e46d5c4bbaf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Aug 2012 17:00:55 +0200 Subject: cfg80211: add ieee80211_operating_class_to_band This function converts a (global only!) operating class to an internal band identifier. This will be needed for extended channel switch support. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ net/wireless/util.c | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 57870b646974..dff96d8cafcd 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4024,6 +4024,17 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, void cfg80211_ch_switch_notify(struct net_device *dev, struct cfg80211_chan_def *chandef); +/** + * ieee80211_operating_class_to_band - convert operating class to band + * + * @operating_class: the operating class to convert + * @band: band pointer to fill + * + * Returns %true if the conversion was successful, %false otherwise. + */ +bool ieee80211_operating_class_to_band(u8 operating_class, + enum ieee80211_band *band); + /* * cfg80211_tdls_oper_request - request userspace to perform TDLS operation * @dev: the device on which the operation is requested diff --git a/net/wireless/util.c b/net/wireless/util.c index 37a56ee1e1ed..3d8a1334f4a9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1155,6 +1155,26 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, } EXPORT_SYMBOL(cfg80211_get_p2p_attr); +bool ieee80211_operating_class_to_band(u8 operating_class, + enum ieee80211_band *band) +{ + switch (operating_class) { + case 112: + case 115 ... 127: + *band = IEEE80211_BAND_5GHZ; + return true; + case 81: + case 82: + case 83: + case 84: + *band = IEEE80211_BAND_2GHZ; + return true; + } + + return false; +} +EXPORT_SYMBOL(ieee80211_operating_class_to_band); + int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, u32 beacon_int) { -- cgit v1.2.3 From 85220d71bf3ca1ba9129e0744247ae5f61bec559 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Mar 2013 18:29:27 +0100 Subject: mac80211: support secondary channel offset in CSA Add support for the secondary channel offset IE in channel switch announcements. This is necessary for proper handling of CSA on HT access points. For this to work it is also necessary to convert everything here to use chandef structs instead of just channels. The driver updates aren't really correct though. In particular, the TI wl18xx driver update can't possibly be right since it just ignores the new channel width for lack of firmware API. Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlegacy/4965-mac.c | 32 ++++++------- drivers/net/wireless/iwlegacy/4965.c | 2 +- drivers/net/wireless/iwlwifi/dvm/devices.c | 10 ++-- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 20 ++++++-- drivers/net/wireless/iwlwifi/dvm/rxon.c | 2 +- drivers/net/wireless/ti/wl12xx/cmd.c | 2 +- drivers/net/wireless/ti/wl18xx/cmd.c | 6 +-- include/linux/ieee80211.h | 11 +++++ include/net/mac80211.h | 4 +- net/mac80211/ieee80211_i.h | 3 +- net/mac80211/mlme.c | 71 +++++++++++++++++++++++------ net/mac80211/trace.h | 8 ++-- net/mac80211/util.c | 8 ++++ 13 files changed, 125 insertions(+), 54 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index c092fcbbe965..cb5882ea5f3a 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -6057,7 +6057,7 @@ il4965_mac_channel_switch(struct ieee80211_hw *hw, struct il_priv *il = hw->priv; const struct il_channel_info *ch_info; struct ieee80211_conf *conf = &hw->conf; - struct ieee80211_channel *channel = ch_switch->channel; + struct ieee80211_channel *channel = ch_switch->chandef.chan; struct il_ht_config *ht_conf = &il->current_ht_config; u16 ch; @@ -6094,23 +6094,21 @@ il4965_mac_channel_switch(struct ieee80211_hw *hw, il->current_ht_config.smps = conf->smps_mode; /* Configure HT40 channels */ - il->ht.enabled = conf_is_ht(conf); - if (il->ht.enabled) { - if (conf_is_ht40_minus(conf)) { - il->ht.extension_chan_offset = - IEEE80211_HT_PARAM_CHA_SEC_BELOW; - il->ht.is_40mhz = true; - } else if (conf_is_ht40_plus(conf)) { - il->ht.extension_chan_offset = - IEEE80211_HT_PARAM_CHA_SEC_ABOVE; - il->ht.is_40mhz = true; - } else { - il->ht.extension_chan_offset = - IEEE80211_HT_PARAM_CHA_SEC_NONE; - il->ht.is_40mhz = false; - } - } else + switch (cfg80211_get_chandef_type(&ch_switch->chandef)) { + case NL80211_CHAN_NO_HT: + case NL80211_CHAN_HT20: il->ht.is_40mhz = false; + il->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; + break; + case NL80211_CHAN_HT40MINUS: + il->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_BELOW; + il->ht.is_40mhz = true; + break; + case NL80211_CHAN_HT40PLUS: + il->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_ABOVE; + il->ht.is_40mhz = true; + break; + } if ((le16_to_cpu(il->staging.channel) != ch)) il->staging.flags = 0; diff --git a/drivers/net/wireless/iwlegacy/4965.c b/drivers/net/wireless/iwlegacy/4965.c index 91eb2d07fdb8..777a578294bd 100644 --- a/drivers/net/wireless/iwlegacy/4965.c +++ b/drivers/net/wireless/iwlegacy/4965.c @@ -1493,7 +1493,7 @@ il4965_hw_channel_switch(struct il_priv *il, cmd.band = band; cmd.expect_beacon = 0; - ch = ch_switch->channel->hw_value; + ch = ch_switch->chandef.chan->hw_value; cmd.channel = cpu_to_le16(ch); cmd.rxon_flags = il->staging.flags; cmd.rxon_filter_flags = il->staging.filter_flags; diff --git a/drivers/net/wireless/iwlwifi/dvm/devices.c b/drivers/net/wireless/iwlwifi/dvm/devices.c index 15cca2ef9294..c48907c8ab43 100644 --- a/drivers/net/wireless/iwlwifi/dvm/devices.c +++ b/drivers/net/wireless/iwlwifi/dvm/devices.c @@ -379,7 +379,7 @@ static int iwl5000_hw_channel_switch(struct iwl_priv *priv, }; cmd.band = priv->band == IEEE80211_BAND_2GHZ; - ch = ch_switch->channel->hw_value; + ch = ch_switch->chandef.chan->hw_value; IWL_DEBUG_11H(priv, "channel switch from %d to %d\n", ctx->active.channel, ch); cmd.channel = cpu_to_le16(ch); @@ -414,7 +414,8 @@ static int iwl5000_hw_channel_switch(struct iwl_priv *priv, } IWL_DEBUG_11H(priv, "uCode time for the switch is 0x%x\n", cmd.switch_time); - cmd.expect_beacon = ch_switch->channel->flags & IEEE80211_CHAN_RADAR; + cmd.expect_beacon = + ch_switch->chandef.chan->flags & IEEE80211_CHAN_RADAR; return iwl_dvm_send_cmd(priv, &hcmd); } @@ -540,7 +541,7 @@ static int iwl6000_hw_channel_switch(struct iwl_priv *priv, hcmd.data[0] = cmd; cmd->band = priv->band == IEEE80211_BAND_2GHZ; - ch = ch_switch->channel->hw_value; + ch = ch_switch->chandef.chan->hw_value; IWL_DEBUG_11H(priv, "channel switch from %u to %u\n", ctx->active.channel, ch); cmd->channel = cpu_to_le16(ch); @@ -575,7 +576,8 @@ static int iwl6000_hw_channel_switch(struct iwl_priv *priv, } IWL_DEBUG_11H(priv, "uCode time for the switch is 0x%x\n", cmd->switch_time); - cmd->expect_beacon = ch_switch->channel->flags & IEEE80211_CHAN_RADAR; + cmd->expect_beacon = + ch_switch->chandef.chan->flags & IEEE80211_CHAN_RADAR; err = iwl_dvm_send_cmd(priv, &hcmd); kfree(cmd); diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index a7294fa4d7e5..2dc101fe0d24 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -967,7 +967,7 @@ static void iwlagn_mac_channel_switch(struct ieee80211_hw *hw, { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); struct ieee80211_conf *conf = &hw->conf; - struct ieee80211_channel *channel = ch_switch->channel; + struct ieee80211_channel *channel = ch_switch->chandef.chan; struct iwl_ht_config *ht_conf = &priv->current_ht_config; /* * MULTI-FIXME @@ -1005,11 +1005,21 @@ static void iwlagn_mac_channel_switch(struct ieee80211_hw *hw, priv->current_ht_config.smps = conf->smps_mode; /* Configure HT40 channels */ - ctx->ht.enabled = conf_is_ht(conf); - if (ctx->ht.enabled) - iwlagn_config_ht40(conf, ctx); - else + switch (cfg80211_get_chandef_type(&ch_switch->chandef)) { + case NL80211_CHAN_NO_HT: + case NL80211_CHAN_HT20: ctx->ht.is_40mhz = false; + ctx->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; + break; + case NL80211_CHAN_HT40MINUS: + ctx->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_BELOW; + ctx->ht.is_40mhz = true; + break; + case NL80211_CHAN_HT40PLUS: + ctx->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_ABOVE; + ctx->ht.is_40mhz = true; + break; + } if ((le16_to_cpu(ctx->staging.channel) != ch)) ctx->staging.flags = 0; diff --git a/drivers/net/wireless/iwlwifi/dvm/rxon.c b/drivers/net/wireless/iwlwifi/dvm/rxon.c index 085c589e7149..acbb50b5f1e8 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rxon.c +++ b/drivers/net/wireless/iwlwifi/dvm/rxon.c @@ -1160,7 +1160,7 @@ int iwlagn_commit_rxon(struct iwl_priv *priv, struct iwl_rxon_context *ctx) } void iwlagn_config_ht40(struct ieee80211_conf *conf, - struct iwl_rxon_context *ctx) + struct iwl_rxon_context *ctx) { if (conf_is_ht40_minus(conf)) { ctx->ht.extension_chan_offset = diff --git a/drivers/net/wireless/ti/wl12xx/cmd.c b/drivers/net/wireless/ti/wl12xx/cmd.c index 7dc9f965037d..7485dbae8c4b 100644 --- a/drivers/net/wireless/ti/wl12xx/cmd.c +++ b/drivers/net/wireless/ti/wl12xx/cmd.c @@ -301,7 +301,7 @@ int wl12xx_cmd_channel_switch(struct wl1271 *wl, } cmd->role_id = wlvif->role_id; - cmd->channel = ch_switch->channel->hw_value; + cmd->channel = ch_switch->chandef.chan->hw_value; cmd->switch_time = ch_switch->count; cmd->stop_tx = ch_switch->block_tx; diff --git a/drivers/net/wireless/ti/wl18xx/cmd.c b/drivers/net/wireless/ti/wl18xx/cmd.c index 1d1f6cc7a50a..7649c75cd68d 100644 --- a/drivers/net/wireless/ti/wl18xx/cmd.c +++ b/drivers/net/wireless/ti/wl18xx/cmd.c @@ -42,11 +42,11 @@ int wl18xx_cmd_channel_switch(struct wl1271 *wl, } cmd->role_id = wlvif->role_id; - cmd->channel = ch_switch->channel->hw_value; + cmd->channel = ch_switch->chandef.chan->hw_value; cmd->switch_time = ch_switch->count; cmd->stop_tx = ch_switch->block_tx; - switch (ch_switch->channel->band) { + switch (ch_switch->chandef.chan->band) { case IEEE80211_BAND_2GHZ: cmd->band = WLCORE_BAND_2_4GHZ; break; @@ -55,7 +55,7 @@ int wl18xx_cmd_channel_switch(struct wl1271 *wl, break; default: wl1271_error("invalid channel switch band: %d", - ch_switch->channel->band); + ch_switch->chandef.chan->band); ret = -EINVAL; goto out_free; } diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 2a10acc65a54..95621528436c 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -684,6 +684,16 @@ struct ieee80211_ext_chansw_ie { u8 count; } __packed; +/** + * struct ieee80211_sec_chan_offs_ie - secondary channel offset IE + * @sec_chan_offs: secondary channel offset, uses IEEE80211_HT_PARAM_CHA_SEC_* + * values here + * This structure represents the "Secondary Channel Offset element" + */ +struct ieee80211_sec_chan_offs_ie { + u8 sec_chan_offs; +} __packed; + /** * struct ieee80211_tim * @@ -1648,6 +1658,7 @@ enum ieee80211_eid { WLAN_EID_HT_CAPABILITY = 45, WLAN_EID_HT_OPERATION = 61, + WLAN_EID_SECONDARY_CHANNEL_OFFSET = 62, WLAN_EID_RSN = 48, WLAN_EID_MMIE = 76, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0dde213dd3b6..9ff10b33b711 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1017,13 +1017,13 @@ struct ieee80211_conf { * the driver passed into mac80211. * @block_tx: Indicates whether transmission must be blocked before the * scheduled channel switch, as indicated by the AP. - * @channel: the new channel to switch to + * @chandef: the new channel to switch to * @count: the number of TBTT's until the channel switch event */ struct ieee80211_channel_switch { u64 timestamp; bool block_tx; - struct ieee80211_channel *channel; + struct cfg80211_chan_def chandef; u8 count; }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 10c3180b165e..8f240c0ec304 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1019,7 +1019,7 @@ struct ieee80211_local { enum mac80211_scan_state next_scan_state; struct delayed_work scan_work; struct ieee80211_sub_if_data __rcu *scan_sdata; - struct ieee80211_channel *csa_channel; + struct cfg80211_chan_def csa_chandef; /* For backward compatibility only -- do not use */ struct cfg80211_chan_def _oper_chandef; @@ -1183,6 +1183,7 @@ struct ieee802_11_elems { const u8 *pwr_constr_elem; const struct ieee80211_timeout_interval_ie *timeout_int; const u8 *opmode_notif; + const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; /* length of them, respectively */ u8 ssid_len; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bc6f87edc624..bd581a80e4b7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -289,6 +289,8 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } else { /* 40 MHz (and 80 MHz) must be supported for VHT */ ret = IEEE80211_STA_DISABLE_VHT; + /* also mark 40 MHz disabled */ + ret |= IEEE80211_STA_DISABLE_40MHZ; goto out; } @@ -964,16 +966,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ifmgd->associated) goto out; - /* - * FIXME: Here we are downgrading to NL80211_CHAN_WIDTH_20_NOHT - * and don't adjust our ht/vht settings - * This is wrong - we should behave according to the CSA params - */ - local->_oper_chandef.chan = local->csa_channel; - local->_oper_chandef.width = NL80211_CHAN_WIDTH_20_NOHT; - local->_oper_chandef.center_freq1 = - local->_oper_chandef.chan->center_freq; - local->_oper_chandef.center_freq2 = 0; + local->_oper_chandef = local->csa_chandef; if (!local->ops->channel_switch) { /* call "hw_config" only if doing sw channel switch */ @@ -1028,13 +1021,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_bss *cbss = ifmgd->associated; struct ieee80211_bss *bss; - struct ieee80211_channel *new_ch; struct ieee80211_chanctx *chanctx; enum ieee80211_band new_band; int new_freq; u8 new_chan_no; u8 count; u8 mode; + struct cfg80211_chan_def new_chandef = {}; + int secondary_channel_offset = -1; ASSERT_MGD_MTX(ifmgd); @@ -1048,6 +1042,19 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED) return; + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { + /* if HT is enabled and the IE not present, it's still HT */ + secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; + if (elems->sec_chan_offs) + secondary_channel_offset = + elems->sec_chan_offs->sec_chan_offs; + } + + if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ && + (secondary_channel_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE || + secondary_channel_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW)) + secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; + if (elems->ext_chansw_ie) { if (!ieee80211_operating_class_to_band( elems->ext_chansw_ie->new_operating_class, @@ -1074,8 +1081,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, bss = (void *)cbss->priv; new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); - new_ch = ieee80211_get_channel(local->hw.wiphy, new_freq); - if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) { + new_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); + if (!new_chandef.chan || + new_chandef.chan->flags & IEEE80211_CHAN_DISABLED) { sdata_info(sdata, "AP %pM switches to unsupported channel (%d MHz), disconnecting\n", ifmgd->associated->bssid, new_freq); @@ -1084,6 +1092,39 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; } + switch (secondary_channel_offset) { + default: + /* secondary_channel_offset was present but is invalid */ + case IEEE80211_HT_PARAM_CHA_SEC_NONE: + cfg80211_chandef_create(&new_chandef, new_chandef.chan, + NL80211_CHAN_HT20); + break; + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: + cfg80211_chandef_create(&new_chandef, new_chandef.chan, + NL80211_CHAN_HT40PLUS); + break; + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: + cfg80211_chandef_create(&new_chandef, new_chandef.chan, + NL80211_CHAN_HT40MINUS); + break; + case -1: + cfg80211_chandef_create(&new_chandef, new_chandef.chan, + NL80211_CHAN_NO_HT); + break; + } + + if (!cfg80211_chandef_usable(local->hw.wiphy, &new_chandef, + IEEE80211_CHAN_DISABLED)) { + sdata_info(sdata, + "AP %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", + ifmgd->associated->bssid, new_freq, + new_chandef.width, new_chandef.center_freq1, + new_chandef.center_freq2); + ieee80211_queue_work(&local->hw, + &ifmgd->csa_connection_drop_work); + return; + } + ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; if (local->use_chanctx) { @@ -1111,7 +1152,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, } mutex_unlock(&local->chanctx_mtx); - local->csa_channel = new_ch; + local->csa_chandef = new_chandef; if (mode) ieee80211_stop_queues_by_reason(&local->hw, @@ -1123,7 +1164,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_switch ch_switch = { .timestamp = timestamp, .block_tx = mode, - .channel = new_ch, + .chandef = new_chandef, .count = count, }; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 8286dcef228b..c215fafd7a2f 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -990,23 +990,23 @@ TRACE_EVENT(drv_channel_switch, TP_STRUCT__entry( LOCAL_ENTRY + CHANDEF_ENTRY __field(u64, timestamp) __field(bool, block_tx) - __field(u16, freq) __field(u8, count) ), TP_fast_assign( LOCAL_ASSIGN; + CHANDEF_ASSIGN(&ch_switch->chandef) __entry->timestamp = ch_switch->timestamp; __entry->block_tx = ch_switch->block_tx; - __entry->freq = ch_switch->channel->center_freq; __entry->count = ch_switch->count; ), TP_printk( - LOCAL_PR_FMT " new freq:%u count:%d", - LOCAL_PR_ARG, __entry->freq, __entry->count + LOCAL_PR_FMT " new " CHANDEF_PR_FMT " count:%d", + LOCAL_PR_ARG, CHANDEF_PR_ARG, __entry->count ) ); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e4a6d559372d..155056c90edf 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -716,6 +716,7 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, case WLAN_EID_COUNTRY: case WLAN_EID_PWR_CONSTRAINT: case WLAN_EID_TIMEOUT_INTERVAL: + case WLAN_EID_SECONDARY_CHANNEL_OFFSET: if (test_bit(id, seen_elems)) { elems->parse_error = true; left -= elen; @@ -870,6 +871,13 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, } elems->ext_chansw_ie = (void *)pos; break; + case WLAN_EID_SECONDARY_CHANNEL_OFFSET: + if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) { + elem_parse_failed = true; + break; + } + elems->sec_chan_offs = (void *)pos; + break; case WLAN_EID_COUNTRY: elems->country_elem = pos; elems->country_elem_len = elen; -- cgit v1.2.3 From 6bc8312f95f982c0a6f26e87d0a6c299a697ed53 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 15 Apr 2013 17:09:29 +0200 Subject: mac80211: VHT off-by-one NSS The number of VHT spatial streams (NSS) is found in: - s8 ieee80211_tx_rate.rate.idx[6:4] (tx - filled by rate control) - u8 ieee80211_rx_status.vht_nss (rx - filled by driver) Tx discriminates valid rates indexes with the sign bit and encodes NSS starting from 0 to 7 (note this matches some hw encodings e.g IWLMVM). Rx does not have the same constraints, and encodes NSS starting from 1 to 8 (note this matches what wireshark expects in the radiotap header). To handle ieee80211_tx_rate.rate.idx[6:4] ieee80211_rate_set_vht() and ieee80211_rate_get_vht_nss() assume their nss parameter and return value respectively runs from 0 to 7. ATM, there are only 2 users of these: cfg.c:sta_set_rate_info_t() and iwlwifi/mvm/tx.c:iwl_mvm_hwrate_to_tx_control(), but both assume nss runs from 1 to 8. This patch fixes this inconsistency by making ieee80211_rate_set_vht() and ieee80211_rate_get_vht_nss() handle an nss running from 1 to 8. Signed-off-by: Karl Beldan Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9ff10b33b711..bc5d8184c489 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -601,8 +601,8 @@ static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate *rate, u8 mcs, u8 nss) { WARN_ON(mcs & ~0xF); - WARN_ON(nss & ~0x7); - rate->idx = (nss << 4) | mcs; + WARN_ON((nss - 1) & ~0x7); + rate->idx = ((nss - 1) << 4) | mcs; } static inline u8 @@ -614,7 +614,7 @@ ieee80211_rate_get_vht_mcs(const struct ieee80211_tx_rate *rate) static inline u8 ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) { - return rate->idx >> 4; + return (rate->idx >> 4) + 1; } /** -- cgit v1.2.3 From 2ffbe6d333664a089f17b13aa79eefe38f794bb7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 16 Apr 2013 13:38:42 +0200 Subject: mac80211: fix and optimize MCS mask handling Currently the code always copies the configured MCS mask (even if it is set to default), but only uses it if legacy rates were also masked out. Fix this by adding a flag that tracks whether the configured MCS mask is set to default or not. Optimize the code further by storing a pointer to the configured rate mask in txrc instead of using memcpy. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++-- net/mac80211/cfg.c | 13 +++++++++++++ net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/rate.c | 9 ++++++--- net/mac80211/tx.c | 10 +++++----- 5 files changed, 28 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bc5d8184c489..05dbb9788504 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4107,7 +4107,7 @@ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); * (deprecated; this will be removed once drivers get updated to use * rate_idx_mask) * @rate_idx_mask: user-requested (legacy) rate mask - * @rate_idx_mcs_mask: user-requested MCS rate mask + * @rate_idx_mcs_mask: user-requested MCS rate mask (NULL if not in use) * @bss: whether this frame is sent out in AP or IBSS mode */ struct ieee80211_tx_rate_control { @@ -4119,7 +4119,7 @@ struct ieee80211_tx_rate_control { bool rts, short_preamble; u8 max_rate_idx; u32 rate_idx_mask; - u8 rate_idx_mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; + u8 *rate_idx_mcs_mask; bool bss; }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fdd95bd751a1..72ab1c0e3ca7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2417,9 +2417,22 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, } for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + struct ieee80211_supported_band *sband = wiphy->bands[i]; + int j; + sdata->rc_rateidx_mask[i] = mask->control[i].legacy; memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].mcs, sizeof(mask->control[i].mcs)); + + sdata->rc_has_mcs_mask[i] = false; + if (!sband) + continue; + + for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) + if (~sdata->rc_rateidx_mcs_mask[i][j]) { + sdata->rc_has_mcs_mask[i] = true; + break; + } } return 0; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f4a65a340a52..21c1720eee00 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -739,6 +739,8 @@ struct ieee80211_sub_if_data { /* bitmap of allowed (non-MCS) rate indexes for rate control */ u32 rc_rateidx_mask[IEEE80211_NUM_BANDS]; + + bool rc_has_mcs_mask[IEEE80211_NUM_BANDS]; u8 rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN]; union { diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index dd88381c53b7..5d545dd2d050 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -460,9 +460,12 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, * the common case. */ mask = sdata->rc_rateidx_mask[info->band]; - memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band], - sizeof(mcs_mask)); - if (mask != (1 << txrc->sband->n_bitrates) - 1) { + if (mask != (1 << txrc->sband->n_bitrates) - 1 || txrc->rate_idx_mcs_mask) { + if (txrc->rate_idx_mcs_mask) + memcpy(mcs_mask, txrc->rate_idx_mcs_mask, sizeof(mcs_mask)); + else + memset(mcs_mask, 0xff, sizeof(mcs_mask)); + if (sta) { /* Filter out rates that the STA does not support */ mask &= sta->sta.supp_rates[info->band]; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index bb82c873f774..15c1b286e280 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -642,9 +642,11 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; - memcpy(txrc.rate_idx_mcs_mask, - tx->sdata->rc_rateidx_mcs_mask[info->band], - sizeof(txrc.rate_idx_mcs_mask)); + + if (tx->sdata->rc_has_mcs_mask[info->band]) + txrc.rate_idx_mcs_mask = + tx->sdata->rc_rateidx_mcs_mask[info->band]; + txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT || tx->sdata->vif.type == NL80211_IFTYPE_ADHOC); @@ -2508,8 +2510,6 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; - memcpy(txrc.rate_idx_mcs_mask, sdata->rc_rateidx_mcs_mask[band], - sizeof(txrc.rate_idx_mcs_mask)); txrc.bss = true; rate_control_get_rate(sdata, NULL, &txrc); -- cgit v1.2.3 From 991fec091061b901e4fdcc8af4fd25d24a5a7bab Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 16 Apr 2013 13:38:43 +0200 Subject: mac80211: fix CTS protection handling The rates[0] CTS and RTS flags are only set after rate control has been called, so minstrel cannot use them to for setting the number of retries. This patch adds two new flags to explicitly indicate RTS/CTS use. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 +++- net/mac80211/rc80211_minstrel.c | 6 ++++-- net/mac80211/tx.c | 8 ++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 05dbb9788504..4f693a5c54de 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -655,7 +655,9 @@ struct ieee80211_tx_info { struct ieee80211_tx_rate rates[ IEEE80211_TX_MAX_RATES]; s8 rts_cts_rate_idx; - /* 3 bytes free */ + u8 use_rts:1; + u8 use_cts_prot:1; + /* 2 bytes free */ }; /* only needed before rate control */ unsigned long jiffies; diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 1c36c9b4fa4a..eda290fb8bd2 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -209,9 +209,9 @@ minstrel_get_retry_count(struct minstrel_rate *mr, { unsigned int retry = mr->adjusted_retry_count; - if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) + if (info->control.use_rts) retry = max(2U, min(mr->retry_count_rtscts, retry)); - else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) + else if (info->control.use_cts_prot) retry = max(2U, min(mr->retry_count_cts, retry)); return retry; } @@ -460,6 +460,8 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, } while ((tx_time < mp->segment_size) && (++mr->retry_count < mp->max_retry)); mr->adjusted_retry_count = mr->retry_count; + if (!(sband->bitrates[i].flags & IEEE80211_RATE_ERP_G)) + mr->retry_count_cts = mr->retry_count; } for (i = n; i < sband->n_bitrates; i++) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 15c1b286e280..6ca857f8f424 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -656,6 +656,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.rts = rts = true; } + info->control.use_rts = rts; + info->control.use_cts_prot = tx->sdata->vif.bss_conf.use_cts_prot; + /* * Use short preamble if the BSS can handle it, but not for * management frames unless we know the receiver can handle @@ -766,6 +769,11 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) */ if (rc_rate->flags & IEEE80211_TX_RC_MCS) { WARN_ON(rc_rate->idx > 76); + + if (!(rc_rate->flags & IEEE80211_TX_RC_USE_RTS_CTS) && + tx->sdata->vif.bss_conf.use_cts_prot) + rc_rate->flags |= + IEEE80211_TX_RC_USE_CTS_PROTECT; continue; } -- cgit v1.2.3 From fc225c3f5d1b6aa6f99c5c300af4605e4923ce79 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:38 +0200 Subject: Bluetooth: remove unneeded hci_conn_hold/put_device() hci_conn_hold/put_device() is used to control when hci_conn->dev is no longer needed and can be deleted from the system. Lets first look how they are currently used throughout the code (excluding HIDP!). All code that uses hci_conn_hold_device() looks like this: ... hci_conn_hold_device(); hci_conn_add_sysfs(); ... On the other side, hci_conn_put_device() is exclusively used in hci_conn_del(). So, considering that hci_conn_del() must not be called twice (which would fail horribly), we know that hci_conn_put_device() is only called _once_ (which is in hci_conn_del()). On the other hand, hci_conn_add_sysfs() must not be called twice, either (it would call device_add twice, which breaks the device, see drivers/base/core.c). So we know that hci_conn_hold_device() is also called only once (it's only called directly before hci_conn_add_sysfs()). So hold and put are known to be called only once. That means we can safely remove them and directly call hci_conn_del_sysfs() in hci_conn_del(). But there is one issue left: HIDP also uses hci_conn_hold/put_device(). However, this case can be ignored and simply removed as it is totally broken. The issue is, the only thing HIDP delays with hci_conn_hold_device() is the removal of the hci_conn->dev from sysfs. But, the hci_conn device has no mechanism to get notified when its own parent (hci_dev) gets removed from sysfs. hci_dev_hold/put() does _not_ control when it is removed but only when the device object is created and destroyed. And hci_dev calls hci_conn_flush_*() when it removes itself from sysfs, which itself causes hci_conn_del() to be called, but it does _not_ cause hci_conn_del_sysfs() to be called, which is wrong. Hence, we fix it to call hci_conn_del_sysfs() in hci_conn_del(). This guarantees that a hci_conn object is removed from sysfs _before_ its parent hci_dev is removed. The changes to HIDP look scary, wrong and broken. However, if you look at the HIDP session management, you will notice they're already broken in the exact _same_ way (ever tried "unplugging" HIDP devices? Breaks _all_ the time). So this patch only makes HIDP look _scary_ and _obviously broken_. It does not break HIDP itself, it already is! See later patches in this series which fix HIDP to use proper session-management. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 4 ---- net/bluetooth/hci_conn.c | 17 +---------------- net/bluetooth/hci_event.c | 4 ---- net/bluetooth/hidp/core.c | 20 +++----------------- 4 files changed, 4 insertions(+), 41 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 78ea9c7c202c..5590cc4412c6 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -345,7 +345,6 @@ struct hci_conn { struct timer_list auto_accept_timer; struct device dev; - atomic_t devref; struct hci_dev *hdev; void *l2cap_data; @@ -601,9 +600,6 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role); void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); -void hci_conn_hold_device(struct hci_conn *conn); -void hci_conn_put_device(struct hci_conn *conn); - static inline void hci_conn_hold(struct hci_conn *conn) { BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt)); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b1a02ce39a20..6b5b8e77cf0b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -410,8 +410,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); - atomic_set(&conn->devref, 0); - hci_conn_init_sysfs(conn); return conn; @@ -460,7 +458,7 @@ int hci_conn_del(struct hci_conn *conn) skb_queue_purge(&conn->data_q); - hci_conn_put_device(conn); + hci_conn_del_sysfs(conn); hci_dev_put(hdev); @@ -847,19 +845,6 @@ void hci_conn_check_pending(struct hci_dev *hdev) hci_dev_unlock(hdev); } -void hci_conn_hold_device(struct hci_conn *conn) -{ - atomic_inc(&conn->devref); -} -EXPORT_SYMBOL(hci_conn_hold_device); - -void hci_conn_put_device(struct hci_conn *conn) -{ - if (atomic_dec_and_test(&conn->devref)) - hci_conn_del_sysfs(conn); -} -EXPORT_SYMBOL(hci_conn_put_device); - int hci_get_conn_list(void __user *arg) { struct hci_conn *c; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f6ea3c734269..688c1a9949cc 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1706,7 +1706,6 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } else conn->state = BT_CONNECTED; - hci_conn_hold_device(conn); hci_conn_add_sysfs(conn); if (test_bit(HCI_AUTH, &hdev->flags)) @@ -2987,7 +2986,6 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; - hci_conn_hold_device(conn); hci_conn_add_sysfs(conn); break; @@ -3452,7 +3450,6 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev, hcon->disc_timeout = HCI_DISCONN_TIMEOUT; hci_conn_drop(hcon); - hci_conn_hold_device(hcon); hci_conn_add_sysfs(hcon); amp_physical_cfm(bredr_hcon, hcon); @@ -3586,7 +3583,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; - hci_conn_hold_device(conn); hci_conn_add_sysfs(conn); hci_proto_connect_cfm(conn, ev->status); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 4ab82cb3eac3..9734136d6431 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -73,18 +73,6 @@ static struct hidp_session *__hidp_get_session(bdaddr_t *bdaddr) return NULL; } -static void __hidp_link_session(struct hidp_session *session) -{ - list_add(&session->list, &hidp_session_list); -} - -static void __hidp_unlink_session(struct hidp_session *session) -{ - hci_conn_put_device(session->conn); - - list_del(&session->list); -} - static void __hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci) { memset(ci, 0, sizeof(*ci)); @@ -760,7 +748,7 @@ static int hidp_session(void *arg) fput(session->ctrl_sock->file); - __hidp_unlink_session(session); + list_del(&session->list); up_write(&hidp_session_sem); @@ -783,8 +771,6 @@ static struct hci_conn *hidp_get_connection(struct hidp_session *session) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); - if (conn) - hci_conn_hold_device(conn); hci_dev_unlock(hdev); hci_dev_put(hdev); @@ -1026,7 +1012,7 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); session->idle_to = req->idle_to; - __hidp_link_session(session); + list_add(&session->list, &hidp_session_list); if (req->rd_size > 0) { err = hidp_setup_hid(session, req); @@ -1106,7 +1092,7 @@ unlink: session->rd_data = NULL; purge: - __hidp_unlink_session(session); + list_del(&session->list); skb_queue_purge(&session->ctrl_transmit); skb_queue_purge(&session->intr_transmit); -- cgit v1.2.3 From 8d12356f33f819ec0d064e233f7ca8e59eaa38ef Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:39 +0200 Subject: Bluetooth: introduce hci_conn ref-counting We currently do not allow using hci_conn from outside of HCI-core. However, several other users could make great use of it. This includes HIDP, rfcomm and all other sub-protocols that rely on an active connection. Hence, we now introduce hci_conn ref-counting. We currently never call get_device(). put_device() is exclusively used in hci_conn_del_sysfs(). Hence, we currently never have a greater device-refcnt than 1. Therefore, it is safe to move the put_device() call from hci_conn_del_sysfs() to hci_conn_del() (it's the only caller). In fact, this even fixes a "use-after-free" bug as we access hci_conn after calling hci_conn_del_sysfs() in hci_conn_del(). From now on we can add references to hci_conn objects in other layers (like l2cap_sock, HIDP, rfcomm, ...) and grab a reference via hci_conn_get(). This does _not_ guarantee, that the connection is still alive. But, this isn't what we want. We can simply lock the hci_conn device and use "device_is_registered(hci_conn->dev)" to test that. However, this is hardly necessary as outside users should never rely on the HCI connection to be alive, anyway. Instead, they should solely rely on the device-object to be available. But if sub-devices want the hci_conn object as sysfs parent, they need to be notified when the connection drops. This will be introduced in later patches with l2cap_users. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 31 +++++++++++++++++++++++++++++++ net/bluetooth/hci_conn.c | 3 +-- net/bluetooth/hci_sysfs.c | 1 - 3 files changed, 32 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 5590cc4412c6..d324b11a0c8f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -600,6 +600,37 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role); void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); +/* + * hci_conn_get() and hci_conn_put() are used to control the life-time of an + * "hci_conn" object. They do not guarantee that the hci_conn object is running, + * working or anything else. They just guarantee that the object is available + * and can be dereferenced. So you can use its locks, local variables and any + * other constant data. + * Before accessing runtime data, you _must_ lock the object and then check that + * it is still running. As soon as you release the locks, the connection might + * get dropped, though. + * + * On the other hand, hci_conn_hold() and hci_conn_drop() are used to control + * how long the underlying connection is held. So every channel that runs on the + * hci_conn object calls this to prevent the connection from disappearing. As + * long as you hold a device, you must also guarantee that you have a valid + * reference to the device via hci_conn_get() (or the initial reference from + * hci_conn_add()). + * The hold()/drop() ref-count is known to drop below 0 sometimes, which doesn't + * break because nobody cares for that. But this means, we cannot use + * _get()/_drop() in it, but require the caller to have a valid ref (FIXME). + */ + +static inline void hci_conn_get(struct hci_conn *conn) +{ + get_device(&conn->dev); +} + +static inline void hci_conn_put(struct hci_conn *conn) +{ + put_device(&conn->dev); +} + static inline void hci_conn_hold(struct hci_conn *conn) { BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt)); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6b5b8e77cf0b..6c7f36379722 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -462,8 +462,7 @@ int hci_conn_del(struct hci_conn *conn) hci_dev_put(hdev); - if (conn->handle == 0) - kfree(conn); + hci_conn_put(conn); return 0; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index ff38561385de..6fe15c822847 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -146,7 +146,6 @@ void hci_conn_del_sysfs(struct hci_conn *conn) } device_del(&conn->dev); - put_device(&conn->dev); hci_dev_put(hdev); } -- cgit v1.2.3 From f53c20e93612f708ed3b378ec9735b779dcd7d59 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:42 +0200 Subject: Bluetooth: allow constant arguments for bacmp()/bacpy() There is no reason to require the source arguments to be writeable so fix this to allow constant source addresses. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/bluetooth.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 591fee7d0060..6912ef9a1881 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -193,11 +193,11 @@ static inline bool bdaddr_type_is_le(__u8 type) #define BDADDR_LOCAL (&(bdaddr_t) {{0, 0, 0, 0xff, 0xff, 0xff} }) /* Copy, swap, convert BD Address */ -static inline int bacmp(bdaddr_t *ba1, bdaddr_t *ba2) +static inline int bacmp(const bdaddr_t *ba1, const bdaddr_t *ba2) { return memcmp(ba1, ba2, sizeof(bdaddr_t)); } -static inline void bacpy(bdaddr_t *dst, bdaddr_t *src) +static inline void bacpy(bdaddr_t *dst, const bdaddr_t *src) { memcpy(dst, src, sizeof(bdaddr_t)); } -- cgit v1.2.3 From 9c903e373c11f62d62bce1209f662ca92589a075 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:44 +0200 Subject: Bluetooth: l2cap: introduce l2cap_conn ref-counting If we want to use l2cap_conn outside of l2cap_core.c, we need refcounting for these objects. Otherwise, we cannot synchronize l2cap locks with outside locks and end up with deadlocks. Hence, introduce ref-counting for l2cap_conn objects. This doesn't affect l2cap internals at all, as they use a direct synchronization. We also keep a reference to the parent hci_conn for locking purposes as l2cap_conn depends on this. This doesn't affect the connection itself but only the lifetime of the (dead) object. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 4 ++++ net/bluetooth/l2cap_core.c | 25 ++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 278830ef92cd..7b4cc5b98535 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -583,6 +583,7 @@ struct l2cap_conn { struct list_head chan_l; struct mutex chan_lock; + struct kref ref; }; #define L2CAP_INFO_CL_MTU_REQ_SENT 0x01 @@ -813,4 +814,7 @@ void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan, u8 status); void __l2cap_physical_cfm(struct l2cap_chan *chan, int result); +void l2cap_conn_get(struct l2cap_conn *conn); +void l2cap_conn_put(struct l2cap_conn *conn); + #endif /* __L2CAP_H */ diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index e09b89be1c4d..be9ad89339cd 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1486,7 +1486,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) } hcon->l2cap_data = NULL; - kfree(conn); + conn->hchan = NULL; + l2cap_conn_put(conn); } static void security_timeout(struct work_struct *work) @@ -1520,8 +1521,10 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) return NULL; } + kref_init(&conn->ref); hcon->l2cap_data = conn; conn->hcon = hcon; + hci_conn_get(conn->hcon); conn->hchan = hchan; BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan); @@ -1558,6 +1561,26 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) return conn; } +static void l2cap_conn_free(struct kref *ref) +{ + struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref); + + hci_conn_put(conn->hcon); + kfree(conn); +} + +void l2cap_conn_get(struct l2cap_conn *conn) +{ + kref_get(&conn->ref); +} +EXPORT_SYMBOL(l2cap_conn_get); + +void l2cap_conn_put(struct l2cap_conn *conn) +{ + kref_put(&conn->ref, l2cap_conn_free); +} +EXPORT_SYMBOL(l2cap_conn_put); + /* ---- Socket interface ---- */ /* Find socket with psm and source / destination bdaddr. -- cgit v1.2.3 From 2c8e1411e93391c5a78f55b09697a997474a4707 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:45 +0200 Subject: Bluetooth: l2cap: add l2cap_user sub-modules Several sub-modules like HIDP, rfcomm, ... need to track l2cap connections. The l2cap_conn->hcon->dev object is used as parent for sysfs devices so the sub-modules need to be notified when the hci_conn object is removed from sysfs. As submodules normally use the l2cap layer, the l2cap_user objects are registered there instead of on the underlying hci_conn object. This avoids any direct dependency on the HCI layer and lets the l2cap core handle any specifics. This patch introduces l2cap_user objects which contain a "probe" and "remove" callback. You can register them on any l2cap_conn object and if it is active, the "probe" callback will get called. Otherwise, an error is returned. The l2cap_conn object will call your "remove" callback directly before it is removed from user-space. This allows you to remove your submodules _before_ the parent l2cap_conn and hci_conn object is removed. At any time you can asynchronously unregister your l2cap_user object if your submodule vanishes before the l2cap_conn object does. There is no way around l2cap_user. If we want wire-protocols in the kernel, we always want the hci_conn object as parent in the sysfs tree. We cannot use a channel here since we might need multiple channels for a single protocol. But the problem is, we _must_ get notified when an l2cap_conn object is removed. We cannot use reference-counting for object-removal! This is not how it works. If a hardware is removed, we should immediately remove the object from sysfs. Any other behavior would be inconsistent with the rest of the system. Also note that device_del() might sleep, but it doesn't wait for user-space or block very long. It only _unlinks_ the object from sysfs and the whole device-tree. Everything else is handled by ref-counts! This is exactly what the other sub-modules must do: unlink their devices when the "remove" l2cap_user callback is called. They should not do any cleanup or synchronous shutdowns. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 10 +++++ net/bluetooth/l2cap_core.c | 86 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 7b4cc5b98535..fb94cf13c777 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -584,6 +584,13 @@ struct l2cap_conn { struct list_head chan_l; struct mutex chan_lock; struct kref ref; + struct list_head users; +}; + +struct l2cap_user { + struct list_head list; + int (*probe) (struct l2cap_conn *conn, struct l2cap_user *user); + void (*remove) (struct l2cap_conn *conn, struct l2cap_user *user); }; #define L2CAP_INFO_CL_MTU_REQ_SENT 0x01 @@ -817,4 +824,7 @@ void __l2cap_physical_cfm(struct l2cap_chan *chan, int result); void l2cap_conn_get(struct l2cap_conn *conn); void l2cap_conn_put(struct l2cap_conn *conn); +int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user); +void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user); + #endif /* __L2CAP_H */ diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index be9ad89339cd..eae1d9f90b68 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1446,6 +1446,89 @@ static void l2cap_info_timeout(struct work_struct *work) l2cap_conn_start(conn); } +/* + * l2cap_user + * External modules can register l2cap_user objects on l2cap_conn. The ->probe + * callback is called during registration. The ->remove callback is called + * during unregistration. + * An l2cap_user object can either be explicitly unregistered or when the + * underlying l2cap_conn object is deleted. This guarantees that l2cap->hcon, + * l2cap->hchan, .. are valid as long as the remove callback hasn't been called. + * External modules must own a reference to the l2cap_conn object if they intend + * to call l2cap_unregister_user(). The l2cap_conn object might get destroyed at + * any time if they don't. + */ + +int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user) +{ + struct hci_dev *hdev = conn->hcon->hdev; + int ret; + + /* We need to check whether l2cap_conn is registered. If it is not, we + * must not register the l2cap_user. l2cap_conn_del() is unregisters + * l2cap_conn objects, but doesn't provide its own locking. Instead, it + * relies on the parent hci_conn object to be locked. This itself relies + * on the hci_dev object to be locked. So we must lock the hci device + * here, too. */ + + hci_dev_lock(hdev); + + if (user->list.next || user->list.prev) { + ret = -EINVAL; + goto out_unlock; + } + + /* conn->hchan is NULL after l2cap_conn_del() was called */ + if (!conn->hchan) { + ret = -ENODEV; + goto out_unlock; + } + + ret = user->probe(conn, user); + if (ret) + goto out_unlock; + + list_add(&user->list, &conn->users); + ret = 0; + +out_unlock: + hci_dev_unlock(hdev); + return ret; +} +EXPORT_SYMBOL(l2cap_register_user); + +void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) +{ + struct hci_dev *hdev = conn->hcon->hdev; + + hci_dev_lock(hdev); + + if (!user->list.next || !user->list.prev) + goto out_unlock; + + list_del(&user->list); + user->list.next = NULL; + user->list.prev = NULL; + user->remove(conn, user); + +out_unlock: + hci_dev_unlock(hdev); +} +EXPORT_SYMBOL(l2cap_unregister_user); + +static void l2cap_unregister_all_users(struct l2cap_conn *conn) +{ + struct l2cap_user *user; + + while (!list_empty(&conn->users)) { + user = list_first_entry(&conn->users, struct l2cap_user, list); + list_del(&user->list); + user->list.next = NULL; + user->list.prev = NULL; + user->remove(conn, user); + } +} + static void l2cap_conn_del(struct hci_conn *hcon, int err) { struct l2cap_conn *conn = hcon->l2cap_data; @@ -1458,6 +1541,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) kfree_skb(conn->rx_skb); + l2cap_unregister_all_users(conn); + mutex_lock(&conn->chan_lock); /* Kill channels */ @@ -1550,6 +1635,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) mutex_init(&conn->chan_lock); INIT_LIST_HEAD(&conn->chan_l); + INIT_LIST_HEAD(&conn->users); if (hcon->type == LE_LINK) INIT_DELAYED_WORK(&conn->security_timer, security_timeout); -- cgit v1.2.3 From 542c2d832087aa78566be49aa4284779a0a687b3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:10 +0000 Subject: net: sctp: sctp_ssnmap: remove 'malloced' element from struct sctp_ssnmap_init() can only be called from sctp_ssnmap_new() where malloced is always set to 1. Thus, when we call sctp_ssnmap_free() the test for map->malloced evaluates always to true. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 - net/sctp/ssnmap.c | 23 ++++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e12aa77abc59..3c1bb8dd1e26 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -399,7 +399,6 @@ struct sctp_stream { struct sctp_ssnmap { struct sctp_stream in; struct sctp_stream out; - int malloced; }; struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 825ea94415b3..da8603523808 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -74,7 +74,6 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, if (!sctp_ssnmap_init(retval, in, out)) goto fail_map; - retval->malloced = 1; SCTP_DBG_OBJCNT_INC(ssnmap); return retval; @@ -118,14 +117,16 @@ void sctp_ssnmap_clear(struct sctp_ssnmap *map) /* Dispose of a ssnmap. */ void sctp_ssnmap_free(struct sctp_ssnmap *map) { - if (map && map->malloced) { - int size; - - size = sctp_ssnmap_size(map->in.len, map->out.len); - if (size <= KMALLOC_MAX_SIZE) - kfree(map); - else - free_pages((unsigned long)map, get_order(size)); - SCTP_DBG_OBJCNT_DEC(ssnmap); - } + int size; + + if (unlikely(!map)) + return; + + size = sctp_ssnmap_size(map->in.len, map->out.len); + if (size <= KMALLOC_MAX_SIZE) + kfree(map); + else + free_pages((unsigned long)map, get_order(size)); + + SCTP_DBG_OBJCNT_DEC(ssnmap); } -- cgit v1.2.3 From ee16371e6c737684215ee10b4b9756b610d81272 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:11 +0000 Subject: net: sctp: sctp_inq: remove dead code sctp_inq is never kmalloced, since it's integrated into sctp_ep_common and only initialized from eps and assocs. Therefore, remove the dead code from there. Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 -- net/sctp/inqueue.c | 7 ------- 2 files changed, 9 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 3c1bb8dd1e26..125a19c73531 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -991,8 +991,6 @@ struct sctp_inq { * messages. */ struct work_struct immediate; - - int malloced; /* Is this structure kfree()able? */ }; void sctp_inq_init(struct sctp_inq *); diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 2d5ad280de38..3221d073448c 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -58,8 +58,6 @@ void sctp_inq_init(struct sctp_inq *queue) /* Create a task for delivering data. */ INIT_WORK(&queue->immediate, NULL); - - queue->malloced = 0; } /* Release the memory associated with an SCTP inqueue. */ @@ -80,11 +78,6 @@ void sctp_inq_free(struct sctp_inq *queue) sctp_chunk_free(queue->in_progress); queue->in_progress = NULL; } - - if (queue->malloced) { - /* Dump the master memory segment. */ - kfree(queue); - } } /* Put a new packet in an SCTP inqueue. -- cgit v1.2.3 From 165a4c31278c980862b2c2ddec408cf30341f3ec Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:12 +0000 Subject: net: sctp: sctp_outq: remove 'malloced' from its struct sctp_outq is embedded into sctp_association, and thus never kmalloced in any way. Also, malloced is always 0, thus kfree() is never called. Therefore, remove that dead piece of code. Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 --- net/sctp/outqueue.c | 6 ------ 2 files changed, 9 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 125a19c73531..73fd5de4d4cc 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1059,9 +1059,6 @@ struct sctp_outq { /* Is this structure empty? */ char empty; - - /* Are we kfree()able? */ - char malloced; }; void sctp_outq_init(struct sctp_association *, struct sctp_outq *); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 01dca753db16..d4c137e1ab85 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -217,8 +217,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) q->outstanding_bytes = 0; q->empty = 1; q->cork = 0; - - q->malloced = 0; q->out_qlen = 0; } @@ -295,10 +293,6 @@ void sctp_outq_free(struct sctp_outq *q) { /* Throw away leftover chunks. */ __sctp_outq_teardown(q); - - /* If we were kmalloc()'d, free the memory. */ - if (q->malloced) - kfree(q); } /* Put a new chunk in an sctp_outq. */ -- cgit v1.2.3 From 8fa5df6d210a09241876b74d156c57d833dd057b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:15 +0000 Subject: net: sctp: sctp_transport: remove unused variable sctp_transport's member 'malloced' is set to 1, never evaluated and the structure is kfreed anyway. So just remove it. Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 5 +---- net/sctp/transport.c | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 73fd5de4d4cc..d581af00120b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -779,10 +779,7 @@ struct sctp_transport { hb_sent:1, /* Is the Path MTU update pending on this tranport */ - pmtu_pending:1, - - /* Is this structure kfree()able? */ - malloced:1; + pmtu_pending:1; /* Has this transport moved the ctsn since we last sacked */ __u32 sack_generation; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index fafd2a461ba0..098f1d5f769e 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -123,7 +123,6 @@ struct sctp_transport *sctp_transport_new(struct net *net, if (!sctp_transport_init(net, transport, addr, gfp)) goto fail_init; - transport->malloced = 1; SCTP_DBG_OBJCNT_INC(transport); return transport; -- cgit v1.2.3 From 50181c07cbde370986c4925b830ca291a2fc31ab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:16 +0000 Subject: net: sctp: sctp_bind_addr: remove dead code The sctp_bind_addr structure has a 'malloced' member that is always set to 0, thus in sctp_bind_addr_free() the kfree() part can never be called. This part is embedded into sctp_ep_common anyway and never alloced. Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 -- net/sctp/bind_addr.c | 7 ------- 2 files changed, 9 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d581af00120b..64d469845f25 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1093,8 +1093,6 @@ struct sctp_bind_addr { * peer(s) in INIT and INIT ACK chunks. */ struct list_head address_list; - - int malloced; /* Are we kfree()able? */ }; void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port); diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index d886b3bf84f5..41145fe31813 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -131,8 +131,6 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest, */ void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port) { - bp->malloced = 0; - INIT_LIST_HEAD(&bp->address_list); bp->port = port; } @@ -155,11 +153,6 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp) { /* Empty the bind address list. */ sctp_bind_addr_clean(bp); - - if (bp->malloced) { - kfree(bp); - SCTP_DBG_OBJCNT_DEC(bind_addr); - } } /* Add an address to the bind address list in the SCTP_bind_addr structure. */ -- cgit v1.2.3 From c1db7a26ac3f7223a38eaeb46a77d0cf9e6a0d8f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:17 +0000 Subject: net: sctp: sctp_ulpq: remove 'malloced' struct member The structure sctp_ulpq is embedded into sctp_association and never separately allocated, also ulpq->malloced is always 0, so that kfree() is never called. Therefore, remove this code. Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/ulpqueue.h | 1 - net/sctp/ulpqueue.c | 3 --- 2 files changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index ff1b8ba73ab1..00e50ba3f24b 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -49,7 +49,6 @@ /* A structure to carry information to the ULP (e.g. Sockets API) */ struct sctp_ulpq { - char malloced; char pd_mode; struct sctp_association *asoc; struct sk_buff_head reasm; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 0fd5b3d2df03..04e3d470f877 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -68,7 +68,6 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq, skb_queue_head_init(&ulpq->reasm); skb_queue_head_init(&ulpq->lobby); ulpq->pd_mode = 0; - ulpq->malloced = 0; return ulpq; } @@ -96,8 +95,6 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq) void sctp_ulpq_free(struct sctp_ulpq *ulpq) { sctp_ulpq_flush(ulpq); - if (ulpq->malloced) - kfree(ulpq); } /* Process an incoming DATA chunk. */ -- cgit v1.2.3 From fa5513be2b709c8ce6ff0b11d0715760a9a70ffd Mon Sep 17 00:00:00 2001 From: Frédéric Dalleau Date: Tue, 16 Apr 2013 17:28:58 +0200 Subject: Bluetooth: Move and rename hci_conn_accept MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since this function is only used by sco, move it from hci_event.c to sco.c and rename to sco_conn_defer_accept. Make it static. Signed-off-by: Frédéric Dalleau Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_event.c | 36 ------------------------------------ net/bluetooth/sco.c | 38 +++++++++++++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d324b11a0c8f..74f77b7ef136 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -583,7 +583,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst); int hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); void hci_conn_check_pending(struct hci_dev *hdev); -void hci_conn_accept(struct hci_conn *conn, int mask); struct hci_chan *hci_chan_create(struct hci_conn *conn); void hci_chan_del(struct hci_chan *chan); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6fa909075ceb..e4d4d231fd8a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1752,42 +1752,6 @@ unlock: hci_conn_check_pending(hdev); } -void hci_conn_accept(struct hci_conn *conn, int mask) -{ - struct hci_dev *hdev = conn->hdev; - - BT_DBG("conn %p", conn); - - conn->state = BT_CONFIG; - - if (!lmp_esco_capable(hdev)) { - struct hci_cp_accept_conn_req cp; - - bacpy(&cp.bdaddr, &conn->dst); - - if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) - cp.role = 0x00; /* Become master */ - else - cp.role = 0x01; /* Remain slave */ - - hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp); - } else /* lmp_esco_capable(hdev)) */ { - struct hci_cp_accept_sync_conn_req cp; - - bacpy(&cp.bdaddr, &conn->dst); - cp.pkt_type = cpu_to_le16(conn->pkt_type); - - cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40); - cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40); - cp.max_latency = __constant_cpu_to_le16(0xffff); - cp.content_format = cpu_to_le16(hdev->voice_setting); - cp.retrans_effort = 0xff; - - hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, - sizeof(cp), &cp); - } -} - static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_conn_request *ev = (void *) skb->data; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d8836802526c..9e62102443dc 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -652,6 +652,42 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock, return err; } +static void sco_conn_defer_accept(struct hci_conn *conn, int mask) +{ + struct hci_dev *hdev = conn->hdev; + + BT_DBG("conn %p", conn); + + conn->state = BT_CONFIG; + + if (!lmp_esco_capable(hdev)) { + struct hci_cp_accept_conn_req cp; + + bacpy(&cp.bdaddr, &conn->dst); + + if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) + cp.role = 0x00; /* Become master */ + else + cp.role = 0x01; /* Remain slave */ + + hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp); + } else { + struct hci_cp_accept_sync_conn_req cp; + + bacpy(&cp.bdaddr, &conn->dst); + cp.pkt_type = cpu_to_le16(conn->pkt_type); + + cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40); + cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40); + cp.max_latency = __constant_cpu_to_le16(0xffff); + cp.content_format = cpu_to_le16(hdev->voice_setting); + cp.retrans_effort = 0xff; + + hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, + sizeof(cp), &cp); + } +} + static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { @@ -662,7 +698,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state == BT_CONNECT2 && test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { - hci_conn_accept(pi->conn->hcon, 0); + sco_conn_defer_accept(pi->conn->hcon, 0); sk->sk_state = BT_CONFIG; release_sock(sk); -- cgit v1.2.3 From cad718ed2f6fd204b2c5cac6b611fc3fcde7b183 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 17 Apr 2013 15:00:51 +0300 Subject: Bluetooth: Track feature pages in a single table The local and remote features are organized by page number. Page 0 are the LMP features, page 1 the host features, and any pages beyond 1 features that future core specification versions may define. So far we've only had the first two pages and two separate variables has been convenient enough, however with the introduction of Core Specification Addendum 4 there are features defined on page 2. Instead of requiring the addition of a new variable each time a new page number is defined, this patch refactors the code to use a single table for the features. The patch needs to update both the hci_dev and hci_conn structures since there are macros that depend on the features being represented in the same way in both of them. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 49 ++++++++++++++++----------------- net/bluetooth/hci_event.c | 58 +++++++++++++++++++++------------------- net/bluetooth/hci_sysfs.c | 16 +++++------ 3 files changed, 63 insertions(+), 60 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 74f77b7ef136..26822967e29c 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -134,6 +134,8 @@ struct amp_assoc { __u8 data[HCI_MAX_AMP_ASSOC_SIZE]; }; +#define HCI_MAX_PAGES 2 + #define NUM_REASSEMBLY 4 struct hci_dev { struct list_head list; @@ -151,8 +153,7 @@ struct hci_dev { __u8 dev_class[3]; __u8 major_class; __u8 minor_class; - __u8 features[8]; - __u8 host_features[8]; + __u8 features[HCI_MAX_PAGES][8]; __u8 le_features[8]; __u8 le_white_list_size; __u8 le_states[8]; @@ -313,7 +314,7 @@ struct hci_conn { bool out; __u8 attempt; __u8 dev_class[3]; - __u8 features[8]; + __u8 features[HCI_MAX_PAGES][8]; __u16 interval; __u16 pkt_type; __u16 link_policy; @@ -786,29 +787,29 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->dev.parent = (pdev)) /* ----- LMP capabilities ----- */ -#define lmp_encrypt_capable(dev) ((dev)->features[0] & LMP_ENCRYPT) -#define lmp_rswitch_capable(dev) ((dev)->features[0] & LMP_RSWITCH) -#define lmp_hold_capable(dev) ((dev)->features[0] & LMP_HOLD) -#define lmp_sniff_capable(dev) ((dev)->features[0] & LMP_SNIFF) -#define lmp_park_capable(dev) ((dev)->features[1] & LMP_PARK) -#define lmp_inq_rssi_capable(dev) ((dev)->features[3] & LMP_RSSI_INQ) -#define lmp_esco_capable(dev) ((dev)->features[3] & LMP_ESCO) -#define lmp_bredr_capable(dev) (!((dev)->features[4] & LMP_NO_BREDR)) -#define lmp_le_capable(dev) ((dev)->features[4] & LMP_LE) -#define lmp_sniffsubr_capable(dev) ((dev)->features[5] & LMP_SNIFF_SUBR) -#define lmp_pause_enc_capable(dev) ((dev)->features[5] & LMP_PAUSE_ENC) -#define lmp_ext_inq_capable(dev) ((dev)->features[6] & LMP_EXT_INQ) -#define lmp_le_br_capable(dev) !!((dev)->features[6] & LMP_SIMUL_LE_BR) -#define lmp_ssp_capable(dev) ((dev)->features[6] & LMP_SIMPLE_PAIR) -#define lmp_no_flush_capable(dev) ((dev)->features[6] & LMP_NO_FLUSH) -#define lmp_lsto_capable(dev) ((dev)->features[7] & LMP_LSTO) -#define lmp_inq_tx_pwr_capable(dev) ((dev)->features[7] & LMP_INQ_TX_PWR) -#define lmp_ext_feat_capable(dev) ((dev)->features[7] & LMP_EXTFEATURES) +#define lmp_encrypt_capable(dev) ((dev)->features[0][0] & LMP_ENCRYPT) +#define lmp_rswitch_capable(dev) ((dev)->features[0][0] & LMP_RSWITCH) +#define lmp_hold_capable(dev) ((dev)->features[0][0] & LMP_HOLD) +#define lmp_sniff_capable(dev) ((dev)->features[0][0] & LMP_SNIFF) +#define lmp_park_capable(dev) ((dev)->features[0][1] & LMP_PARK) +#define lmp_inq_rssi_capable(dev) ((dev)->features[0][3] & LMP_RSSI_INQ) +#define lmp_esco_capable(dev) ((dev)->features[0][3] & LMP_ESCO) +#define lmp_bredr_capable(dev) (!((dev)->features[0][4] & LMP_NO_BREDR)) +#define lmp_le_capable(dev) ((dev)->features[0][4] & LMP_LE) +#define lmp_sniffsubr_capable(dev) ((dev)->features[0][5] & LMP_SNIFF_SUBR) +#define lmp_pause_enc_capable(dev) ((dev)->features[0][5] & LMP_PAUSE_ENC) +#define lmp_ext_inq_capable(dev) ((dev)->features[0][6] & LMP_EXT_INQ) +#define lmp_le_br_capable(dev) (!!((dev)->features[0][6] & LMP_SIMUL_LE_BR)) +#define lmp_ssp_capable(dev) ((dev)->features[0][6] & LMP_SIMPLE_PAIR) +#define lmp_no_flush_capable(dev) ((dev)->features[0][6] & LMP_NO_FLUSH) +#define lmp_lsto_capable(dev) ((dev)->features[0][7] & LMP_LSTO) +#define lmp_inq_tx_pwr_capable(dev) ((dev)->features[0][7] & LMP_INQ_TX_PWR) +#define lmp_ext_feat_capable(dev) ((dev)->features[0][7] & LMP_EXTFEATURES) /* ----- Extended LMP capabilities ----- */ -#define lmp_host_ssp_capable(dev) ((dev)->host_features[0] & LMP_HOST_SSP) -#define lmp_host_le_capable(dev) !!((dev)->host_features[0] & LMP_HOST_LE) -#define lmp_host_le_br_capable(dev) !!((dev)->host_features[0] & LMP_HOST_LE_BREDR) +#define lmp_host_ssp_capable(dev) ((dev)->features[1][0] & LMP_HOST_SSP) +#define lmp_host_le_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE)) +#define lmp_host_le_br_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE_BREDR)) /* returns true if at least one AMP active */ static inline bool hci_amp_capable(void) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e4d4d231fd8a..8adc3915ece4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -433,9 +433,9 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) if (!status) { if (sent->mode) - hdev->host_features[0] |= LMP_HOST_SSP; + hdev->features[1][0] |= LMP_HOST_SSP; else - hdev->host_features[0] &= ~LMP_HOST_SSP; + hdev->features[1][0] &= ~LMP_HOST_SSP; } if (test_bit(HCI_MGMT, &hdev->dev_flags)) @@ -493,18 +493,18 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, /* Adjust default settings according to features * supported by device. */ - if (hdev->features[0] & LMP_3SLOT) + if (hdev->features[0][0] & LMP_3SLOT) hdev->pkt_type |= (HCI_DM3 | HCI_DH3); - if (hdev->features[0] & LMP_5SLOT) + if (hdev->features[0][0] & LMP_5SLOT) hdev->pkt_type |= (HCI_DM5 | HCI_DH5); - if (hdev->features[1] & LMP_HV2) { + if (hdev->features[0][1] & LMP_HV2) { hdev->pkt_type |= (HCI_HV2); hdev->esco_type |= (ESCO_HV2); } - if (hdev->features[1] & LMP_HV3) { + if (hdev->features[0][1] & LMP_HV3) { hdev->pkt_type |= (HCI_HV3); hdev->esco_type |= (ESCO_HV3); } @@ -512,26 +512,26 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, if (lmp_esco_capable(hdev)) hdev->esco_type |= (ESCO_EV3); - if (hdev->features[4] & LMP_EV4) + if (hdev->features[0][4] & LMP_EV4) hdev->esco_type |= (ESCO_EV4); - if (hdev->features[4] & LMP_EV5) + if (hdev->features[0][4] & LMP_EV5) hdev->esco_type |= (ESCO_EV5); - if (hdev->features[5] & LMP_EDR_ESCO_2M) + if (hdev->features[0][5] & LMP_EDR_ESCO_2M) hdev->esco_type |= (ESCO_2EV3); - if (hdev->features[5] & LMP_EDR_ESCO_3M) + if (hdev->features[0][5] & LMP_EDR_ESCO_3M) hdev->esco_type |= (ESCO_3EV3); - if (hdev->features[5] & LMP_EDR_3S_ESCO) + if (hdev->features[0][5] & LMP_EDR_3S_ESCO) hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5); BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, - hdev->features[0], hdev->features[1], - hdev->features[2], hdev->features[3], - hdev->features[4], hdev->features[5], - hdev->features[6], hdev->features[7]); + hdev->features[0][0], hdev->features[0][1], + hdev->features[0][2], hdev->features[0][3], + hdev->features[0][4], hdev->features[0][5], + hdev->features[0][6], hdev->features[0][7]); } static void hci_cc_read_local_ext_features(struct hci_dev *hdev, @@ -544,14 +544,8 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, if (rp->status) return; - switch (rp->page) { - case 0: - memcpy(hdev->features, rp->features, 8); - break; - case 1: - memcpy(hdev->host_features, rp->features, 8); - break; - } + if (rp->page < HCI_MAX_PAGES) + memcpy(hdev->features[rp->page], rp->features, 8); } static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, @@ -1046,14 +1040,14 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev, if (!status) { if (sent->le) - hdev->host_features[0] |= LMP_HOST_LE; + hdev->features[1][0] |= LMP_HOST_LE; else - hdev->host_features[0] &= ~LMP_HOST_LE; + hdev->features[1][0] &= ~LMP_HOST_LE; if (sent->simul) - hdev->host_features[0] |= LMP_HOST_LE_BREDR; + hdev->features[1][0] |= LMP_HOST_LE_BREDR; else - hdev->host_features[0] &= ~LMP_HOST_LE_BREDR; + hdev->features[1][0] &= ~LMP_HOST_LE_BREDR; } if (test_bit(HCI_MGMT, &hdev->dev_flags) && @@ -2076,7 +2070,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev, goto unlock; if (!ev->status) - memcpy(conn->features, ev->features, 8); + memcpy(conn->features[0], ev->features, 8); if (conn->state != BT_CONFIG) goto unlock; @@ -2888,6 +2882,9 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev, if (!conn) goto unlock; + if (ev->page < HCI_MAX_PAGES) + memcpy(conn->features[ev->page], ev->features, 8); + if (!ev->status && ev->page == 0x01) { struct inquiry_entry *ie; @@ -3346,11 +3343,16 @@ static void hci_remote_host_features_evt(struct hci_dev *hdev, { struct hci_ev_remote_host_features *ev = (void *) skb->data; struct inquiry_entry *ie; + struct hci_conn *conn; BT_DBG("%s", hdev->name); hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) + memcpy(conn->features[1], ev->features, 8); + ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); if (ie) ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP); diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 6fe15c822847..7ad6ecf36f20 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -48,10 +48,10 @@ static ssize_t show_link_features(struct device *dev, struct hci_conn *conn = to_hci_conn(dev); return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", - conn->features[0], conn->features[1], - conn->features[2], conn->features[3], - conn->features[4], conn->features[5], - conn->features[6], conn->features[7]); + conn->features[0][0], conn->features[0][1], + conn->features[0][2], conn->features[0][3], + conn->features[0][4], conn->features[0][5], + conn->features[0][6], conn->features[0][7]); } #define LINK_ATTR(_name, _mode, _show, _store) \ @@ -233,10 +233,10 @@ static ssize_t show_features(struct device *dev, struct hci_dev *hdev = to_hci_dev(dev); return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", - hdev->features[0], hdev->features[1], - hdev->features[2], hdev->features[3], - hdev->features[4], hdev->features[5], - hdev->features[6], hdev->features[7]); + hdev->features[0][0], hdev->features[0][1], + hdev->features[0][2], hdev->features[0][3], + hdev->features[0][4], hdev->features[0][5], + hdev->features[0][6], hdev->features[0][7]); } static ssize_t show_manufacturer(struct device *dev, -- cgit v1.2.3 From d2c5d77fff6ac0f43fc36f4fde020f726f773c1d Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 17 Apr 2013 15:00:52 +0300 Subject: Bluetooth: Add reading of all local feature pages With the introduction of CSA4 there is now also a features page number 2 available. This patch increments the maximum supported page number to 2 and adds code for reading all available pages (as long as we have support for them - indicated by HCI_MAX_PAGES). Signed-off-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 3 ++- net/bluetooth/hci_core.c | 10 ++++++++++ net/bluetooth/hci_event.c | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 26822967e29c..80d718a9b31f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -134,7 +134,7 @@ struct amp_assoc { __u8 data[HCI_MAX_AMP_ASSOC_SIZE]; }; -#define HCI_MAX_PAGES 2 +#define HCI_MAX_PAGES 3 #define NUM_REASSEMBLY 4 struct hci_dev { @@ -153,6 +153,7 @@ struct hci_dev { __u8 dev_class[3]; __u8 major_class; __u8 minor_class; + __u8 max_page; __u8 features[HCI_MAX_PAGES][8]; __u8 le_features[8]; __u8 le_white_list_size; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9570358adb77..e246d3782ac2 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -589,6 +589,7 @@ static void hci_set_le_support(struct hci_request *req) static void hci_init3_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; + u8 p; if (hdev->commands[5] & 0x10) hci_setup_link_policy(req); @@ -597,6 +598,15 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_set_le_support(req); hci_update_ad(req); } + + /* Read features beyond page 1 if available */ + for (p = 2; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { + struct hci_cp_read_local_ext_features cp; + + cp.page = p; + hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES, + sizeof(cp), &cp); + } } static int __hci_init(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8adc3915ece4..3b2c0e07b25b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -544,6 +544,8 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, if (rp->status) return; + hdev->max_page = rp->max_page; + if (rp->page < HCI_MAX_PAGES) memcpy(hdev->features[rp->page], rp->features, 8); } -- cgit v1.2.3 From 5df480b56e427d83830576862463226c8fcc95d7 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Thu, 4 Apr 2013 20:21:00 -0300 Subject: Bluetooth: Add LE scan type macros This patch adds macros for active and passive LE scan type values. The LE_SCAN_PASSIVE was also defined since it will be used in future by LE connection routine and GAP Observer Role support. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 3 +++ net/bluetooth/mgmt.c | 7 +++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index b3308927a0a1..3f4266b3c3ba 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -984,6 +984,9 @@ struct hci_cp_le_set_adv_data { #define HCI_OP_LE_SET_ADV_ENABLE 0x200a +#define LE_SCAN_PASSIVE 0x00 +#define LE_SCAN_ACTIVE 0x01 + #define HCI_OP_LE_SET_SCAN_PARAM 0x200b struct hci_cp_le_set_scan_param { __u8 type; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index cd2332f6cecb..4c830c62ef74 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -106,7 +106,6 @@ static const u16 mgmt_events[] = { * These LE scan and inquiry parameters were chosen according to LE General * Discovery Procedure specification. */ -#define LE_SCAN_TYPE 0x01 #define LE_SCAN_WIN 0x12 #define LE_SCAN_INT 0x12 #define LE_SCAN_TIMEOUT_LE_ONLY msecs_to_jiffies(10240) @@ -2703,7 +2702,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, + err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); break; @@ -2715,8 +2714,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, LE_SCAN_WIN, - LE_SCAN_TIMEOUT_BREDR_LE); + err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, + LE_SCAN_WIN, LE_SCAN_TIMEOUT_BREDR_LE); break; default: -- cgit v1.2.3 From 525e296a28561659d85a63befb694f36e6ec3429 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Thu, 4 Apr 2013 20:21:01 -0300 Subject: Bluetooth: Add macros for filter duplicates values This patch adds macros for filter_duplicates parameter values from HCI LE Set Scan Enable command. It also fixes le_scan_enable_req function so it uses the LE_SCAN_FILTER_DUP_ENABLE macro instead of a magic number. The LE_SCAN_FILTER_DUP_DISABLE was also defined since it will be required to properly support the GAP Observer Role. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 2 ++ net/bluetooth/hci_core.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 3f4266b3c3ba..84c37abc48c1 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -998,6 +998,8 @@ struct hci_cp_le_set_scan_param { #define LE_SCANNING_DISABLED 0x00 #define LE_SCANNING_ENABLED 0x01 +#define LE_SCAN_FILTER_DUP_DISABLE 0x00 +#define LE_SCAN_FILTER_DUP_ENABLE 0x01 #define HCI_OP_LE_SET_SCAN_ENABLE 0x200c struct hci_cp_le_set_scan_enable { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 00dcb74954d3..d0ae237ff819 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1990,7 +1990,7 @@ static void le_scan_enable_req(struct hci_request *req, unsigned long opt) memset(&cp, 0, sizeof(cp)); cp.enable = 1; - cp.filter_dup = 1; + cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); } -- cgit v1.2.3 From 76a388beaf92cc75b829d4a0b7d69afaaeaa4b0a Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Thu, 4 Apr 2013 20:21:02 -0300 Subject: Bluetooth: Rename LE_SCANNING_* macros This patch renames LE_SCANNING_ENABLED and LE_SCANNING_DISABLED macros to LE_SCAN_ENABLE and LE_SCAN_DISABLE in order to keep the same prefix others LE scan macros have. It also fixes le_scan_enable_req function so it uses the LE_SCAN_ ENABLE macro instead of a magic number. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 4 ++-- net/bluetooth/hci_core.c | 2 +- net/bluetooth/hci_event.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 84c37abc48c1..e0512aaef4b8 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -996,8 +996,8 @@ struct hci_cp_le_set_scan_param { __u8 filter_policy; } __packed; -#define LE_SCANNING_DISABLED 0x00 -#define LE_SCANNING_ENABLED 0x01 +#define LE_SCAN_DISABLE 0x00 +#define LE_SCAN_ENABLE 0x01 #define LE_SCAN_FILTER_DUP_DISABLE 0x00 #define LE_SCAN_FILTER_DUP_ENABLE 0x01 diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d0ae237ff819..ce82265f5619 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1989,7 +1989,7 @@ static void le_scan_enable_req(struct hci_request *req, unsigned long opt) struct hci_cp_le_set_scan_enable cp; memset(&cp, 0, sizeof(cp)); - cp.enable = 1; + cp.enable = LE_SCAN_ENABLE; cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 3b2c0e07b25b..b93cd2eb5d58 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -964,7 +964,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, return; switch (cp->enable) { - case LE_SCANNING_ENABLED: + case LE_SCAN_ENABLE: if (status) { hci_dev_lock(hdev); mgmt_start_discovery_failed(hdev, status); @@ -979,7 +979,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, hci_dev_unlock(hdev); break; - case LE_SCANNING_DISABLED: + case LE_SCAN_DISABLE: if (status) { hci_dev_lock(hdev); mgmt_stop_discovery_failed(hdev, status); -- cgit v1.2.3 From 07dc93dd14957dc1faba08f0aadd27b082e35ba2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 19 Apr 2013 10:14:51 +0300 Subject: Bluetooth: Fix HCI command send functions to use const specifier All HCI command send functions that take a pointer to the command parameters do not need to modify the content in any way (they merely copy the data to an skb). Therefore, the parameter type should be declared const. This also allows passing already const parameters to these APIs which previously would have generated a compiler warning. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 14 ++++++++------ net/bluetooth/hci_core.c | 16 +++++++++------- 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 80d718a9b31f..35a57cd1704c 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1081,17 +1081,19 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); -void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void *param, - u8 event); +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, + const void *param); +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, + const void *param, u8 event); void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - void *param, u32 timeout); + const void *param, u32 timeout); struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, - void *param, u8 event, u32 timeout); + const void *param, u8 event, u32 timeout); -int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); +int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, + const void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ce82265f5619..215db0801a65 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -134,7 +134,7 @@ failed: } struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, - void *param, u8 event, u32 timeout) + const void *param, u8 event, u32 timeout) { DECLARE_WAITQUEUE(wait, current); struct hci_request req; @@ -188,7 +188,7 @@ struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, EXPORT_SYMBOL(__hci_cmd_sync_ev); struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - void *param, u32 timeout) + const void *param, u32 timeout) { return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout); } @@ -2602,7 +2602,7 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete) } static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, - u32 plen, void *param) + u32 plen, const void *param) { int len = HCI_COMMAND_HDR_SIZE + plen; struct hci_command_hdr *hdr; @@ -2628,7 +2628,8 @@ static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, } /* Send HCI command */ -int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) +int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, + const void *param) { struct sk_buff *skb; @@ -2652,8 +2653,8 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) } /* Queue a command to an asynchronous HCI request */ -void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void *param, - u8 event) +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, + const void *param, u8 event) { struct hci_dev *hdev = req->hdev; struct sk_buff *skb; @@ -2682,7 +2683,8 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void *param, skb_queue_tail(&req->cmd_q, skb); } -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, + const void *param) { hci_req_add_ev(req, opcode, plen, param, 0); } -- cgit v1.2.3 From ec464e5dc504a164c5dbff4a06812d495e44e34d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Apr 2013 06:47:08 +0000 Subject: netfilter: rename netlink related "pid" variables to "portid" Get rid of the confusing mix of pid and portid and use portid consistently for all netlink related socket identities. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 9 +++++---- include/net/netfilter/nf_conntrack.h | 2 +- include/net/netfilter/nf_conntrack_expect.h | 4 ++-- net/netfilter/nf_conntrack_core.c | 8 ++++---- net/netfilter/nf_conntrack_expect.c | 8 ++++---- net/netfilter/nfnetlink.c | 13 +++++++------ 6 files changed, 23 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index ecbb8e495912..60b164171daf 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -29,10 +29,11 @@ extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); extern int nfnetlink_has_listeners(struct net *net, unsigned int group); -extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, - int echo, gfp_t flags); -extern int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); -extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags); +extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, + unsigned int group, int echo, gfp_t flags); +extern int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); +extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, + u32 portid, int flags); extern void nfnl_lock(__u8 subsys_id); extern void nfnl_unlock(__u8 subsys_id); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index caca0c4d6b4b..644d9c223d24 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -184,7 +184,7 @@ extern int nf_conntrack_hash_check_insert(struct nf_conn *ct); extern void nf_ct_delete_from_lists(struct nf_conn *ct); extern void nf_ct_dying_timeout(struct nf_conn *ct); -extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report); +extern void nf_conntrack_flush_report(struct net *net, u32 portid, int report); extern bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index cbbae7621e22..3f3aecbc8632 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -88,7 +88,7 @@ nf_ct_find_expectation(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, - u32 pid, int report); + u32 portid, int report); static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) { nf_ct_unlink_expect_report(exp, 0, 0); @@ -106,7 +106,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, u_int8_t, const __be16 *, const __be16 *); void nf_ct_expect_put(struct nf_conntrack_expect *exp); int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, - u32 pid, int report); + u32 portid, int report); static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect) { return nf_ct_expect_related_report(expect, 0, 0); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 007e8c43d19a..54ddc2f8e7c9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1260,7 +1260,7 @@ void nf_ct_iterate_cleanup(struct net *net, EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); struct __nf_ct_flush_report { - u32 pid; + u32 portid; int report; }; @@ -1275,7 +1275,7 @@ static int kill_report(struct nf_conn *i, void *data) /* If we fail to deliver the event, death_by_timeout() will retry */ if (nf_conntrack_event_report(IPCT_DESTROY, i, - fr->pid, fr->report) < 0) + fr->portid, fr->report) < 0) return 1; /* Avoid the delivery of the destroy event in death_by_timeout(). */ @@ -1298,10 +1298,10 @@ void nf_ct_free_hashtable(void *hash, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush_report(struct net *net, u32 pid, int report) +void nf_conntrack_flush_report(struct net *net, u32 portid, int report) { struct __nf_ct_flush_report fr = { - .pid = pid, + .portid = portid, .report = report, }; nf_ct_iterate_cleanup(net, kill_report, &fr); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 8c10e3db3d9b..0adfdcc68bae 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -40,7 +40,7 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, - u32 pid, int report) + u32 portid, int report) { struct nf_conn_help *master_help = nfct_help(exp->master); struct net *net = nf_ct_exp_net(exp); @@ -54,7 +54,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, hlist_del(&exp->lnode); master_help->expecting[exp->class]--; - nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report); + nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report); nf_ct_expect_put(exp); NF_CT_STAT_INC(net, expect_delete); @@ -412,7 +412,7 @@ out: } int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, - u32 pid, int report) + u32 portid, int report) { int ret; @@ -425,7 +425,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, if (ret < 0) goto out; spin_unlock_bh(&nf_conntrack_lock); - nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); + nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); return ret; out: spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index bc4c499adb13..1640bd7dcdf4 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -112,22 +112,23 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group) } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, +int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags) { - return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); + return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); -int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) +int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error) { - return netlink_set_err(net->nfnl, pid, group, error); + return netlink_set_err(net->nfnl, portid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, + int flags) { - return netlink_unicast(net->nfnl, skb, pid, flags); + return netlink_unicast(net->nfnl, skb, portid, flags); } EXPORT_SYMBOL_GPL(nfnetlink_unicast); -- cgit v1.2.3 From 908f8d07e9774c2476e0683f6a0ce50562a2da45 Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Sun, 7 Apr 2013 09:53:30 +0300 Subject: mac80211: indicate admission control in TX queue parameters Some driver implementations need to know whether mandatory admission control is required by the AP for some ACs. Add a parameter to the TX queue parameters indicating this. As there's currently no support for admission control in mac80211's AP implementation, it's only ever set for the client implementation. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/mlme.c | 1 + 2 files changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 4f693a5c54de..b77d57a070e5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -128,6 +128,7 @@ enum ieee80211_ac_numbers { * 2^n-1 in the range 1..32767] * @cw_max: maximum contention window [like @cw_min] * @txop: maximum burst time in units of 32 usecs, 0 meaning disabled + * @acm: is mandatory admission control required for the access category * @uapsd: is U-APSD mode enabled for the queue */ struct ieee80211_tx_queue_params { @@ -135,6 +136,7 @@ struct ieee80211_tx_queue_params { u16 cw_min; u16 cw_max; u8 aifs; + bool acm; bool uapsd; }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f7beb12abde2..13bb81402e1f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1661,6 +1661,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4); params.cw_min = ecw2cw(pos[1] & 0x0f); params.txop = get_unaligned_le16(pos + 2); + params.acm = acm; params.uapsd = uapsd; mlme_dbg(sdata, -- cgit v1.2.3 From 5de17984898c5758fc6ebe08eccea9f4b6548914 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 18 Apr 2013 15:49:00 +0200 Subject: cfg80211: introduce critical protocol indication from user-space Some protocols need a more reliable connection to complete successful in reasonable time. This patch adds a user-space API to indicate the wireless driver that a critical protocol is about to commence and when it is done, using nl80211 primitives NL80211_CMD_CRIT_PROTOCOL_START and NL80211_CRIT_PROTOCOL_STOP. There can be only on critical protocol session started per registered cfg80211 device. The driver can support this by implementing the cfg80211 callbacks .crit_proto_start() and .crit_proto_stop(). Examples of protocols that can benefit from this are DHCP, EAPOL, APIPA. Exactly how the link can/should be made more reliable is up to the driver. Things to consider are avoid scanning, no multi-channel operations, and alter coexistence schemes. Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky (Zhenhui) Lin Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 23 +++++++++ include/uapi/linux/nl80211.h | 39 +++++++++++++++ net/wireless/core.h | 3 ++ net/wireless/mlme.c | 5 ++ net/wireless/nl80211.c | 117 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 24 ++++++++- net/wireless/trace.h | 35 +++++++++++++ 7 files changed, 245 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index dff96d8cafcd..26b5b692c22b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2002,6 +2002,12 @@ struct cfg80211_update_ft_ies_params { * @update_ft_ies: Provide updated Fast BSS Transition information to the * driver. If the SME is in the driver/firmware, this information can be * used in building Authentication and Reassociation Request frames. + * + * @crit_proto_start: Indicates a critical protocol needs more link reliability + * for a given duration (milliseconds). The protocol is provided so the + * driver can take the most appropriate actions. + * @crit_proto_stop: Indicates critical protocol no longer needs increased link + * reliability. This operation can not fail. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2231,6 +2237,12 @@ struct cfg80211_ops { struct cfg80211_chan_def *chandef); int (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_update_ft_ies_params *ftie); + int (*crit_proto_start)(struct wiphy *wiphy, + struct wireless_dev *wdev, + enum nl80211_crit_proto_id protocol, + u16 duration); + void (*crit_proto_stop)(struct wiphy *wiphy, + struct wireless_dev *wdev); }; /* @@ -4137,6 +4149,17 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct cfg80211_wowlan_wakeup *wakeup, gfp_t gfp); +/** + * cfg80211_crit_proto_stopped() - indicate critical protocol stopped by driver. + * + * @wdev: the wireless device for which critical protocol is stopped. + * + * This function can be called by the driver to indicate it has reverted + * operation back to normal. One reason could be that the duration given + * by .crit_proto_start() has expired. + */ +void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 79da8710448e..d1e48b5e348f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -639,6 +639,13 @@ * with the relevant Information Elements. This event is used to report * received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE). * + * @NL80211_CMD_CRIT_PROTOCOL_START: Indicates user-space will start running + * a critical protocol that needs more reliability in the connection to + * complete. + * + * @NL80211_CMD_CRIT_PROTOCOL_STOP: Indicates the connection reliability can + * return back to normal. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -798,6 +805,9 @@ enum nl80211_commands { NL80211_CMD_UPDATE_FT_IES, NL80211_CMD_FT_EVENT, + NL80211_CMD_CRIT_PROTOCOL_START, + NL80211_CMD_CRIT_PROTOCOL_STOP, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1414,6 +1424,11 @@ enum nl80211_commands { * @NL80211_ATTR_IE_RIC: Resource Information Container Information * Element * + * @NL80211_ATTR_CRIT_PROT_ID: critical protocol identifier requiring increased + * reliability, see &enum nl80211_crit_proto_id (u16). + * @NL80211_ATTR_MAX_CRIT_PROT_DURATION: duration in milliseconds in which + * the connection should have increased reliability (u16). + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1709,6 +1724,9 @@ enum nl80211_attrs { NL80211_ATTR_MDID, NL80211_ATTR_IE_RIC, + NL80211_ATTR_CRIT_PROT_ID, + NL80211_ATTR_MAX_CRIT_PROT_DURATION, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3682,4 +3700,25 @@ enum nl80211_protocol_features { NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP = 1 << 0, }; +/** + * enum nl80211_crit_proto_id - nl80211 critical protocol identifiers + * + * @NL80211_CRIT_PROTO_UNSPEC: protocol unspecified. + * @NL80211_CRIT_PROTO_DHCP: BOOTP or DHCPv6 protocol. + * @NL80211_CRIT_PROTO_EAPOL: EAPOL protocol. + * @NL80211_CRIT_PROTO_APIPA: APIPA protocol. + * @NUM_NL80211_CRIT_PROTO: must be kept last. + */ +enum nl80211_crit_proto_id { + NL80211_CRIT_PROTO_UNSPEC, + NL80211_CRIT_PROTO_DHCP, + NL80211_CRIT_PROTO_EAPOL, + NL80211_CRIT_PROTO_APIPA, + /* add other protocols before this one */ + NUM_NL80211_CRIT_PROTO +}; + +/* maximum duration for critical protocol measures */ +#define NL80211_CRIT_PROTO_MAX_DURATION 5000 /* msec */ + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/core.h b/net/wireless/core.h index 124e5e773fbc..fd35dae547c4 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -88,6 +88,9 @@ struct cfg80211_registered_device { struct delayed_work dfs_update_channels_wk; + /* netlink port which started critical protocol (0 means not started) */ + u32 crit_proto_nlportid; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 390198bf4b36..0c7b7dd855f6 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -648,6 +648,11 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) spin_unlock_bh(&wdev->mgmt_registrations_lock); + if (nlportid && rdev->crit_proto_nlportid == nlportid) { + rdev->crit_proto_nlportid = 0; + rdev_crit_proto_stop(rdev, wdev); + } + if (nlportid == wdev->ap_unexpected_nlportid) wdev->ap_unexpected_nlportid = 0; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3abcbbada6d4..afa283841e8c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1424,6 +1424,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, } CMD(start_p2p_device, START_P2P_DEVICE); CMD(set_mcast_rate, SET_MCAST_RATE); + if (split) { + CMD(crit_proto_start, CRIT_PROTOCOL_START); + CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); + } #ifdef CONFIG_NL80211_TESTMODE CMD(testmode_cmd, TESTMODE); @@ -8216,6 +8220,64 @@ static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info) return rdev_update_ft_ies(rdev, dev, &ft_params); } +static int nl80211_crit_protocol_start(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + enum nl80211_crit_proto_id proto = NL80211_CRIT_PROTO_UNSPEC; + u16 duration; + int ret; + + if (!rdev->ops->crit_proto_start) + return -EOPNOTSUPP; + + if (WARN_ON(!rdev->ops->crit_proto_stop)) + return -EINVAL; + + if (rdev->crit_proto_nlportid) + return -EBUSY; + + /* determine protocol if provided */ + if (info->attrs[NL80211_ATTR_CRIT_PROT_ID]) + proto = nla_get_u16(info->attrs[NL80211_ATTR_CRIT_PROT_ID]); + + if (proto >= NUM_NL80211_CRIT_PROTO) + return -EINVAL; + + /* timeout must be provided */ + if (!info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]) + return -EINVAL; + + duration = + nla_get_u16(info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]); + + if (duration > NL80211_CRIT_PROTO_MAX_DURATION) + return -ERANGE; + + ret = rdev_crit_proto_start(rdev, wdev, proto, duration); + if (!ret) + rdev->crit_proto_nlportid = info->snd_portid; + + return ret; +} + +static int nl80211_crit_protocol_stop(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + + if (!rdev->ops->crit_proto_stop) + return -EOPNOTSUPP; + + if (rdev->crit_proto_nlportid) { + rdev->crit_proto_nlportid = 0; + rdev_crit_proto_stop(rdev, wdev); + } + return 0; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -8905,6 +8967,22 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_CRIT_PROTOCOL_START, + .doit = nl80211_crit_protocol_start, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP, + .doit = nl80211_crit_protocol_stop, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + } }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -10650,6 +10728,45 @@ void cfg80211_ft_event(struct net_device *netdev, } EXPORT_SYMBOL(cfg80211_ft_event); +void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp) +{ + struct cfg80211_registered_device *rdev; + struct sk_buff *msg; + void *hdr; + u32 nlportid; + + rdev = wiphy_to_dev(wdev->wiphy); + if (!rdev->crit_proto_nlportid) + return; + + nlportid = rdev->crit_proto_nlportid; + rdev->crit_proto_nlportid = 0; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CRIT_PROTOCOL_STOP); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); + return; + + nla_put_failure: + if (hdr) + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); + +} +EXPORT_SYMBOL(cfg80211_crit_proto_stopped); + /* initialisation/exit functions */ int nl80211_init(void) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index d77e1c1d3a0e..9f15f0ac824d 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -875,7 +875,7 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, trace_rdev_stop_p2p_device(&rdev->wiphy, wdev); rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); trace_rdev_return_void(&rdev->wiphy); -} +} static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, @@ -901,4 +901,26 @@ static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev, return ret; } +static inline int rdev_crit_proto_start(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + enum nl80211_crit_proto_id protocol, + u16 duration) +{ + int ret; + + trace_rdev_crit_proto_start(&rdev->wiphy, wdev, protocol, duration); + ret = rdev->ops->crit_proto_start(&rdev->wiphy, wdev, + protocol, duration); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline void rdev_crit_proto_stop(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + trace_rdev_crit_proto_stop(&rdev->wiphy, wdev); + rdev->ops->crit_proto_stop(&rdev->wiphy, wdev); + trace_rdev_return_void(&rdev->wiphy); +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 3c2033b8f596..ecd4fcec3c94 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1806,6 +1806,41 @@ TRACE_EVENT(rdev_update_ft_ies, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md) ); +TRACE_EVENT(rdev_crit_proto_start, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + enum nl80211_crit_proto_id protocol, u16 duration), + TP_ARGS(wiphy, wdev, protocol, duration), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u16, proto) + __field(u16, duration) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->proto = protocol; + __entry->duration = duration; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", proto=%x, duration=%u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->proto, __entry->duration) +); + +TRACE_EVENT(rdev_crit_proto_stop, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, + WIPHY_PR_ARG, WDEV_PR_ARG) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ -- cgit v1.2.3 From 0d528d85c519b755b6f4e1bafa3a39984370e1c1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Apr 2013 16:14:41 +0200 Subject: mac80211: improve the rate control API Allow rate control modules to pass a rate selection table to mac80211 and the driver. This allows drivers to fetch the most recent rate selection from the sta pointer for already buffered frames. This allows rate control to respond faster to sudden link changes and it is also a step towards adding minstrel_ht support to drivers like iwlwifi. When a driver sets IEEE80211_HW_SUPPORTS_RC_TABLE, mac80211 will not fill info->control.rates with rates from the rate table (to preserve explicit overrides by the rate control module). The driver then explicitly calls ieee80211_get_tx_rates to merge overrides from info->control.rates with defaults from the sta rate table. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/mac80211.h | 66 +++++++++ net/mac80211/ieee80211_i.h | 1 + net/mac80211/rate.c | 327 +++++++++++++++++++++++++++++++++++++-------- net/mac80211/tx.c | 142 +++++--------------- 4 files changed, 369 insertions(+), 167 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b77d57a070e5..04c2d4670dc6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -563,6 +563,9 @@ enum mac80211_rate_control_flags { /* maximum number of rate stages */ #define IEEE80211_TX_MAX_RATES 4 +/* maximum number of rate table entries */ +#define IEEE80211_TX_RATE_TABLE_SIZE 4 + /** * struct ieee80211_tx_rate - rate selection/status * @@ -659,6 +662,8 @@ struct ieee80211_tx_info { s8 rts_cts_rate_idx; u8 use_rts:1; u8 use_cts_prot:1; + u8 short_preamble:1; + u8 skip_table:1; /* 2 bytes free */ }; /* only needed before rate control */ @@ -680,6 +685,8 @@ struct ieee80211_tx_info { struct { struct ieee80211_tx_rate driver_rates[ IEEE80211_TX_MAX_RATES]; + u8 pad[4]; + void *rate_driver_data[ IEEE80211_TX_INFO_RATE_DRIVER_DATA_SIZE / sizeof(void *)]; }; @@ -1224,6 +1231,24 @@ enum ieee80211_sta_rx_bandwidth { IEEE80211_STA_RX_BW_160, }; +/** + * struct ieee80211_sta_rates - station rate selection table + * + * @rcu_head: RCU head used for freeing the table on update + * @rates: transmit rates/flags to be used by default. + * Overriding entries per-packet is possible by using cb tx control. + */ +struct ieee80211_sta_rates { + struct rcu_head rcu_head; + struct { + s8 idx; + u8 count; + u8 count_cts; + u8 count_rts; + u16 flags; + } rate[IEEE80211_TX_RATE_TABLE_SIZE]; +}; + /** * struct ieee80211_sta - station table entry * @@ -1251,6 +1276,7 @@ enum ieee80211_sta_rx_bandwidth { * notifications and capabilities. The value is only valid after * the station moves to associated state. * @smps_mode: current SMPS mode (off, static or dynamic) + * @tx_rates: rate control selection table */ struct ieee80211_sta { u32 supp_rates[IEEE80211_NUM_BANDS]; @@ -1264,6 +1290,7 @@ struct ieee80211_sta { u8 rx_nss; enum ieee80211_sta_rx_bandwidth bandwidth; enum ieee80211_smps_mode smps_mode; + struct ieee80211_sta_rates __rcu *rates; /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); @@ -1419,6 +1446,9 @@ struct ieee80211_tx_control { * for different virtual interfaces. See the doc section on HW queue * control for more details. * + * @IEEE80211_HW_SUPPORTS_RC_TABLE: The driver supports using a rate + * selection table provided by the rate control algorithm. + * * @IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF: Use the P2P Device address for any * P2P Interface. This will be honoured even if more than one interface * is supported. @@ -1451,6 +1481,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_PER_STA_GTK = 1<<21, IEEE80211_HW_AP_LINK_PS = 1<<22, IEEE80211_HW_TX_AMPDU_SETUP_IN_HW = 1<<23, + IEEE80211_HW_SUPPORTS_RC_TABLE = 1<<24, IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF = 1<<25, IEEE80211_HW_TIMING_BEACON_ONLY = 1<<26, }; @@ -3136,6 +3167,25 @@ static inline int ieee80211_sta_ps_transition_ni(struct ieee80211_sta *sta, void ieee80211_sta_set_buffered(struct ieee80211_sta *sta, u8 tid, bool buffered); +/** + * ieee80211_get_tx_rates - get the selected transmit rates for a packet + * + * Call this function in a driver with per-packet rate selection support + * to combine the rate info in the packet tx info with the most recent + * rate selection table for the station entry. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @sta: the receiver station to which this packet is sent. + * @skb: the frame to be transmitted. + * @dest: buffer for extracted rate/retry information + * @max_rates: maximum number of rates to fetch + */ +void ieee80211_get_tx_rates(struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct sk_buff *skb, + struct ieee80211_tx_rate *dest, + int max_rates); + /** * ieee80211_tx_status - transmit status callback * @@ -4212,6 +4262,22 @@ bool rate_usable_index_exists(struct ieee80211_supported_band *sband, return false; } +/** + * rate_control_set_rates - pass the sta rate selection to mac80211/driver + * + * When not doing a rate control probe to test rates, rate control should pass + * its rate selection to mac80211. If the driver supports receiving a station + * rate table, it will use it to ensure that frames are always sent based on + * the most recent rate control module decision. + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @pubsta: &struct ieee80211_sta pointer to the target destination. + * @rates: new tx rate set to be used for this station. + */ +int rate_control_set_rates(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, + struct ieee80211_sta_rates *rates); + int ieee80211_rate_control_register(struct rate_control_ops *ops); void ieee80211_rate_control_unregister(struct rate_control_ops *ops); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index af8410e1291e..158e6eb188d3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -156,6 +156,7 @@ struct ieee80211_tx_data { struct ieee80211_sub_if_data *sdata; struct sta_info *sta; struct ieee80211_key *key; + struct ieee80211_tx_rate rate; unsigned int flags; }; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 5d545dd2d050..0d51877efdb7 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -252,6 +252,25 @@ rate_lowest_non_cck_index(struct ieee80211_supported_band *sband, return 0; } +static void __rate_control_send_low(struct ieee80211_hw *hw, + struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, + struct ieee80211_tx_info *info) +{ + if ((sband->band != IEEE80211_BAND_2GHZ) || + !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) + info->control.rates[0].idx = rate_lowest_index(sband, sta); + else + info->control.rates[0].idx = + rate_lowest_non_cck_index(sband, sta); + + info->control.rates[0].count = + (info->flags & IEEE80211_TX_CTL_NO_ACK) ? + 1 : hw->max_rate_tries; + + info->control.skip_table = 1; +} + bool rate_control_send_low(struct ieee80211_sta *sta, void *priv_sta, @@ -262,16 +281,8 @@ bool rate_control_send_low(struct ieee80211_sta *sta, int mcast_rate; if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) { - if ((sband->band != IEEE80211_BAND_2GHZ) || - !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) - info->control.rates[0].idx = - rate_lowest_index(txrc->sband, sta); - else - info->control.rates[0].idx = - rate_lowest_non_cck_index(txrc->sband, sta); - info->control.rates[0].count = - (info->flags & IEEE80211_TX_CTL_NO_ACK) ? - 1 : txrc->hw->max_rate_tries; + __rate_control_send_low(txrc->hw, sband, sta, info); + if (!sta && txrc->bss) { mcast_rate = txrc->bss_conf->mcast_rate[sband->band]; if (mcast_rate > 0) { @@ -355,7 +366,8 @@ static bool rate_idx_match_mcs_mask(struct ieee80211_tx_rate *rate, static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, - struct ieee80211_tx_rate_control *txrc, + struct ieee80211_supported_band *sband, + enum nl80211_chan_width chan_width, u32 mask, u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]) { @@ -375,27 +387,17 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, IEEE80211_TX_RC_USE_SHORT_PREAMBLE); alt_rate.count = rate->count; if (rate_idx_match_legacy_mask(&alt_rate, - txrc->sband->n_bitrates, - mask)) { + sband->n_bitrates, mask)) { *rate = alt_rate; return; } } else { - struct sk_buff *skb = txrc->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - __le16 fc; - /* handle legacy rates */ - if (rate_idx_match_legacy_mask(rate, txrc->sband->n_bitrates, - mask)) + if (rate_idx_match_legacy_mask(rate, sband->n_bitrates, mask)) return; /* if HT BSS, and we handle a data frame, also try HT rates */ - if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) - return; - - fc = hdr->frame_control; - if (!ieee80211_is_data(fc)) + if (chan_width == NL80211_CHAN_WIDTH_20_NOHT) return; alt_rate.idx = 0; @@ -408,7 +410,7 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, alt_rate.flags |= IEEE80211_TX_RC_MCS; - if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_40) + if (chan_width == NL80211_CHAN_WIDTH_40) alt_rate.flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (rate_idx_match_mcs_mask(&alt_rate, mcs_mask)) { @@ -426,6 +428,228 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, */ } +static void rate_fixup_ratelist(struct ieee80211_vif *vif, + struct ieee80211_supported_band *sband, + struct ieee80211_tx_info *info, + struct ieee80211_tx_rate *rates, + int max_rates) +{ + struct ieee80211_rate *rate; + bool inval = false; + int i; + + /* + * Set up the RTS/CTS rate as the fastest basic rate + * that is not faster than the data rate unless there + * is no basic rate slower than the data rate, in which + * case we pick the slowest basic rate + * + * XXX: Should this check all retry rates? + */ + if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) { + u32 basic_rates = vif->bss_conf.basic_rates; + s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0; + + rate = &sband->bitrates[rates[0].idx]; + + for (i = 0; i < sband->n_bitrates; i++) { + /* must be a basic rate */ + if (!(basic_rates & BIT(i))) + continue; + /* must not be faster than the data rate */ + if (sband->bitrates[i].bitrate > rate->bitrate) + continue; + /* maximum */ + if (sband->bitrates[baserate].bitrate < + sband->bitrates[i].bitrate) + baserate = i; + } + + info->control.rts_cts_rate_idx = baserate; + } + + for (i = 0; i < max_rates; i++) { + /* + * make sure there's no valid rate following + * an invalid one, just in case drivers don't + * take the API seriously to stop at -1. + */ + if (inval) { + rates[i].idx = -1; + continue; + } + if (rates[i].idx < 0) { + inval = true; + continue; + } + + /* + * For now assume MCS is already set up correctly, this + * needs to be fixed. + */ + if (rates[i].flags & IEEE80211_TX_RC_MCS) { + WARN_ON(rates[i].idx > 76); + + if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && + info->control.use_cts_prot) + rates[i].flags |= + IEEE80211_TX_RC_USE_CTS_PROTECT; + continue; + } + + if (rates[i].flags & IEEE80211_TX_RC_VHT_MCS) { + WARN_ON(ieee80211_rate_get_vht_mcs(&rates[i]) > 9); + continue; + } + + /* set up RTS protection if desired */ + if (info->control.use_rts) { + rates[i].flags |= IEEE80211_TX_RC_USE_RTS_CTS; + info->control.use_cts_prot = false; + } + + /* RC is busted */ + if (WARN_ON_ONCE(rates[i].idx >= sband->n_bitrates)) { + rates[i].idx = -1; + continue; + } + + rate = &sband->bitrates[rates[i].idx]; + + /* set up short preamble */ + if (info->control.short_preamble && + rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) + rates[i].flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; + + /* set up G protection */ + if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && + info->control.use_cts_prot && + rate->flags & IEEE80211_RATE_ERP_G) + rates[i].flags |= IEEE80211_TX_RC_USE_CTS_PROTECT; + } +} + + +static void rate_control_fill_sta_table(struct ieee80211_sta *sta, + struct ieee80211_tx_info *info, + struct ieee80211_tx_rate *rates, + int max_rates) +{ + struct ieee80211_sta_rates *ratetbl = NULL; + int i; + + if (sta && !info->control.skip_table) + ratetbl = rcu_dereference(sta->rates); + + /* Fill remaining rate slots with data from the sta rate table. */ + max_rates = min_t(int, max_rates, IEEE80211_TX_RATE_TABLE_SIZE); + for (i = 0; i < max_rates; i++) { + if (i < ARRAY_SIZE(info->control.rates) && + info->control.rates[i].idx >= 0 && + info->control.rates[i].count) { + if (rates != info->control.rates) + rates[i] = info->control.rates[i]; + } else if (ratetbl) { + rates[i].idx = ratetbl->rate[i].idx; + rates[i].flags = ratetbl->rate[i].flags; + if (info->control.use_rts) + rates[i].count = ratetbl->rate[i].count_rts; + else if (info->control.use_cts_prot) + rates[i].count = ratetbl->rate[i].count_cts; + else + rates[i].count = ratetbl->rate[i].count; + } else { + rates[i].idx = -1; + rates[i].count = 0; + } + + if (rates[i].idx < 0 || !rates[i].count) + break; + } +} + +static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct ieee80211_supported_band *sband, + struct ieee80211_tx_info *info, + struct ieee80211_tx_rate *rates, + int max_rates) +{ + enum nl80211_chan_width chan_width; + u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; + bool has_mcs_mask; + u32 mask; + int i; + + /* + * Try to enforce the rateidx mask the user wanted. skip this if the + * default mask (allow all rates) is used to save some processing for + * the common case. + */ + mask = sdata->rc_rateidx_mask[info->band]; + has_mcs_mask = sdata->rc_has_mcs_mask[info->band]; + if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask) + return; + + if (has_mcs_mask) + memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band], + sizeof(mcs_mask)); + else + memset(mcs_mask, 0xff, sizeof(mcs_mask)); + + if (sta) { + /* Filter out rates that the STA does not support */ + mask &= sta->supp_rates[info->band]; + for (i = 0; i < sizeof(mcs_mask); i++) + mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i]; + } + + /* + * Make sure the rate index selected for each TX rate is + * included in the configured mask and change the rate indexes + * if needed. + */ + chan_width = sdata->vif.bss_conf.chandef.width; + for (i = 0; i < max_rates; i++) { + /* Skip invalid rates */ + if (rates[i].idx < 0) + break; + + rate_idx_match_mask(&rates[i], sband, mask, chan_width, + mcs_mask); + } +} + +void ieee80211_get_tx_rates(struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct sk_buff *skb, + struct ieee80211_tx_rate *dest, + int max_rates) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_supported_band *sband; + + rate_control_fill_sta_table(sta, info, dest, max_rates); + + if (!vif) + return; + + sdata = vif_to_sdata(vif); + sband = sdata->local->hw.wiphy->bands[info->band]; + + if (ieee80211_is_data(hdr->frame_control)) + rate_control_apply_mask(sdata, sta, sband, info, dest, max_rates); + + if (dest[0].idx < 0) + __rate_control_send_low(&sdata->local->hw, sband, sta, info); + + if (sta) + rate_fixup_ratelist(vif, sband, info, dest, max_rates); +} +EXPORT_SYMBOL(ieee80211_get_tx_rates); + void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_tx_rate_control *txrc) @@ -435,8 +659,6 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *ista = NULL; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); int i; - u32 mask; - u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) { ista = &sta->sta; @@ -454,40 +676,27 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); - /* - * Try to enforce the rateidx mask the user wanted. skip this if the - * default mask (allow all rates) is used to save some processing for - * the common case. - */ - mask = sdata->rc_rateidx_mask[info->band]; - if (mask != (1 << txrc->sband->n_bitrates) - 1 || txrc->rate_idx_mcs_mask) { - if (txrc->rate_idx_mcs_mask) - memcpy(mcs_mask, txrc->rate_idx_mcs_mask, sizeof(mcs_mask)); - else - memset(mcs_mask, 0xff, sizeof(mcs_mask)); - - if (sta) { - /* Filter out rates that the STA does not support */ - mask &= sta->sta.supp_rates[info->band]; - for (i = 0; i < sizeof(mcs_mask); i++) - mcs_mask[i] &= sta->sta.ht_cap.mcs.rx_mask[i]; - } - /* - * Make sure the rate index selected for each TX rate is - * included in the configured mask and change the rate indexes - * if needed. - */ - for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { - /* Skip invalid rates */ - if (info->control.rates[i].idx < 0) - break; - rate_idx_match_mask(&info->control.rates[i], txrc, - mask, mcs_mask); - } - } + if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE) + return; + + ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb, + info->control.rates, + ARRAY_SIZE(info->control.rates)); +} - BUG_ON(info->control.rates[0].idx < 0); +int rate_control_set_rates(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, + struct ieee80211_sta_rates *rates) +{ + struct ieee80211_sta_rates *old = rcu_dereference(pubsta->rates); + + rcu_assign_pointer(pubsta->rates, rates); + if (old) + kfree_rcu(old, rcu_head); + + return 0; } +EXPORT_SYMBOL(rate_control_set_rates); int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, const char *name) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6ca857f8f424..4a5fbf83cd1e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -48,15 +48,15 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); /* assume HW handles this */ - if (info->control.rates[0].flags & IEEE80211_TX_RC_MCS) + if (tx->rate.flags & IEEE80211_TX_RC_MCS) return 0; /* uh huh? */ - if (WARN_ON_ONCE(info->control.rates[0].idx < 0)) + if (WARN_ON_ONCE(tx->rate.idx < 0)) return 0; sband = local->hw.wiphy->bands[info->band]; - txrate = &sband->bitrates[info->control.rates[0].idx]; + txrate = &sband->bitrates[tx->rate.idx]; erp = txrate->flags & IEEE80211_RATE_ERP_G; @@ -617,11 +617,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (void *)tx->skb->data; struct ieee80211_supported_band *sband; - struct ieee80211_rate *rate; - int i; u32 len; - bool inval = false, rts = false, short_preamble = false; struct ieee80211_tx_rate_control txrc; + struct ieee80211_sta_rates *ratetbl = NULL; bool assoc = false; memset(&txrc, 0, sizeof(txrc)); @@ -653,10 +651,10 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) /* set up RTS protection if desired */ if (len > tx->local->hw.wiphy->rts_threshold) { - txrc.rts = rts = true; + txrc.rts = true; } - info->control.use_rts = rts; + info->control.use_rts = txrc.rts; info->control.use_cts_prot = tx->sdata->vif.bss_conf.use_cts_prot; /* @@ -668,7 +666,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) if (tx->sdata->vif.bss_conf.use_short_preamble && (ieee80211_is_data(hdr->frame_control) || (tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE)))) - txrc.short_preamble = short_preamble = true; + txrc.short_preamble = true; + + info->control.short_preamble = txrc.short_preamble; if (tx->sta) assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC); @@ -692,16 +692,38 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) */ rate_control_get_rate(tx->sdata, tx->sta, &txrc); - if (unlikely(info->control.rates[0].idx < 0)) - return TX_DROP; + if (tx->sta && !info->control.skip_table) + ratetbl = rcu_dereference(tx->sta->sta.rates); + + if (unlikely(info->control.rates[0].idx < 0)) { + if (ratetbl) { + struct ieee80211_tx_rate rate = { + .idx = ratetbl->rate[0].idx, + .flags = ratetbl->rate[0].flags, + .count = ratetbl->rate[0].count + }; + + if (ratetbl->rate[0].idx < 0) + return TX_DROP; + + tx->rate = rate; + } else { + return TX_DROP; + } + } else { + tx->rate = info->control.rates[0]; + } if (txrc.reported_rate.idx < 0) { - txrc.reported_rate = info->control.rates[0]; + txrc.reported_rate = tx->rate; if (tx->sta && ieee80211_is_data(hdr->frame_control)) tx->sta->last_tx_rate = txrc.reported_rate; } else if (tx->sta) tx->sta->last_tx_rate = txrc.reported_rate; + if (ratetbl) + return TX_CONTINUE; + if (unlikely(!info->control.rates[0].count)) info->control.rates[0].count = 1; @@ -709,102 +731,6 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) (info->flags & IEEE80211_TX_CTL_NO_ACK))) info->control.rates[0].count = 1; - if (is_multicast_ether_addr(hdr->addr1)) { - /* - * XXX: verify the rate is in the basic rateset - */ - return TX_CONTINUE; - } - - /* - * Set up the RTS/CTS rate as the fastest basic rate - * that is not faster than the data rate unless there - * is no basic rate slower than the data rate, in which - * case we pick the slowest basic rate - * - * XXX: Should this check all retry rates? - */ - if (!(info->control.rates[0].flags & IEEE80211_TX_RC_MCS)) { - u32 basic_rates = tx->sdata->vif.bss_conf.basic_rates; - s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0; - - rate = &sband->bitrates[info->control.rates[0].idx]; - - for (i = 0; i < sband->n_bitrates; i++) { - /* must be a basic rate */ - if (!(basic_rates & BIT(i))) - continue; - /* must not be faster than the data rate */ - if (sband->bitrates[i].bitrate > rate->bitrate) - continue; - /* maximum */ - if (sband->bitrates[baserate].bitrate < - sband->bitrates[i].bitrate) - baserate = i; - } - - info->control.rts_cts_rate_idx = baserate; - } - - for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { - struct ieee80211_tx_rate *rc_rate = &info->control.rates[i]; - - /* - * make sure there's no valid rate following - * an invalid one, just in case drivers don't - * take the API seriously to stop at -1. - */ - if (inval) { - rc_rate->idx = -1; - continue; - } - if (rc_rate->idx < 0) { - inval = true; - continue; - } - - /* - * For now assume MCS is already set up correctly, this - * needs to be fixed. - */ - if (rc_rate->flags & IEEE80211_TX_RC_MCS) { - WARN_ON(rc_rate->idx > 76); - - if (!(rc_rate->flags & IEEE80211_TX_RC_USE_RTS_CTS) && - tx->sdata->vif.bss_conf.use_cts_prot) - rc_rate->flags |= - IEEE80211_TX_RC_USE_CTS_PROTECT; - continue; - } - - if (rc_rate->flags & IEEE80211_TX_RC_VHT_MCS) { - WARN_ON(ieee80211_rate_get_vht_mcs(rc_rate) > 9); - continue; - } - - /* set up RTS protection if desired */ - if (rts) - rc_rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; - - /* RC is busted */ - if (WARN_ON_ONCE(rc_rate->idx >= sband->n_bitrates)) { - rc_rate->idx = -1; - continue; - } - - rate = &sband->bitrates[rc_rate->idx]; - - /* set up short preamble */ - if (short_preamble && - rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) - rc_rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; - - /* set up G protection */ - if (!rts && tx->sdata->vif.bss_conf.use_cts_prot && - rate->flags & IEEE80211_RATE_ERP_G) - rc_rate->flags |= IEEE80211_TX_RC_USE_CTS_PROTECT; - } - return TX_CONTINUE; } -- cgit v1.2.3 From 3fb62c5d3fc1821f50c6003e582713857a520f6b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Apr 2013 14:29:25 -0700 Subject: net: remove a stale comment for dl_next dl_next member in struct request_sock doesn't need to be first. We expect to insert a "struct common_sock" or a subset of it, so this claim had to be verified. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/request_sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 9069e65c1c56..59795e42c8b6 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -48,7 +48,7 @@ extern int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req); /* struct request_sock - mini sock to represent a connection request */ struct request_sock { - struct request_sock *dl_next; /* Must be first member! */ + struct request_sock *dl_next; u16 mss; u8 num_retrans; /* number of retransmits */ u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */ -- cgit v1.2.3 From 3e3251b3f289528732edab386ddf73ac428359b7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 Apr 2013 21:59:37 +0000 Subject: net: sctp: minor: remove dead code from sctp_packet struct sctp_packet is currently embedded into sctp_transport or sits on the stack as 'singleton' in sctp_outq_flush(). Therefore, its member 'malloced' is always 0, thus a kfree() is never called. Because of that, we can just remove this code. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 +-- net/sctp/output.c | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 64d469845f25..1bd4c4144fe8 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -714,8 +714,7 @@ struct sctp_packet { has_sack:1, /* This packet contains a SACK chunk. */ has_auth:1, /* This packet contains an AUTH chunk */ has_data:1, /* This packet contains at least 1 DATA chunk */ - ipfragok:1, /* So let ip fragment this packet */ - malloced:1; /* Is it malloced? */ + ipfragok:1; /* So let ip fragment this packet */ }; struct sctp_packet *sctp_packet_init(struct sctp_packet *, diff --git a/net/sctp/output.c b/net/sctp/output.c index f5200a2ad852..bbef4a7a9b56 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -136,7 +136,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, packet->overhead = overhead; sctp_packet_reset(packet); packet->vtag = 0; - packet->malloced = 0; + return packet; } @@ -151,9 +151,6 @@ void sctp_packet_free(struct sctp_packet *packet) list_del_init(&chunk->list); sctp_chunk_free(chunk); } - - if (packet->malloced) - kfree(packet); } /* This routine tries to append the chunk to the offered packet. If adding -- cgit v1.2.3 From 0a925864c1038a78fd1cc9b048d9a2b1ae04b63e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 17 Apr 2013 23:50:49 +0300 Subject: ipvs: fix sparse warnings for some parameters Some service fields are in network order: - netmask: used once in network order and also as prefix len for IPv6 - port Other parameters are in host order: - struct ip_vs_flags: flags and mask moved between user and kernel only - sync state: moved between user and kernel only - syncid: sent over network as single octet Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 8 ++++---- include/uapi/linux/ip_vs.h | 4 ++-- net/netfilter/ipvs/ip_vs_core.c | 3 ++- net/netfilter/ipvs/ip_vs_ctl.c | 40 ++++++++++++++++++++++++---------------- 4 files changed, 32 insertions(+), 23 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f9f5b057b480..4c062ccff9aa 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -678,7 +678,7 @@ struct ip_vs_service_user_kern { u16 af; u16 protocol; union nf_inet_addr addr; /* virtual ip address */ - u16 port; + __be16 port; u32 fwmark; /* firwall mark of service */ /* virtual service options */ @@ -686,14 +686,14 @@ struct ip_vs_service_user_kern { char *pe_name; unsigned int flags; /* virtual service flags */ unsigned int timeout; /* persistent timeout in sec */ - u32 netmask; /* persistent netmask */ + __be32 netmask; /* persistent netmask or plen */ }; struct ip_vs_dest_user_kern { /* destination server address */ union nf_inet_addr addr; - u16 port; + __be16 port; /* real server options */ unsigned int conn_flags; /* connection flags */ @@ -721,7 +721,7 @@ struct ip_vs_service { __u32 fwmark; /* firewall mark of the service */ unsigned int flags; /* service status flags */ unsigned int timeout; /* persistent timeout in ticks */ - __be32 netmask; /* grouping granularity */ + __be32 netmask; /* grouping granularity, mask/plen */ struct net *net; struct list_head destinations; /* real server d-linked list */ diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index 8a2d438dc499..a24537725e80 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -280,8 +280,8 @@ struct ip_vs_daemon_user { #define IPVS_GENL_VERSION 0x1 struct ip_vs_flags { - __be32 flags; - __be32 mask; + __u32 flags; + __u32 mask; }; /* Generic Netlink command attributes */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index f26fe3353a30..a0d7bd342775 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -235,7 +235,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* Mask saddr with the netmask to adjust template granularity */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask); + ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, + (__force __u32) svc->netmask); else #endif snet.ip = iph->saddr.ip & svc->netmask; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 64075a775e35..5b142fb16480 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1164,9 +1164,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, } #ifdef CONFIG_IP_VS_IPV6 - if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { - ret = -EINVAL; - goto out_err; + if (u->af == AF_INET6) { + __u32 plen = (__force __u32) u->netmask; + + if (plen < 1 || plen > 128) { + ret = -EINVAL; + goto out_err; + } } #endif @@ -1277,9 +1281,13 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) } #ifdef CONFIG_IP_VS_IPV6 - if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { - ret = -EINVAL; - goto out; + if (u->af == AF_INET6) { + __u32 plen = (__force __u32) u->netmask; + + if (plen < 1 || plen > 128) { + ret = -EINVAL; + goto out; + } } #endif @@ -2892,7 +2900,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, } else { if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || - nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port)) + nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port)) goto nla_put_failure; } @@ -2902,7 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || - nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) + nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) goto nla_put_failure; if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) goto nla_put_failure; @@ -3015,7 +3023,7 @@ static int ip_vs_genl_parse_service(struct net *net, } else { usvc->protocol = nla_get_u16(nla_protocol); nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); - usvc->port = nla_get_u16(nla_port); + usvc->port = nla_get_be16(nla_port); usvc->fwmark = 0; } @@ -3055,7 +3063,7 @@ static int ip_vs_genl_parse_service(struct net *net, usvc->sched_name = nla_data(nla_sched); usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; usvc->timeout = nla_get_u32(nla_timeout); - usvc->netmask = nla_get_u32(nla_netmask); + usvc->netmask = nla_get_be32(nla_netmask); } return 0; @@ -3081,7 +3089,7 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) return -EMSGSIZE; if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || - nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) || + nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) || nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, (atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK)) || @@ -3190,7 +3198,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, memset(udest, 0, sizeof(*udest)); nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); - udest->port = nla_get_u16(nla_port); + udest->port = nla_get_be16(nla_port); /* If a full entry was requested, check for the additional fields */ if (full_entry) { @@ -3215,8 +3223,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, return 0; } -static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, - const char *mcast_ifn, __be32 syncid) +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, + const char *mcast_ifn, __u32 syncid) { struct nlattr *nl_daemon; @@ -3237,8 +3245,8 @@ nla_put_failure: return -EMSGSIZE; } -static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, - const char *mcast_ifn, __be32 syncid, +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, + const char *mcast_ifn, __u32 syncid, struct netlink_callback *cb) { void *hdr; -- cgit v1.2.3 From 26ee65e680f4a2291f6258e11beceae0ad4eeba3 Mon Sep 17 00:00:00 2001 From: "sjur.brandeland@stericsson.com" Date: Mon, 22 Apr 2013 23:57:01 +0000 Subject: caif: Remove my bouncing email address. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove my soon bouncing email address. Also remove the "Contact:" line in file header. The MAINTAINERS file is a better place to find the contact person anyway. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- drivers/net/caif/caif_hsi.c | 1 - drivers/net/caif/caif_serial.c | 4 ++-- drivers/net/caif/caif_spi.c | 1 - drivers/net/caif/caif_spi_slave.c | 1 - include/net/caif/caif_dev.h | 2 +- include/net/caif/caif_device.h | 2 +- include/net/caif/caif_hsi.h | 1 - include/net/caif/caif_layer.h | 2 +- include/net/caif/cfcnfg.h | 2 +- include/net/caif/cfctrl.h | 2 +- include/net/caif/cffrml.h | 2 +- include/net/caif/cfmuxl.h | 2 +- include/net/caif/cfpkt.h | 2 +- include/net/caif/cfserl.h | 2 +- include/net/caif/cfsrvl.h | 2 +- include/uapi/linux/caif/caif_socket.h | 2 +- include/uapi/linux/caif/if_caif.h | 2 +- net/caif/caif_dev.c | 2 +- net/caif/caif_socket.c | 2 +- net/caif/caif_usb.c | 2 +- net/caif/cfcnfg.c | 2 +- net/caif/cfctrl.c | 2 +- net/caif/cfdbgl.c | 2 +- net/caif/cfdgml.c | 2 +- net/caif/cffrml.c | 2 +- net/caif/cfmuxl.c | 2 +- net/caif/cfpkt_skbuff.c | 2 +- net/caif/cfrfml.c | 2 +- net/caif/cfserl.c | 2 +- net/caif/cfsrvl.c | 2 +- net/caif/cfutill.c | 2 +- net/caif/cfveil.c | 2 +- net/caif/cfvidl.c | 2 +- net/caif/chnl_net.c | 2 +- 34 files changed, 31 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index 0def8b3106f4..77f50749acb1 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -1,6 +1,5 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com * Author: Daniel Martensson / daniel.martensson@stericsson.com * Dmitry.Tarnyagin / dmitry.tarnyagin@stericsson.com * License terms: GNU General Public License (GPL) version 2. diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index 666891a9a248..e56b56c08b27 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland / sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -21,7 +21,7 @@ #include MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Sjur Brendeland"); +MODULE_AUTHOR("Sjur Brendeland"); MODULE_DESCRIPTION("CAIF serial device TTY line discipline"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_CAIF); diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c index b71ce9bf0afb..c2cb852e13b8 100644 --- a/drivers/net/caif/caif_spi.c +++ b/drivers/net/caif/caif_spi.c @@ -1,6 +1,5 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com * Author: Daniel Martensson / Daniel.Martensson@stericsson.com * License terms: GNU General Public License (GPL) version 2. */ diff --git a/drivers/net/caif/caif_spi_slave.c b/drivers/net/caif/caif_spi_slave.c index e139e133fc79..98f03663aa22 100644 --- a/drivers/net/caif/caif_spi_slave.c +++ b/drivers/net/caif/caif_spi_slave.c @@ -1,6 +1,5 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com * Author: Daniel Martensson / Daniel.Martensson@stericsson.com * License terms: GNU General Public License (GPL) version 2. */ diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h index ef2dd9438bb1..028b754ae9b1 100644 --- a/include/net/caif/caif_dev.h +++ b/include/net/caif/caif_dev.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/caif_device.h b/include/net/caif/caif_device.h index d02f044adb8a..d6e3c4267c81 100644 --- a/include/net/caif/caif_device.h +++ b/include/net/caif/caif_device.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h index bcb9cc3ce98b..4795e817afe5 100644 --- a/include/net/caif/caif_hsi.h +++ b/include/net/caif/caif_hsi.h @@ -1,6 +1,5 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com * Author: Daniel Martensson / daniel.martensson@stericsson.com * Dmitry.Tarnyagin / dmitry.tarnyagin@stericsson.com * License terms: GNU General Public License (GPL) version 2 diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h index 0f3a39125f90..94e5ed64dc6d 100644 --- a/include/net/caif/caif_layer.h +++ b/include/net/caif/caif_layer.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland / sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h index 90b4ff8bad83..70bfd017581f 100644 --- a/include/net/caif/cfcnfg.h +++ b/include/net/caif/cfcnfg.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfctrl.h b/include/net/caif/cfctrl.h index 9e5425b4a1d7..f2ae33d23baf 100644 --- a/include/net/caif/cfctrl.h +++ b/include/net/caif/cfctrl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cffrml.h b/include/net/caif/cffrml.h index afac1a48cce7..a06e33fbaa8b 100644 --- a/include/net/caif/cffrml.h +++ b/include/net/caif/cffrml.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfmuxl.h b/include/net/caif/cfmuxl.h index 5847a196b8ad..752999572f21 100644 --- a/include/net/caif/cfmuxl.h +++ b/include/net/caif/cfmuxl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h index 83a89ba3005b..1c1ad46250d5 100644 --- a/include/net/caif/cfpkt.h +++ b/include/net/caif/cfpkt.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h index f121299a3427..b5b020f3c72e 100644 --- a/include/net/caif/cfserl.h +++ b/include/net/caif/cfserl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h index 0f5905241843..cd47705c2cc3 100644 --- a/include/net/caif/cfsrvl.h +++ b/include/net/caif/cfsrvl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/uapi/linux/caif/caif_socket.h b/include/uapi/linux/caif/caif_socket.h index 3f3bac6af7bc..586e9f98184f 100644 --- a/include/uapi/linux/caif/caif_socket.h +++ b/include/uapi/linux/caif/caif_socket.h @@ -1,7 +1,7 @@ /* linux/caif_socket.h * CAIF Definitions for CAIF socket and network layer * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/uapi/linux/caif/if_caif.h b/include/uapi/linux/caif/if_caif.h index 5e7eed4edf51..7618aabe8c6b 100644 --- a/include/uapi/linux/caif/if_caif.h +++ b/include/uapi/linux/caif/if_caif.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index df6d56d8689a..1f9ece1a9c34 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -1,7 +1,7 @@ /* * CAIF Interface registration. * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 * * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 630b8be6e748..05a41c7ec304 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index d76278d644b8..942e00a425fd 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -1,7 +1,7 @@ /* * CAIF USB handler * Copyright (C) ST-Ericsson AB 2011 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 * */ diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 246ac3aa8de5..fa39fc298708 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 9cd057c59c59..2bd4b58f4372 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c index 2914659eb9b2..7aae0b56829e 100644 --- a/net/caif/cfdbgl.c +++ b/net/caif/cfdbgl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index a63f4a5f5aff..3bdddb32d55a 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 204c5e226a61..8bc7caa28e64 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -2,7 +2,7 @@ * CAIF Framing Layer. * * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 154d9f8f964c..8c5d6386319f 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index e8f9c149504d..6493351f39c6 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index db51830c8587..61d7617d9249 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 147c232b1285..ce60f06d76de 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 95f7f5ea30ef..353f793d1b3b 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index 86d2dadb4b73..1728fa4471cf 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 910ab0661f66..262224581efa 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c index a8e2a2d758a5..b3b110e8a350 100644 --- a/net/caif/cfvidl.c +++ b/net/caif/cfvidl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 26a4e4e3a767..89586c05a479 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Authors: Sjur Brendeland/sjur.brandeland@stericsson.com + * Authors: Sjur Brendeland * Daniel Martensson / Daniel.Martensson@stericsson.com * License terms: GNU General Public License (GPL) version 2 */ -- cgit v1.2.3 From def3117493eafd9dfa1f809d861e0031b2cc8a07 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 23 Apr 2013 07:48:30 +0000 Subject: genl: Allow concurrent genl callbacks. All genl callbacks are serialized by genl-mutex. This can become bottleneck in multi threaded case. Following patch adds an parameter to genl_family so that a particular family can get concurrent netlink callback without genl_lock held. New rw-sem is used to protect genl callback from genl family unregister. in case of parallel_ops genl-family read-lock is taken for callbacks and write lock is taken for register or unregistration for any family. In case of locked genl family semaphore and gel-mutex is locked for any openration. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/genetlink.h | 1 + net/netlink/genetlink.c | 114 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 77 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index bdfbe68c1c3b..93024a47e0e2 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -50,6 +50,7 @@ struct genl_family { unsigned int version; unsigned int maxattr; bool netnsok; + bool parallel_ops; int (*pre_doit)(struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 5a55be3f17a5..2f72598dd8fe 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -16,10 +16,12 @@ #include #include #include +#include #include #include static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ +static DECLARE_RWSEM(cb_lock); void genl_lock(void) { @@ -41,6 +43,18 @@ int lockdep_genl_is_held(void) EXPORT_SYMBOL(lockdep_genl_is_held); #endif +static void genl_lock_all(void) +{ + down_write(&cb_lock); + genl_lock(); +} + +static void genl_unlock_all(void) +{ + genl_unlock(); + up_write(&cb_lock); +} + #define GENL_FAM_TAB_SIZE 16 #define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) @@ -144,7 +158,7 @@ int genl_register_mc_group(struct genl_family *family, BUG_ON(grp->name[0] == '\0'); BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL); - genl_lock(); + genl_lock_all(); /* special-case our own group */ if (grp == ¬ify_grp) @@ -213,7 +227,7 @@ int genl_register_mc_group(struct genl_family *family, genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp); out: - genl_unlock(); + genl_unlock_all(); return err; } EXPORT_SYMBOL(genl_register_mc_group); @@ -255,9 +269,9 @@ static void __genl_unregister_mc_group(struct genl_family *family, void genl_unregister_mc_group(struct genl_family *family, struct genl_multicast_group *grp) { - genl_lock(); + genl_lock_all(); __genl_unregister_mc_group(family, grp); - genl_unlock(); + genl_unlock_all(); } EXPORT_SYMBOL(genl_unregister_mc_group); @@ -303,9 +317,9 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops) if (ops->policy) ops->flags |= GENL_CMD_CAP_HASPOL; - genl_lock(); + genl_lock_all(); list_add_tail(&ops->ops_list, &family->ops_list); - genl_unlock(); + genl_unlock_all(); genl_ctrl_event(CTRL_CMD_NEWOPS, ops); err = 0; @@ -334,16 +348,16 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) { struct genl_ops *rc; - genl_lock(); + genl_lock_all(); list_for_each_entry(rc, &family->ops_list, ops_list) { if (rc == ops) { list_del(&ops->ops_list); - genl_unlock(); + genl_unlock_all(); genl_ctrl_event(CTRL_CMD_DELOPS, ops); return 0; } } - genl_unlock(); + genl_unlock_all(); return -ENOENT; } @@ -373,7 +387,7 @@ int genl_register_family(struct genl_family *family) INIT_LIST_HEAD(&family->ops_list); INIT_LIST_HEAD(&family->mcast_groups); - genl_lock(); + genl_lock_all(); if (genl_family_find_byname(family->name)) { err = -EEXIST; @@ -394,7 +408,7 @@ int genl_register_family(struct genl_family *family) goto errout_locked; } - if (family->maxattr) { + if (family->maxattr && !family->parallel_ops) { family->attrbuf = kmalloc((family->maxattr+1) * sizeof(struct nlattr *), GFP_KERNEL); if (family->attrbuf == NULL) { @@ -405,14 +419,14 @@ int genl_register_family(struct genl_family *family) family->attrbuf = NULL; list_add_tail(&family->family_list, genl_family_chain(family->id)); - genl_unlock(); + genl_unlock_all(); genl_ctrl_event(CTRL_CMD_NEWFAMILY, family); return 0; errout_locked: - genl_unlock(); + genl_unlock_all(); errout: return err; } @@ -476,7 +490,7 @@ int genl_unregister_family(struct genl_family *family) { struct genl_family *rc; - genl_lock(); + genl_lock_all(); genl_unregister_mc_groups(family); @@ -486,14 +500,14 @@ int genl_unregister_family(struct genl_family *family) list_del(&rc->family_list); INIT_LIST_HEAD(&family->ops_list); - genl_unlock(); + genl_unlock_all(); kfree(family->attrbuf); genl_ctrl_event(CTRL_CMD_DELFAMILY, family); return 0; } - genl_unlock(); + genl_unlock_all(); return -ENOENT; } @@ -530,19 +544,17 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); -static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int genl_family_rcv_msg(struct genl_family *family, + struct sk_buff *skb, + struct nlmsghdr *nlh) { struct genl_ops *ops; - struct genl_family *family; struct net *net = sock_net(skb->sk); struct genl_info info; struct genlmsghdr *hdr = nlmsg_data(nlh); + struct nlattr **attrbuf; int hdrlen, err; - family = genl_family_find_byid(nlh->nlmsg_type); - if (family == NULL) - return -ENOENT; - /* this family doesn't exist in this netns */ if (!family->netnsok && !net_eq(net, &init_net)) return -ENOENT; @@ -560,26 +572,30 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EPERM; if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ops->dumpit, + .done = ops->done, + }; + if (ops->dumpit == NULL) return -EOPNOTSUPP; - genl_unlock(); - { - struct netlink_dump_control c = { - .dump = ops->dumpit, - .done = ops->done, - }; - err = netlink_dump_start(net->genl_sock, skb, nlh, &c); - } - genl_lock(); - return err; + return netlink_dump_start(net->genl_sock, skb, nlh, &c); } if (ops->doit == NULL) return -EOPNOTSUPP; - if (family->attrbuf) { - err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr, + if (family->maxattr && family->parallel_ops) { + attrbuf = kmalloc((family->maxattr+1) * + sizeof(struct nlattr *), GFP_KERNEL); + if (attrbuf == NULL) + return -ENOMEM; + } else + attrbuf = family->attrbuf; + + if (attrbuf) { + err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, ops->policy); if (err < 0) return err; @@ -590,7 +606,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) info.nlhdr = nlh; info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; - info.attrs = family->attrbuf; + info.attrs = attrbuf; genl_info_net_set(&info, net); memset(&info.user_ptr, 0, sizeof(info.user_ptr)); @@ -605,14 +621,37 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (family->post_doit) family->post_doit(ops, skb, &info); + if (family->parallel_ops) + kfree(attrbuf); + + return err; +} + +static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct genl_family *family; + int err; + + family = genl_family_find_byid(nlh->nlmsg_type); + if (family == NULL) + return -ENOENT; + + if (!family->parallel_ops) + genl_lock(); + + err = genl_family_rcv_msg(family, skb, nlh); + + if (!family->parallel_ops) + genl_unlock(); + return err; } static void genl_rcv(struct sk_buff *skb) { - genl_lock(); + down_read(&cb_lock); netlink_rcv_skb(skb, &genl_rcv_msg); - genl_unlock(); + up_read(&cb_lock); } /************************************************************************** @@ -918,7 +957,6 @@ static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, - .cb_mutex = &genl_mutex, .flags = NL_CFG_F_NONROOT_RECV, }; -- cgit v1.2.3 From a4c4009f4f54dabaaea1bb2b2c3c8930e93cd409 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 25 Apr 2013 09:52:25 +0000 Subject: net: increase frag hash size Increase fragmentation hash bucket size to 1024 from old 64 elems. After we increased the frag mem limits commit c2a93660 (net: increase fragment memory usage limits) the hash size of 64 elements is simply too small. Also considering the mem limit is per netns and the hash table is shared for all netns. For the embedded people, note that this increase will change the hash table/array from using approx 1 Kbytes to 16 Kbytes. Signed-off-by: Jesper Dangaard Brouer Acked-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 6f41b45e819e..4182c9be8bb5 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -41,7 +41,7 @@ struct inet_frag_queue { struct netns_frags *net; }; -#define INETFRAGS_HASHSZ 64 +#define INETFRAGS_HASHSZ 1024 /* averaged: * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ / -- cgit v1.2.3 From a5fedd43d5f6c94c71053a66e4c3d2e35f1731a2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Apr 2013 04:58:25 +0000 Subject: netfilter: move skb_gso_segment into nfnetlink_queue module skb_gso_segment is expensive, so it would be nice if we could avoid it in the future. However, userspace needs to be prepared to receive larger-than-mtu-packets (which will also have incorrect l3/l4 checksums), so we cannot simply remove it. The plan is to add a per-queue feature flag that userspace can set when binding the queue. The problem is that in nf_queue, we only have a queue number, not the queue context/configuration settings. This patch should have no impact other than the skb_gso_segment call now being in a function that has access to the queue config data. A new size attribute in nf_queue_entry is needed so nfnetlink_queue can duplicate the entry of the gso skb when segmenting the skb while also copying the route key. The follow up patch adds switch to disable skb_gso_segment when queue config says so. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 6 ++ net/netfilter/nf_queue.c | 96 ++-------------------- net/netfilter/nfnetlink_queue_core.c | 154 +++++++++++++++++++++++++++++++---- 3 files changed, 152 insertions(+), 104 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index fb1c0be38b6d..aaba4bbcdda0 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -9,10 +9,13 @@ struct nf_queue_entry { struct nf_hook_ops *elem; u_int8_t pf; + u16 size; /* sizeof(entry) + saved route keys */ unsigned int hook; struct net_device *indev; struct net_device *outdev; int (*okfn)(struct sk_buff *); + + /* extra space to store route keys */ }; #define nf_queue_entry_reroute(x) ((void *)x + sizeof(struct nf_queue_entry)) @@ -27,4 +30,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh); void nf_unregister_queue_handler(void); extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); +void nf_queue_entry_release_refs(struct nf_queue_entry *entry); + #endif /* _NF_QUEUE_H */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 1d91e77ba4c2..5d24b1fdb593 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -45,7 +45,7 @@ void nf_unregister_queue_handler(void) } EXPORT_SYMBOL(nf_unregister_queue_handler); -static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) +void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { /* Release those devices we held, or Alexey will kill me. */ if (entry->indev) @@ -65,9 +65,10 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) /* Drop reference to owner of hook which queued us. */ module_put(entry->elem->owner); } +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs); /* Bump dev refs so they don't vanish while packet is out */ -static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) { if (!try_module_get(entry->elem->owner)) return false; @@ -92,12 +93,13 @@ static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) return true; } +EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); /* * Any packet that leaves via this function must come back * through nf_reinject(). */ -static int __nf_queue(struct sk_buff *skb, +int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf, unsigned int hook, struct net_device *indev, @@ -137,6 +139,7 @@ static int __nf_queue(struct sk_buff *skb, .indev = indev, .outdev = outdev, .okfn = okfn, + .size = sizeof(*entry) + afinfo->route_key_size, }; if (!nf_queue_entry_get_refs(entry)) { @@ -163,87 +166,6 @@ err: return status; } -#ifdef CONFIG_BRIDGE_NETFILTER -/* When called from bridge netfilter, skb->data must point to MAC header - * before calling skb_gso_segment(). Else, original MAC header is lost - * and segmented skbs will be sent to wrong destination. - */ -static void nf_bridge_adjust_skb_data(struct sk_buff *skb) -{ - if (skb->nf_bridge) - __skb_push(skb, skb->network_header - skb->mac_header); -} - -static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) -{ - if (skb->nf_bridge) - __skb_pull(skb, skb->network_header - skb->mac_header); -} -#else -#define nf_bridge_adjust_skb_data(s) do {} while (0) -#define nf_bridge_adjust_segmented_data(s) do {} while (0) -#endif - -int nf_queue(struct sk_buff *skb, - struct nf_hook_ops *elem, - u_int8_t pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum) -{ - struct sk_buff *segs; - int err = -EINVAL; - unsigned int queued; - - if (!skb_is_gso(skb)) - return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, - queuenum); - - switch (pf) { - case NFPROTO_IPV4: - skb->protocol = htons(ETH_P_IP); - break; - case NFPROTO_IPV6: - skb->protocol = htons(ETH_P_IPV6); - break; - } - - nf_bridge_adjust_skb_data(skb); - segs = skb_gso_segment(skb, 0); - /* Does not use PTR_ERR to limit the number of error codes that can be - * returned by nf_queue. For instance, callers rely on -ECANCELED to mean - * 'ignore this hook'. - */ - if (IS_ERR(segs)) - goto out_err; - queued = 0; - err = 0; - do { - struct sk_buff *nskb = segs->next; - - segs->next = NULL; - if (err == 0) { - nf_bridge_adjust_segmented_data(segs); - err = __nf_queue(segs, elem, pf, hook, indev, - outdev, okfn, queuenum); - } - if (err == 0) - queued++; - else - kfree_skb(segs); - segs = nskb; - } while (segs); - - if (queued) { - kfree_skb(skb); - return 0; - } - out_err: - nf_bridge_adjust_segmented_data(skb); - return err; -} - void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct sk_buff *skb = entry->skb; @@ -283,9 +205,9 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) local_bh_enable(); break; case NF_QUEUE: - err = __nf_queue(skb, elem, entry->pf, entry->hook, - entry->indev, entry->outdev, entry->okfn, - verdict >> NF_VERDICT_QBITS); + err = nf_queue(skb, elem, entry->pf, entry->hook, + entry->indev, entry->outdev, entry->okfn, + verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ECANCELED) goto next_hook; diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index ef3cdb4bfeea..edbae4caf753 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -477,28 +477,13 @@ nla_put_failure: } static int -nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) +__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, + struct nf_queue_entry *entry) { struct sk_buff *nskb; - struct nfqnl_instance *queue; int err = -ENOBUFS; __be32 *packet_id_ptr; int failopen = 0; - struct net *net = dev_net(entry->indev ? - entry->indev : entry->outdev); - struct nfnl_queue_net *q = nfnl_queue_pernet(net); - - /* rcu_read_lock()ed by nf_hook_slow() */ - queue = instance_lookup(q, queuenum); - if (!queue) { - err = -ESRCH; - goto err_out; - } - - if (queue->copy_mode == NFQNL_COPY_NONE) { - err = -EINVAL; - goto err_out; - } nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); if (nskb == NULL) { @@ -547,6 +532,141 @@ err_out: return err; } +static struct nf_queue_entry * +nf_queue_entry_dup(struct nf_queue_entry *e) +{ + struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); + if (entry) { + if (nf_queue_entry_get_refs(entry)) + return entry; + kfree(entry); + } + return NULL; +} + +#ifdef CONFIG_BRIDGE_NETFILTER +/* When called from bridge netfilter, skb->data must point to MAC header + * before calling skb_gso_segment(). Else, original MAC header is lost + * and segmented skbs will be sent to wrong destination. + */ +static void nf_bridge_adjust_skb_data(struct sk_buff *skb) +{ + if (skb->nf_bridge) + __skb_push(skb, skb->network_header - skb->mac_header); +} + +static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) +{ + if (skb->nf_bridge) + __skb_pull(skb, skb->network_header - skb->mac_header); +} +#else +#define nf_bridge_adjust_skb_data(s) do {} while (0) +#define nf_bridge_adjust_segmented_data(s) do {} while (0) +#endif + +static void free_entry(struct nf_queue_entry *entry) +{ + nf_queue_entry_release_refs(entry); + kfree(entry); +} + +static int +__nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue, + struct sk_buff *skb, struct nf_queue_entry *entry) +{ + int ret = -ENOMEM; + struct nf_queue_entry *entry_seg; + + nf_bridge_adjust_segmented_data(skb); + + if (skb->next == NULL) { /* last packet, no need to copy entry */ + struct sk_buff *gso_skb = entry->skb; + entry->skb = skb; + ret = __nfqnl_enqueue_packet(net, queue, entry); + if (ret) + entry->skb = gso_skb; + return ret; + } + + skb->next = NULL; + + entry_seg = nf_queue_entry_dup(entry); + if (entry_seg) { + entry_seg->skb = skb; + ret = __nfqnl_enqueue_packet(net, queue, entry_seg); + if (ret) + free_entry(entry_seg); + } + return ret; +} + +static int +nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) +{ + unsigned int queued; + struct nfqnl_instance *queue; + struct sk_buff *skb, *segs; + int err = -ENOBUFS; + struct net *net = dev_net(entry->indev ? + entry->indev : entry->outdev); + struct nfnl_queue_net *q = nfnl_queue_pernet(net); + + /* rcu_read_lock()ed by nf_hook_slow() */ + queue = instance_lookup(q, queuenum); + if (!queue) + return -ESRCH; + + if (queue->copy_mode == NFQNL_COPY_NONE) + return -EINVAL; + + if (!skb_is_gso(entry->skb)) + return __nfqnl_enqueue_packet(net, queue, entry); + + skb = entry->skb; + + switch (entry->pf) { + case NFPROTO_IPV4: + skb->protocol = htons(ETH_P_IP); + break; + case NFPROTO_IPV6: + skb->protocol = htons(ETH_P_IPV6); + break; + } + + nf_bridge_adjust_skb_data(skb); + segs = skb_gso_segment(skb, 0); + /* Does not use PTR_ERR to limit the number of error codes that can be + * returned by nf_queue. For instance, callers rely on -ECANCELED to + * mean 'ignore this hook'. + */ + if (IS_ERR(segs)) + goto out_err; + queued = 0; + err = 0; + do { + struct sk_buff *nskb = segs->next; + if (err == 0) + err = __nfqnl_enqueue_packet_gso(net, queue, + segs, entry); + if (err == 0) + queued++; + else + kfree_skb(segs); + segs = nskb; + } while (segs); + + if (queued) { + if (err) /* some segments are already queued */ + free_entry(entry); + kfree_skb(skb); + return 0; + } + out_err: + nf_bridge_adjust_segmented_data(skb); + return err; +} + static int nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) { -- cgit v1.2.3 From eee1d5a14780b9391ec51f3feaf4cffb521ddbb1 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 19 Apr 2013 10:54:58 +0900 Subject: sctp: Correct type and usage of sctp_end_cksum() Change the type of the crc32 parameter of sctp_end_cksum() from __be32 to __u32 to reflect that fact that it is passed to cpu_to_le32(). There are five in-tree users of sctp_end_cksum(). The following four had warnings flagged by sparse which are no longer present with this change. net/netfilter/ipvs/ip_vs_proto_sctp.c:sctp_nat_csum() net/netfilter/ipvs/ip_vs_proto_sctp.c:sctp_csum_check() net/sctp/input.c:sctp_rcv_checksum() net/sctp/output.c:sctp_packet_transmit() The fifth user is net/netfilter/nf_nat_proto_sctp.c:sctp_manip_pkt(). It has been updated to pass a __u32 instead of a __be32, the value in question was already calculated in cpu byte-order. net/netfilter/nf_nat_proto_sctp.c:sctp_manip_pkt() has also been updated to assign the return value of sctp_end_cksum() directly to a variable of type __le32, matching the type of the return value. Previously the return value was assigned to a variable of type __be32 and then that variable was finally assigned to another variable of type __le32. Problems flagged by sparse. Compile and sparse tested only. Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/sctp/checksum.h | 2 +- net/netfilter/nf_nat_proto_sctp.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index befc8d2a1b9f..5a2110d3176d 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -77,7 +77,7 @@ static inline __u32 sctp_update_cksum(__u8 *buffer, __u16 length, __u32 crc32) return sctp_crc32c(crc32, buffer, length); } -static inline __le32 sctp_end_cksum(__be32 crc32) +static inline __le32 sctp_end_cksum(__u32 crc32) { return cpu_to_le32(~crc32); } diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index e64faa5ca893..396e55d46f90 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb, { struct sk_buff *frag; sctp_sctphdr_t *hdr; - __be32 crc32; + __u32 crc32; if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return false; @@ -55,8 +55,7 @@ sctp_manip_pkt(struct sk_buff *skb, skb_walk_frags(skb, frag) crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag), crc32); - crc32 = sctp_end_cksum(crc32); - hdr->checksum = crc32; + hdr->checksum = sctp_end_cksum(crc32); return true; } -- cgit v1.2.3 From aebda156a570782a86fc4426842152237a19427d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Apr 2013 05:58:52 +0000 Subject: net: defer net_secret[] initialization Instead of feeding net_secret[] at boot time, defer the init at the point first socket is created. This permits some platforms to use better entropy sources than the ones available at boot time. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/secure_seq.h | 1 + net/core/secure_seq.c | 4 +--- net/ipv4/af_inet.c | 5 ++++- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index c2e542b27a5a..6ca975bebd37 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -3,6 +3,7 @@ #include +extern void net_secret_init(void); extern __u32 secure_ip_id(__be32 daddr); extern __u32 secure_ipv6_id(const __be32 daddr[4]); extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index e61a8bb7fce7..6a2f13cee86a 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -12,12 +12,10 @@ static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; -static int __init net_secret_init(void) +void net_secret_init(void) { get_random_bytes(net_secret, sizeof(net_secret)); - return 0; } -late_initcall(net_secret_init); #ifdef CONFIG_INET static u32 seq_scale(u32 seq) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 93824c57b108..c61b3bb87a16 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -114,6 +114,7 @@ #include #include #include +#include #ifdef CONFIG_IP_MROUTE #include #endif @@ -262,8 +263,10 @@ void build_ehash_secret(void) get_random_bytes(&rnd, sizeof(rnd)); } while (rnd == 0); - if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) + if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); + net_secret_init(); + } } EXPORT_SYMBOL(build_ehash_secret); -- cgit v1.2.3 From 60bc851ae59bfe99be6ee89d6bc50008c85ec75d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 May 2013 05:24:03 +0000 Subject: af_unix: fix a fatal race with bit fields Using bit fields is dangerous on ppc64/sparc64, as the compiler [1] uses 64bit instructions to manipulate them. If the 64bit word includes any atomic_t or spinlock_t, we can lose critical concurrent changes. This is happening in af_unix, where unix_sk(sk)->gc_candidate/ gc_maybe_cycle/lock share the same 64bit word. This leads to fatal deadlock, as one/several cpus spin forever on a spinlock that will never be available again. A safer way would be to use a long to store flags. This way we are sure compiler/arch wont do bad things. As we own unix_gc_lock spinlock when clearing or setting bits, we can use the non atomic __set_bit()/__clear_bit(). recursion_level can share the same 64bit location with the spinlock, as it is set only with this spinlock held. [1] bug fixed in gcc-4.8.0 : http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52080 Reported-by: Ambrose Feinstein Signed-off-by: Eric Dumazet Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: David S. Miller --- include/net/af_unix.h | 5 +++-- net/unix/garbage.c | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a8836e8445cc..dbdfd2b0f3b3 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -57,9 +57,10 @@ struct unix_sock { struct list_head link; atomic_long_t inflight; spinlock_t lock; - unsigned int gc_candidate : 1; - unsigned int gc_maybe_cycle : 1; unsigned char recursion_level; + unsigned long gc_flags; +#define UNIX_GC_CANDIDATE 0 +#define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d0f6545b0010..9bc73f87f64a 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -185,7 +185,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), * have been added to the queues after * starting the garbage collection */ - if (u->gc_candidate) { + if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { hit = true; func(u); } @@ -254,7 +254,7 @@ static void inc_inflight_move_tail(struct unix_sock *u) * of the list, so that it's checked even if it was already * passed over */ - if (u->gc_maybe_cycle) + if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags)) list_move_tail(&u->link, &gc_candidates); } @@ -315,8 +315,8 @@ void unix_gc(void) BUG_ON(total_refs < inflight_refs); if (total_refs == inflight_refs) { list_move_tail(&u->link, &gc_candidates); - u->gc_candidate = 1; - u->gc_maybe_cycle = 1; + __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); + __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); } } @@ -344,7 +344,7 @@ void unix_gc(void) if (atomic_long_read(&u->inflight) > 0) { list_move_tail(&u->link, ¬_cycle_list); - u->gc_maybe_cycle = 0; + __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); scan_children(&u->sk, inc_inflight_move_tail, NULL); } } @@ -356,7 +356,7 @@ void unix_gc(void) */ while (!list_empty(¬_cycle_list)) { u = list_entry(not_cycle_list.next, struct unix_sock, link); - u->gc_candidate = 0; + __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags); list_move_tail(&u->link, &gc_inflight_list); } -- cgit v1.2.3