From bf37d2b3fdc9e451f9e376a3922ac8df5aa24128 Mon Sep 17 00:00:00 2001
From: Eliezer Tamir <eliezer.tamir@linux.intel.com>
Date: Sun, 4 Aug 2013 12:55:48 +0300
Subject: busy_poll: cleanup do-nothing placeholders

When renaming ll_poll to busy poll, I introduced a typo
in the name of the do-nothing placeholder for sk_busy_loop
and called it sk_busy_poll.
This broke compile when busy poll was not configured.
Cong Wang submitted a patch to fixed that.
This patch removes the now redundant, misspelled placeholder.

Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index f18b91966d3d..8e2dfc106aed 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -162,11 +162,6 @@ static inline bool sk_can_busy_loop(struct sock *sk)
 	return false;
 }
 
-static inline bool sk_busy_poll(struct sock *sk, int nonblock)
-{
-	return false;
-}
-
 static inline void skb_mark_napi_id(struct sk_buff *skb,
 				    struct napi_struct *napi)
 {
-- 
cgit v1.2.3


From 7d46daba8dd5df1aa45724518a041ef7163d3ad5 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Mon, 5 Aug 2013 16:05:32 +0300
Subject: mlx5: remove health handler plugin

Remove this code, per Dave Miller's request, since it is not being used
anywhere in the kernel.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 29 +-----------------------
 include/linux/mlx5/driver.h                      |  3 ---
 2 files changed, 1 insertion(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 748f10a155c4..3e6670c4a7cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -55,33 +55,9 @@ enum {
 };
 
 static DEFINE_SPINLOCK(health_lock);
-
 static LIST_HEAD(health_list);
 static struct work_struct health_work;
 
-static health_handler_t reg_handler;
-int mlx5_register_health_report_handler(health_handler_t handler)
-{
-	spin_lock_irq(&health_lock);
-	if (reg_handler) {
-		spin_unlock_irq(&health_lock);
-		return -EEXIST;
-	}
-	reg_handler = handler;
-	spin_unlock_irq(&health_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL(mlx5_register_health_report_handler);
-
-void mlx5_unregister_health_report_handler(void)
-{
-	spin_lock_irq(&health_lock);
-	reg_handler = NULL;
-	spin_unlock_irq(&health_lock);
-}
-EXPORT_SYMBOL(mlx5_unregister_health_report_handler);
-
 static void health_care(struct work_struct *work)
 {
 	struct mlx5_core_health *health, *n;
@@ -98,11 +74,8 @@ static void health_care(struct work_struct *work)
 		priv = container_of(health, struct mlx5_priv, health);
 		dev = container_of(priv, struct mlx5_core_dev, priv);
 		mlx5_core_warn(dev, "handling bad device here\n");
+		/* nothing yet */
 		spin_lock_irq(&health_lock);
-		if (reg_handler)
-			reg_handler(dev->pdev, health->health,
-				    sizeof(health->health));
-
 		list_del_init(&health->list);
 		spin_unlock_irq(&health_lock);
 	}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2aa258b0ced1..611e65e76b00 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -731,9 +731,6 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
 void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
 
-typedef void (*health_handler_t)(struct pci_dev *pdev, struct health_buffer __iomem *buf, int size);
-int mlx5_register_health_report_handler(health_handler_t handler);
-void mlx5_unregister_health_report_handler(void);
 const char *mlx5_command_str(int command);
 int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 288a9376371d425edeeea41a0310922c5fb2092d Mon Sep 17 00:00:00 2001
From: Eliezer Tamir <eliezer.tamir@linux.intel.com>
Date: Wed, 7 Aug 2013 11:33:25 +0300
Subject: net: rename busy poll MIB counter

Rename mib counter from "low latency" to "busy poll"

v1 also moved the counter to the ip MIB (suggested by Shawn Bohrer)
Eric Dumazet suggested that the current location is better.

So v2 just renames the counter to fit the new naming convention.

Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h   | 2 +-
 include/uapi/linux/snmp.h | 2 +-
 net/ipv4/proc.c           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 8e2dfc106aed..8a358a2c97e6 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -122,7 +122,7 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock)
 		if (rc > 0)
 			/* local bh are disabled so it is ok to use _BH */
 			NET_ADD_STATS_BH(sock_net(sk),
-					 LINUX_MIB_LOWLATENCYRXPACKETS, rc);
+					 LINUX_MIB_BUSYPOLLRXPACKETS, rc);
 
 	} while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
 		 !need_resched() && !busy_loop_timeout(end_time));
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index af0a674cc677..a1356d3b54df 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -253,7 +253,7 @@ enum
 	LINUX_MIB_TCPFASTOPENLISTENOVERFLOW,	/* TCPFastOpenListenOverflow */
 	LINUX_MIB_TCPFASTOPENCOOKIEREQD,	/* TCPFastOpenCookieReqd */
 	LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */
-	LINUX_MIB_LOWLATENCYRXPACKETS,		/* LowLatencyRxPackets */
+	LINUX_MIB_BUSYPOLLRXPACKETS,		/* BusyPollRxPackets */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 6577a1149a47..463bd1273346 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -273,7 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
 	SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
 	SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
-	SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS),
+	SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS),
 	SNMP_MIB_SENTINEL
 };
 
-- 
cgit v1.2.3


From 4221f40513233fa8edeef7fc82e44163fde03b9b Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Tue, 13 Aug 2013 01:41:06 -0700
Subject: ip_tunnel: Do not use inner ip-header-id for tunnel ip-header-id.

Using inner-id for tunnel id is not safe in some rare cases.
E.g. packets coming from multiple sources entering same tunnel
can have same id. Therefore on tunnel packet receive we
could have packets from two different stream but with same
source and dst IP with same ip-id which could confuse ip packet
reassembly.

Following patch reverts optimization from commit
490ab08127 (IP_GRE: Fix IP-Identification.)

CC: Jarno Rajahalme <jrajahalme@nicira.com>
CC: Ansis Atteka <aatteka@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h  | 14 --------------
 net/ipv4/ip_tunnel_core.c |  4 +---
 2 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 781b3cf86a2f..a354db5b7662 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -145,20 +145,6 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
 	return INET_ECN_encapsulate(tos, inner);
 }
 
-static inline void tunnel_ip_select_ident(struct sk_buff *skb,
-					  const struct iphdr  *old_iph,
-					  struct dst_entry *dst)
-{
-	struct iphdr *iph = ip_hdr(skb);
-
-	/* Use inner packet iph-id if possible. */
-	if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
-		iph->id	= old_iph->id;
-	else
-		__ip_select_ident(iph, dst,
-				  (skb_shinfo(skb)->gso_segs ?: 1) - 1);
-}
-
 int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
 int iptunnel_xmit(struct net *net, struct rtable *rt,
 		  struct sk_buff *skb,
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 7167b08977df..850525b34899 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -76,9 +76,7 @@ int iptunnel_xmit(struct net *net, struct rtable *rt,
 	iph->daddr	=	dst;
 	iph->saddr	=	src;
 	iph->ttl	=	ttl;
-	tunnel_ip_select_ident(skb,
-			       (const struct iphdr *)skb_inner_network_header(skb),
-			       &rt->dst);
+	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
 	err = ip_local_out(skb);
 	if (unlikely(net_xmit_eval(err)))
-- 
cgit v1.2.3


From 8a8e3d84b1719a56f9151909e80ea6ebc5b8e318 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 14 Aug 2013 23:47:11 +0200
Subject: net_sched: restore "linklayer atm" handling

commit 56b765b79 ("htb: improved accuracy at high rates")
broke the "linklayer atm" handling.

 tc class add ... htb rate X ceil Y linklayer atm

The linklayer setting is implemented by modifying the rate table
which is send to the kernel.  No direct parameter were
transferred to the kernel indicating the linklayer setting.

The commit 56b765b79 ("htb: improved accuracy at high rates")
removed the use of the rate table system.

To keep compatible with older iproute2 utils, this patch detects
the linklayer by parsing the rate table.  It also supports future
versions of iproute2 to send this linklayer parameter to the
kernel directly. This is done by using the __reserved field in
struct tc_ratespec, to convey the choosen linklayer option, but
only using the lower 4 bits of this field.

Linklayer detection is limited to speeds below 100Mbit/s, because
at high rates the rtab is gets too inaccurate, so bad that
several fields contain the same values, this resembling the ATM
detect.  Fields even start to contain "0" time to send, e.g. at
1000Mbit/s sending a 96 bytes packet cost "0", thus the rtab have
been more broken than we first realized.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h      |  9 ++++++++-
 include/uapi/linux/pkt_sched.h | 10 +++++++++-
 net/sched/sch_api.c            | 41 +++++++++++++++++++++++++++++++++++++++++
 net/sched/sch_generic.c        |  1 +
 net/sched/sch_htb.c            | 13 +++++++++++++
 5 files changed, 72 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6eab63363e59..e5ae0c50fa9c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -683,13 +683,19 @@ struct psched_ratecfg {
 	u64	rate_bytes_ps; /* bytes per second */
 	u32	mult;
 	u16	overhead;
+	u8	linklayer;
 	u8	shift;
 };
 
 static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
 				unsigned int len)
 {
-	return ((u64)(len + r->overhead) * r->mult) >> r->shift;
+	len += r->overhead;
+
+	if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
+		return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
+
+	return ((u64)len * r->mult) >> r->shift;
 }
 
 extern void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf);
@@ -700,6 +706,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
 	memset(res, 0, sizeof(*res));
 	res->rate = r->rate_bytes_ps;
 	res->overhead = r->overhead;
+	res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
 }
 
 #endif
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index dbd71b0c7d8c..09d62b9228ff 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -73,9 +73,17 @@ struct tc_estimator {
 #define TC_H_ROOT	(0xFFFFFFFFU)
 #define TC_H_INGRESS    (0xFFFFFFF1U)
 
+/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
+enum tc_link_layer {
+	TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */
+	TC_LINKLAYER_ETHERNET,
+	TC_LINKLAYER_ATM,
+};
+#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */
+
 struct tc_ratespec {
 	unsigned char	cell_log;
-	unsigned char	__reserved;
+	__u8		linklayer; /* lower 4 bits */
 	unsigned short	overhead;
 	short		cell_align;
 	unsigned short	mpu;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 281c1bded1f6..51b968d3febb 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -285,6 +285,45 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
 	return q;
 }
 
+/* The linklayer setting were not transferred from iproute2, in older
+ * versions, and the rate tables lookup systems have been dropped in
+ * the kernel. To keep backward compatible with older iproute2 tc
+ * utils, we detect the linklayer setting by detecting if the rate
+ * table were modified.
+ *
+ * For linklayer ATM table entries, the rate table will be aligned to
+ * 48 bytes, thus some table entries will contain the same value.  The
+ * mpu (min packet unit) is also encoded into the old rate table, thus
+ * starting from the mpu, we find low and high table entries for
+ * mapping this cell.  If these entries contain the same value, when
+ * the rate tables have been modified for linklayer ATM.
+ *
+ * This is done by rounding mpu to the nearest 48 bytes cell/entry,
+ * and then roundup to the next cell, calc the table entry one below,
+ * and compare.
+ */
+static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
+{
+	int low       = roundup(r->mpu, 48);
+	int high      = roundup(low+1, 48);
+	int cell_low  = low >> r->cell_log;
+	int cell_high = (high >> r->cell_log) - 1;
+
+	/* rtab is too inaccurate at rates > 100Mbit/s */
+	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
+		pr_debug("TC linklayer: Giving up ATM detection\n");
+		return TC_LINKLAYER_ETHERNET;
+	}
+
+	if ((cell_high > cell_low) && (cell_high < 256)
+	    && (rtab[cell_low] == rtab[cell_high])) {
+		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
+			 cell_low, cell_high, rtab[cell_high]);
+		return TC_LINKLAYER_ATM;
+	}
+	return TC_LINKLAYER_ETHERNET;
+}
+
 static struct qdisc_rate_table *qdisc_rtab_list;
 
 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
@@ -308,6 +347,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta
 		rtab->rate = *r;
 		rtab->refcnt = 1;
 		memcpy(rtab->data, nla_data(tab), 1024);
+		if (r->linklayer == TC_LINKLAYER_UNAWARE)
+			r->linklayer = __detect_linklayer(r, rtab->data);
 		rtab->next = qdisc_rtab_list;
 		qdisc_rtab_list = rtab;
 	}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index eeb8276d7a89..48be3d5c0d92 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -909,6 +909,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
 	memset(r, 0, sizeof(*r));
 	r->overhead = conf->overhead;
 	r->rate_bytes_ps = conf->rate;
+	r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
 	r->mult = 1;
 	/*
 	 * The deal here is to replace a divide by a reciprocal one
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 45e751527dfc..c2178b15ca6e 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1329,6 +1329,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
 	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
 	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct tc_htb_opt *hopt;
 
@@ -1350,6 +1351,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	if (!hopt->rate.rate || !hopt->ceil.rate)
 		goto failure;
 
+	/* Keeping backward compatible with rate_table based iproute2 tc */
+	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) {
+		rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
+		if (rtab)
+			qdisc_put_rtab(rtab);
+	}
+	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) {
+		ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
+		if (ctab)
+			qdisc_put_rtab(ctab);
+	}
+
 	if (!cl) {		/* new class */
 		struct Qdisc *new_q;
 		int prio;
-- 
cgit v1.2.3


From 0a324f3189ed9c78b1aaf48d88e93cb18643c655 Mon Sep 17 00:00:00 2001
From: Moshe Lazer <moshel@mellanox.com>
Date: Wed, 14 Aug 2013 17:46:48 +0300
Subject: net/mlx5_core: Support MANAGE_PAGES and QUERY_PAGES firmware command
 changes

In the previous QUERY_PAGES command version we used one command to get the
required amount of boot, init and post init pages.  The new version uses the
op_mod field to specify whether the query is for the required amount of boot,
init or post init pages. In addition the output field size for the required
amount of pages increased from 16 to 32 bits.

In MANAGE_PAGES command the input_num_entries and output_num_entries fields
sizes changed from 16 to 32 bits and the PAS tables offset changed to 0x10.

In the pages request event the num_pages field also changed to 32 bits.

In the HCA-capabilities-layout the size and location of max_qp_mcg field has
been changed to support 24 bits.

This patch isn't compatible with firmware versions < 5; however, it  turns out that the
first GA firmware we will publish will not support previous versions so this should be OK.

Signed-off-by: Moshe Lazer <moshel@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c       |  2 +-
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c    | 58 ++++++++++------------
 include/linux/mlx5/device.h                        | 22 ++++----
 include/linux/mlx5/driver.h                        |  4 +-
 6 files changed, 41 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index c571de85d0f9..5472cbd34028 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -46,7 +46,7 @@
 #include "mlx5_core.h"
 
 enum {
-	CMD_IF_REV = 4,
+	CMD_IF_REV = 5,
 };
 
 enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c02cbcfd0fb8..443cc4d7b024 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -268,7 +268,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 		case MLX5_EVENT_TYPE_PAGE_REQUEST:
 			{
 				u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
-				s16 npages = be16_to_cpu(eqe->data.req_pages.num_pages);
+				s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
 
 				mlx5_core_dbg(dev, "page request for func 0x%x, napges %d\n", func_id, npages);
 				mlx5_core_req_pages_handler(dev, func_id, npages);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 72a5222447f5..f012658b6a92 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -113,7 +113,7 @@ int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev,
 	caps->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
 	caps->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
 	caps->log_max_mcg = out->hca_cap.log_max_mcg;
-	caps->max_qp_mcg = be16_to_cpu(out->hca_cap.max_qp_mcg);
+	caps->max_qp_mcg = be32_to_cpu(out->hca_cap.max_qp_mcg) & 0xffffff;
 	caps->max_ra_res_qp = 1 << (out->hca_cap.log_max_ra_res_qp & 0x3f);
 	caps->max_ra_req_qp = 1 << (out->hca_cap.log_max_ra_req_qp & 0x3f);
 	caps->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 4a3e137931a3..3a2408d44820 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -43,10 +43,16 @@ enum {
 	MLX5_PAGES_TAKE		= 2
 };
 
+enum {
+	MLX5_BOOT_PAGES		= 1,
+	MLX5_INIT_PAGES		= 2,
+	MLX5_POST_INIT_PAGES	= 3
+};
+
 struct mlx5_pages_req {
 	struct mlx5_core_dev *dev;
 	u32	func_id;
-	s16	npages;
+	s32	npages;
 	struct work_struct work;
 };
 
@@ -64,27 +70,23 @@ struct mlx5_query_pages_inbox {
 
 struct mlx5_query_pages_outbox {
 	struct mlx5_outbox_hdr	hdr;
-	__be16			num_boot_pages;
+	__be16			rsvd;
 	__be16			func_id;
-	__be16			init_pages;
-	__be16			num_pages;
+	__be32			num_pages;
 };
 
 struct mlx5_manage_pages_inbox {
 	struct mlx5_inbox_hdr	hdr;
-	__be16			rsvd0;
+	__be16			rsvd;
 	__be16			func_id;
-	__be16			rsvd1;
-	__be16			num_entries;
-	u8			rsvd2[16];
+	__be32			num_entries;
 	__be64			pas[0];
 };
 
 struct mlx5_manage_pages_outbox {
 	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[2];
-	__be16			num_entries;
-	u8			rsvd1[20];
+	__be32			num_entries;
+	u8			rsvd[4];
 	__be64			pas[0];
 };
 
@@ -146,7 +148,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
 }
 
 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
-				s16 *pages, s16 *init_pages, u16 *boot_pages)
+				s32 *npages, int boot)
 {
 	struct mlx5_query_pages_inbox	in;
 	struct mlx5_query_pages_outbox	out;
@@ -155,6 +157,8 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 	memset(&in, 0, sizeof(in));
 	memset(&out, 0, sizeof(out));
 	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES);
+	in.hdr.opmod = boot ? cpu_to_be16(MLX5_BOOT_PAGES) : cpu_to_be16(MLX5_INIT_PAGES);
+
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 	if (err)
 		return err;
@@ -162,15 +166,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 	if (out.hdr.status)
 		return mlx5_cmd_status_to_err(&out.hdr);
 
-	if (pages)
-		*pages = be16_to_cpu(out.num_pages);
-
-	if (init_pages)
-		*init_pages = be16_to_cpu(out.init_pages);
-
-	if (boot_pages)
-		*boot_pages = be16_to_cpu(out.num_boot_pages);
-
+	*npages = be32_to_cpu(out.num_pages);
 	*func_id = be16_to_cpu(out.func_id);
 
 	return err;
@@ -224,7 +220,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
 	in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE);
 	in->func_id = cpu_to_be16(func_id);
-	in->num_entries = cpu_to_be16(npages);
+	in->num_entries = cpu_to_be32(npages);
 	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
 	mlx5_core_dbg(dev, "err %d\n", err);
 	if (err) {
@@ -292,7 +288,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
 	in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE);
 	in.func_id = cpu_to_be16(func_id);
-	in.num_entries = cpu_to_be16(npages);
+	in.num_entries = cpu_to_be32(npages);
 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
 	if (err) {
@@ -306,7 +302,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 		goto out_free;
 	}
 
-	num_claimed = be16_to_cpu(out->num_entries);
+	num_claimed = be32_to_cpu(out->num_entries);
 	if (nclaimed)
 		*nclaimed = num_claimed;
 
@@ -345,7 +341,7 @@ static void pages_work_handler(struct work_struct *work)
 }
 
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
-				 s16 npages)
+				 s32 npages)
 {
 	struct mlx5_pages_req *req;
 
@@ -364,20 +360,18 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
 
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
 {
-	u16 uninitialized_var(boot_pages);
-	s16 uninitialized_var(init_pages);
 	u16 uninitialized_var(func_id);
+	s32 uninitialized_var(npages);
 	int err;
 
-	err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages,
-				   &boot_pages);
+	err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
 	if (err)
 		return err;
 
+	mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
+		      npages, boot ? "boot" : "init", func_id);
 
-	mlx5_core_dbg(dev, "requested %d init pages and %d boot pages for func_id 0x%x\n",
-		      init_pages, boot_pages, func_id);
-	return give_pages(dev, func_id, boot ? boot_pages : init_pages, 0);
+	return give_pages(dev, func_id, npages, 0);
 }
 
 static int optimal_reclaimed_pages(void)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 737685e9e852..68029b30c3dc 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -309,21 +309,20 @@ struct mlx5_hca_cap {
 	__be16	max_desc_sz_rq;
 	u8	rsvd21[2];
 	__be16	max_desc_sz_sq_dc;
-	u8	rsvd22[4];
-	__be16	max_qp_mcg;
-	u8	rsvd23;
+	__be32	max_qp_mcg;
+	u8	rsvd22[3];
 	u8	log_max_mcg;
-	u8	rsvd24;
+	u8	rsvd23;
 	u8	log_max_pd;
-	u8	rsvd25;
+	u8	rsvd24;
 	u8	log_max_xrcd;
-	u8	rsvd26[42];
+	u8	rsvd25[42];
 	__be16  log_uar_page_sz;
-	u8	rsvd27[28];
+	u8	rsvd26[28];
 	u8	log_msx_atomic_size_qp;
-	u8	rsvd28[2];
+	u8	rsvd27[2];
 	u8	log_msx_atomic_size_dc;
-	u8	rsvd29[76];
+	u8	rsvd28[76];
 };
 
 
@@ -472,9 +471,8 @@ struct mlx5_eqe_cmd {
 struct mlx5_eqe_page_req {
 	u8		rsvd0[2];
 	__be16		func_id;
-	u8		rsvd1[2];
-	__be16		num_pages;
-	__be32		rsvd2[5];
+	__be32		num_pages;
+	__be32		rsvd1[5];
 };
 
 union ev_data {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 611e65e76b00..8888381fc150 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -358,7 +358,7 @@ struct mlx5_caps {
 	u32	reserved_lkey;
 	u8	local_ca_ack_delay;
 	u8	log_max_mcg;
-	u16	max_qp_mcg;
+	u32	max_qp_mcg;
 	int	min_page_sz;
 };
 
@@ -691,7 +691,7 @@ void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
 int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
-				 s16 npages);
+				 s32 npages);
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
 void mlx5_register_debugfs(void);
-- 
cgit v1.2.3