From bafe68034e3ef5e9f512bd0468001caf34981c41 Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <hskinnemoen@atmel.com>
Date: Mon, 10 Mar 2008 12:56:12 +0100
Subject: avr32: Work around byteswap bug in gcc < 4.2

gcc versions earlier than 4.2 sign-extends the result of le16_to_cpu()
and friends when we implement __arch__swabX() using
__builtin_bswap_X(). Disable our arch-specific optimizations when those
gcc versions are being used.

Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
---
 include/asm-avr32/byteorder.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/asm-avr32/byteorder.h b/include/asm-avr32/byteorder.h
index 402ff4125cdc..d77b48ba7338 100644
--- a/include/asm-avr32/byteorder.h
+++ b/include/asm-avr32/byteorder.h
@@ -12,8 +12,14 @@ extern unsigned long __builtin_bswap_32(unsigned long x);
 extern unsigned short __builtin_bswap_16(unsigned short x);
 #endif
 
+/*
+ * avr32-linux-gcc versions earlier than 4.2 improperly sign-extends
+ * the result.
+ */
+#if !(__GNUC__ == 4 && __GNUC_MINOR__ < 2)
 #define __arch__swab32(x) __builtin_bswap_32(x)
 #define __arch__swab16(x) __builtin_bswap_16(x)
+#endif
 
 #if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
 # define __BYTEORDER_HAS_U64__
-- 
cgit v1.2.3


From fa86d322d89995fef1bfb5cc768b89d8c22ea0d9 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 24 Mar 2008 14:48:59 -0700
Subject: [NEIGH]: Fix race between pneigh deletion and ipv6's ndisc_recv_ns
 (v3).

Proxy neighbors do not have any reference counting, so any caller
of pneigh_lookup (unless it's a netlink triggered add/del routine)
should _not_ perform any actions on the found proxy entry.

There's one exception from this rule - the ipv6's ndisc_recv_ns()
uses found entry to check the flags for NTF_ROUTER.

This creates a race between the ndisc and pneigh_delete - after
the pneigh is returned to the caller, the nd_tbl.lock is dropped
and the deleting procedure may proceed.

One of the fixes would be to add a reference counting, but this
problem exists for ndisc only. Besides such a patch would be too
big for -rc4.

So I propose to introduce a __pneigh_lookup() which is supposed
to be called with the lock held and use it in ndisc code to check
the flags on alive pneigh entry.


Changes from v2:
As David noticed, Exported the __pneigh_lookup() to ipv6 module.
The checkpatch generates a warning on it, since the EXPORT_SYMBOL
does not follow the symbol itself, but in this file all the
exports come at the end, so I decided no to break this harmony.

Changes from v1:
Fixed comments from YOSHIFUJI - indentation of prototype in header
and the pndisc_check_router() name - and a compilation fix, pointed
by Daniel - the is_routed was (falsely) considered as uninitialized
by gcc.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h |  4 ++++
 net/core/neighbour.c    | 23 +++++++++++++++++++++++
 net/ipv6/ndisc.c        | 22 ++++++++++++++++++----
 3 files changed, 45 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index ebbfb509822e..64a5f0120b52 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -218,6 +218,10 @@ extern unsigned long		neigh_rand_reach_time(unsigned long base);
 extern void			pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
 					       struct sk_buff *skb);
 extern struct pneigh_entry	*pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev, int creat);
+extern struct pneigh_entry	*__pneigh_lookup(struct neigh_table *tbl,
+						 struct net *net,
+						 const void *key,
+						 struct net_device *dev);
 extern int			pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev);
 
 extern void neigh_app_ns(struct neighbour *n);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d9a02b2cc289..19b8e003f150 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -466,6 +466,28 @@ out_neigh_release:
 	goto out;
 }
 
+struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
+		struct net *net, const void *pkey, struct net_device *dev)
+{
+	struct pneigh_entry *n;
+	int key_len = tbl->key_len;
+	u32 hash_val = *(u32 *)(pkey + key_len - 4);
+
+	hash_val ^= (hash_val >> 16);
+	hash_val ^= hash_val >> 8;
+	hash_val ^= hash_val >> 4;
+	hash_val &= PNEIGH_HASHMASK;
+
+	for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+		if (!memcmp(n->key, pkey, key_len) &&
+		    (n->net == net) &&
+		    (n->dev == dev || !n->dev))
+			break;
+	}
+
+	return n;
+}
+
 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
 				    struct net *net, const void *pkey,
 				    struct net_device *dev, int creat)
@@ -2803,6 +2825,7 @@ EXPORT_SYMBOL(neigh_table_init_no_netlink);
 EXPORT_SYMBOL(neigh_update);
 EXPORT_SYMBOL(pneigh_enqueue);
 EXPORT_SYMBOL(pneigh_lookup);
+EXPORT_SYMBOL_GPL(__pneigh_lookup);
 
 #ifdef CONFIG_ARPD
 EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 51557c27a0cd..452a2ac4eec8 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -676,6 +676,20 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	}
 }
 
+static struct pneigh_entry *pndisc_check_router(struct net_device *dev,
+		struct in6_addr *addr, int *is_router)
+{
+	struct pneigh_entry *n;
+
+	read_lock_bh(&nd_tbl.lock);
+	n = __pneigh_lookup(&nd_tbl, &init_net, addr, dev);
+	if (n != NULL)
+		*is_router = (n->flags & NTF_ROUTER);
+	read_unlock_bh(&nd_tbl.lock);
+
+	return n;
+}
+
 static void ndisc_recv_ns(struct sk_buff *skb)
 {
 	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
@@ -692,7 +706,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	struct pneigh_entry *pneigh = NULL;
 	int dad = ipv6_addr_any(saddr);
 	int inc;
-	int is_router;
+	int is_router = 0;
 
 	if (ipv6_addr_is_multicast(&msg->target)) {
 		ND_PRINTK2(KERN_WARNING
@@ -790,8 +804,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 		if (ipv6_chk_acast_addr(dev, &msg->target) ||
 		    (idev->cnf.forwarding &&
 		     (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
-		     (pneigh = pneigh_lookup(&nd_tbl, &init_net,
-					     &msg->target, dev, 0)) != NULL)) {
+		     (pneigh = pndisc_check_router(dev, &msg->target,
+						  &is_router)) != NULL)) {
 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
 			    skb->pkt_type != PACKET_HOST &&
 			    inc != 0 &&
@@ -812,7 +826,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 			goto out;
 	}
 
-	is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding);
+	is_router = !!(pneigh ? is_router : idev->cnf.forwarding);
 
 	if (dad) {
 		struct in6_addr maddr;
-- 
cgit v1.2.3


From df9dcb4588aca9cc243cf1f3f454361a84e1cbdb Mon Sep 17 00:00:00 2001
From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
Date: Mon, 24 Mar 2008 14:51:51 -0700
Subject: [IPSEC]: Fix inter address family IPsec tunnel handling.

Signed-off-by: Kazunori MIYAZAWA <kazunori@miyazawa.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h           | 23 +++++++++++++++++++
 net/ipv4/xfrm4_mode_tunnel.c |  2 +-
 net/ipv4/xfrm4_output.c      |  2 +-
 net/ipv6/xfrm6_mode_tunnel.c |  2 +-
 net/ipv6/xfrm6_output.c      |  2 +-
 net/key/af_key.c             |  2 +-
 net/xfrm/xfrm_input.c        | 22 +++++++++++++++---
 net/xfrm/xfrm_output.c       | 18 ++++++++++++++-
 net/xfrm/xfrm_state.c        | 54 ++++++++++++++++++++++++++++++++++++++------
 net/xfrm/xfrm_user.c         |  7 ++----
 10 files changed, 113 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 619c53bc3cd2..4e6f9568cbe7 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -204,6 +204,7 @@ struct xfrm_state
 	 * transformer. */
 	const struct xfrm_type	*type;
 	struct xfrm_mode	*inner_mode;
+	struct xfrm_mode	*inner_mode_iaf;
 	struct xfrm_mode	*outer_mode;
 
 	/* Security context */
@@ -387,6 +388,27 @@ enum {
 extern int xfrm_register_mode(struct xfrm_mode *mode, int family);
 extern int xfrm_unregister_mode(struct xfrm_mode *mode, int family);
 
+static inline int xfrm_af2proto(unsigned int family)
+{
+	switch(family) {
+	case AF_INET:
+		return IPPROTO_IPIP;
+	case AF_INET6:
+		return IPPROTO_IPV6;
+	default:
+		return 0;
+	}
+}
+
+static inline struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto)
+{
+	if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) ||
+	    (ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6))
+		return x->inner_mode;
+	else
+		return x->inner_mode_iaf;
+}
+
 struct xfrm_tmpl
 {
 /* id in template is interpreted as:
@@ -1253,6 +1275,7 @@ extern int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi,
 extern int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
 extern int xfrm_output_resume(struct sk_buff *skb, int err);
 extern int xfrm_output(struct sk_buff *skb);
+extern int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
 extern int xfrm4_extract_header(struct sk_buff *skb);
 extern int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
 extern int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 8dee617ee900..584e6d74e3a9 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -41,7 +41,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph->ihl = 5;
 	top_iph->version = 4;
 
-	top_iph->protocol = x->inner_mode->afinfo->proto;
+	top_iph->protocol = xfrm_af2proto(skb->dst->ops->family);
 
 	/* DS disclosed */
 	top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d5a58a818021..8c3180adddbf 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -56,7 +56,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
 
-	err = x->inner_mode->afinfo->extract_output(x, skb);
+	err = xfrm_inner_extract_output(x, skb);
 	if (err)
 		return err;
 
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 0c742faaa30b..e20529b4c825 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -45,7 +45,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
 	       sizeof(top_iph->flow_lbl));
-	top_iph->nexthdr = x->inner_mode->afinfo->proto;
+	top_iph->nexthdr = xfrm_af2proto(skb->dst->ops->family);
 
 	dsfield = XFRM_MODE_SKB_CB(skb)->tos;
 	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 79ccfb080733..0af823cf7f1f 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -62,7 +62,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
 
-	err = x->inner_mode->afinfo->extract_output(x, skb);
+	err = xfrm_inner_extract_output(x, skb);
 	if (err)
 		return err;
 
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 8b5f486ac80f..e9ef9af4a53b 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1219,7 +1219,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
 		x->sel.prefixlen_s = addr->sadb_address_prefixlen;
 	}
 
-	if (!x->sel.family)
+	if (x->props.mode == XFRM_MODE_TRANSPORT)
 		x->sel.family = x->props.family;
 
 	if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) {
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 62188c6a06dd..75279402ccf4 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -84,14 +84,21 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
 
 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
 {
+	struct xfrm_mode *inner_mode = x->inner_mode;
 	int err;
 
 	err = x->outer_mode->afinfo->extract_input(x, skb);
 	if (err)
 		return err;
 
-	skb->protocol = x->inner_mode->afinfo->eth_proto;
-	return x->inner_mode->input2(x, skb);
+	if (x->sel.family == AF_UNSPEC) {
+		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+		if (inner_mode == NULL)
+			return -EAFNOSUPPORT;
+	}
+
+	skb->protocol = inner_mode->afinfo->eth_proto;
+	return inner_mode->input2(x, skb);
 }
 EXPORT_SYMBOL(xfrm_prepare_input);
 
@@ -101,6 +108,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	__be32 seq;
 	struct xfrm_state *x;
 	xfrm_address_t *daddr;
+	struct xfrm_mode *inner_mode;
 	unsigned int family;
 	int decaps = 0;
 	int async = 0;
@@ -207,7 +215,15 @@ resume:
 
 		XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
 
-		if (x->inner_mode->input(x, skb)) {
+		inner_mode = x->inner_mode;
+
+		if (x->sel.family == AF_UNSPEC) {
+			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+			if (inner_mode == NULL)
+				goto drop;
+		}
+
+		if (inner_mode->input(x, skb)) {
 			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR);
 			goto drop;
 		}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 569d377932c4..2519129c6d21 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -124,7 +124,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err)
 		if (!x)
 			return dst_output(skb);
 
-		err = nf_hook(x->inner_mode->afinfo->family,
+		err = nf_hook(skb->dst->ops->family,
 			      NF_INET_POST_ROUTING, skb,
 			      NULL, skb->dst->dev, xfrm_output2);
 		if (unlikely(err != 1))
@@ -193,4 +193,20 @@ int xfrm_output(struct sk_buff *skb)
 
 	return xfrm_output2(skb);
 }
+
+int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct xfrm_mode *inner_mode;
+	if (x->sel.family == AF_UNSPEC)
+		inner_mode = xfrm_ip2inner_mode(x,
+				xfrm_af2proto(skb->dst->ops->family));
+	else
+		inner_mode = x->inner_mode;
+
+	if (inner_mode == NULL)
+		return -EAFNOSUPPORT;
+	return inner_mode->afinfo->extract_output(x, skb);
+}
+
 EXPORT_SYMBOL_GPL(xfrm_output);
+EXPORT_SYMBOL_GPL(xfrm_inner_extract_output);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7ba65e82941c..58f1f9347b54 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -388,6 +388,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 	kfree(x->coaddr);
 	if (x->inner_mode)
 		xfrm_put_mode(x->inner_mode);
+	if (x->inner_mode_iaf)
+		xfrm_put_mode(x->inner_mode_iaf);
 	if (x->outer_mode)
 		xfrm_put_mode(x->outer_mode);
 	if (x->type) {
@@ -523,6 +525,8 @@ struct xfrm_state *xfrm_state_alloc(void)
 		x->lft.hard_packet_limit = XFRM_INF;
 		x->replay_maxage = 0;
 		x->replay_maxdiff = 0;
+		x->inner_mode = NULL;
+		x->inner_mode_iaf = NULL;
 		spin_lock_init(&x->lock);
 	}
 	return x;
@@ -796,7 +800,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			      selector.
 			 */
 			if (x->km.state == XFRM_STATE_VALID) {
-				if (!xfrm_selector_match(&x->sel, fl, x->sel.family) ||
+				if ((x->sel.family && !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
 				    !security_xfrm_state_pol_flow_match(x, pol, fl))
 					continue;
 				if (!best ||
@@ -1944,6 +1948,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 int xfrm_init_state(struct xfrm_state *x)
 {
 	struct xfrm_state_afinfo *afinfo;
+	struct xfrm_mode *inner_mode;
 	int family = x->props.family;
 	int err;
 
@@ -1962,13 +1967,48 @@ int xfrm_init_state(struct xfrm_state *x)
 		goto error;
 
 	err = -EPROTONOSUPPORT;
-	x->inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
-	if (x->inner_mode == NULL)
-		goto error;
 
-	if (!(x->inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
-	    family != x->sel.family)
-		goto error;
+	if (x->sel.family != AF_UNSPEC) {
+		inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
+		if (inner_mode == NULL)
+			goto error;
+
+		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
+		    family != x->sel.family) {
+			xfrm_put_mode(inner_mode);
+			goto error;
+		}
+
+		x->inner_mode = inner_mode;
+	} else {
+		struct xfrm_mode *inner_mode_iaf;
+
+		inner_mode = xfrm_get_mode(x->props.mode, AF_INET);
+		if (inner_mode == NULL)
+			goto error;
+
+		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
+			xfrm_put_mode(inner_mode);
+			goto error;
+		}
+
+		inner_mode_iaf = xfrm_get_mode(x->props.mode, AF_INET6);
+		if (inner_mode_iaf == NULL)
+			goto error;
+
+		if (!(inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)) {
+			xfrm_put_mode(inner_mode_iaf);
+			goto error;
+		}
+
+		if (x->props.family == AF_INET) {
+			x->inner_mode = inner_mode;
+			x->inner_mode_iaf = inner_mode_iaf;
+		} else {
+			x->inner_mode = inner_mode_iaf;
+			x->inner_mode_iaf = inner_mode;
+		}
+	}
 
 	x->type = xfrm_get_type(x->id.proto, family);
 	if (x->type == NULL)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f971ca5645f8..5d96f2728dc6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -288,12 +288,9 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
 	memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr));
 	x->props.flags = p->flags;
 
-	/*
-	 * Set inner address family if the KM left it as zero.
-	 * See comment in validate_tmpl.
-	 */
-	if (!x->sel.family)
+	if (x->props.mode == XFRM_MODE_TRANSPORT)
 		x->sel.family = p->family;
+
 }
 
 /*
-- 
cgit v1.2.3


From 85a793533524f333e8d630dc22450e574b7e08d2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 24 Mar 2008 20:06:24 -0700
Subject: [SPARC64]: Make save_stack_trace() more efficient.

Doing a 'flushw' every stack trace capture creates so much overhead
that it makes lockdep next to unusable.

We only care about the frame pointer chain and the function caller
program counters, so flush those by hand to the stack frame.

This is significantly more efficient than a 'flushw' because:

1) We only save 16 bytes per active register window to the stack.

2) This doesn't push the entire register window context of the current
   call chain out of the cpu, forcing register window fill traps as we
   return back down.

Note that we can't use 'restore' and 'save' instructions to move
around the register windows because that wouldn't work on Niagara
processors.  They optimize 'save' into a new register window by
simply clearing out the registers instead of pulling them in from
the on-chip register window backing store.

Based upon a report by Tom Callaway.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/entry.S      | 30 ++++++++++++++++++++++++++++++
 arch/sparc64/kernel/stacktrace.c |  4 +++-
 include/asm-sparc64/stacktrace.h |  6 ++++++
 3 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 include/asm-sparc64/stacktrace.h

(limited to 'include')

diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index 6be4d2d2904e..49eca4b1cf25 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -1705,6 +1705,36 @@ __flushw_user:
 2:	retl
 	 nop
 
+	/* Flush %fp and %i7 to the stack for all register
+	 * windows active inside of the cpu.  This allows
+	 * show_stack_trace() to avoid using an expensive
+	 * 'flushw'.
+	 */
+	.globl		stack_trace_flush
+	.type		stack_trace_flush,#function
+stack_trace_flush:
+	rdpr		%pstate, %o0
+	wrpr		%o0, PSTATE_IE, %pstate
+
+	rdpr		%cwp, %g1
+	rdpr		%canrestore, %g2
+	sub		%g1, 1, %g3
+
+1:	brz,pn		%g2, 2f
+	 sub		%g2, 1, %g2
+	wrpr		%g3, %cwp
+	stx		%fp, [%sp + STACK_BIAS + RW_V9_I6]
+	stx		%i7, [%sp + STACK_BIAS + RW_V9_I7]
+	ba,pt		%xcc, 1b
+	 sub		%g3, 1, %g3
+
+2:	wrpr		%g1, %cwp
+	wrpr		%o0, %pstate
+
+	retl
+	 nop
+	.size		stack_trace_flush,.-stack_trace_flush
+
 #ifdef CONFIG_SMP
 	.globl		hard_smp_processor_id
 hard_smp_processor_id:
diff --git a/arch/sparc64/kernel/stacktrace.c b/arch/sparc64/kernel/stacktrace.c
index 47f92a59be18..84d39e873e88 100644
--- a/arch/sparc64/kernel/stacktrace.c
+++ b/arch/sparc64/kernel/stacktrace.c
@@ -2,13 +2,15 @@
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
 #include <asm/ptrace.h>
+#include <asm/stacktrace.h>
 
 void save_stack_trace(struct stack_trace *trace)
 {
 	unsigned long ksp, fp, thread_base;
 	struct thread_info *tp = task_thread_info(current);
 
-	flushw_all();
+	stack_trace_flush();
+
 	__asm__ __volatile__(
 		"mov	%%fp, %0"
 		: "=r" (ksp)
diff --git a/include/asm-sparc64/stacktrace.h b/include/asm-sparc64/stacktrace.h
new file mode 100644
index 000000000000..6cee39adf6d6
--- /dev/null
+++ b/include/asm-sparc64/stacktrace.h
@@ -0,0 +1,6 @@
+#ifndef _SPARC64_STACKTRACE_H
+#define _SPARC64_STACKTRACE_H
+
+extern void stack_trace_flush(void);
+
+#endif /* _SPARC64_STACKTRACE_H */
-- 
cgit v1.2.3


From 606d5b19391476f71e10ccce5b376f7071d11aba Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 25 Mar 2008 21:13:22 -0700
Subject: [SPARC64]: Adjust {TLBTEMP,TSBMAP}_BASE.

Move them further from the main kernel image area
to facilitate larger kernel sizes.

Adjust comments to match.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-sparc64/pgtable.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index 3167ccff64f8..95303f5be86b 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -23,9 +23,9 @@
 #include <asm/page.h>
 #include <asm/processor.h>
 
-/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB).
- * The page copy blockops can use 0x2000000 to 0x4000000.
- * The TSB is mapped in the 0x4000000 to 0x6000000 range.
+/* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB).
+ * The page copy blockops can use 0x6000000 to 0x8000000.
+ * The TSB is mapped in the 0x8000000 to 0xa000000 range.
  * The PROM resides in an area spanning 0xf0000000 to 0x100000000.
  * The vmalloc area spans 0x100000000 to 0x200000000.
  * Since modules need to be in the lowest 32-bits of the address space,
@@ -33,8 +33,8 @@
  * There is a single static kernel PMD which maps from 0x0 to address
  * 0x400000000.
  */
-#define	TLBTEMP_BASE		_AC(0x0000000002000000,UL)
-#define	TSBMAP_BASE		_AC(0x0000000004000000,UL)
+#define	TLBTEMP_BASE		_AC(0x0000000006000000,UL)
+#define	TSBMAP_BASE		_AC(0x0000000008000000,UL)
 #define MODULES_VADDR		_AC(0x0000000010000000,UL)
 #define MODULES_LEN		_AC(0x00000000e0000000,UL)
 #define MODULES_END		_AC(0x00000000f0000000,UL)
-- 
cgit v1.2.3


From 8b78cf602fd3bd97c0080edd22fe8fd5d0fa7832 Mon Sep 17 00:00:00 2001
From: Yi Yang <yi.y.yang@intel.com>
Date: Mon, 25 Feb 2008 08:46:12 +0800
Subject: cpuidle: fix cpuidle time and usage overflow

cpuidle C-state sysfs node time and usage are very easy to overflow because
they are all of unsigned int type, time will overflow within about two hours,
usage will take longer time to overflow, but they are increasing for ever.

This patch will convert them to unsigned long long.

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/cpuidle/cpuidle.c |  2 +-
 drivers/cpuidle/sysfs.c   | 10 ++++++++--
 include/linux/cpuidle.h   |  4 ++--
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index d73663a52324..d42deb310ac7 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -67,7 +67,7 @@ static void cpuidle_idle_call(void)
 	/* enter the state and update stats */
 	dev->last_residency = target_state->enter(dev, target_state);
 	dev->last_state = target_state;
-	target_state->time += dev->last_residency;
+	target_state->time += (unsigned long long)dev->last_residency;
 	target_state->usage++;
 
 	/* give the governor an opportunity to reflect on the outcome */
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 69102ca05685..e949618b9be0 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -218,6 +218,12 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \
 	return sprintf(buf, "%u\n", state->_name);\
 }
 
+#define define_show_state_ull_function(_name) \
+static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \
+{ \
+	return sprintf(buf, "%llu\n", state->_name);\
+}
+
 #define define_show_state_str_function(_name) \
 static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \
 { \
@@ -228,8 +234,8 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \
 
 define_show_state_function(exit_latency)
 define_show_state_function(power_usage)
-define_show_state_function(usage)
-define_show_state_function(time)
+define_show_state_ull_function(usage)
+define_show_state_ull_function(time)
 define_show_state_str_function(name)
 define_show_state_str_function(desc)
 
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 6b72a4584086..51e6b1e520e6 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -38,8 +38,8 @@ struct cpuidle_state {
 	unsigned int	power_usage; /* in mW */
 	unsigned int	target_residency; /* in US */
 
-	unsigned int	usage;
-	unsigned int	time; /* in US */
+	unsigned long long	usage;
+	unsigned long long	time; /* in US */
 
 	int (*enter)	(struct cpuidle_device *dev,
 			 struct cpuidle_state *state);
-- 
cgit v1.2.3


From 3d5ae6b69eacfac025021998d2ce159768edcfe1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 25 Mar 2008 21:51:40 -0700
Subject: [SPARC64]: Fix sparse warnings in arch/sparc64/kernel/{cpu,setup}.c

We create a local header file entry.h, under arch/sparc64/kernel/,
that we can use to declare routines either defined in assembler
or only invoked from assembler.  As well as other data objects
which are private to the inner sparc64 kernel arch code.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/cpu.c     | 10 ++++++----
 arch/sparc64/kernel/entry.h   | 15 +++++++++++++++
 arch/sparc64/kernel/setup.c   |  5 ++---
 arch/sparc64/mm/init.c        |  1 +
 include/asm-sparc64/cpudata.h |  2 ++
 5 files changed, 26 insertions(+), 7 deletions(-)
 create mode 100644 arch/sparc64/kernel/entry.h

(limited to 'include')

diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c
index dd5d28e3d798..0097c08dc600 100644
--- a/arch/sparc64/kernel/cpu.c
+++ b/arch/sparc64/kernel/cpu.c
@@ -15,6 +15,8 @@
 #include <asm/spitfire.h>
 #include <asm/oplib.h>
 
+#include "entry.h"
+
 DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
 
 struct cpu_iu_info {
@@ -65,8 +67,6 @@ static struct cpu_iu_info linux_sparc_chips[] = {
 char *sparc_cpu_type;
 char *sparc_fpu_type;
 
-unsigned int fsr_storage;
-
 static void __init sun4v_cpu_probe(void)
 {
 	switch (sun4v_chip_type) {
@@ -94,8 +94,10 @@ void __init cpu_probe(void)
 	unsigned long ver, fpu_vers, manuf, impl, fprs;
 	int i;
 	
-	if (tlb_type == hypervisor)
-		return sun4v_cpu_probe();
+	if (tlb_type == hypervisor) {
+		sun4v_cpu_probe();
+		return;
+	}
 
 	fprs = fprs_read();
 	fprs_write(FPRS_FEF);
diff --git a/arch/sparc64/kernel/entry.h b/arch/sparc64/kernel/entry.h
new file mode 100644
index 000000000000..bfcd1b8d23dc
--- /dev/null
+++ b/arch/sparc64/kernel/entry.h
@@ -0,0 +1,15 @@
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/init.h>
+
+extern char *sparc_cpu_type;
+extern char *sparc_fpu_type;
+
+extern void __init per_cpu_patch(void);
+extern void __init sun4v_patch(void);
+extern void __init boot_cpu_id_too_large(int cpu);
+extern unsigned int dcache_parity_tl1_occurred;
+extern unsigned int icache_parity_tl1_occurred;
+
+#endif /* _ENTRY_H */
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index d036dbe72864..6acb4c51cfe4 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -51,6 +51,8 @@
 #include <net/ipconfig.h>
 #endif
 
+#include "entry.h"
+
 /* Used to synchronize accesses to NatSemi SUPER I/O chip configure
  * operations in asm/ns87303.h
  */
@@ -335,9 +337,6 @@ void __init setup_arch(char **cmdline_p)
 
 /* BUFFER is PAGE_SIZE bytes long. */
 
-extern char *sparc_cpu_type;
-extern char *sparc_fpu_type;
-
 extern void smp_info(struct seq_file *);
 extern void smp_bogo(struct seq_file *);
 extern void mmu_info(struct seq_file *);
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 466fd6cffac9..fced6dfe77a8 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -46,6 +46,7 @@
 #include <asm/prom.h>
 #include <asm/sstate.h>
 #include <asm/mdesc.h>
+#include <asm/cpudata.h>
 
 #define MAX_PHYS_ADDRESS	(1UL << 42UL)
 #define KPTE_BITMAP_CHUNK_SZ	(256UL * 1024UL * 1024UL)
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index 542421460a12..532975ecfe10 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -86,6 +86,8 @@ extern struct trap_per_cpu trap_block[NR_CPUS];
 extern void init_cur_cpu_trap(struct thread_info *);
 extern void setup_tba(void);
 extern int ncpus_probed;
+extern void __init cpu_probe(void);
+extern const struct seq_operations cpuinfo_op;
 
 extern unsigned long real_hard_smp_processor_id(void);
 
-- 
cgit v1.2.3


From 99cd220133cdf2a559529d522a78b2ebc1bef2d8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 26 Mar 2008 00:19:43 -0700
Subject: [SPARC64]: Fix sparse errors in arch/sparc64/kernel/traps.c

Add 'UL' markers to DCU_* macros.

Declare C functions called from assembler in entry.h

Declare C functions called from within the sparc64 arch
code in include/asm-sparc64/*.h headers as appropriate.

Remove unused routines in traps.c

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/entry.h   | 141 ++++++++++++++++++++++++++++++++++++++++++
 arch/sparc64/kernel/traps.c   |  49 +--------------
 arch/sparc64/mm/init.c        |   4 --
 include/asm-sparc64/dcu.h     |  41 ++++++------
 include/asm-sparc64/pgtable.h |   2 +
 5 files changed, 167 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/entry.h b/arch/sparc64/kernel/entry.h
index bfcd1b8d23dc..a5084d6821ba 100644
--- a/arch/sparc64/kernel/entry.h
+++ b/arch/sparc64/kernel/entry.h
@@ -2,6 +2,7 @@
 #define _ENTRY_H
 
 #include <linux/init.h>
+#include <linux/types.h>
 
 extern char *sparc_cpu_type;
 extern char *sparc_fpu_type;
@@ -12,4 +13,144 @@ extern void __init boot_cpu_id_too_large(int cpu);
 extern unsigned int dcache_parity_tl1_occurred;
 extern unsigned int icache_parity_tl1_occurred;
 
+extern void bad_trap_tl1(struct pt_regs *regs, long lvl);
+
+extern void do_fpe_common(struct pt_regs *regs);
+extern void do_fpieee(struct pt_regs *regs);
+extern void do_fpother(struct pt_regs *regs);
+extern void do_tof(struct pt_regs *regs);
+extern void do_div0(struct pt_regs *regs);
+extern void do_illegal_instruction(struct pt_regs *regs);
+extern void mem_address_unaligned(struct pt_regs *regs,
+				  unsigned long sfar,
+				  unsigned long sfsr);
+extern void sun4v_do_mna(struct pt_regs *regs,
+			 unsigned long addr,
+			 unsigned long type_ctx);
+extern void do_privop(struct pt_regs *regs);
+extern void do_privact(struct pt_regs *regs);
+extern void do_cee(struct pt_regs *regs);
+extern void do_cee_tl1(struct pt_regs *regs);
+extern void do_dae_tl1(struct pt_regs *regs);
+extern void do_iae_tl1(struct pt_regs *regs);
+extern void do_div0_tl1(struct pt_regs *regs);
+extern void do_fpdis_tl1(struct pt_regs *regs);
+extern void do_fpieee_tl1(struct pt_regs *regs);
+extern void do_fpother_tl1(struct pt_regs *regs);
+extern void do_ill_tl1(struct pt_regs *regs);
+extern void do_irq_tl1(struct pt_regs *regs);
+extern void do_lddfmna_tl1(struct pt_regs *regs);
+extern void do_stdfmna_tl1(struct pt_regs *regs);
+extern void do_paw(struct pt_regs *regs);
+extern void do_paw_tl1(struct pt_regs *regs);
+extern void do_vaw(struct pt_regs *regs);
+extern void do_vaw_tl1(struct pt_regs *regs);
+extern void do_tof_tl1(struct pt_regs *regs);
+extern void do_getpsr(struct pt_regs *regs);
+
+extern void spitfire_insn_access_exception(struct pt_regs *regs,
+					   unsigned long sfsr,
+					   unsigned long sfar);
+extern void spitfire_insn_access_exception_tl1(struct pt_regs *regs,
+					       unsigned long sfsr,
+					       unsigned long sfar);
+extern void spitfire_data_access_exception(struct pt_regs *regs,
+					   unsigned long sfsr,
+					   unsigned long sfar);
+extern void spitfire_data_access_exception_tl1(struct pt_regs *regs,
+					       unsigned long sfsr,
+					       unsigned long sfar);
+extern void spitfire_access_error(struct pt_regs *regs,
+				  unsigned long status_encoded,
+				  unsigned long afar);
+
+extern void cheetah_fecc_handler(struct pt_regs *regs,
+				 unsigned long afsr,
+				 unsigned long afar);
+extern void cheetah_cee_handler(struct pt_regs *regs,
+				unsigned long afsr,
+				unsigned long afar);
+extern void cheetah_deferred_handler(struct pt_regs *regs,
+				     unsigned long afsr,
+				     unsigned long afar);
+extern void cheetah_plus_parity_error(int type, struct pt_regs *regs);
+
+extern void sun4v_insn_access_exception(struct pt_regs *regs,
+					unsigned long addr,
+					unsigned long type_ctx);
+extern void sun4v_insn_access_exception_tl1(struct pt_regs *regs,
+					    unsigned long addr,
+					    unsigned long type_ctx);
+extern void sun4v_data_access_exception(struct pt_regs *regs,
+					unsigned long addr,
+					unsigned long type_ctx);
+extern void sun4v_data_access_exception_tl1(struct pt_regs *regs,
+					    unsigned long addr,
+					    unsigned long type_ctx);
+extern void sun4v_resum_error(struct pt_regs *regs,
+			      unsigned long offset);
+extern void sun4v_resum_overflow(struct pt_regs *regs);
+extern void sun4v_nonresum_error(struct pt_regs *regs,
+				 unsigned long offset);
+extern void sun4v_nonresum_overflow(struct pt_regs *regs);
+
+extern unsigned long sun4v_err_itlb_vaddr;
+extern unsigned long sun4v_err_itlb_ctx;
+extern unsigned long sun4v_err_itlb_pte;
+extern unsigned long sun4v_err_itlb_error;
+
+extern void sun4v_itlb_error_report(struct pt_regs *regs, int tl);
+
+extern unsigned long sun4v_err_dtlb_vaddr;
+extern unsigned long sun4v_err_dtlb_ctx;
+extern unsigned long sun4v_err_dtlb_pte;
+extern unsigned long sun4v_err_dtlb_error;
+
+extern void sun4v_dtlb_error_report(struct pt_regs *regs, int tl);
+extern void hypervisor_tlbop_error(unsigned long err,
+				   unsigned long op);
+extern void hypervisor_tlbop_error_xcall(unsigned long err,
+					 unsigned long op);
+
+/* WARNING: The error trap handlers in assembly know the precise
+ *	    layout of the following structure.
+ *
+ * C-level handlers in traps.c use this information to log the
+ * error and then determine how to recover (if possible).
+ */
+struct cheetah_err_info {
+/*0x00*/u64 afsr;
+/*0x08*/u64 afar;
+
+	/* D-cache state */
+/*0x10*/u64 dcache_data[4];	/* The actual data	*/
+/*0x30*/u64 dcache_index;	/* D-cache index	*/
+/*0x38*/u64 dcache_tag;		/* D-cache tag/valid	*/
+/*0x40*/u64 dcache_utag;	/* D-cache microtag	*/
+/*0x48*/u64 dcache_stag;	/* D-cache snooptag	*/
+
+	/* I-cache state */
+/*0x50*/u64 icache_data[8];	/* The actual insns + predecode	*/
+/*0x90*/u64 icache_index;	/* I-cache index	*/
+/*0x98*/u64 icache_tag;		/* I-cache phys tag	*/
+/*0xa0*/u64 icache_utag;	/* I-cache microtag	*/
+/*0xa8*/u64 icache_stag;	/* I-cache snooptag	*/
+/*0xb0*/u64 icache_upper;	/* I-cache upper-tag	*/
+/*0xb8*/u64 icache_lower;	/* I-cache lower-tag	*/
+
+	/* E-cache state */
+/*0xc0*/u64 ecache_data[4];	/* 32 bytes from staging registers */
+/*0xe0*/u64 ecache_index;	/* E-cache index	*/
+/*0xe8*/u64 ecache_tag;		/* E-cache tag/state	*/
+
+/*0xf0*/u64 __pad[32 - 30];
+};
+#define CHAFSR_INVALID		((u64)-1L)
+
+/* This is allocated at boot time based upon the largest hardware
+ * cpu ID in the system.  We allocate two entries per cpu, one for
+ * TL==0 logging and one for TL >= 1 logging.
+ */
+extern struct cheetah_err_info *cheetah_error_log;
+
 #endif /* _ENTRY_H */
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 007f5317c0de..96da847023f3 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -42,6 +42,7 @@
 #endif
 #include <asm/prom.h>
 
+#include "entry.h"
 
 /* When an irrecoverable trap occurs at tl > 0, the trap entry
  * code logs the trap state registers at every level in the trap
@@ -77,11 +78,6 @@ static void dump_tl1_traplog(struct tl1_traplog *p)
 	}
 }
 
-void do_call_debug(struct pt_regs *regs) 
-{ 
-	notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT); 
-}
-
 void bad_trap(struct pt_regs *regs, long lvl)
 {
 	char buffer[32];
@@ -550,41 +546,6 @@ static unsigned long ecache_flush_physbase;
 static unsigned long ecache_flush_linesize;
 static unsigned long ecache_flush_size;
 
-/* WARNING: The error trap handlers in assembly know the precise
- *	    layout of the following structure.
- *
- * C-level handlers below use this information to log the error
- * and then determine how to recover (if possible).
- */
-struct cheetah_err_info {
-/*0x00*/u64 afsr;
-/*0x08*/u64 afar;
-
-	/* D-cache state */
-/*0x10*/u64 dcache_data[4];	/* The actual data	*/
-/*0x30*/u64 dcache_index;	/* D-cache index	*/
-/*0x38*/u64 dcache_tag;		/* D-cache tag/valid	*/
-/*0x40*/u64 dcache_utag;	/* D-cache microtag	*/
-/*0x48*/u64 dcache_stag;	/* D-cache snooptag	*/
-
-	/* I-cache state */
-/*0x50*/u64 icache_data[8];	/* The actual insns + predecode	*/
-/*0x90*/u64 icache_index;	/* I-cache index	*/
-/*0x98*/u64 icache_tag;		/* I-cache phys tag	*/
-/*0xa0*/u64 icache_utag;	/* I-cache microtag	*/
-/*0xa8*/u64 icache_stag;	/* I-cache snooptag	*/
-/*0xb0*/u64 icache_upper;	/* I-cache upper-tag	*/
-/*0xb8*/u64 icache_lower;	/* I-cache lower-tag	*/
-
-	/* E-cache state */
-/*0xc0*/u64 ecache_data[4];	/* 32 bytes from staging registers */
-/*0xe0*/u64 ecache_index;	/* E-cache index	*/
-/*0xe8*/u64 ecache_tag;		/* E-cache tag/state	*/
-
-/*0xf0*/u64 __pad[32 - 30];
-};
-#define CHAFSR_INVALID		((u64)-1L)
-
 /* This table is ordered in priority of errors and matches the
  * AFAR overwrite policy as well.
  */
@@ -758,10 +719,6 @@ static struct afsr_error_table __jalapeno_error_table[] = {
 static struct afsr_error_table *cheetah_error_table;
 static unsigned long cheetah_afsr_errors;
 
-/* This is allocated at boot time based upon the largest hardware
- * cpu ID in the system.  We allocate two entries per cpu, one for
- * TL==0 logging and one for TL >= 1 logging.
- */
 struct cheetah_err_info *cheetah_error_log;
 
 static inline struct cheetah_err_info *cheetah_get_error_log(unsigned long afsr)
@@ -2102,7 +2059,7 @@ void do_div0(struct pt_regs *regs)
 	force_sig_info(SIGFPE, &info, current);
 }
 
-void instruction_dump (unsigned int *pc)
+static void instruction_dump(unsigned int *pc)
 {
 	int i;
 
@@ -2115,7 +2072,7 @@ void instruction_dump (unsigned int *pc)
 	printk("\n");
 }
 
-static void user_instruction_dump (unsigned int __user *pc)
+static void user_instruction_dump(unsigned int __user *pc)
 {
 	int i;
 	unsigned int buf[9];
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index fced6dfe77a8..f37078d96407 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -1274,10 +1274,6 @@ void __cpuinit sun4v_ktsb_register(void)
 
 /* paging_init() sets up the page tables */
 
-extern void cheetah_ecache_flush_init(void);
-extern void sun4v_patch_tlb_handlers(void);
-
-extern void cpu_probe(void);
 extern void central_probe(void);
 
 static unsigned long last_valid_pfn;
diff --git a/include/asm-sparc64/dcu.h b/include/asm-sparc64/dcu.h
index ecbed2ae548f..0f704e106a1b 100644
--- a/include/asm-sparc64/dcu.h
+++ b/include/asm-sparc64/dcu.h
@@ -1,26 +1,27 @@
-/* $Id: dcu.h,v 1.2 2001/03/01 23:23:33 davem Exp $ */
 #ifndef _SPARC64_DCU_H
 #define _SPARC64_DCU_H
 
+#include <linux/const.h>
+
 /* UltraSparc-III Data Cache Unit Control Register */
-#define DCU_CP		0x0002000000000000 /* Physical Cache Enable w/o mmu*/
-#define DCU_CV		0x0001000000000000 /* Virtual Cache Enable	w/o mmu	*/
-#define DCU_ME		0x0000800000000000 /* NC-store Merging Enable	*/
-#define DCU_RE		0x0000400000000000 /* RAW bypass Enable		*/
-#define DCU_PE		0x0000200000000000 /* PCache Enable		*/
-#define DCU_HPE		0x0000100000000000 /* HW prefetch Enable		*/
-#define DCU_SPE		0x0000080000000000 /* SW prefetch Enable		*/
-#define DCU_SL		0x0000040000000000 /* Secondary load steering Enab	*/
-#define DCU_WE		0x0000020000000000 /* WCache enable		*/
-#define DCU_PM		0x000001fe00000000 /* PA Watchpoint Byte Mask	*/
-#define DCU_VM		0x00000001fe000000 /* VA Watchpoint Byte Mask	*/
-#define DCU_PR		0x0000000001000000 /* PA Watchpoint Read Enable	*/
-#define DCU_PW		0x0000000000800000 /* PA Watchpoint Write Enable	*/
-#define DCU_VR		0x0000000000400000 /* VA Watchpoint Read Enable	*/
-#define DCU_VW		0x0000000000200000 /* VA Watchpoint Write Enable	*/
-#define DCU_DM		0x0000000000000008 /* DMMU Enable			*/
-#define DCU_IM		0x0000000000000004 /* IMMU Enable			*/
-#define DCU_DC		0x0000000000000002 /* Data Cache Enable		*/
-#define DCU_IC		0x0000000000000001 /* Instruction Cache Enable	*/
+#define DCU_CP	_AC(0x0002000000000000,UL) /* Phys Cache Enable w/o mmu	*/
+#define DCU_CV	_AC(0x0001000000000000,UL) /* Virt Cache Enable w/o mmu	*/
+#define DCU_ME	_AC(0x0000800000000000,UL) /* NC-store Merging Enable	*/
+#define DCU_RE	_AC(0x0000400000000000,UL) /* RAW bypass Enable		*/
+#define DCU_PE	_AC(0x0000200000000000,UL) /* PCache Enable		*/
+#define DCU_HPE	_AC(0x0000100000000000,UL) /* HW prefetch Enable	*/
+#define DCU_SPE	_AC(0x0000080000000000,UL) /* SW prefetch Enable	*/
+#define DCU_SL	_AC(0x0000040000000000,UL) /* Secondary ld-steering Enab*/
+#define DCU_WE	_AC(0x0000020000000000,UL) /* WCache enable		*/
+#define DCU_PM	_AC(0x000001fe00000000,UL) /* PA Watchpoint Byte Mask	*/
+#define DCU_VM	_AC(0x00000001fe000000,UL) /* VA Watchpoint Byte Mask	*/
+#define DCU_PR	_AC(0x0000000001000000,UL) /* PA Watchpoint Read Enable	*/
+#define DCU_PW	_AC(0x0000000000800000,UL) /* PA Watchpoint Write Enable*/
+#define DCU_VR	_AC(0x0000000000400000,UL) /* VA Watchpoint Read Enable	*/
+#define DCU_VW	_AC(0x0000000000200000,UL) /* VA Watchpoint Write Enable*/
+#define DCU_DM	_AC(0x0000000000000008,UL) /* DMMU Enable		*/
+#define DCU_IM	_AC(0x0000000000000004,UL) /* IMMU Enable		*/
+#define DCU_DC	_AC(0x0000000000000002,UL) /* Data Cache Enable		*/
+#define DCU_IC	_AC(0x0000000000000001,UL) /* Instruction Cache Enable	*/
 
 #endif /* _SPARC64_DCU_H */
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index 95303f5be86b..549e45266b68 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -761,6 +761,8 @@ extern unsigned long get_fb_unmapped_area(struct file *filp, unsigned long,
 extern void pgtable_cache_init(void);
 extern void sun4v_register_fault_status(void);
 extern void sun4v_ktsb_register(void);
+extern void __init cheetah_ecache_flush_init(void);
+extern void sun4v_patch_tlb_handlers(void);
 
 extern unsigned long cmdline_memory_size;
 
-- 
cgit v1.2.3


From 06d8308c61e54346585b2691c13ee3f90cb6fb2f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 22 Mar 2008 09:20:24 +0100
Subject: NOHZ: reevaluate idle sleep length after add_timer_on()

add_timer_on() can add a timer on a CPU which is currently in a long
idle sleep, but the timer wheel is not reevaluated by the nohz code on
that CPU. So a timer can be delayed for quite a long time. This
triggered a false positive in the clocksource watchdog code.

To avoid this we need to wake up the idle CPU and enforce the
reevaluation of the timer wheel for the next timer event.

Add a function, which checks a given CPU for idle state, marks the
idle task with NEED_RESCHED and sends a reschedule IPI to notify the
other CPU of the change in the timer wheel.

Call this function from add_timer_on().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: stable@kernel.org

--
 include/linux/sched.h |    6 ++++++
 kernel/sched.c        |   43 +++++++++++++++++++++++++++++++++++++++++++
 kernel/timer.c        |   10 +++++++++-
 3 files changed, 58 insertions(+), 1 deletion(-)
---
 include/linux/sched.h |  6 ++++++
 kernel/sched.c        | 43 +++++++++++++++++++++++++++++++++++++++++++
 kernel/timer.c        | 10 +++++++++-
 3 files changed, 58 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fed07d03364e..6a1e7afb099b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1541,6 +1541,12 @@ static inline void idle_task_exit(void) {}
 
 extern void sched_idle_next(void);
 
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+extern void wake_up_idle_cpu(int cpu);
+#else
+static inline void wake_up_idle_cpu(int cpu) { }
+#endif
+
 #ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
diff --git a/kernel/sched.c b/kernel/sched.c
index 28c73f07efb2..8dcdec6fe0fe 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1052,6 +1052,49 @@ static void resched_cpu(int cpu)
 	resched_task(cpu_curr(cpu));
 	spin_unlock_irqrestore(&rq->lock, flags);
 }
+
+#ifdef CONFIG_NO_HZ
+/*
+ * When add_timer_on() enqueues a timer into the timer wheel of an
+ * idle CPU then this timer might expire before the next timer event
+ * which is scheduled to wake up that CPU. In case of a completely
+ * idle system the next event might even be infinite time into the
+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
+ * leaves the inner idle loop so the newly added timer is taken into
+ * account when the CPU goes back to idle and evaluates the timer
+ * wheel for the next timer event.
+ */
+void wake_up_idle_cpu(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	if (cpu == smp_processor_id())
+		return;
+
+	/*
+	 * This is safe, as this function is called with the timer
+	 * wheel base lock of (cpu) held. When the CPU is on the way
+	 * to idle and has not yet set rq->curr to idle then it will
+	 * be serialized on the timer wheel base lock and take the new
+	 * timer into account automatically.
+	 */
+	if (rq->curr != rq->idle)
+		return;
+
+	/*
+	 * We can set TIF_RESCHED on the idle task of the other CPU
+	 * lockless. The worst case is that the other CPU runs the
+	 * idle task through an additional NOOP schedule()
+	 */
+	set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED);
+
+	/* NEED_RESCHED must be visible before we test polling */
+	smp_mb();
+	if (!tsk_is_polling(rq->idle))
+		smp_send_reschedule(cpu);
+}
+#endif
+
 #else
 static void __resched_task(struct task_struct *p, int tif_bit)
 {
diff --git a/kernel/timer.c b/kernel/timer.c
index 99b00a25f88b..b024106daa70 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -451,10 +451,18 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	spin_lock_irqsave(&base->lock, flags);
 	timer_set_base(timer, base);
 	internal_add_timer(base, timer);
+	/*
+	 * Check whether the other CPU is idle and needs to be
+	 * triggered to reevaluate the timer wheel when nohz is
+	 * active. We are protected against the other CPU fiddling
+	 * with the timer by holding the timer base lock. This also
+	 * makes sure that a CPU on the way to idle can not evaluate
+	 * the timer wheel.
+	 */
+	wake_up_idle_cpu(cpu);
 	spin_unlock_irqrestore(&base->lock, flags);
 }
 
-
 /**
  * mod_timer - modify a timer's timeout
  * @timer: the timer to be modified
-- 
cgit v1.2.3


From d91aa123b4b96e57680a39fb9dfd9722f8df3c7e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 26 Mar 2008 00:37:51 -0700
Subject: [SPARC64]: Fix sparse warnings in arch/sparc64/kernel/irq.c

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/entry.h | 27 +++++++++++++++++++++++++++
 arch/sparc64/kernel/irq.c   | 21 ++-------------------
 include/asm-sparc64/irq.h   |  1 +
 3 files changed, 30 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/entry.h b/arch/sparc64/kernel/entry.h
index a5084d6821ba..e66d94c7caff 100644
--- a/arch/sparc64/kernel/entry.h
+++ b/arch/sparc64/kernel/entry.h
@@ -153,4 +153,31 @@ struct cheetah_err_info {
  */
 extern struct cheetah_err_info *cheetah_error_log;
 
+/* UPA nodes send interrupt packet to UltraSparc with first data reg
+ * value low 5 (7 on Starfire) bits holding the IRQ identifier being
+ * delivered.  We must translate this into a non-vector IRQ so we can
+ * set the softint on this cpu.
+ *
+ * To make processing these packets efficient and race free we use
+ * an array of irq buckets below.  The interrupt vector handler in
+ * entry.S feeds incoming packets into per-cpu pil-indexed lists.
+ *
+ * If you make changes to ino_bucket, please update hand coded assembler
+ * of the vectored interrupt trap handler(s) in entry.S and sun4v_ivec.S
+ */
+struct ino_bucket {
+/*0x00*/unsigned long __irq_chain_pa;
+
+	/* Virtual interrupt number assigned to this INO.  */
+/*0x08*/unsigned int __virt_irq;
+/*0x0c*/unsigned int __pad;
+};
+
+extern struct ino_bucket *ivector_table;
+extern unsigned long ivector_table_pa;
+
+extern void handler_irq(int irq, struct pt_regs *regs);
+extern void init_irqwork_curcpu(void);
+extern void __cpuinit sun4v_register_mondo_queues(int this_cpu);
+
 #endif /* _ENTRY_H */
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 5ec06c8c7fea..eb88bd6e674e 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -44,27 +44,10 @@
 #include <asm/hypervisor.h>
 #include <asm/cacheflush.h>
 
-/* UPA nodes send interrupt packet to UltraSparc with first data reg
- * value low 5 (7 on Starfire) bits holding the IRQ identifier being
- * delivered.  We must translate this into a non-vector IRQ so we can
- * set the softint on this cpu.
- *
- * To make processing these packets efficient and race free we use
- * an array of irq buckets below.  The interrupt vector handler in
- * entry.S feeds incoming packets into per-cpu pil-indexed lists.
- *
- * If you make changes to ino_bucket, please update hand coded assembler
- * of the vectored interrupt trap handler(s) in entry.S and sun4v_ivec.S
- */
-struct ino_bucket {
-/*0x00*/unsigned long __irq_chain_pa;
-
-	/* Virtual interrupt number assigned to this INO.  */
-/*0x08*/unsigned int __virt_irq;
-/*0x0c*/unsigned int __pad;
-};
+#include "entry.h"
 
 #define NUM_IVECS	(IMAP_INR + 1)
+
 struct ino_bucket *ivector_table;
 unsigned long ivector_table_pa;
 
diff --git a/include/asm-sparc64/irq.h b/include/asm-sparc64/irq.h
index 30cb76b47be1..0bb9bf531745 100644
--- a/include/asm-sparc64/irq.h
+++ b/include/asm-sparc64/irq.h
@@ -64,6 +64,7 @@ extern unsigned char virt_irq_alloc(unsigned int dev_handle,
 extern void virt_irq_free(unsigned int virt_irq);
 #endif
 
+extern void __init init_IRQ(void);
 extern void fixup_irqs(void);
 
 static inline void set_softint(unsigned long bits)
-- 
cgit v1.2.3


From cf3d7c1ef418863376d556c48c214cb828623584 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 26 Mar 2008 01:11:55 -0700
Subject: [SPARC64]: Fix sparse warnings in arch/sparc64/kernel/time.c

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/entry.h |  2 ++
 arch/sparc64/kernel/smp.c   |  3 ++-
 arch/sparc64/kernel/time.c  | 66 +++++++++++++++++++++++++++------------------
 include/asm-sparc64/timer.h |  9 ++++---
 4 files changed, 49 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/entry.h b/arch/sparc64/kernel/entry.h
index 0d0de9c32a80..4e238a11bdfe 100644
--- a/arch/sparc64/kernel/entry.h
+++ b/arch/sparc64/kernel/entry.h
@@ -14,6 +14,8 @@ extern void __init boot_cpu_id_too_large(int cpu);
 extern unsigned int dcache_parity_tl1_occurred;
 extern unsigned int icache_parity_tl1_occurred;
 
+extern void timer_interrupt(int irq, struct pt_regs *regs);
+
 extern asmlinkage void syscall_trace(struct pt_regs *regs,
 				     int syscall_exit_p);
 
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 5a1126b363a4..59f020d69d4c 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1,6 +1,6 @@
 /* smp.c: Sparc64 SMP support.
  *
- * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
  */
 
 #include <linux/module.h>
@@ -30,6 +30,7 @@
 #include <asm/cpudata.h>
 #include <asm/hvtramp.h>
 #include <asm/io.h>
+#include <asm/timer.h>
 
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index d204f1ab1d4c..e5d238970c7e 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -1,7 +1,6 @@
-/* $Id: time.c,v 1.42 2002/01/23 14:33:55 davem Exp $
- * time.c: UltraSparc timer and TOD clock support.
+/* time.c: UltraSparc timer and TOD clock support.
  *
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997, 2008 David S. Miller (davem@davemloft.net)
  * Copyright (C) 1998 Eddie C. Dost   (ecd@skynet.be)
  *
  * Based largely on code which is:
@@ -48,6 +47,8 @@
 #include <asm/uaccess.h>
 #include <asm/irq_regs.h>
 
+#include "entry.h"
+
 DEFINE_SPINLOCK(mostek_lock);
 DEFINE_SPINLOCK(rtc_lock);
 void __iomem *mstk48t02_regs = NULL;
@@ -508,6 +509,37 @@ static int __init has_low_battery(void)
 	return (data1 == data2);	/* Was the write blocked? */
 }
 
+static void __init mostek_set_system_time(void __iomem *mregs)
+{
+	unsigned int year, mon, day, hour, min, sec;
+	u8 tmp;
+
+	spin_lock_irq(&mostek_lock);
+
+	/* Traditional Mostek chip. */
+	tmp = mostek_read(mregs + MOSTEK_CREG);
+	tmp |= MSTK_CREG_READ;
+	mostek_write(mregs + MOSTEK_CREG, tmp);
+
+	sec = MSTK_REG_SEC(mregs);
+	min = MSTK_REG_MIN(mregs);
+	hour = MSTK_REG_HOUR(mregs);
+	day = MSTK_REG_DOM(mregs);
+	mon = MSTK_REG_MONTH(mregs);
+	year = MSTK_CVT_YEAR( MSTK_REG_YEAR(mregs) );
+
+	xtime.tv_sec = mktime(year, mon, day, hour, min, sec);
+	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+	set_normalized_timespec(&wall_to_monotonic,
+ 	                        -xtime.tv_sec, -xtime.tv_nsec);
+
+	tmp = mostek_read(mregs + MOSTEK_CREG);
+	tmp &= ~MSTK_CREG_READ;
+	mostek_write(mregs + MOSTEK_CREG, tmp);
+
+	spin_unlock_irq(&mostek_lock);
+}
+
 /* Probe for the real time clock chip. */
 static void __init set_system_time(void)
 {
@@ -520,7 +552,6 @@ static void __init set_system_time(void)
 	unsigned long dregs = 0UL;
 	void __iomem *bregs = 0UL;
 #endif
-	u8 tmp;
 
 	if (!mregs && !dregs && !bregs) {
 		prom_printf("Something wrong, clock regs not mapped yet.\n");
@@ -528,20 +559,11 @@ static void __init set_system_time(void)
 	}		
 
 	if (mregs) {
-		spin_lock_irq(&mostek_lock);
-
-		/* Traditional Mostek chip. */
-		tmp = mostek_read(mregs + MOSTEK_CREG);
-		tmp |= MSTK_CREG_READ;
-		mostek_write(mregs + MOSTEK_CREG, tmp);
+		mostek_set_system_time(mregs);
+		return;
+	}
 
-		sec = MSTK_REG_SEC(mregs);
-		min = MSTK_REG_MIN(mregs);
-		hour = MSTK_REG_HOUR(mregs);
-		day = MSTK_REG_DOM(mregs);
-		mon = MSTK_REG_MONTH(mregs);
-		year = MSTK_CVT_YEAR( MSTK_REG_YEAR(mregs) );
-	} else if (bregs) {
+	if (bregs) {
 		unsigned char val = readb(bregs + 0x0e);
 		unsigned int century;
 
@@ -596,14 +618,6 @@ static void __init set_system_time(void)
 	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
 	set_normalized_timespec(&wall_to_monotonic,
  	                        -xtime.tv_sec, -xtime.tv_nsec);
-
-	if (mregs) {
-		tmp = mostek_read(mregs + MOSTEK_CREG);
-		tmp &= ~MSTK_CREG_READ;
-		mostek_write(mregs + MOSTEK_CREG, tmp);
-
-		spin_unlock_irq(&mostek_lock);
-	}
 }
 
 /* davem suggests we keep this within the 4M locked kernel image */
@@ -1027,7 +1041,7 @@ void __init time_init(void)
 	setup_clockevent_multiplier(clock);
 
 	sparc64_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7fffffffffffffff, &sparc64_clockevent);
+		clockevent_delta2ns(0x7fffffffffffffffUL, &sparc64_clockevent);
 	sparc64_clockevent.min_delta_ns =
 		clockevent_delta2ns(0xF, &sparc64_clockevent);
 
diff --git a/include/asm-sparc64/timer.h b/include/asm-sparc64/timer.h
index ccbd69448866..5b779fd1f788 100644
--- a/include/asm-sparc64/timer.h
+++ b/include/asm-sparc64/timer.h
@@ -1,14 +1,13 @@
-/* $Id: timer.h,v 1.3 2000/05/09 17:40:15 davem Exp $
- * timer.h: System timer definitions for sun5.
+/* timer.h: System timer definitions for sun5.
  *
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997, 2008 David S. Miller (davem@davemloft.net)
  */
 
 #ifndef _SPARC64_TIMER_H
 #define _SPARC64_TIMER_H
 
 #include <linux/types.h>
-
+#include <linux/init.h>
 
 struct sparc64_tick_ops {
 	unsigned long (*get_tick)(void);
@@ -25,5 +24,7 @@ struct sparc64_tick_ops {
 extern struct sparc64_tick_ops *tick_ops;
 
 extern unsigned long sparc64_get_clock_tick(unsigned int cpu);
+extern void __devinit setup_sparc64_timer(void);
+extern void __init time_init(void);
 
 #endif /* _SPARC64_TIMER_H */
-- 
cgit v1.2.3


From 9bbafce2eec190ef7e44b0eb1095ba17ce6ad3af Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 26 Mar 2008 19:02:47 +0900
Subject: sh: Fix occasional FPU register corruption under preempt.

Presently with preempt enabled there's the possibility to be preempted
after the TIF_USEDFPU test and the register save, leading to bogus
state post-__switch_to(). Use an explicit preempt_disable()/enable()
pair around unlazy_fpu()/clear_fpu() to avoid this. Follows the x86
change.

Reported-by: Takuo Koguchi <takuo.koguchi.sw@hitachi.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh2a/fpu.c |  1 +
 arch/sh/kernel/cpu/sh4/fpu.c  |  1 +
 arch/sh/kernel/cpu/sh5/fpu.c  |  1 +
 arch/sh/kernel/dump_task.c    |  1 +
 arch/sh/kernel/process_32.c   |  1 +
 arch/sh/kernel/signal_32.c    |  1 +
 include/asm-sh/fpu.h          | 32 ++++++++++++++++++--------------
 include/asm-sh/processor.h    |  1 -
 include/asm-sh/processor_32.h |  1 +
 include/asm-sh/processor_64.h |  1 +
 10 files changed, 26 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c
index ff99562456fb..5627c0b3ffa8 100644
--- a/arch/sh/kernel/cpu/sh2a/fpu.c
+++ b/arch/sh/kernel/cpu/sh2a/fpu.c
@@ -13,6 +13,7 @@
 #include <linux/signal.h>
 #include <asm/processor.h>
 #include <asm/io.h>
+#include <asm/fpu.h>
 
 /* The PR (precision) bit in the FP Status Register must be clear when
  * an frchg instruction is executed, otherwise the instruction is undefined.
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index 817f9939cda6..8020796139f1 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -16,6 +16,7 @@
 #include <asm/cpu/fpu.h>
 #include <asm/processor.h>
 #include <asm/system.h>
+#include <asm/fpu.h>
 
 /* The PR (precision) bit in the FP Status Register must be clear when
  * an frchg instruction is executed, otherwise the instruction is undefined.
diff --git a/arch/sh/kernel/cpu/sh5/fpu.c b/arch/sh/kernel/cpu/sh5/fpu.c
index 30b76a94abf2..dd4f51ffb50e 100644
--- a/arch/sh/kernel/cpu/sh5/fpu.c
+++ b/arch/sh/kernel/cpu/sh5/fpu.c
@@ -17,6 +17,7 @@
 #include <asm/processor.h>
 #include <asm/user.h>
 #include <asm/io.h>
+#include <asm/fpu.h>
 
 /*
  * Initially load the FPU with signalling NANS.  This bit pattern
diff --git a/arch/sh/kernel/dump_task.c b/arch/sh/kernel/dump_task.c
index 4a8a4083ff0b..1db7ce0f25d4 100644
--- a/arch/sh/kernel/dump_task.c
+++ b/arch/sh/kernel/dump_task.c
@@ -1,5 +1,6 @@
 #include <linux/elfcore.h>
 #include <linux/sched.h>
+#include <asm/fpu.h>
 
 /*
  * Capture the user space registers if the task is not running (in user space)
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 9ab1926b9d10..b98e37a1f54c 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -25,6 +25,7 @@
 #include <asm/pgalloc.h>
 #include <asm/system.h>
 #include <asm/ubc.h>
+#include <asm/fpu.h>
 
 static int hlt_counter;
 int ubc_usercnt = 0;
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index f6b5fbfe75c4..f311551d9a05 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -29,6 +29,7 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
+#include <asm/fpu.h>
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
diff --git a/include/asm-sh/fpu.h b/include/asm-sh/fpu.h
index f8429880a270..f89abf5920d8 100644
--- a/include/asm-sh/fpu.h
+++ b/include/asm-sh/fpu.h
@@ -1,9 +1,8 @@
 #ifndef __ASM_SH_FPU_H
 #define __ASM_SH_FPU_H
 
-#define SR_FD    0x00008000
-
 #ifndef __ASSEMBLY__
+#include <linux/preempt.h>
 #include <asm/ptrace.h>
 
 #ifdef CONFIG_SH_FPU
@@ -28,18 +27,23 @@ extern void save_fpu(struct task_struct *__tsk, struct pt_regs *regs);
 
 extern int do_fpu_inst(unsigned short, struct pt_regs *);
 
-#define unlazy_fpu(tsk, regs) do {			\
-	if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) {	\
-		save_fpu(tsk, regs);			\
-	}						\
-} while (0)
-
-#define clear_fpu(tsk, regs) do {				\
-	if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) {		\
-		clear_tsk_thread_flag(tsk, TIF_USEDFPU);	\
-		release_fpu(regs);				\
-	}							\
-} while (0)
+static inline void unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs)
+{
+	preempt_disable();
+	if (test_tsk_thread_flag(tsk, TIF_USEDFPU))
+		save_fpu(tsk, regs);
+	preempt_enable();
+}
+
+static inline void clear_fpu(struct task_struct *tsk, struct pt_regs *regs)
+{
+	preempt_disable();
+	if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) {
+		clear_tsk_thread_flag(tsk, TIF_USEDFPU);
+		release_fpu(regs);
+	}
+	preempt_enable();
+}
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-sh/processor.h b/include/asm-sh/processor.h
index 19fe47c1ca17..ec707b98e5b9 100644
--- a/include/asm-sh/processor.h
+++ b/include/asm-sh/processor.h
@@ -2,7 +2,6 @@
 #define __ASM_SH_PROCESSOR_H
 
 #include <asm/cpu-features.h>
-#include <asm/fpu.h>
 
 #ifndef __ASSEMBLY__
 /*
diff --git a/include/asm-sh/processor_32.h b/include/asm-sh/processor_32.h
index df2d5b039ef4..c09305d6a9d9 100644
--- a/include/asm-sh/processor_32.h
+++ b/include/asm-sh/processor_32.h
@@ -70,6 +70,7 @@ extern struct sh_cpuinfo cpu_data[];
  */
 #define SR_DSP		0x00001000
 #define SR_IMASK	0x000000f0
+#define SR_FD		0x00008000
 
 /*
  * FPU structure and data
diff --git a/include/asm-sh/processor_64.h b/include/asm-sh/processor_64.h
index eda4bef448e9..88a2edf8fa5d 100644
--- a/include/asm-sh/processor_64.h
+++ b/include/asm-sh/processor_64.h
@@ -112,6 +112,7 @@ extern struct sh_cpuinfo cpu_data[];
 #endif
 
 #define SR_IMASK 0x000000f0
+#define SR_FD    0x00008000
 #define SR_SSTEP 0x08000000
 
 #ifndef __ASSEMBLY__
-- 
cgit v1.2.3


From 138bed154eab2205b4ef93f02f5feb1edf6d8552 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 26 Mar 2008 19:09:21 +0900
Subject: sh: Fix TIF_USEDFPU clearing under FPU emulation.

The unlazy_fpu() path calls in to save_fpu() if the task has
TIF_USEDFPU set. save_fpu() being the crap API that it is has the side
effect of clearing the flag itself, which presently doesn't happen
if we're using FPU emulation. Fix this up for now, pending an overhaul
in 2.6.26.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/asm-sh/fpu.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-sh/fpu.h b/include/asm-sh/fpu.h
index f89abf5920d8..91462fea1507 100644
--- a/include/asm-sh/fpu.h
+++ b/include/asm-sh/fpu.h
@@ -20,9 +20,14 @@ struct task_struct;
 
 extern void save_fpu(struct task_struct *__tsk, struct pt_regs *regs);
 #else
+
 #define release_fpu(regs)	do { } while (0)
 #define grab_fpu(regs)		do { } while (0)
-#define save_fpu(tsk, regs)	do { } while (0)
+
+static inline void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
+{
+	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
+}
 #endif
 
 extern int do_fpu_inst(unsigned short, struct pt_regs *);
-- 
cgit v1.2.3


From d546b67a940eb42a99f56b86c5cd8d47c8348c2a Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 25 Mar 2008 17:39:12 -0700
Subject: x86: fix performance drop for glx

fix the 3D performance drop reported at:

   http://bugzilla.kernel.org/show_bug.cgi?id=10328

fb drivers are using ioremap()/ioremap_nocache(), followed by mtrr_add with
WC attribute. Recent changes in page attribute code made both
ioremap()/ioremap_nocache() mappings as UC (instead of previous UC-). This
breaks the graphics performance, as the effective memory type is UC instead
of expected WC.

The correct way to fix this is to add ioremap_wc() (which uses UC- in the
absence of PAT kernel support and WC with PAT) and change all the
fb drivers to use this new ioremap_wc() API.

We can take this correct and longer route for post 2.6.25. For now,
revert back to the UC- behavior for ioremap/ioremap_nocache.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap.c     | 6 +++++-
 arch/x86/mm/pageattr.c    | 2 +-
 include/asm-x86/pgtable.h | 2 ++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 4afaba0ed722..794895c6dcc9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -137,7 +137,11 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
 	switch (mode) {
 	case IOR_MODE_UNCACHED:
 	default:
-		prot = PAGE_KERNEL_NOCACHE;
+		/*
+		 * FIXME: we will use UC MINUS for now, as video fb drivers
+		 * depend on it. Upcoming ioremap_wc() will fix this behavior.
+		 */
+		prot = PAGE_KERNEL_UC_MINUS;
 		break;
 	case IOR_MODE_CACHED:
 		prot = PAGE_KERNEL;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 14e48b5a94ba..7b79f6be4e7d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -771,7 +771,7 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages,
 int set_memory_uc(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(addr, numpages,
-				    __pgprot(_PAGE_PCD | _PAGE_PWT));
+				    __pgprot(_PAGE_PCD));
 }
 EXPORT_SYMBOL(set_memory_uc);
 
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 174b87738714..9cf472aeb9ce 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -85,6 +85,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 #define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
 #define __PAGE_KERNEL_EXEC_NOCACHE	(__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
 #define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_UC_MINUS		(__PAGE_KERNEL | _PAGE_PCD)
 #define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)
 #define __PAGE_KERNEL_VSYSCALL_NOCACHE	(__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
 #define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
@@ -101,6 +102,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 #define PAGE_KERNEL_EXEC		MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
 #define PAGE_KERNEL_RX			MAKE_GLOBAL(__PAGE_KERNEL_RX)
 #define PAGE_KERNEL_NOCACHE		MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_UC_MINUS		MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS)
 #define PAGE_KERNEL_EXEC_NOCACHE	MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
 #define PAGE_KERNEL_LARGE		MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
 #define PAGE_KERNEL_LARGE_EXEC		MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC)
-- 
cgit v1.2.3


From 732c8bd590625e8bc0b88313b82930e336b2bec4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 26 Mar 2008 16:51:09 -0700
Subject: [IPSEC]: Fix BEET output

The IPv6 BEET output function is incorrectly including the inner
header in the payload to be protected.  This causes a crash as
the packet doesn't actually have that many bytes for a second
header.

The IPv4 BEET output on the other hand is broken when it comes
to handling an inner IPv6 header since it always assumes an
inner IPv4 header.

This patch fixes both by making sure that neither BEET output
function touches the inner header at all.  All access is now
done through the protocol-independent cb structure.  Two new
attributes are added to make this work, the IP header length
and the IPv4 option length.  They're filled in by the inner
mode's output function.

Thanks to Joakim Koskela for finding this problem.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h         |  6 ++++++
 net/ipv4/xfrm4_mode_beet.c | 11 +++++------
 net/ipv4/xfrm4_state.c     |  2 ++
 net/ipv6/xfrm6_mode_beet.c |  1 +
 net/ipv6/xfrm6_state.c     |  2 ++
 5 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4e6f9568cbe7..0d255ae008b6 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -552,6 +552,9 @@ struct xfrm_mode_skb_cb {
 	__be16 id;
 	__be16 frag_off;
 
+	/* IP header length (excluding options or extension headers). */
+	u8 ihl;
+
 	/* TOS for IPv4, class for IPv6. */
 	u8 tos;
 
@@ -561,6 +564,9 @@ struct xfrm_mode_skb_cb {
 	/* Protocol for IPv4, NH for IPv6. */
 	u8 protocol;
 
+	/* Option length for IPv4, zero for IPv6. */
+	u8 optlen;
+
 	/* Used by IPv6 only, zero for IPv4. */
 	u8 flow_lbl[3];
 };
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index b47030ba162b..9c798abce736 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -39,13 +39,11 @@ static void xfrm4_beet_make_header(struct sk_buff *skb)
 static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct ip_beet_phdr *ph;
-	struct iphdr *iph, *top_iph;
+	struct iphdr *top_iph;
 	int hdrlen, optlen;
 
-	iph = ip_hdr(skb);
-
 	hdrlen = 0;
-	optlen = iph->ihl * 4 - sizeof(*iph);
+	optlen = XFRM_MODE_SKB_CB(skb)->optlen;
 	if (unlikely(optlen))
 		hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
 
@@ -53,11 +51,12 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 				    hdrlen);
 	skb->mac_header = skb->network_header +
 			  offsetof(struct iphdr, protocol);
-	skb->transport_header = skb->network_header + sizeof(*iph);
+	skb->transport_header = skb->network_header + sizeof(*top_iph);
 
 	xfrm4_beet_make_header(skb);
 
-	ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen);
+	ph = (struct ip_beet_phdr *)
+		__skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen);
 
 	top_iph = ip_hdr(skb);
 
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index fdeebe68a379..07735ed280d7 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -52,10 +52,12 @@ int xfrm4_extract_header(struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
 
+	XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
 	XFRM_MODE_SKB_CB(skb)->id = iph->id;
 	XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off;
 	XFRM_MODE_SKB_CB(skb)->tos = iph->tos;
 	XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl;
+	XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph);
 	memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0,
 	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
 
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 0527d11c1ae3..d6ce400f585f 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -45,6 +45,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	skb->mac_header = skb->network_header +
 			  offsetof(struct ipv6hdr, nexthdr);
 	skb->transport_header = skb->network_header + sizeof(*top_iph);
+	__skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl);
 
 	xfrm6_beet_make_header(skb);
 
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index dc817e035e23..ff1e1db8e236 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -174,10 +174,12 @@ int xfrm6_extract_header(struct sk_buff *skb)
 {
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 
+	XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
 	XFRM_MODE_SKB_CB(skb)->id = 0;
 	XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
 	XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
 	XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
+	XFRM_MODE_SKB_CB(skb)->optlen = 0;
 	memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
 	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
 
-- 
cgit v1.2.3


From c101b088ba0ed16d7109b2f3c2d16798d162a535 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 26 Mar 2008 17:32:33 -0700
Subject: [SPARC64]: Define TASK_SIZE_OF()

This make "cat /proc/${PID}/pagemap" more efficient for
32-bit tasks.

Based upon a report by Mariusz Kozlowski.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-sparc64/processor.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h
index 8da484c19822..885b6a1dcae4 100644
--- a/include/asm-sparc64/processor.h
+++ b/include/asm-sparc64/processor.h
@@ -37,6 +37,9 @@
 #endif
 
 #define TASK_SIZE	((unsigned long)-VPTE_SIZE)
+#define TASK_SIZE_OF(tsk) \
+	(test_tsk_thread_flag(tsk,TIF_32BIT) ? \
+	 (1UL << 32UL) : TASK_SIZE)
 #ifdef __KERNEL__
 
 #define STACK_TOP32	((1UL << 32UL) - PAGE_SIZE)
-- 
cgit v1.2.3


From b2ef749720a97053d60605a7456772a1752164cc Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian.fainelli@telecomint.eu>
Date: Wed, 26 Mar 2008 22:39:15 +0100
Subject: rdc321x: GPIO routines bugfixes

This patch fixes the use of GPIO routines which are in the PCI
configuration space of the RDC321x, therefore reading/writing
to this space without spinlock protection can be problematic.

We also now request and free GPIOs and support the MGB100
board, previous code was very AR525W-centric.

Signed-off-by: Volker Weiss <volker@tintuc.de>
Signed-off-by: Florian Fainelli <florian.fainelli@telecomint.eu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mach-rdc321x/gpio.c                | 199 +++++++++++++++++++++-------
 arch/x86/mach-rdc321x/platform.c            |   2 +
 include/asm-x86/mach-rdc321x/gpio.h         |   9 +-
 include/asm-x86/mach-rdc321x/rdc321x_defs.h |   8 +-
 4 files changed, 165 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mach-rdc321x/gpio.c b/arch/x86/mach-rdc321x/gpio.c
index 031269163bd6..247f33d3a407 100644
--- a/arch/x86/mach-rdc321x/gpio.c
+++ b/arch/x86/mach-rdc321x/gpio.c
@@ -1,91 +1,194 @@
 /*
- *  Copyright (C) 2007, OpenWrt.org, Florian Fainelli <florian@openwrt.org>
- *  	RDC321x architecture specific GPIO support
+ *  GPIO support for RDC SoC R3210/R8610
+ *
+ *  Copyright (C) 2007, Florian Fainelli <florian@openwrt.org>
+ *  Copyright (C) 2008, Volker Weiss <dev@tintuc.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
  */
 
-#include <linux/autoconf.h>
-#include <linux/init.h>
+
+#include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/types.h>
 #include <linux/module.h>
-#include <linux/delay.h>
 
+#include <asm/gpio.h>
 #include <asm/mach-rdc321x/rdc321x_defs.h>
 
-static inline int rdc_gpio_is_valid(unsigned gpio)
+
+/* spin lock to protect our private copy of GPIO data register plus
+   the access to PCI conf registers. */
+static DEFINE_SPINLOCK(gpio_lock);
+
+/* copy of GPIO data registers */
+static u32 gpio_data_reg1;
+static u32 gpio_data_reg2;
+
+static u32 gpio_request_data[2];
+
+
+static inline void rdc321x_conf_write(unsigned addr, u32 value)
 {
-	return (gpio <= RDC_MAX_GPIO);
+	outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
+	outl(value, RDC3210_CFGREG_DATA);
 }
 
-static unsigned int rdc_gpio_read(unsigned gpio)
+static inline void rdc321x_conf_or(unsigned addr, u32 value)
 {
-	unsigned int val;
-
-	val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x84:0x48));
-	outl(val, RDC3210_CFGREG_ADDR);
-	udelay(10);
-	val = inl(RDC3210_CFGREG_DATA);
-	val |= (0x1 << (gpio & 0x1F));
-	outl(val, RDC3210_CFGREG_DATA);
-	udelay(10);
-	val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x88:0x4C));
-	outl(val, RDC3210_CFGREG_ADDR);
-	udelay(10);
-	val = inl(RDC3210_CFGREG_DATA);
-
-	return val;
+	outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
+	value |= inl(RDC3210_CFGREG_DATA);
+	outl(value, RDC3210_CFGREG_DATA);
 }
 
-static void rdc_gpio_write(unsigned int val)
+static inline u32 rdc321x_conf_read(unsigned addr)
 {
-	if (val) {
-		outl(val, RDC3210_CFGREG_DATA);
-		udelay(10);
-	}
+	outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
+
+	return inl(RDC3210_CFGREG_DATA);
 }
 
-int rdc_gpio_get_value(unsigned gpio)
+/* configure pin as GPIO */
+static void rdc321x_configure_gpio(unsigned gpio)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	rdc321x_conf_or(gpio < 32
+		? RDC321X_GPIO_CTRL_REG1 : RDC321X_GPIO_CTRL_REG2,
+		1 << (gpio & 0x1f));
+	spin_unlock_irqrestore(&gpio_lock, flags);
+}
+
+/* initially setup the 2 copies of the gpio data registers.
+   This function must be called by the platform setup code. */
+void __init rdc321x_gpio_setup()
+{
+	/* this might not be, what others (BIOS, bootloader, etc.)
+	   wrote to these registers before, but it's a good guess. Still
+	   better than just using 0xffffffff. */
+
+	gpio_data_reg1 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG1);
+	gpio_data_reg2 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG2);
+}
+
+/* determine, if gpio number is valid */
+static inline int rdc321x_is_gpio(unsigned gpio)
+{
+	return gpio <= RDC321X_MAX_GPIO;
+}
+
+/* request GPIO */
+int rdc_gpio_request(unsigned gpio, const char *label)
 {
-	if (rdc_gpio_is_valid(gpio))
-		return (int)rdc_gpio_read(gpio);
-	else
+	unsigned long flags;
+
+	if (!rdc321x_is_gpio(gpio))
 		return -EINVAL;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	if (gpio_request_data[(gpio & 0x20) ? 1 : 0] & (1 << (gpio & 0x1f)))
+		goto inuse;
+	gpio_request_data[(gpio & 0x20) ? 1 : 0] |= (1 << (gpio & 0x1f));
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	return 0;
+inuse:
+	spin_unlock_irqrestore(&gpio_lock, flags);
+	return -EINVAL;
 }
-EXPORT_SYMBOL(rdc_gpio_get_value);
+EXPORT_SYMBOL(rdc_gpio_request);
 
-void rdc_gpio_set_value(unsigned gpio, int value)
+/* release previously-claimed GPIO */
+void rdc_gpio_free(unsigned gpio)
 {
-	unsigned int val;
+	unsigned long flags;
 
-	if (!rdc_gpio_is_valid(gpio))
+	if (!rdc321x_is_gpio(gpio))
 		return;
 
-	val = rdc_gpio_read(gpio);
+	spin_lock_irqsave(&gpio_lock, flags);
+	gpio_request_data[(gpio & 0x20) ? 1 : 0] &= ~(1 << (gpio & 0x1f));
+	spin_unlock_irqrestore(&gpio_lock, flags);
+}
+EXPORT_SYMBOL(rdc_gpio_free);
+
+/* read GPIO pin */
+int rdc_gpio_get_value(unsigned gpio)
+{
+	u32 reg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	reg = rdc321x_conf_read(gpio < 32
+		? RDC321X_GPIO_DATA_REG1 : RDC321X_GPIO_DATA_REG2);
+	spin_unlock_irqrestore(&gpio_lock, flags);
 
-	if (value)
-		val &= ~(0x1 << (gpio & 0x1F));
-	else
-		val |= (0x1 << (gpio & 0x1F));
+	return (1 << (gpio & 0x1f)) & reg ? 1 : 0;
+}
+EXPORT_SYMBOL(rdc_gpio_get_value);
 
-	rdc_gpio_write(val);
+/* set GPIO pin to value */
+void rdc_gpio_set_value(unsigned gpio, int value)
+{
+	unsigned long flags;
+	u32 reg;
+
+	reg = 1 << (gpio & 0x1f);
+	if (gpio < 32) {
+		spin_lock_irqsave(&gpio_lock, flags);
+		if (value)
+			gpio_data_reg1 |= reg;
+		else
+			gpio_data_reg1 &= ~reg;
+		rdc321x_conf_write(RDC321X_GPIO_DATA_REG1, gpio_data_reg1);
+		spin_unlock_irqrestore(&gpio_lock, flags);
+	} else {
+		spin_lock_irqsave(&gpio_lock, flags);
+		if (value)
+			gpio_data_reg2 |= reg;
+		else
+			gpio_data_reg2 &= ~reg;
+		rdc321x_conf_write(RDC321X_GPIO_DATA_REG2, gpio_data_reg2);
+		spin_unlock_irqrestore(&gpio_lock, flags);
+	}
 }
 EXPORT_SYMBOL(rdc_gpio_set_value);
 
+/* configure GPIO pin as input */
 int rdc_gpio_direction_input(unsigned gpio)
 {
+	if (!rdc321x_is_gpio(gpio))
+		return -EINVAL;
+
+	rdc321x_configure_gpio(gpio);
+
 	return 0;
 }
 EXPORT_SYMBOL(rdc_gpio_direction_input);
 
+/* configure GPIO pin as output and set value */
 int rdc_gpio_direction_output(unsigned gpio, int value)
 {
+	if (!rdc321x_is_gpio(gpio))
+		return -EINVAL;
+
+	gpio_set_value(gpio, value);
+	rdc321x_configure_gpio(gpio);
+
 	return 0;
 }
 EXPORT_SYMBOL(rdc_gpio_direction_output);
-
-
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c
index dda6024a5862..a037041817c7 100644
--- a/arch/x86/mach-rdc321x/platform.c
+++ b/arch/x86/mach-rdc321x/platform.c
@@ -62,6 +62,8 @@ static struct platform_device *rdc321x_devs[] = {
 
 static int __init rdc_board_setup(void)
 {
+	rdc321x_gpio_setup();
+
 	return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs));
 }
 
diff --git a/include/asm-x86/mach-rdc321x/gpio.h b/include/asm-x86/mach-rdc321x/gpio.h
index db31b929b990..acce0b7d397b 100644
--- a/include/asm-x86/mach-rdc321x/gpio.h
+++ b/include/asm-x86/mach-rdc321x/gpio.h
@@ -5,19 +5,20 @@ extern int rdc_gpio_get_value(unsigned gpio);
 extern void rdc_gpio_set_value(unsigned gpio, int value);
 extern int rdc_gpio_direction_input(unsigned gpio);
 extern int rdc_gpio_direction_output(unsigned gpio, int value);
-
+extern int rdc_gpio_request(unsigned gpio, const char *label);
+extern void rdc_gpio_free(unsigned gpio);
+extern void __init rdc321x_gpio_setup(void);
 
 /* Wrappers for the arch-neutral GPIO API */
 
 static inline int gpio_request(unsigned gpio, const char *label)
 {
-	/* Not yet implemented */
-	return 0;
+	return rdc_gpio_request(gpio, label);
 }
 
 static inline void gpio_free(unsigned gpio)
 {
-	/* Not yet implemented */
+	rdc_gpio_free(gpio);
 }
 
 static inline int gpio_direction_input(unsigned gpio)
diff --git a/include/asm-x86/mach-rdc321x/rdc321x_defs.h b/include/asm-x86/mach-rdc321x/rdc321x_defs.h
index 838ba8f64fd3..c8e9c8bed3d0 100644
--- a/include/asm-x86/mach-rdc321x/rdc321x_defs.h
+++ b/include/asm-x86/mach-rdc321x/rdc321x_defs.h
@@ -3,4 +3,10 @@
 /* General purpose configuration and data registers */
 #define RDC3210_CFGREG_ADDR     0x0CF8
 #define RDC3210_CFGREG_DATA     0x0CFC
-#define RDC_MAX_GPIO		0x3A
+
+#define RDC321X_GPIO_CTRL_REG1	0x48
+#define RDC321X_GPIO_CTRL_REG2	0x84
+#define RDC321X_GPIO_DATA_REG1	0x4c
+#define RDC321X_GPIO_DATA_REG2	0x88
+
+#define RDC321X_MAX_GPIO	58
-- 
cgit v1.2.3


From a6bd8e13034dd7d60b6f14217096efa192d0adc1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 28 Mar 2008 11:05:53 -0500
Subject: lguest: comment documentation update.

Took some cycles to re-read the Lguest Journey end-to-end, fix some
rot and tighten some phrases.

Only comments change.  No new jokes, but a couple of recycled old jokes.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c         |  69 ++++++++++++----------
 arch/x86/lguest/boot.c                | 108 +++++++++++++++++++---------------
 arch/x86/lguest/i386_head.S           |  15 +++--
 drivers/lguest/core.c                 |  18 +++---
 drivers/lguest/hypercalls.c           |  11 +++-
 drivers/lguest/interrupts_and_traps.c |   7 +--
 drivers/lguest/lguest_device.c        |  11 ++--
 drivers/lguest/lguest_user.c          |  30 +++++++---
 drivers/lguest/page_tables.c          |  32 +++++-----
 drivers/lguest/x86/core.c             |  33 +++++++----
 drivers/lguest/x86/switcher_32.S      |   8 +--
 include/asm-x86/lguest_hcall.h        |   2 +-
 include/linux/lguest_launcher.h       |   6 +-
 13 files changed, 208 insertions(+), 142 deletions(-)

(limited to 'include')

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index d45c7f682b1b..4c1fc65a8b3d 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1,7 +1,7 @@
 /*P:100 This is the Launcher code, a simple program which lays out the
- * "physical" memory for the new Guest by mapping the kernel image and the
- * virtual devices, then reads repeatedly from /dev/lguest to run the Guest.
-:*/
+ * "physical" memory for the new Guest by mapping the kernel image and
+ * the virtual devices, then opens /dev/lguest to tell the kernel
+ * about the Guest and control it. :*/
 #define _LARGEFILE64_SOURCE
 #define _GNU_SOURCE
 #include <stdio.h>
@@ -43,7 +43,7 @@
 #include "linux/virtio_console.h"
 #include "linux/virtio_ring.h"
 #include "asm-x86/bootparam.h"
-/*L:110 We can ignore the 38 include files we need for this program, but I do
+/*L:110 We can ignore the 39 include files we need for this program, but I do
  * want to draw attention to the use of kernel-style types.
  *
  * As Linus said, "C is a Spartan language, and so should your naming be."  I
@@ -320,7 +320,7 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
 		err(1, "Reading program headers");
 
 	/* Try all the headers: there are usually only three.  A read-only one,
-	 * a read-write one, and a "note" section which isn't loadable. */
+	 * a read-write one, and a "note" section which we don't load. */
 	for (i = 0; i < ehdr->e_phnum; i++) {
 		/* If this isn't a loadable segment, we ignore it */
 		if (phdr[i].p_type != PT_LOAD)
@@ -387,7 +387,7 @@ static unsigned long load_kernel(int fd)
 	if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
 		return map_elf(fd, &hdr);
 
-	/* Otherwise we assume it's a bzImage, and try to unpack it */
+	/* Otherwise we assume it's a bzImage, and try to load it. */
 	return load_bzimage(fd);
 }
 
@@ -433,12 +433,12 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
 	return len;
 }
 
-/* Once we know how much memory we have, we can construct simple linear page
+/* Once we know how much memory we have we can construct simple linear page
  * tables which set virtual == physical which will get the Guest far enough
  * into the boot to create its own.
  *
  * We lay them out of the way, just below the initrd (which is why we need to
- * know its size). */
+ * know its size here). */
 static unsigned long setup_pagetables(unsigned long mem,
 				      unsigned long initrd_size)
 {
@@ -850,7 +850,8 @@ static void handle_console_output(int fd, struct virtqueue *vq)
  *
  * Handling output for network is also simple: we get all the output buffers
  * and write them (ignoring the first element) to this device's file descriptor
- * (stdout). */
+ * (/dev/net/tun).
+ */
 static void handle_net_output(int fd, struct virtqueue *vq)
 {
 	unsigned int head, out, in;
@@ -924,7 +925,7 @@ static void enable_fd(int fd, struct virtqueue *vq)
 	write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
 }
 
-/* Resetting a device is fairly easy. */
+/* When the Guest asks us to reset a device, it's is fairly easy. */
 static void reset_device(struct device *dev)
 {
 	struct virtqueue *vq;
@@ -1003,8 +1004,8 @@ static void handle_input(int fd)
 		if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0)
 			break;
 
-		/* Otherwise, call the device(s) which have readable
-		 * file descriptors and a method of handling them.  */
+		/* Otherwise, call the device(s) which have readable file
+		 * descriptors and a method of handling them.  */
 		for (i = devices.dev; i; i = i->next) {
 			if (i->handle_input && FD_ISSET(i->fd, &fds)) {
 				int dev_fd;
@@ -1015,8 +1016,7 @@ static void handle_input(int fd)
 				 * should no longer service it.  Networking and
 				 * console do this when there's no input
 				 * buffers to deliver into.  Console also uses
-				 * it when it discovers that stdin is
-				 * closed. */
+				 * it when it discovers that stdin is closed. */
 				FD_CLR(i->fd, &devices.infds);
 				/* Tell waker to ignore it too, by sending a
 				 * negative fd number (-1, since 0 is a valid
@@ -1033,7 +1033,8 @@ static void handle_input(int fd)
  *
  * All devices need a descriptor so the Guest knows it exists, and a "struct
  * device" so the Launcher can keep track of it.  We have common helper
- * routines to allocate and manage them. */
+ * routines to allocate and manage them.
+ */
 
 /* The layout of the device page is a "struct lguest_device_desc" followed by a
  * number of virtqueue descriptors, then two sets of feature bits, then an
@@ -1078,7 +1079,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	struct virtqueue **i, *vq = malloc(sizeof(*vq));
 	void *p;
 
-	/* First we need some pages for this virtqueue. */
+	/* First we need some memory for this virtqueue. */
 	pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1)
 		/ getpagesize();
 	p = get_pages(pages);
@@ -1122,7 +1123,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 }
 
 /* The first half of the feature bitmask is for us to advertise features.  The
- * second half if for the Guest to accept features. */
+ * second half is for the Guest to accept features. */
 static void add_feature(struct device *dev, unsigned bit)
 {
 	u8 *features = get_feature_bits(dev);
@@ -1151,7 +1152,9 @@ static void set_config(struct device *dev, unsigned len, const void *conf)
 }
 
 /* This routine does all the creation and setup of a new device, including
- * calling new_dev_desc() to allocate the descriptor and device memory. */
+ * calling new_dev_desc() to allocate the descriptor and device memory.
+ *
+ * See what I mean about userspace being boring? */
 static struct device *new_device(const char *name, u16 type, int fd,
 				 bool (*handle_input)(int, struct device *))
 {
@@ -1492,7 +1495,10 @@ static int io_thread(void *_dev)
 	while (read(vblk->workpipe[0], &c, 1) == 1) {
 		/* We acknowledge each request immediately to reduce latency,
 		 * rather than waiting until we've done them all.  I haven't
-		 * measured to see if it makes any difference. */
+		 * measured to see if it makes any difference.
+		 *
+		 * That would be an interesting test, wouldn't it?  You could
+		 * also try having more than one I/O thread. */
 		while (service_io(dev))
 			write(vblk->done_fd, &c, 1);
 	}
@@ -1500,7 +1506,7 @@ static int io_thread(void *_dev)
 }
 
 /* Now we've seen the I/O thread, we return to the Launcher to see what happens
- * when the thread tells us it's completed some I/O. */
+ * when that thread tells us it's completed some I/O. */
 static bool handle_io_finish(int fd, struct device *dev)
 {
 	char c;
@@ -1572,11 +1578,12 @@ static void setup_block_file(const char *filename)
 	 * more work. */
 	pipe(vblk->workpipe);
 
-	/* Create stack for thread and run it */
+	/* Create stack for thread and run it.  Since stack grows upwards, we
+	 * point the stack pointer to the end of this region. */
 	stack = malloc(32768);
 	/* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from
 	 * becoming a zombie. */
-	if (clone(io_thread, stack + 32768,  CLONE_VM | SIGCHLD, dev) == -1)
+	if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1)
 		err(1, "Creating clone");
 
 	/* We don't need to keep the I/O thread's end of the pipes open. */
@@ -1586,14 +1593,14 @@ static void setup_block_file(const char *filename)
 	verbose("device %u: virtblock %llu sectors\n",
 		devices.device_num, le64_to_cpu(conf.capacity));
 }
-/* That's the end of device setup. :*/
+/* That's the end of device setup. */
 
-/* Reboot */
+/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */
 static void __attribute__((noreturn)) restart_guest(void)
 {
 	unsigned int i;
 
-	/* Closing pipes causes the waker thread and io_threads to die, and
+	/* Closing pipes causes the Waker thread and io_threads to die, and
 	 * closing /dev/lguest cleans up the Guest.  Since we don't track all
 	 * open fds, we simply close everything beyond stderr. */
 	for (i = 3; i < FD_SETSIZE; i++)
@@ -1602,7 +1609,7 @@ static void __attribute__((noreturn)) restart_guest(void)
 	err(1, "Could not exec %s", main_args[0]);
 }
 
-/*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves
+/*L:220 Finally we reach the core of the Launcher which runs the Guest, serves
  * its input and output, and finally, lays it to rest. */
 static void __attribute__((noreturn)) run_guest(int lguest_fd)
 {
@@ -1643,7 +1650,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
 			err(1, "Resetting break");
 	}
 }
-/*
+/*L:240
  * This is the end of the Launcher.  The good news: we are over halfway
  * through!  The bad news: the most fiendish part of the code still lies ahead
  * of us.
@@ -1690,8 +1697,8 @@ int main(int argc, char *argv[])
 	 * device receive input from a file descriptor, we keep an fdset
 	 * (infds) and the maximum fd number (max_infd) with the head of the
 	 * list.  We also keep a pointer to the last device.  Finally, we keep
-	 * the next interrupt number to hand out (1: remember that 0 is used by
-	 * the timer). */
+	 * the next interrupt number to use for devices (1: remember that 0 is
+	 * used by the timer). */
 	FD_ZERO(&devices.infds);
 	devices.max_infd = -1;
 	devices.lastdev = NULL;
@@ -1792,8 +1799,8 @@ int main(int argc, char *argv[])
 	lguest_fd = tell_kernel(pgdir, start);
 
 	/* We fork off a child process, which wakes the Launcher whenever one
-	 * of the input file descriptors needs attention.  Otherwise we would
-	 * run the Guest until it tries to output something. */
+	 * of the input file descriptors needs attention.  We call this the
+	 * Waker, and we'll cover it in a moment. */
 	waker_fd = setup_waker(lguest_fd);
 
 	/* Finally, run the Guest.  This doesn't return. */
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a104c532ff70..3335b4595efd 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -10,21 +10,19 @@
  * (such as the example in Documentation/lguest/lguest.c) is called the
  * Launcher.
  *
- * Secondly, we only run specially modified Guests, not normal kernels.  When
- * you set CONFIG_LGUEST to 'y' or 'm', this automatically sets
- * CONFIG_LGUEST_GUEST=y, which compiles this file into the kernel so it knows
- * how to be a Guest.  This means that you can use the same kernel you boot
- * normally (ie. as a Host) as a Guest.
+ * Secondly, we only run specially modified Guests, not normal kernels: setting
+ * CONFIG_LGUEST_GUEST to "y" compiles this file into the kernel so it knows
+ * how to be a Guest at boot time.  This means that you can use the same kernel
+ * you boot normally (ie. as a Host) as a Guest.
  *
  * These Guests know that they cannot do privileged operations, such as disable
  * interrupts, and that they have to ask the Host to do such things explicitly.
  * This file consists of all the replacements for such low-level native
  * hardware operations: these special Guest versions call the Host.
  *
- * So how does the kernel know it's a Guest?  The Guest starts at a special
- * entry point marked with a magic string, which sets up a few things then
- * calls here.  We replace the native functions various "paravirt" structures
- * with our Guest versions, then boot like normal. :*/
+ * So how does the kernel know it's a Guest?  We'll see that later, but let's
+ * just say that we end up here where we replace the native functions various
+ * "paravirt" structures with our Guest versions, then boot like normal. :*/
 
 /*
  * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
@@ -134,7 +132,7 @@ static void async_hcall(unsigned long call, unsigned long arg1,
  * lguest_leave_lazy_mode().
  *
  * So, when we're in lazy mode, we call async_hcall() to store the call for
- * future processing. */
+ * future processing: */
 static void lazy_hcall(unsigned long call,
 		       unsigned long arg1,
 		       unsigned long arg2,
@@ -147,7 +145,7 @@ static void lazy_hcall(unsigned long call,
 }
 
 /* When lazy mode is turned off reset the per-cpu lazy mode variable and then
- * issue a hypercall to flush any stored calls. */
+ * issue the do-nothing hypercall to flush any stored calls. */
 static void lguest_leave_lazy_mode(void)
 {
 	paravirt_leave_lazy(paravirt_get_lazy_mode());
@@ -164,7 +162,7 @@ static void lguest_leave_lazy_mode(void)
  *
  * So instead we keep an "irq_enabled" field inside our "struct lguest_data",
  * which the Guest can update with a single instruction.  The Host knows to
- * check there when it wants to deliver an interrupt.
+ * check there before it tries to deliver an interrupt.
  */
 
 /* save_flags() is expected to return the processor state (ie. "flags").  The
@@ -196,10 +194,15 @@ static void irq_enable(void)
 /*M:003 Note that we don't check for outstanding interrupts when we re-enable
  * them (or when we unmask an interrupt).  This seems to work for the moment,
  * since interrupts are rare and we'll just get the interrupt on the next timer
- * tick, but when we turn on CONFIG_NO_HZ, we should revisit this.  One way
+ * tick, but now we can run with CONFIG_NO_HZ, we should revisit this.  One way
  * would be to put the "irq_enabled" field in a page by itself, and have the
  * Host write-protect it when an interrupt comes in when irqs are disabled.
- * There will then be a page fault as soon as interrupts are re-enabled. :*/
+ * There will then be a page fault as soon as interrupts are re-enabled.
+ *
+ * A better method is to implement soft interrupt disable generally for x86:
+ * instead of disabling interrupts, we set a flag.  If an interrupt does come
+ * in, we then disable them for real.  This is uncommon, so we could simply use
+ * a hypercall for interrupt control and not worry about efficiency. :*/
 
 /*G:034
  * The Interrupt Descriptor Table (IDT).
@@ -212,6 +215,10 @@ static void irq_enable(void)
 static void lguest_write_idt_entry(gate_desc *dt,
 				   int entrynum, const gate_desc *g)
 {
+	/* The gate_desc structure is 8 bytes long: we hand it to the Host in
+	 * two 32-bit chunks.  The whole 32-bit kernel used to hand descriptors
+	 * around like this; typesafety wasn't a big concern in Linux's early
+	 * years. */
 	u32 *desc = (u32 *)g;
 	/* Keep the local copy up to date. */
 	native_write_idt_entry(dt, entrynum, g);
@@ -243,7 +250,8 @@ static void lguest_load_idt(const struct desc_ptr *desc)
  *
  * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY
  * hypercall and use that repeatedly to load a new IDT.  I don't think it
- * really matters, but wouldn't it be nice if they were the same?
+ * really matters, but wouldn't it be nice if they were the same?  Wouldn't
+ * it be even better if you were the one to send the patch to fix it?
  */
 static void lguest_load_gdt(const struct desc_ptr *desc)
 {
@@ -298,9 +306,9 @@ static void lguest_load_tr_desc(void)
 
 /* The "cpuid" instruction is a way of querying both the CPU identity
  * (manufacturer, model, etc) and its features.  It was introduced before the
- * Pentium in 1993 and keeps getting extended by both Intel and AMD.  As you
- * might imagine, after a decade and a half this treatment, it is now a giant
- * ball of hair.  Its entry in the current Intel manual runs to 28 pages.
+ * Pentium in 1993 and keeps getting extended by both Intel, AMD and others.
+ * As you might imagine, after a decade and a half this treatment, it is now a
+ * giant ball of hair.  Its entry in the current Intel manual runs to 28 pages.
  *
  * This instruction even it has its own Wikipedia entry.  The Wikipedia entry
  * has been translated into 4 languages.  I am not making this up!
@@ -594,17 +602,17 @@ static unsigned long lguest_get_wallclock(void)
 	return lguest_data.time.tv_sec;
 }
 
-/* The TSC is a Time Stamp Counter.  The Host tells us what speed it runs at,
- * or 0 if it's unusable as a reliable clock source.  This matches what we want
- * here: if we return 0 from this function, the x86 TSC clock will not register
- * itself. */
+/* The TSC is an Intel thing called the Time Stamp Counter.  The Host tells us
+ * what speed it runs at, or 0 if it's unusable as a reliable clock source.
+ * This matches what we want here: if we return 0 from this function, the x86
+ * TSC clock will give up and not register itself. */
 static unsigned long lguest_cpu_khz(void)
 {
 	return lguest_data.tsc_khz;
 }
 
-/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where
- * we read the time value given to us by the Host. */
+/* If we can't use the TSC, the kernel falls back to our lower-priority
+ * "lguest_clock", where we read the time value given to us by the Host. */
 static cycle_t lguest_clock_read(void)
 {
 	unsigned long sec, nsec;
@@ -648,12 +656,16 @@ static struct clocksource lguest_clock = {
 static int lguest_clockevent_set_next_event(unsigned long delta,
                                            struct clock_event_device *evt)
 {
+	/* FIXME: I don't think this can ever happen, but James tells me he had
+	 * to put this code in.  Maybe we should remove it now.  Anyone? */
 	if (delta < LG_CLOCK_MIN_DELTA) {
 		if (printk_ratelimit())
 			printk(KERN_DEBUG "%s: small delta %lu ns\n",
 			       __FUNCTION__, delta);
 		return -ETIME;
 	}
+
+	/* Please wake us this far in the future. */
 	hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0);
 	return 0;
 }
@@ -738,7 +750,7 @@ static void lguest_time_init(void)
  * will not tolerate us trying to use that), the stack pointer, and the number
  * of pages in the stack. */
 static void lguest_load_sp0(struct tss_struct *tss,
-				     struct thread_struct *thread)
+			    struct thread_struct *thread)
 {
 	lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0,
 		   THREAD_SIZE/PAGE_SIZE);
@@ -786,9 +798,8 @@ static void lguest_safe_halt(void)
 	hcall(LHCALL_HALT, 0, 0, 0);
 }
 
-/* Perhaps CRASH isn't the best name for this hypercall, but we use it to get a
- * message out when we're crashing as well as elegant termination like powering
- * off.
+/* The SHUTDOWN hypercall takes a string to describe what's happening, and
+ * an argument which says whether this to restart (reboot) the Guest or not.
  *
  * Note that the Host always prefers that the Guest speak in physical addresses
  * rather than virtual addresses, so we use __pa() here. */
@@ -816,8 +827,9 @@ static struct notifier_block paniced = {
 /* Setting up memory is fairly easy. */
 static __init char *lguest_memory_setup(void)
 {
-	/* We do this here and not earlier because lockcheck barfs if we do it
-	 * before start_kernel() */
+	/* We do this here and not earlier because lockcheck used to barf if we
+	 * did it before start_kernel().  I think we fixed that, so it'd be
+	 * nice to move it back to lguest_init.  Patch welcome... */
 	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
 
 	/* The Linux bootloader header contains an "e820" memory map: the
@@ -850,12 +862,19 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
 	return len;
 }
 
+/* Rebooting also tells the Host we're finished, but the RESTART flag tells the
+ * Launcher to reboot us. */
+static void lguest_restart(char *reason)
+{
+	hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0);
+}
+
 /*G:050
  * Patching (Powerfully Placating Performance Pedants)
  *
- * We have already seen that pv_ops structures let us replace simple
- * native instructions with calls to the appropriate back end all throughout
- * the kernel.  This allows the same kernel to run as a Guest and as a native
+ * We have already seen that pv_ops structures let us replace simple native
+ * instructions with calls to the appropriate back end all throughout the
+ * kernel.  This allows the same kernel to run as a Guest and as a native
  * kernel, but it's slow because of all the indirect branches.
  *
  * Remember that David Wheeler quote about "Any problem in computer science can
@@ -908,14 +927,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
 	return insn_len;
 }
 
-static void lguest_restart(char *reason)
-{
-	hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0);
-}
-
-/*G:030 Once we get to lguest_init(), we know we're a Guest.  The pv_ops
- * structures in the kernel provide points for (almost) every routine we have
- * to override to avoid privileged instructions. */
+/*G:030 Once we get to lguest_init(), we know we're a Guest.  The various
+ * pv_ops structures in the kernel provide points for (almost) every routine we
+ * have to override to avoid privileged instructions. */
 __init void lguest_init(void)
 {
 	/* We're under lguest, paravirt is enabled, and we're running at
@@ -1003,9 +1017,9 @@ __init void lguest_init(void)
 	 * the normal data segment to get through booting. */
 	asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
 
-	/* The Host uses the top of the Guest's virtual address space for the
-	 * Host<->Guest Switcher, and it tells us how big that is in
-	 * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */
+	/* The Host<->Guest Switcher lives at the top of our address space, and
+	 * the Host told us how big it is when we made LGUEST_INIT hypercall:
+	 * it put the answer in lguest_data.reserve_mem  */
 	reserve_top_address(lguest_data.reserve_mem);
 
 	/* If we don't initialize the lock dependency checker now, it crashes
@@ -1027,6 +1041,7 @@ __init void lguest_init(void)
 	/* Math is always hard! */
 	new_cpu_data.hard_math = 1;
 
+	/* We don't have features.  We have puppies!  Puppies! */
 #ifdef CONFIG_X86_MCE
 	mce_disabled = 1;
 #endif
@@ -1044,10 +1059,11 @@ __init void lguest_init(void)
 	virtio_cons_early_init(early_put_chars);
 
 	/* Last of all, we set the power management poweroff hook to point to
-	 * the Guest routine to power off. */
+	 * the Guest routine to power off, and the reboot hook to our restart
+	 * routine. */
 	pm_power_off = lguest_power_off;
-
 	machine_ops.restart = lguest_restart;
+
 	/* Now we're set up, call start_kernel() in init/main.c and we proceed
 	 * to boot as normal.  It never returns. */
 	start_kernel();
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 95b6fbcded63..5c7cef34c9e7 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -5,13 +5,20 @@
 #include <asm/thread_info.h>
 #include <asm/processor-flags.h>
 
-/*G:020 This is where we begin: head.S notes that the boot header's platform
- * type field is "1" (lguest), so calls us here.
+/*G:020 Our story starts with the kernel booting into startup_32 in
+ * arch/x86/kernel/head_32.S.  It expects a boot header, which is created by
+ * the bootloader (the Launcher in our case).
+ *
+ * The startup_32 function does very little: it clears the uninitialized global
+ * C variables which we expect to be zero (ie. BSS) and then copies the boot
+ * header and kernel command line somewhere safe.  Finally it checks the
+ * 'hardware_subarch' field.  This was introduced in 2.6.24 for lguest and Xen:
+ * if it's set to '1' (lguest's assigned number), then it calls us here.
  *
  * WARNING: be very careful here!  We're running at addresses equal to physical
  * addesses (around 0), not above PAGE_OFFSET as most code expectes
  * (eg. 0xC0000000).  Jumps are relative, so they're OK, but we can't touch any
- * data.
+ * data without remembering to subtract __PAGE_OFFSET!
  *
  * The .section line puts this code in .init.text so it will be discarded after
  * boot. */
@@ -24,7 +31,7 @@ ENTRY(lguest_entry)
 	int $LGUEST_TRAP_ENTRY
 
 	/* The Host put the toplevel pagetable in lguest_data.pgdir.  The movsl
-	 * instruction uses %esi implicitly as the source for the copy we'
+	 * instruction uses %esi implicitly as the source for the copy we're
 	 * about to do. */
 	movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi
 
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index c632c08cbbdc..5eea4356d703 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -1,8 +1,6 @@
 /*P:400 This contains run_guest() which actually calls into the Host<->Guest
  * Switcher and analyzes the return, such as determining if the Guest wants the
- * Host to do something.  This file also contains useful helper routines, and a
- * couple of non-obvious setup and teardown pieces which were implemented after
- * days of debugging pain. :*/
+ * Host to do something.  This file also contains useful helper routines. :*/
 #include <linux/module.h>
 #include <linux/stringify.h>
 #include <linux/stddef.h>
@@ -49,8 +47,8 @@ static __init int map_switcher(void)
 	 * easy.
 	 */
 
-	/* We allocate an array of "struct page"s.  map_vm_area() wants the
-	 * pages in this form, rather than just an array of pointers. */
+	/* We allocate an array of struct page pointers.  map_vm_area() wants
+	 * this, rather than just an array of pages. */
 	switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES,
 				GFP_KERNEL);
 	if (!switcher_page) {
@@ -172,7 +170,7 @@ void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes)
 	}
 }
 
-/* This is the write (copy into guest) version. */
+/* This is the write (copy into Guest) version. */
 void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,
 	       unsigned bytes)
 {
@@ -209,9 +207,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		if (cpu->break_out)
 			return -EAGAIN;
 
-		/* Check if there are any interrupts which can be delivered
-		 * now: if so, this sets up the hander to be executed when we
-		 * next run the Guest. */
+		/* Check if there are any interrupts which can be delivered now:
+		 * if so, this sets up the hander to be executed when we next
+		 * run the Guest. */
 		maybe_do_interrupt(cpu);
 
 		/* All long-lived kernel loops need to check with this horrible
@@ -246,8 +244,10 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		lguest_arch_handle_trap(cpu);
 	}
 
+	/* Special case: Guest is 'dead' but wants a reboot. */
 	if (cpu->lg->dead == ERR_PTR(-ERESTART))
 		return -ERESTART;
+
 	/* The Guest is dead => "No such file or directory" */
 	return -ENOENT;
 }
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 0f2cb4fd7c69..54d66f05fefa 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -29,7 +29,7 @@
 #include "lg.h"
 
 /*H:120 This is the core hypercall routine: where the Guest gets what it wants.
- * Or gets killed.  Or, in the case of LHCALL_CRASH, both. */
+ * Or gets killed.  Or, in the case of LHCALL_SHUTDOWN, both. */
 static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 {
 	switch (args->arg0) {
@@ -190,6 +190,13 @@ static void initialize(struct lg_cpu *cpu)
 	 * pagetable. */
 	guest_pagetable_clear_all(cpu);
 }
+/*:*/
+
+/*M:013 If a Guest reads from a page (so creates a mapping) that it has never
+ * written to, and then the Launcher writes to it (ie. the output of a virtual
+ * device), the Guest will still see the old page.  In practice, this never
+ * happens: why would the Guest read a page which it has never written to?  But
+ * a similar scenario might one day bite us, so it's worth mentioning. :*/
 
 /*H:100
  * Hypercalls
@@ -227,7 +234,7 @@ void do_hypercalls(struct lg_cpu *cpu)
 		 * However, if we are signalled or the Guest sends I/O to the
 		 * Launcher, the run_guest() loop will exit without running the
 		 * Guest.  When it comes back it would try to re-run the
-		 * hypercall. */
+		 * hypercall.  Finding that bug sucked. */
 		cpu->hcall = NULL;
 	}
 }
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 32e97c1858e5..0414ddf87587 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -144,7 +144,6 @@ void maybe_do_interrupt(struct lg_cpu *cpu)
 	if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
 			   sizeof(blk)))
 		return;
-
 	bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS);
 
 	/* Find the first interrupt. */
@@ -237,9 +236,9 @@ void free_interrupts(void)
 		clear_bit(syscall_vector, used_vectors);
 }
 
-/*H:220 Now we've got the routines to deliver interrupts, delivering traps
- * like page fault is easy.  The only trick is that Intel decided that some
- * traps should have error codes: */
+/*H:220 Now we've got the routines to deliver interrupts, delivering traps like
+ * page fault is easy.  The only trick is that Intel decided that some traps
+ * should have error codes: */
 static int has_err(unsigned int trap)
 {
 	return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17);
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 1b2ec0bf5eb1..2bc9bf7e88e5 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -1,10 +1,10 @@
 /*P:050 Lguest guests use a very simple method to describe devices.  It's a
- * series of device descriptors contained just above the top of normal
+ * series of device descriptors contained just above the top of normal Guest
  * memory.
  *
  * We use the standard "virtio" device infrastructure, which provides us with a
  * console, a network and a block driver.  Each one expects some configuration
- * information and a "virtqueue" mechanism to send and receive data. :*/
+ * information and a "virtqueue" or two to send and receive data. :*/
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/lguest_launcher.h>
@@ -53,7 +53,7 @@ struct lguest_device {
  * Device configurations
  *
  * The configuration information for a device consists of one or more
- * virtqueues, a feature bitmaks, and some configuration bytes.  The
+ * virtqueues, a feature bitmap, and some configuration bytes.  The
  * configuration bytes don't really matter to us: the Launcher sets them up, and
  * the driver will look at them during setup.
  *
@@ -179,7 +179,7 @@ struct lguest_vq_info
 };
 
 /* When the virtio_ring code wants to prod the Host, it calls us here and we
- * make a hypercall.  We hand the page number of the virtqueue so the Host
+ * make a hypercall.  We hand the physical address of the virtqueue so the Host
  * knows which virtqueue we're talking about. */
 static void lg_notify(struct virtqueue *vq)
 {
@@ -199,7 +199,8 @@ static void lg_notify(struct virtqueue *vq)
  * allocate its own pages and tell the Host where they are, but for lguest it's
  * simpler for the Host to simply tell us where the pages are.
  *
- * So we provide devices with a "find virtqueue and set it up" function. */
+ * So we provide drivers with a "find the Nth virtqueue and set it up"
+ * function. */
 static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 				    unsigned index,
 				    void (*callback)(struct virtqueue *vq))
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 2221485b0773..564e425d71dd 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -73,7 +73,7 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
 	if (current != cpu->tsk)
 		return -EPERM;
 
-	/* If the guest is already dead, we indicate why */
+	/* If the Guest is already dead, we indicate why */
 	if (lg->dead) {
 		size_t len;
 
@@ -88,7 +88,7 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
 		return len;
 	}
 
-	/* If we returned from read() last time because the Guest notified,
+	/* If we returned from read() last time because the Guest sent I/O,
 	 * clear the flag. */
 	if (cpu->pending_notify)
 		cpu->pending_notify = 0;
@@ -97,14 +97,20 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
 	return run_guest(cpu, (unsigned long __user *)user);
 }
 
+/*L:025 This actually initializes a CPU.  For the moment, a Guest is only
+ * uniprocessor, so "id" is always 0. */
 static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 {
+	/* We have a limited number the number of CPUs in the lguest struct. */
 	if (id >= NR_CPUS)
 		return -EINVAL;
 
+	/* Set up this CPU's id, and pointer back to the lguest struct. */
 	cpu->id = id;
 	cpu->lg = container_of((cpu - id), struct lguest, cpus[0]);
 	cpu->lg->nr_cpus++;
+
+	/* Each CPU has a timer it can set. */
 	init_clockdev(cpu);
 
 	/* We need a complete page for the Guest registers: they are accessible
@@ -120,11 +126,11 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 	 * address. */
 	lguest_arch_setup_regs(cpu, start_ip);
 
-	/* Initialize the queue for the waker to wait on */
+	/* Initialize the queue for the Waker to wait on */
 	init_waitqueue_head(&cpu->break_wq);
 
 	/* We keep a pointer to the Launcher task (ie. current task) for when
-	 * other Guests want to wake this one (inter-Guest I/O). */
+	 * other Guests want to wake this one (eg. console input). */
 	cpu->tsk = current;
 
 	/* We need to keep a pointer to the Launcher's memory map, because if
@@ -136,6 +142,7 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 	 * when the same Guest runs on the same CPU twice. */
 	cpu->last_pages = NULL;
 
+	/* No error == success. */
 	return 0;
 }
 
@@ -185,14 +192,13 @@ static int initialize(struct file *file, const unsigned long __user *input)
 	lg->mem_base = (void __user *)(long)args[0];
 	lg->pfn_limit = args[1];
 
-	/* This is the first cpu */
+	/* This is the first cpu (cpu 0) and it will start booting at args[3] */
 	err = lg_cpu_start(&lg->cpus[0], 0, args[3]);
 	if (err)
 		goto release_guest;
 
 	/* Initialize the Guest's shadow page tables, using the toplevel
-	 * address the Launcher gave us.  This allocates memory, so can
-	 * fail. */
+	 * address the Launcher gave us.  This allocates memory, so can fail. */
 	err = init_guest_pagetable(lg, args[2]);
 	if (err)
 		goto free_regs;
@@ -218,11 +224,16 @@ unlock:
 /*L:010 The first operation the Launcher does must be a write.  All writes
  * start with an unsigned long number: for the first write this must be
  * LHREQ_INITIALIZE to set up the Guest.  After that the Launcher can use
- * writes of other values to send interrupts. */
+ * writes of other values to send interrupts.
+ *
+ * Note that we overload the "offset" in the /dev/lguest file to indicate what
+ * CPU number we're dealing with.  Currently this is always 0, since we only
+ * support uniprocessor Guests, but you can see the beginnings of SMP support
+ * here. */
 static ssize_t write(struct file *file, const char __user *in,
 		     size_t size, loff_t *off)
 {
-	/* Once the guest is initialized, we hold the "struct lguest" in the
+	/* Once the Guest is initialized, we hold the "struct lguest" in the
 	 * file private data. */
 	struct lguest *lg = file->private_data;
 	const unsigned long __user *input = (const unsigned long __user *)in;
@@ -230,6 +241,7 @@ static ssize_t write(struct file *file, const char __user *in,
 	struct lg_cpu *uninitialized_var(cpu);
 	unsigned int cpu_id = *off;
 
+	/* The first value tells us what this request is. */
 	if (get_user(req, input) != 0)
 		return -EFAULT;
 	input++;
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a7f64a9d67e0..d93500f24fbb 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -2,8 +2,8 @@
  * previous encounters.  It's functional, and as neat as it can be in the
  * circumstances, but be wary, for these things are subtle and break easily.
  * The Guest provides a virtual to physical mapping, but we can neither trust
- * it nor use it: we verify and convert it here to point the hardware to the
- * actual Guest pages when running the Guest. :*/
+ * it nor use it: we verify and convert it here then point the CPU to the
+ * converted Guest pages when running the Guest. :*/
 
 /* Copyright (C) Rusty Russell IBM Corporation 2006.
  * GPL v2 and any later version */
@@ -106,6 +106,11 @@ static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
 	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
 	return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t);
 }
+/*:*/
+
+/*M:014 get_pfn is slow; it takes the mmap sem and calls get_user_pages.  We
+ * could probably try to grab batches of pages here as an optimization
+ * (ie. pre-faulting). :*/
 
 /*H:350 This routine takes a page number given by the Guest and converts it to
  * an actual, physical page number.  It can fail for several reasons: the
@@ -113,8 +118,8 @@ static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
  * and the page is read-only, or the write flag was set and the page was
  * shared so had to be copied, but we ran out of memory.
  *
- * This holds a reference to the page, so release_pte() is careful to
- * put that back. */
+ * This holds a reference to the page, so release_pte() is careful to put that
+ * back. */
 static unsigned long get_pfn(unsigned long virtpfn, int write)
 {
 	struct page *page;
@@ -532,13 +537,13 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
  * all processes.  So when the page table above that address changes, we update
  * all the page tables, not just the current one.  This is rare.
  *
- * The benefit is that when we have to track a new page table, we can copy keep
- * all the kernel mappings.  This speeds up context switch immensely. */
+ * The benefit is that when we have to track a new page table, we can keep all
+ * the kernel mappings.  This speeds up context switch immensely. */
 void guest_set_pte(struct lg_cpu *cpu,
 		   unsigned long gpgdir, unsigned long vaddr, pte_t gpte)
 {
-	/* Kernel mappings must be changed on all top levels.  Slow, but
-	 * doesn't happen often. */
+	/* Kernel mappings must be changed on all top levels.  Slow, but doesn't
+	 * happen often. */
 	if (vaddr >= cpu->lg->kernel_address) {
 		unsigned int i;
 		for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++)
@@ -704,12 +709,11 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
 /* We've made it through the page table code.  Perhaps our tired brains are
  * still processing the details, or perhaps we're simply glad it's over.
  *
- * If nothing else, note that all this complexity in juggling shadow page
- * tables in sync with the Guest's page tables is for one reason: for most
- * Guests this page table dance determines how bad performance will be.  This
- * is why Xen uses exotic direct Guest pagetable manipulation, and why both
- * Intel and AMD have implemented shadow page table support directly into
- * hardware.
+ * If nothing else, note that all this complexity in juggling shadow page tables
+ * in sync with the Guest's page tables is for one reason: for most Guests this
+ * page table dance determines how bad performance will be.  This is why Xen
+ * uses exotic direct Guest pagetable manipulation, and why both Intel and AMD
+ * have implemented shadow page table support directly into hardware.
  *
  * There is just one file remaining in the Host. */
 
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 635187812d52..5126d5d9ea0e 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -17,6 +17,13 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
+/*P:450 This file contains the x86-specific lguest code.  It used to be all
+ * mixed in with drivers/lguest/core.c but several foolhardy code slashers
+ * wrestled most of the dependencies out to here in preparation for porting
+ * lguest to other architectures (see what I mean by foolhardy?).
+ *
+ * This also contains a couple of non-obvious setup and teardown pieces which
+ * were implemented after days of debugging pain. :*/
 #include <linux/kernel.h>
 #include <linux/start_kernel.h>
 #include <linux/string.h>
@@ -157,6 +164,8 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages)
  * also simplify copy_in_guest_info().  Note that we'd still need to restore
  * things when we exit to Launcher userspace, but that's fairly easy.
  *
+ * We could also try using this hooks for PGE, but that might be too expensive.
+ *
  * The hooks were designed for KVM, but we can also put them to good use. :*/
 
 /*H:040 This is the i386-specific code to setup and run the Guest.  Interrupts
@@ -182,7 +191,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 	 * was doing. */
 	run_guest_once(cpu, lguest_pages(raw_smp_processor_id()));
 
-	/* Note that the "regs" pointer contains two extra entries which are
+	/* Note that the "regs" structure contains two extra entries which are
 	 * not really registers: a trap number which says what interrupt or
 	 * trap made the switcher code come back, and an error code which some
 	 * traps set.  */
@@ -293,11 +302,10 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
 		break;
 	case 14: /* We've intercepted a Page Fault. */
 		/* The Guest accessed a virtual address that wasn't mapped.
-		 * This happens a lot: we don't actually set up most of the
-		 * page tables for the Guest at all when we start: as it runs
-		 * it asks for more and more, and we set them up as
-		 * required. In this case, we don't even tell the Guest that
-		 * the fault happened.
+		 * This happens a lot: we don't actually set up most of the page
+		 * tables for the Guest at all when we start: as it runs it asks
+		 * for more and more, and we set them up as required. In this
+		 * case, we don't even tell the Guest that the fault happened.
 		 *
 		 * The errcode tells whether this was a read or a write, and
 		 * whether kernel or userspace code. */
@@ -342,7 +350,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
 	if (!deliver_trap(cpu, cpu->regs->trapnum))
 		/* If the Guest doesn't have a handler (either it hasn't
 		 * registered any yet, or it's one of the faults we don't let
-		 * it handle), it dies with a cryptic error message. */
+		 * it handle), it dies with this cryptic error message. */
 		kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)",
 			   cpu->regs->trapnum, cpu->regs->eip,
 			   cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault
@@ -375,8 +383,8 @@ void __init lguest_arch_host_init(void)
 	 * The only exception is the interrupt handlers in switcher.S: their
 	 * addresses are placed in a table (default_idt_entries), so we need to
 	 * update the table with the new addresses.  switcher_offset() is a
-	 * convenience function which returns the distance between the builtin
-	 * switcher code and the high-mapped copy we just made. */
+	 * convenience function which returns the distance between the
+	 * compiled-in switcher code and the high-mapped copy we just made. */
 	for (i = 0; i < IDT_ENTRIES; i++)
 		default_idt_entries[i] += switcher_offset();
 
@@ -416,7 +424,7 @@ void __init lguest_arch_host_init(void)
 		state->guest_gdt_desc.address = (long)&state->guest_gdt;
 
 		/* We know where we want the stack to be when the Guest enters
-		 * the switcher: in pages->regs.  The stack grows upwards, so
+		 * the Switcher: in pages->regs.  The stack grows upwards, so
 		 * we start it at the end of that structure. */
 		state->guest_tss.sp0 = (long)(&pages->regs + 1);
 		/* And this is the GDT entry to use for the stack: we keep a
@@ -513,8 +521,8 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu)
 {
 	u32 tsc_speed;
 
-	/* The pointer to the Guest's "struct lguest_data" is the only
-	 * argument.  We check that address now. */
+	/* The pointer to the Guest's "struct lguest_data" is the only argument.
+	 * We check that address now. */
 	if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1,
 			       sizeof(*cpu->lg->lguest_data)))
 		return -EFAULT;
@@ -546,6 +554,7 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu)
 
 	return 0;
 }
+/*:*/
 
 /*L:030 lguest_arch_setup_regs()
  *
diff --git a/drivers/lguest/x86/switcher_32.S b/drivers/lguest/x86/switcher_32.S
index 0af8baaa0d4a..3fc15318a80f 100644
--- a/drivers/lguest/x86/switcher_32.S
+++ b/drivers/lguest/x86/switcher_32.S
@@ -1,6 +1,6 @@
-/*P:900 This is the Switcher: code which sits at 0xFFC00000 to do the low-level
- * Guest<->Host switch.  It is as simple as it can be made, but it's naturally
- * very specific to x86.
+/*P:900 This is the Switcher: code which sits at 0xFFC00000 astride both the
+ * Host and Guest to do the low-level Guest<->Host switch.  It is as simple as
+ * it can be made, but it's naturally very specific to x86.
  *
  * You have now completed Preparation.  If this has whet your appetite; if you
  * are feeling invigorated and refreshed then the next, more challenging stage
@@ -189,7 +189,7 @@ ENTRY(switch_to_guest)
 	// Interrupts are turned back on: we are Guest.
 	iret
 
-// We treat two paths to switch back to the Host
+// We tread two paths to switch back to the Host
 // Yet both must save Guest state and restore Host
 // So we put the routine in a macro.
 #define SWITCH_TO_HOST							\
diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h
index 758b9a5d4539..f239e7069cab 100644
--- a/include/asm-x86/lguest_hcall.h
+++ b/include/asm-x86/lguest_hcall.h
@@ -27,7 +27,7 @@
 #ifndef __ASSEMBLY__
 #include <asm/hw_irq.h>
 
-/*G:031 First, how does our Guest contact the Host to ask for privileged
+/*G:031 But first, how does our Guest contact the Host to ask for privileged
  * operations?  There are two ways: the direct way is to make a "hypercall",
  * to make requests of the Host Itself.
  *
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index 589be3e1f3ac..e7217dc58f39 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -16,6 +16,10 @@
  * a new device, we simply need to write a new virtio driver and create support
  * for it in the Launcher: this code won't need to change.
  *
+ * Virtio devices are also used by kvm, so we can simply reuse their optimized
+ * device drivers.  And one day when everyone uses virtio, my plan will be
+ * complete.  Bwahahahah!
+ *
  * Devices are described by a simplified ID, a status byte, and some "config"
  * bytes which describe this device's configuration.  This is placed by the
  * Launcher just above the top of physical memory:
@@ -26,7 +30,7 @@ struct lguest_device_desc {
 	/* The number of virtqueues (first in config array) */
 	__u8 num_vq;
 	/* The number of bytes of feature bits.  Multiply by 2: one for host
-	 * features and one for guest acknowledgements. */
+	 * features and one for Guest acknowledgements. */
 	__u8 feature_len;
 	/* The number of bytes of the config array after virtqueues. */
 	__u8 config_len;
-- 
cgit v1.2.3


From 50fd4407b8bfbde7c1a0bfe4f24de7df37164342 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 27 Mar 2008 17:42:50 -0700
Subject: [NET]: Use local_irq_{save,restore}() in napi_complete().

Based upon a lockdep report.

Since ->poll() can be invoked from netpoll with interrupts
disabled, we must not unconditionally enable interrupts
in napi_complete().

Instead we must use local_irq_{save,restore}().

Noticed by Peter Zijlstra:

<irqs disabled>

  netpoll_poll()
    poll_napi()
      spin_trylock(&napi->poll_lock)
      poll_one_napi()
        napi->poll() := sky2_poll()
          napi_complete()
            local_irq_disable()
            local_irq_enable() <--- *BUG*

  <irq>
    irq_exit()
      do_softirq()
        net_rx_action()
          spin_lock(&napi->poll_lock) <--- Deadlock!

Because we still hold the lock....

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a2f003239c85..fae6a7ececdb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -383,9 +383,11 @@ static inline void __napi_complete(struct napi_struct *n)
 
 static inline void napi_complete(struct napi_struct *n)
 {
-	local_irq_disable();
+	unsigned long flags;
+
+	local_irq_save(flags);
 	__napi_complete(n);
-	local_irq_enable();
+	local_irq_restore(flags);
 }
 
 /**
-- 
cgit v1.2.3


From 7c4b93d8269b9d35971a8239426b1f6ddc3d5ef7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 21 Mar 2008 23:59:49 -0400
Subject: [PATCH] count ghost references to vfsmounts

make propagate_mount_busy() exclude references from the vfsmounts
that had been isolated by umount_tree() and are just waiting for
release_mounts() to dispose of their ->mnt_parent/->mnt_mountpoint.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c        | 5 ++++-
 fs/pnode.c            | 2 +-
 include/linux/mount.h | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index 6324dfc80dc6..c175218ebae1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -548,6 +548,7 @@ void release_mounts(struct list_head *head)
 			m = mnt->mnt_parent;
 			mnt->mnt_mountpoint = mnt->mnt_root;
 			mnt->mnt_parent = mnt;
+			m->mnt_ghosts--;
 			spin_unlock(&vfsmount_lock);
 			dput(dentry);
 			mntput(m);
@@ -572,8 +573,10 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
 		__touch_mnt_namespace(p->mnt_ns);
 		p->mnt_ns = NULL;
 		list_del_init(&p->mnt_child);
-		if (p->mnt_parent != p)
+		if (p->mnt_parent != p) {
+			p->mnt_parent->mnt_ghosts++;
 			p->mnt_mountpoint->d_mounted--;
+		}
 		change_mnt_propagation(p, MS_PRIVATE);
 	}
 }
diff --git a/fs/pnode.c b/fs/pnode.c
index 05ba692bc540..1d8f5447f3f7 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -225,7 +225,7 @@ out:
  */
 static inline int do_refcount_check(struct vfsmount *mnt, int count)
 {
-	int mycount = atomic_read(&mnt->mnt_count);
+	int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts;
 	return (mycount > count);
 }
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 6d3047d8c91c..dac5e67ff3ee 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -61,6 +61,7 @@ struct vfsmount {
 	atomic_t mnt_count;
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
+	int mnt_ghosts;
 };
 
 static inline struct vfsmount *mntget(struct vfsmount *mnt)
-- 
cgit v1.2.3


From c35038becad0adb0e25261fff66d85b1a6ddd0c2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 22 Mar 2008 00:46:23 -0400
Subject: [PATCH] do shrink_submounts() for all fs types

... and take it out of ->umount_begin() instances.  Call with all locks
already taken (by do_umount()) and leave calling release_mounts() to
caller (it will do release_mounts() anyway, so we can just put into
the same list).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/internal.h      |  1 -
 fs/afs/mntpt.c         |  8 --------
 fs/afs/super.c         |  1 -
 fs/cifs/cifs_dfs_ref.c |  1 -
 fs/namespace.c         | 23 ++++++++++-------------
 fs/nfs/super.c         |  2 --
 include/linux/mount.h  |  1 -
 7 files changed, 10 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5ca3625cd39e..9ba16edc0af2 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -573,7 +573,6 @@ extern const struct file_operations afs_mntpt_file_operations;
 
 extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
 extern void afs_mntpt_kill_timer(void);
-extern void afs_umount_begin(struct vfsmount *, int);
 
 /*
  * proc.c
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index a3510b8ba3e7..2f5503902c37 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -283,11 +283,3 @@ void afs_mntpt_kill_timer(void)
 	cancel_delayed_work(&afs_mntpt_expiry_timer);
 	flush_scheduled_work();
 }
-
-/*
- * begin unmount by attempting to remove all automounted mountpoints we added
- */
-void afs_umount_begin(struct vfsmount *vfsmnt, int flags)
-{
-	shrink_submounts(vfsmnt, &afs_vfsmounts);
-}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 36bbce45f44b..4b572b801d8d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -50,7 +50,6 @@ static const struct super_operations afs_super_ops = {
 	.write_inode	= afs_write_inode,
 	.destroy_inode	= afs_destroy_inode,
 	.clear_inode	= afs_clear_inode,
-	.umount_begin	= afs_umount_begin,
 	.put_super	= afs_put_super,
 	.show_options	= generic_show_options,
 };
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index a1a95b027136..56c924033b78 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -33,7 +33,6 @@ void dfs_shrink_umount_helper(struct vfsmount *vfsmnt)
 {
 	mark_mounts_for_expiry(&cifs_dfs_automount_list);
 	mark_mounts_for_expiry(&cifs_dfs_automount_list);
-	shrink_submounts(vfsmnt, &cifs_dfs_automount_list);
 }
 
 /**
diff --git a/fs/namespace.c b/fs/namespace.c
index 1c78917ec930..7bd74b25930c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -581,6 +581,8 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
 	}
 }
 
+static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts);
+
 static int do_umount(struct vfsmount *mnt, int flags)
 {
 	struct super_block *sb = mnt->mnt_sb;
@@ -653,6 +655,9 @@ static int do_umount(struct vfsmount *mnt, int flags)
 	spin_lock(&vfsmount_lock);
 	event++;
 
+	if (!(flags & MNT_DETACH))
+		shrink_submounts(mnt, &umount_list);
+
 	retval = -EBUSY;
 	if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
 		if (!list_empty(&mnt->mnt_list))
@@ -1302,30 +1307,22 @@ resume:
  * process a list of expirable mountpoints with the intent of discarding any
  * submounts of a specific parent mountpoint
  */
-void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts)
+static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
 {
 	LIST_HEAD(graveyard);
-	LIST_HEAD(umounts);
-	struct vfsmount *mnt;
+	struct vfsmount *m;
 
-	down_write(&namespace_sem);
-	spin_lock(&vfsmount_lock);
 	/* extract submounts of 'mountpoint' from the expiration list */
-	while (select_submounts(mountpoint, &graveyard)) {
+	while (select_submounts(mnt, &graveyard)) {
 		while (!list_empty(&graveyard)) {
-			mnt = list_first_entry(&graveyard, struct vfsmount,
+			m = list_first_entry(&graveyard, struct vfsmount,
 						mnt_expire);
 			touch_mnt_namespace(mnt->mnt_ns);
-			umount_tree(mnt, 1, &umounts);
+			umount_tree(mnt, 1, umounts);
 		}
 	}
-	spin_unlock(&vfsmount_lock);
-	up_write(&namespace_sem);
-	release_mounts(&umounts);
 }
 
-EXPORT_SYMBOL_GPL(shrink_submounts);
-
 /*
  * Some copy_from_user() implementations do not return the exact number of
  * bytes remaining to copy on a fault.  But copy_mount_options() requires that.
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index dd4dfcd632ec..f9219024f31a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -589,8 +589,6 @@ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
 	struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
 	struct rpc_clnt *rpc;
 
-	shrink_submounts(vfsmnt, &nfs_automount_list);
-
 	if (!(flags & MNT_FORCE))
 		return;
 	/* -EIO all pending I/O */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index dac5e67ff3ee..5ee2df217cdf 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -99,7 +99,6 @@ extern int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
 			int mnt_flags, struct list_head *fslist);
 
 extern void mark_mounts_for_expiry(struct list_head *mounts);
-extern void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts);
 
 extern spinlock_t vfsmount_lock;
 extern dev_t name_to_dev_t(char *name);
-- 
cgit v1.2.3


From 8c703d35fa91911dd92a18c31a718853f483ad80 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 28 Mar 2008 14:15:49 -0700
Subject: in_atomic(): document why it is unsuitable for general use

Discourage people from inappropriately using in_atomic()

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hardirq.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 49829988bfa0..897f723bd222 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -72,6 +72,13 @@
 #define in_softirq()		(softirq_count())
 #define in_interrupt()		(irq_count())
 
+/*
+ * Are we running in atomic context?  WARNING: this macro cannot
+ * always detect atomic context; in particular, it cannot know about
+ * held spinlocks in non-preemptible kernels.  Thus it should not be
+ * used in the general case to determine whether sleeping is possible.
+ * Do not use in_atomic() in driver code.
+ */
 #define in_atomic()		((preempt_count() & ~PREEMPT_ACTIVE) != 0)
 
 #ifdef CONFIG_PREEMPT
-- 
cgit v1.2.3


From 3afe3925987adc3fc052abe404e44520c2072fc8 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 28 Mar 2008 14:16:01 -0700
Subject: kernel: add bit rotation helpers for 16 and 8 bit

Will replace open-coded variants elsewhere.  Done in the same
style as the 32-bit versions.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John W. Linville <linville@tuxdriver.com>
Cc: Joe Perches <joe@perches.com>
Cc: Jiri Benc <jbenc@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitops.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 69c1edb9fe54..40d54731de7e 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -65,6 +65,46 @@ static inline __u32 ror32(__u32 word, unsigned int shift)
 	return (word >> shift) | (word << (32 - shift));
 }
 
+/**
+ * rol16 - rotate a 16-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u16 rol16(__u16 word, unsigned int shift)
+{
+	return (word << shift) | (word >> (16 - shift));
+}
+
+/**
+ * ror16 - rotate a 16-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u16 ror16(__u16 word, unsigned int shift)
+{
+	return (word >> shift) | (word << (16 - shift));
+}
+
+/**
+ * rol8 - rotate an 8-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u8 rol8(__u8 word, unsigned int shift)
+{
+	return (word << shift) | (word >> (8 - shift));
+}
+
+/**
+ * ror8 - rotate an 8-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u8 ror8(__u8 word, unsigned int shift)
+{
+	return (word >> shift) | (word << (8 - shift));
+}
+
 static inline unsigned fls_long(unsigned long l)
 {
 	if (sizeof(l) == 4)
-- 
cgit v1.2.3


From 5ac7ec85bcc70ef605657fb2d1106d27ab3bd131 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 28 Mar 2008 14:16:03 -0700
Subject: ext3: don't export ext3_fs.h and jbd.h

Neither of the headers actually compiles when included from userpsace nor
should it be made available as userspace tools should be using the libraries
or at least headers from e2fsprogs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 4108b38ebb16..4a446a19295e 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -195,7 +195,6 @@ unifdef-y += ethtool.h
 unifdef-y += eventpoll.h
 unifdef-y += signalfd.h
 unifdef-y += ext2_fs.h
-unifdef-y += ext3_fs.h
 unifdef-y += fb.h
 unifdef-y += fcntl.h
 unifdef-y += filter.h
@@ -248,7 +247,6 @@ unifdef-y += isdn.h
 unifdef-y += isdnif.h
 unifdef-y += isdn_divertif.h
 unifdef-y += isdn_ppp.h
-unifdef-y += jbd.h
 unifdef-y += joystick.h
 unifdef-y += kdev_t.h
 unifdef-y += kd.h
-- 
cgit v1.2.3


From a5a04819c5740cb1aa217af2cc8f5ef26f33d744 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwpark81@gmail.com>
Date: Fri, 28 Mar 2008 16:28:36 -0700
Subject: [LLC]: station source mac address

kill unnecessary llc_station_mac_sa.

Signed-off-by: Joonwoo Park <joonwpark81@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc.h     | 1 -
 net/llc/llc_core.c    | 8 +-------
 net/llc/llc_station.c | 6 +++---
 3 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/llc.h b/include/net/llc.h
index f5024583fc8b..7940da1606e7 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -65,7 +65,6 @@ struct llc_sap {
 
 extern struct list_head llc_sap_list;
 extern rwlock_t llc_sap_list_lock;
-extern unsigned char llc_station_mac_sa[ETH_ALEN];
 
 extern int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 		   struct packet_type *pt, struct net_device *orig_dev);
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 248b5903bb13..00de27cef46b 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -25,8 +25,6 @@
 LIST_HEAD(llc_sap_list);
 DEFINE_RWLOCK(llc_sap_list_lock);
 
-unsigned char llc_station_mac_sa[ETH_ALEN];
-
 /**
  *	llc_sap_alloc - allocates and initializes sap.
  *
@@ -37,8 +35,8 @@ static struct llc_sap *llc_sap_alloc(void)
 	struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC);
 
 	if (sap) {
+		/* sap->laddr.mac - leave as a null, it's filled by bind */
 		sap->state = LLC_SAP_STATE_ACTIVE;
-		memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN);
 		rwlock_init(&sap->sk_list.lock);
 		atomic_set(&sap->refcnt, 1);
 	}
@@ -167,10 +165,6 @@ static int __init llc_init(void)
 	if (dev != NULL)
 		dev = next_net_device(dev);
 
-	if (dev != NULL)
-		memcpy(llc_station_mac_sa, dev->dev_addr, ETH_ALEN);
-	else
-		memset(llc_station_mac_sa, 0, ETH_ALEN);
 	dev_add_pack(&llc_packet_type);
 	dev_add_pack(&llc_tr_packet_type);
 	return 0;
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 6f2ea2090322..959e7f31833b 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -259,7 +259,7 @@ static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb)
 		goto out;
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, 0, LLC_PDU_CMD);
 	llc_pdu_init_as_xid_cmd(nskb, LLC_XID_NULL_CLASS_2, 127);
-	rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, llc_station_mac_sa);
+	rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, skb->dev->dev_addr);
 	if (unlikely(rc))
 		goto free;
 	llc_station_send_pdu(nskb);
@@ -283,7 +283,7 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb)
 	llc_pdu_decode_ssap(skb, &dsap);
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP);
 	llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 127);
-	rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, mac_da);
+	rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da);
 	if (unlikely(rc))
 		goto free;
 	llc_station_send_pdu(nskb);
@@ -307,7 +307,7 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
 	llc_pdu_decode_ssap(skb, &dsap);
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP);
 	llc_pdu_init_as_test_rsp(nskb, skb);
-	rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, mac_da);
+	rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da);
 	if (unlikely(rc))
 		goto free;
 	llc_station_send_pdu(nskb);
-- 
cgit v1.2.3


From 0ef4730927c54cac5006759fa0cf50fbeff9d191 Mon Sep 17 00:00:00 2001
From: Matti Linnanvuori <mattilinnanvuori@yahoo.com>
Date: Fri, 28 Mar 2008 16:33:00 -0700
Subject: net: Comment dev_kfree_skb_irq and dev_kfree_skb_any better

Comment dev_kfree_skb_irq and dev_kfree_skb_any better.

Signed-off-by: Matti Linnanvuori <mattilinnanvuori@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fae6a7ececdb..ee81906b5164 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1074,12 +1074,14 @@ static inline int netif_is_multiqueue(const struct net_device *dev)
 }
 
 /* Use this variant when it is known for sure that it
- * is executing from interrupt context.
+ * is executing from hardware interrupt context or with hardware interrupts
+ * disabled.
  */
 extern void dev_kfree_skb_irq(struct sk_buff *skb);
 
 /* Use this variant in places where it could be invoked
- * either from interrupt or non-interrupt context.
+ * from either hardware interrupt or other context, with hardware interrupts
+ * either disabled or enabled.
  */
 extern void dev_kfree_skb_any(struct sk_buff *skb);
 
-- 
cgit v1.2.3


From 3ec25ebd69dc120d0590e64caaf1477aa88c8a93 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 27 Mar 2008 18:37:14 +0900
Subject: libata: ATA_EHI_LPM should be ATA_EH_LPM

EH actions are ATA_EH_* not ATA_EHI_*.  Rename ATA_EHI_LPM to
ATA_EH_LPM.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Cc: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/ata/libata-core.c | 2 +-
 drivers/ata/libata-eh.c   | 2 +-
 include/linux/libata.h    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c4248b37ff64..48519887f94a 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -972,7 +972,7 @@ static void ata_dev_disable_pm(struct ata_device *dev)
 void ata_lpm_schedule(struct ata_port *ap, enum link_pm policy)
 {
 	ap->pm_policy = policy;
-	ap->link.eh_info.action |= ATA_EHI_LPM;
+	ap->link.eh_info.action |= ATA_EH_LPM;
 	ap->link.eh_info.flags |= ATA_EHI_NO_AUTOPSY;
 	ata_port_schedule_eh(ap);
 }
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 681252fd8143..a5830329eda4 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2748,7 +2748,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 			ehc->i.flags &= ~ATA_EHI_SETMODE;
 		}
 
-		if (ehc->i.action & ATA_EHI_LPM)
+		if (ehc->i.action & ATA_EH_LPM)
 			ata_link_for_each_dev(dev, link)
 				ata_dev_enable_pm(dev, ap->pm_policy);
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 269cdba09578..b064bfeb69ee 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -295,6 +295,7 @@ enum {
 	ATA_EH_SOFTRESET	= (1 << 1),
 	ATA_EH_HARDRESET	= (1 << 2),
 	ATA_EH_ENABLE_LINK	= (1 << 3),
+	ATA_EH_LPM		= (1 << 4),  /* link power management action */
 
 	ATA_EH_RESET_MASK	= ATA_EH_SOFTRESET | ATA_EH_HARDRESET,
 	ATA_EH_PERDEV_MASK	= ATA_EH_REVALIDATE,
@@ -304,7 +305,6 @@ enum {
 	ATA_EHI_RESUME_LINK	= (1 << 1),  /* resume link (reset modifier) */
 	ATA_EHI_NO_AUTOPSY	= (1 << 2),  /* no autopsy */
 	ATA_EHI_QUIET		= (1 << 3),  /* be quiet */
-	ATA_EHI_LPM		= (1 << 4),  /* link power management action */
 
 	ATA_EHI_DID_SOFTRESET	= (1 << 16), /* already soft-reset this port */
 	ATA_EHI_DID_HARDRESET	= (1 << 17), /* already soft-reset this port */
-- 
cgit v1.2.3


From 729d4de96a5c090e40a918a41f63b7fb1b27c240 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sat, 29 Mar 2008 19:55:17 +0100
Subject: ide: fix defining SUPPORT_VLB_SYNC

We need to check for CONFIG_{CRIS,FRV} not {CRIS,FRV}.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index a3b69c10d667..bc26b2f27359 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -26,7 +26,7 @@
 #include <asm/semaphore.h>
 #include <asm/mutex.h>
 
-#if defined(CRIS) || defined(FRV)
+#if defined(CONFIG_CRIS) || defined(CONFIG_FRV)
 # define SUPPORT_VLB_SYNC 0
 #else
 # define SUPPORT_VLB_SYNC 1
-- 
cgit v1.2.3


From 7c43f2b888f9ca1fcb7b07abc4cbff4ca1b8e03b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 29 Mar 2008 03:08:18 +0000
Subject: NULL noise: frv cmpxchg()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-frv/system.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-frv/system.h b/include/asm-frv/system.h
index b400cea81487..2c57f4734746 100644
--- a/include/asm-frv/system.h
+++ b/include/asm-frv/system.h
@@ -234,7 +234,7 @@ extern void free_initmem(void);
 		break;								\
 										\
 	default:								\
-		__xg_orig = 0;							\
+		__xg_orig = (__typeof__(__xg_orig))0;				\
 		asm volatile("break");						\
 		break;								\
 	}									\
@@ -259,7 +259,7 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
 					 (__force uint32_t)__xg_test,		\
 					 (__force uint32_t)__xg_new); break;	\
 	default:								\
-		__xg_orig = 0;							\
+		__xg_orig = (__typeof__(__xg_orig))0;				\
 		asm volatile("break");						\
 		break;								\
 	}									\
-- 
cgit v1.2.3


From 7d61c4596d11d624efb4bbcbad01f9cf2b321162 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 29 Mar 2008 03:09:28 +0000
Subject: compat_sys_wait4() prototype misannotation

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compat.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index a671dbff7a1f..8fa7857e153b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -192,8 +192,8 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		struct compat_timeval __user *tvp);
 
 asmlinkage long compat_sys_wait4(compat_pid_t pid,
-				 compat_uint_t *stat_addr, int options,
-				 struct compat_rusage *ru);
+				 compat_uint_t __user *stat_addr, int options,
+				 struct compat_rusage __user *ru);
 
 #define BITS_PER_COMPAT_LONG    (8*sizeof(compat_long_t))
 
-- 
cgit v1.2.3


From b2ddb9019ea13fb7b62d8e45adcc468376af0de7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 29 Mar 2008 03:09:38 +0000
Subject: dma_page_list ->base_address is a userland pointer

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 261e43a4c873..34d440698293 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -423,7 +423,7 @@ void dma_async_device_unregister(struct dma_device *device);
 /* --- Helper iov-locking functions --- */
 
 struct dma_page_list {
-	char *base_address;
+	char __user *base_address;
 	int nr_pages;
 	struct page **pages;
 };
-- 
cgit v1.2.3


From 7d7f7c3ed2c519a462a4ae989ad3d55cc7f7f6ec Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Mon, 31 Mar 2008 01:53:43 +0300
Subject: remove include/asm-sh/floppy.h

This patch removes the unused include/asm-sh/floppy.h
(ARCH_MAY_HAVE_PC_FDC was not enabled).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig         |   3 -
 include/asm-sh/floppy.h | 268 ------------------------------------------------
 2 files changed, 271 deletions(-)
 delete mode 100644 include/asm-sh/floppy.h

(limited to 'include')

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 12720489e458..8d2cd1de5726 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -72,9 +72,6 @@ config SYS_SUPPORTS_NUMA
 config SYS_SUPPORTS_PCI
 	bool
 
-config ARCH_MAY_HAVE_PC_FDC
-	bool
-
 config STACKTRACE_SUPPORT
 	def_bool y
 
diff --git a/include/asm-sh/floppy.h b/include/asm-sh/floppy.h
deleted file mode 100644
index 59fbfdc90dfb..000000000000
--- a/include/asm-sh/floppy.h
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Architecture specific parts of the Floppy driver
- *   include/asm-i386/floppy.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1995
- */
-#ifndef __ASM_SH_FLOPPY_H
-#define __ASM_SH_FLOPPY_H
-
-#include <linux/vmalloc.h>
-
-
-/*
- * The DMA channel used by the floppy controller cannot access data at
- * addresses >= 16MB
- *
- * Went back to the 1MB limit, as some people had problems with the floppy
- * driver otherwise. It doesn't matter much for performance anyway, as most
- * floppy accesses go through the track buffer.
- */
-#define _CROSS_64KB(a,s,vdma) \
-(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
-
-#define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1)
-
-
-#define SW fd_routine[use_virtual_dma&1]
-#define CSW fd_routine[can_use_virtual_dma & 1]
-
-
-#define fd_inb(port)			inb_p(port)
-#define fd_outb(value,port)		outb_p(value,port)
-
-#define fd_request_dma()        CSW._request_dma(FLOPPY_DMA,"floppy")
-#define fd_free_dma()           CSW._free_dma(FLOPPY_DMA)
-#define fd_enable_irq()         enable_irq(FLOPPY_IRQ)
-#define fd_disable_irq()        disable_irq(FLOPPY_IRQ)
-#define fd_free_irq()		free_irq(FLOPPY_IRQ, NULL)
-#define fd_get_dma_residue()    SW._get_dma_residue(FLOPPY_DMA)
-#define fd_dma_mem_alloc(size)	SW._dma_mem_alloc(size)
-#define fd_dma_setup(addr, size, mode, io) SW._dma_setup(addr, size, mode, io)
-
-#define FLOPPY_CAN_FALLBACK_ON_NODMA
-
-static int virtual_dma_count;
-static int virtual_dma_residue;
-static char *virtual_dma_addr;
-static int virtual_dma_mode;
-static int doing_pdma;
-
-static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
-{
-	register unsigned char st;
-
-#undef TRACE_FLPY_INT
-
-#ifdef TRACE_FLPY_INT
-	static int calls=0;
-	static int bytes=0;
-	static int dma_wait=0;
-#endif
-	if(!doing_pdma) {
-		floppy_interrupt(irq, dev_id, regs);
-		return;
-	}
-
-#ifdef TRACE_FLPY_INT
-	if(!calls)
-		bytes = virtual_dma_count;
-#endif
-
-	{
-		register int lcount;
-		register char *lptr;
-
-		st = 1;
-		for(lcount=virtual_dma_count, lptr=virtual_dma_addr; 
-		    lcount; lcount--, lptr++) {
-			st=inb(virtual_dma_port+4) & 0xa0 ;
-			if(st != 0xa0) 
-				break;
-			if(virtual_dma_mode)
-				outb_p(*lptr, virtual_dma_port+5);
-			else
-				*lptr = inb_p(virtual_dma_port+5);
-		}
-		virtual_dma_count = lcount;
-		virtual_dma_addr = lptr;
-		st = inb(virtual_dma_port+4);
-	}
-
-#ifdef TRACE_FLPY_INT
-	calls++;
-#endif
-	if(st == 0x20)
-		return;
-	if(!(st & 0x20)) {
-		virtual_dma_residue += virtual_dma_count;
-		virtual_dma_count=0;
-#ifdef TRACE_FLPY_INT
-		printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", 
-		       virtual_dma_count, virtual_dma_residue, calls, bytes,
-		       dma_wait);
-		calls = 0;
-		dma_wait=0;
-#endif
-		doing_pdma = 0;
-		floppy_interrupt(irq, dev_id, regs);
-		return;
-	}
-#ifdef TRACE_FLPY_INT
-	if(!virtual_dma_count)
-		dma_wait++;
-#endif
-}
-
-static void fd_disable_dma(void)
-{
-	if(! (can_use_virtual_dma & 1))
-		disable_dma(FLOPPY_DMA);
-	doing_pdma = 0;
-	virtual_dma_residue += virtual_dma_count;
-	virtual_dma_count=0;
-}
-
-static int vdma_request_dma(unsigned int dmanr, const char * device_id)
-{
-	return 0;
-}
-
-static void vdma_nop(unsigned int dummy)
-{
-}
-
-
-static int vdma_get_dma_residue(unsigned int dummy)
-{
-	return virtual_dma_count + virtual_dma_residue;
-}
-
-
-static int fd_request_irq(void)
-{
-	if(can_use_virtual_dma)
-		return request_irq(FLOPPY_IRQ, floppy_hardint,
-				   IRQF_DISABLED, "floppy", NULL);
-	else
-		return request_irq(FLOPPY_IRQ, floppy_interrupt,
-				   IRQF_DISABLED, "floppy", NULL);
-}
-
-static unsigned long dma_mem_alloc(unsigned long size)
-{
-	return __get_dma_pages(GFP_KERNEL,get_order(size));
-}
-
-
-static unsigned long vdma_mem_alloc(unsigned long size)
-{
-	return (unsigned long) vmalloc(size);
-
-}
-
-#define nodma_mem_alloc(size) vdma_mem_alloc(size)
-
-static void _fd_dma_mem_free(unsigned long addr, unsigned long size)
-{
-	if((unsigned int) addr >= (unsigned int) high_memory)
-		return vfree((void *)addr);
-	else
-		free_pages(addr, get_order(size));		
-}
-
-#define fd_dma_mem_free(addr, size)  _fd_dma_mem_free(addr, size) 
-
-static void _fd_chose_dma_mode(char *addr, unsigned long size)
-{
-	if(can_use_virtual_dma == 2) {
-		if((unsigned int) addr >= (unsigned int) high_memory ||
-		   virt_to_phys(addr) >= 0x10000000)
-			use_virtual_dma = 1;
-		else
-			use_virtual_dma = 0;
-	} else {
-		use_virtual_dma = can_use_virtual_dma & 1;
-	}
-}
-
-#define fd_chose_dma_mode(addr, size) _fd_chose_dma_mode(addr, size)
-
-
-static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io)
-{
-	doing_pdma = 1;
-	virtual_dma_port = io;
-	virtual_dma_mode = (mode  == DMA_MODE_WRITE);
-	virtual_dma_addr = addr;
-	virtual_dma_count = size;
-	virtual_dma_residue = 0;
-	return 0;
-}
-
-static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
-{
-#ifdef FLOPPY_SANITY_CHECK
-	if (CROSS_64KB(addr, size)) {
-		printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size);
-		return -1;
-	}
-#endif
-
-	__flush_purge_region(addr, size);
-
-	/* actual, physical DMA */
-	doing_pdma = 0;
-	clear_dma_ff(FLOPPY_DMA);
-	set_dma_mode(FLOPPY_DMA,mode);
-	set_dma_addr(FLOPPY_DMA,virt_to_phys(addr));
-	set_dma_count(FLOPPY_DMA,size);
-	enable_dma(FLOPPY_DMA);
-	return 0;
-}
-
-static struct fd_routine_l {
-	int (*_request_dma)(unsigned int dmanr, const char * device_id);
-	void (*_free_dma)(unsigned int dmanr);
-	int (*_get_dma_residue)(unsigned int dummy);
-	unsigned long (*_dma_mem_alloc) (unsigned long size);
-	int (*_dma_setup)(char *addr, unsigned long size, int mode, int io);
-} fd_routine[] = {
-	{
-		request_dma,
-		free_dma,
-		get_dma_residue,
-		dma_mem_alloc,
-		hard_dma_setup
-	},
-	{
-		vdma_request_dma,
-		vdma_nop,
-		vdma_get_dma_residue,
-		vdma_mem_alloc,
-		vdma_dma_setup
-	}
-};
-
-
-static int FDC1 = 0x3f0;
-static int FDC2 = -1;
-
-/*
- * Floppy types are stored in the rtc's CMOS RAM and so rtc_lock
- * is needed to prevent corrupted CMOS RAM in case "insmod floppy"
- * coincides with another rtc CMOS user.		Paul G.
- */
-#define FLOPPY0_TYPE	(4)
-#define FLOPPY1_TYPE	(0)
-
-#define N_FDC 2
-#define N_DRIVE 8
-
-#define EXTRA_FLOPPY_PARAMS
-
-#endif /* __ASM_SH_FLOPPY_H */
-- 
cgit v1.2.3


From f83f1768f833cb45bc93429fdc552252a4f55ac3 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwpark81@gmail.com>
Date: Mon, 31 Mar 2008 21:02:47 -0700
Subject: [LLC]: skb allocation size for responses

Allocate the skb for llc responses with the received packet size by
using the size adjustable llc_frame_alloc.
Don't allocate useless extra payload.
Cleanup magic numbers.

So, this fixes oops.
Reported by Jim Westfall:
kernel: skb_over_panic: text:c0541fc7 len:1000 put:997 head:c166ac00 data:c166ac2f tail:0xc166b017 end:0xc166ac80 dev:eth0
kernel: ------------[ cut here ]------------
kernel: kernel BUG at net/core/skbuff.c:95!

Signed-off-by: Joonwoo Park <joonwpark81@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc_pdu.h |  4 ++--
 include/net/llc_sap.h |  7 +++++--
 net/llc/llc_c_ac.c    | 47 +++++++++++++++++++++++++----------------------
 net/llc/llc_pdu.c     |  2 +-
 net/llc/llc_s_ac.c    |  9 +++++++--
 net/llc/llc_sap.c     | 27 ++++++++++++++++++++++++---
 net/llc/llc_station.c | 13 ++++++++++---
 7 files changed, 74 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 4a8f58b17e43..75b8e2968c9b 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -381,7 +381,7 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb,
 	xid_info->fmt_id = LLC_XID_FMT_ID;	/* 0x81 */
 	xid_info->type	 = svcs_supported;
 	xid_info->rw	 = rx_window << 1;	/* size of receive window */
-	skb_put(skb, 3);
+	skb_put(skb, sizeof(struct llc_xid_info));
 }
 
 /**
@@ -406,7 +406,7 @@ static inline void llc_pdu_init_as_xid_rsp(struct sk_buff *skb,
 	xid_info->fmt_id = LLC_XID_FMT_ID;
 	xid_info->type	 = svcs_supported;
 	xid_info->rw	 = rx_window << 1;
-	skb_put(skb, 3);
+	skb_put(skb, sizeof(struct llc_xid_info));
 }
 
 /* LLC Type 2 FRMR response information field format */
diff --git a/include/net/llc_sap.h b/include/net/llc_sap.h
index 2c56dbece729..ed25bec2f648 100644
--- a/include/net/llc_sap.h
+++ b/include/net/llc_sap.h
@@ -1,5 +1,8 @@
 #ifndef LLC_SAP_H
 #define LLC_SAP_H
+
+#include <asm/types.h>
+
 /*
  * Copyright (c) 1997 by Procom Technology,Inc.
  * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
@@ -19,8 +22,8 @@ struct sock;
 extern void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb);
 extern void llc_save_primitive(struct sock *sk, struct sk_buff* skb,
 			       unsigned char prim);
-extern struct sk_buff *llc_alloc_frame(struct sock *sk,
-				       struct net_device *dev);
+extern struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev,
+				       u8 type, u32 data_size);
 
 extern void llc_build_and_send_test_pkt(struct llc_sap *sap,
 				        struct sk_buff *skb,
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 860140caa6e0..71a00225bdb3 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -198,7 +198,7 @@ int llc_conn_ac_send_disc_cmd_p_set_x(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -223,7 +223,7 @@ int llc_conn_ac_send_dm_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -249,7 +249,7 @@ int llc_conn_ac_send_dm_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -282,7 +282,8 @@ int llc_conn_ac_send_frmr_rsp_f_set_x(struct sock *sk, struct sk_buff *skb)
 		llc_pdu_decode_pf_bit(skb, &f_bit);
 	else
 		f_bit = 0;
-	nskb = llc_alloc_frame(sk, llc->dev);
+	nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U,
+			       sizeof(struct llc_frmr_info));
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
 
@@ -306,7 +307,8 @@ int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U,
+					       sizeof(struct llc_frmr_info));
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -336,7 +338,8 @@ int llc_conn_ac_resend_frmr_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
 	struct llc_sock *llc = llc_sk(sk);
 
 	llc_pdu_decode_pf_bit(skb, &f_bit);
-	nskb = llc_alloc_frame(sk, llc->dev);
+	nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U,
+			       sizeof(struct llc_frmr_info));
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
 		struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
@@ -424,7 +427,7 @@ int llc_conn_ac_resend_i_xxx_x_set_0_or_send_rr(struct sock *sk,
 	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -459,7 +462,7 @@ int llc_conn_ac_send_rej_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -483,7 +486,7 @@ int llc_conn_ac_send_rej_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -507,7 +510,7 @@ int llc_conn_ac_send_rej_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -531,7 +534,7 @@ int llc_conn_ac_send_rnr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -555,7 +558,7 @@ int llc_conn_ac_send_rnr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -579,7 +582,7 @@ int llc_conn_ac_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -615,7 +618,7 @@ int llc_conn_ac_opt_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -639,7 +642,7 @@ int llc_conn_ac_send_rr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -663,7 +666,7 @@ int llc_conn_ac_send_rr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -688,7 +691,7 @@ int llc_conn_ac_send_ack_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -712,7 +715,7 @@ int llc_conn_ac_send_rr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -736,7 +739,7 @@ int llc_conn_ac_send_ack_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -770,7 +773,7 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -799,7 +802,7 @@ int llc_conn_ac_send_ua_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
 	u8 f_bit;
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0);
 
 	llc_pdu_decode_pf_bit(skb, &f_bit);
 	if (nskb) {
@@ -956,7 +959,7 @@ static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk,
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
diff --git a/net/llc/llc_pdu.c b/net/llc/llc_pdu.c
index fa8324396db3..2e6cb79196bb 100644
--- a/net/llc/llc_pdu.c
+++ b/net/llc/llc_pdu.c
@@ -241,7 +241,7 @@ void llc_pdu_init_as_frmr_rsp(struct sk_buff *skb, struct llc_pdu_sn *prev_pdu,
 	FRMR_INFO_SET_PDU_INFO_2LONG_IND(frmr_info, vzyxw);
 	FRMR_INFO_SET_PDU_INVALID_Nr_IND(frmr_info, vzyxw);
 	FRMR_INFO_SET_PDU_INVALID_Ns_IND(frmr_info, vzyxw);
-	skb_put(skb, 5);
+	skb_put(skb, sizeof(struct llc_frmr_info));
 }
 
 /**
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index ac3d93b210d2..a94bd56bcac6 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -103,7 +103,8 @@ int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb)
 	llc_pdu_decode_sa(skb, mac_da);
 	llc_pdu_decode_da(skb, mac_sa);
 	llc_pdu_decode_ssap(skb, &dsap);
-	nskb = llc_alloc_frame(NULL, skb->dev);
+	nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U,
+			       sizeof(struct llc_xid_info));
 	if (!nskb)
 		goto out;
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap,
@@ -144,11 +145,15 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb)
 	u8 mac_da[ETH_ALEN], mac_sa[ETH_ALEN], dsap;
 	struct sk_buff *nskb;
 	int rc = 1;
+	u32 data_size;
 
 	llc_pdu_decode_sa(skb, mac_da);
 	llc_pdu_decode_da(skb, mac_sa);
 	llc_pdu_decode_ssap(skb, &dsap);
-	nskb = llc_alloc_frame(NULL, skb->dev);
+
+	/* The test request command is type U (llc_len = 3) */
+	data_size = ntohs(eth_hdr(skb)->h_proto) - 3;
+	nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size);
 	if (!nskb)
 		goto out;
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap,
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 2525165e2e8f..e2ddde755019 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -24,20 +24,41 @@
 #include <net/tcp_states.h>
 #include <linux/llc.h>
 
+static int llc_mac_header_len(unsigned short devtype)
+{
+	switch (devtype) {
+	case ARPHRD_ETHER:
+	case ARPHRD_LOOPBACK:
+		return sizeof(struct ethhdr);
+#ifdef CONFIG_TR
+	case ARPHRD_IEEE802_TR:
+		return sizeof(struct trh_hdr);
+#endif
+	}
+	return 0;
+}
+
 /**
  *	llc_alloc_frame - allocates sk_buff for frame
  *	@dev: network device this skb will be sent over
+ *	@type: pdu type to allocate
+ *	@data_size: data size to allocate
  *
  *	Allocates an sk_buff for frame and initializes sk_buff fields.
  *	Returns allocated skb or %NULL when out of memory.
  */
-struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev)
+struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev,
+				u8 type, u32 data_size)
 {
-	struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
+	int hlen = type == LLC_PDU_TYPE_U ? 3 : 4;
+	struct sk_buff *skb;
+
+	hlen += llc_mac_header_len(dev->type);
+	skb = alloc_skb(hlen + data_size, GFP_ATOMIC);
 
 	if (skb) {
 		skb_reset_mac_header(skb);
-		skb_reserve(skb, 50);
+		skb_reserve(skb, hlen);
 		skb_reset_network_header(skb);
 		skb_reset_transport_header(skb);
 		skb->protocol = htons(ETH_P_802_2);
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 959e7f31833b..83da13339490 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -253,7 +253,8 @@ static int llc_station_ac_inc_xid_r_cnt_by_1(struct sk_buff *skb)
 static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb)
 {
 	int rc = 1;
-	struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev);
+	struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U,
+					       sizeof(struct llc_xid_info));
 
 	if (!nskb)
 		goto out;
@@ -274,7 +275,8 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb)
 {
 	u8 mac_da[ETH_ALEN], dsap;
 	int rc = 1;
-	struct sk_buff* nskb = llc_alloc_frame(NULL, skb->dev);
+	struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U,
+					       sizeof(struct llc_xid_info));
 
 	if (!nskb)
 		goto out;
@@ -298,7 +300,12 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
 {
 	u8 mac_da[ETH_ALEN], dsap;
 	int rc = 1;
-	struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev);
+	u32 data_size;
+	struct sk_buff *nskb;
+
+	/* The test request command is type U (llc_len = 3) */
+	data_size = ntohs(eth_hdr(skb)->h_proto) - 3;
+	nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size);
 
 	if (!nskb)
 		goto out;
-- 
cgit v1.2.3


From a7097ff89c3204737a07eecbc83f9ae6002cc534 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 1 Apr 2008 00:22:53 -0400
Subject: Input: make sure input interfaces pin parent input devices

Recent driver core change causes references to parent devices being
dropped early, at device_del() time, as opposed to when all children
are freed. This causes oops in evdev with grabbed devices. Take the
reference to the parent input device ourselves to ensure that it
stays around long enough.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/evdev.c    | 6 ++----
 drivers/input/joydev.c   | 3 ++-
 drivers/input/mousedev.c | 3 ++-
 include/linux/input.h    | 5 +++--
 4 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 99562cee827e..b32984bc516f 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -124,6 +124,7 @@ static void evdev_free(struct device *dev)
 {
 	struct evdev *evdev = container_of(dev, struct evdev, dev);
 
+	input_put_device(evdev->handle.dev);
 	kfree(evdev);
 }
 
@@ -853,9 +854,6 @@ static void evdev_cleanup(struct evdev *evdev)
 	evdev_hangup(evdev);
 	evdev_remove_chrdev(evdev);
 
-	if (evdev->grab)
-		evdev_ungrab(evdev, evdev->grab);
-
 	/* evdev is marked dead so no one else accesses evdev->open */
 	if (evdev->open) {
 		input_flush_device(handle, NULL);
@@ -896,7 +894,7 @@ static int evdev_connect(struct input_handler *handler, struct input_dev *dev,
 	evdev->exist = 1;
 	evdev->minor = minor;
 
-	evdev->handle.dev = dev;
+	evdev->handle.dev = input_get_device(dev);
 	evdev->handle.name = evdev->name;
 	evdev->handle.handler = handler;
 	evdev->handle.private = evdev;
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index 22b2789ef58a..65d7077a75a1 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -171,6 +171,7 @@ static void joydev_free(struct device *dev)
 {
 	struct joydev *joydev = container_of(dev, struct joydev, dev);
 
+	input_put_device(joydev->handle.dev);
 	kfree(joydev);
 }
 
@@ -750,7 +751,7 @@ static int joydev_connect(struct input_handler *handler, struct input_dev *dev,
 	joydev->minor = minor;
 
 	joydev->exist = 1;
-	joydev->handle.dev = dev;
+	joydev->handle.dev = input_get_device(dev);
 	joydev->handle.name = joydev->name;
 	joydev->handle.handler = handler;
 	joydev->handle.private = joydev;
diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c
index bbbe5e81adc1..b989748598ae 100644
--- a/drivers/input/mousedev.c
+++ b/drivers/input/mousedev.c
@@ -414,6 +414,7 @@ static void mousedev_free(struct device *dev)
 {
 	struct mousedev *mousedev = container_of(dev, struct mousedev, dev);
 
+	input_put_device(mousedev->handle.dev);
 	kfree(mousedev);
 }
 
@@ -865,7 +866,7 @@ static struct mousedev *mousedev_create(struct input_dev *dev,
 
 	mousedev->minor = minor;
 	mousedev->exist = 1;
-	mousedev->handle.dev = dev;
+	mousedev->handle.dev = input_get_device(dev);
 	mousedev->handle.name = mousedev->name;
 	mousedev->handle.handler = handler;
 	mousedev->handle.private = mousedev;
diff --git a/include/linux/input.h b/include/linux/input.h
index 1bdc39a8c76c..cae2c35d1206 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1227,12 +1227,13 @@ void input_free_device(struct input_dev *dev);
 
 static inline struct input_dev *input_get_device(struct input_dev *dev)
 {
-	return to_input_dev(get_device(&dev->dev));
+	return dev ? to_input_dev(get_device(&dev->dev)) : NULL;
 }
 
 static inline void input_put_device(struct input_dev *dev)
 {
-	put_device(&dev->dev);
+	if (dev)
+		put_device(&dev->dev);
 }
 
 static inline void *input_get_drvdata(struct input_dev *dev)
-- 
cgit v1.2.3


From 758e285faca4db948ecddefb523007255b29cdb7 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Thu, 27 Mar 2008 16:09:31 +0300
Subject: [MIPS] Alchemy: work around clock misdetection on early Au1000

Work around the CPU clock miscalculation on Au1000DA/HA/HB due the
sys_cpupll register being write-only, i.e. actually do what the comment
before cal_r4off() function advertised for years but the code failed at.
This is achieved by just giving user a chance to define the clock
explicitly  in the board config. via CONFIG_SOC_AU1000_FREQUENCY option,
defaulting to 396 MHz if the option is not given...

The patch is based on the AMD's big unpublished patch, the issue seems to
be an undocumented errata (or feature :-)...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/au1000/common/cputable.c    | 36 +++++++++++++++++------------------
 arch/mips/au1000/common/setup.c       | 13 ++++++++++---
 arch/mips/au1000/common/time.c        | 24 +++++++++++++----------
 include/asm-mips/mach-au1x00/au1000.h |  1 +
 4 files changed, 43 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/mips/au1000/common/cputable.c b/arch/mips/au1000/common/cputable.c
index d8df5fdb045f..5c0d35d6e22a 100644
--- a/arch/mips/au1000/common/cputable.c
+++ b/arch/mips/au1000/common/cputable.c
@@ -22,24 +22,24 @@ struct cpu_spec* cur_cpu_spec[NR_CPUS];
 /* With some thought, we can probably use the mask to reduce the
  * size of the table.
  */
-struct cpu_spec	cpu_specs[] = {
-    { 0xffffffff, 0x00030100, "Au1000 DA", 1, 0 },
-    { 0xffffffff, 0x00030201, "Au1000 HA", 1, 0 },
-    { 0xffffffff, 0x00030202, "Au1000 HB", 1, 0 },
-    { 0xffffffff, 0x00030203, "Au1000 HC", 1, 1 },
-    { 0xffffffff, 0x00030204, "Au1000 HD", 1, 1 },
-    { 0xffffffff, 0x01030200, "Au1500 AB", 1, 1 },
-    { 0xffffffff, 0x01030201, "Au1500 AC", 0, 1 },
-    { 0xffffffff, 0x01030202, "Au1500 AD", 0, 1 },
-    { 0xffffffff, 0x02030200, "Au1100 AB", 1, 1 },
-    { 0xffffffff, 0x02030201, "Au1100 BA", 1, 1 },
-    { 0xffffffff, 0x02030202, "Au1100 BC", 1, 1 },
-    { 0xffffffff, 0x02030203, "Au1100 BD", 0, 1 },
-    { 0xffffffff, 0x02030204, "Au1100 BE", 0, 1 },
-    { 0xffffffff, 0x03030200, "Au1550 AA", 0, 1 },
-    { 0xffffffff, 0x04030200, "Au1200 AB", 0, 0 },
-    { 0xffffffff, 0x04030201, "Au1200 AC", 1, 0 },
-    { 0x00000000, 0x00000000, "Unknown Au1xxx", 1, 0 },
+struct cpu_spec cpu_specs[] = {
+	{ 0xffffffff, 0x00030100, "Au1000 DA", 1, 0, 1 },
+	{ 0xffffffff, 0x00030201, "Au1000 HA", 1, 0, 1 },
+	{ 0xffffffff, 0x00030202, "Au1000 HB", 1, 0, 1 },
+	{ 0xffffffff, 0x00030203, "Au1000 HC", 1, 1, 0 },
+	{ 0xffffffff, 0x00030204, "Au1000 HD", 1, 1, 0 },
+	{ 0xffffffff, 0x01030200, "Au1500 AB", 1, 1, 0 },
+	{ 0xffffffff, 0x01030201, "Au1500 AC", 0, 1, 0 },
+	{ 0xffffffff, 0x01030202, "Au1500 AD", 0, 1, 0 },
+	{ 0xffffffff, 0x02030200, "Au1100 AB", 1, 1, 0 },
+	{ 0xffffffff, 0x02030201, "Au1100 BA", 1, 1, 0 },
+	{ 0xffffffff, 0x02030202, "Au1100 BC", 1, 1, 0 },
+	{ 0xffffffff, 0x02030203, "Au1100 BD", 0, 1, 0 },
+	{ 0xffffffff, 0x02030204, "Au1100 BE", 0, 1, 0 },
+	{ 0xffffffff, 0x03030200, "Au1550 AA", 0, 1, 0 },
+	{ 0xffffffff, 0x04030200, "Au1200 AB", 0, 0, 0 },
+	{ 0xffffffff, 0x04030201, "Au1200 AC", 1, 0, 0 },
+	{ 0x00000000, 0x00000000, "Unknown Au1xxx", 1, 0, 0 }
 };
 
 void
diff --git a/arch/mips/au1000/common/setup.c b/arch/mips/au1000/common/setup.c
index d885e3848ec6..9e4ab80caab6 100644
--- a/arch/mips/au1000/common/setup.c
+++ b/arch/mips/au1000/common/setup.c
@@ -57,7 +57,7 @@ void __init plat_mem_setup(void)
 {
 	struct	cpu_spec *sp;
 	char *argptr;
-	unsigned long prid, cpupll, bclk = 1;
+	unsigned long prid, cpufreq, bclk = 1;
 
 	set_cpuspec();
 	sp = cur_cpu_spec[0];
@@ -65,8 +65,15 @@ void __init plat_mem_setup(void)
 	board_setup();  /* board specific setup */
 
 	prid = read_c0_prid();
-	cpupll = (au_readl(0xB1900060) & 0x3F) * 12;
-	printk("(PRId %08lx) @ %ldMHZ\n", prid, cpupll);
+	if (sp->cpu_pll_wo)
+#ifdef CONFIG_SOC_AU1000_FREQUENCY
+		cpufreq = CONFIG_SOC_AU1000_FREQUENCY / 1000000;
+#else
+		cpufreq = 396;
+#endif
+	else
+		cpufreq = (au_readl(SYS_CPUPLL) & 0x3F) * 12;
+	printk(KERN_INFO "(PRID %08lx) @ %ld MHz\n", prid, cpufreq);
 
 	bclk = sp->cpu_bclk;
 	if (bclk)
diff --git a/arch/mips/au1000/common/time.c b/arch/mips/au1000/common/time.c
index f113b512d7b1..e122bbc6cd88 100644
--- a/arch/mips/au1000/common/time.c
+++ b/arch/mips/au1000/common/time.c
@@ -209,18 +209,22 @@ unsigned long cal_r4koff(void)
 		while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C1S);
 		au_writel(0, SYS_TOYWRITE);
 		while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C1S);
+	} else
+		no_au1xxx_32khz = 1;
 
-		cpu_speed = (au_readl(SYS_CPUPLL) & 0x0000003f) *
-			AU1000_SRC_CLK;
-	}
-	else {
-		/* The 32KHz oscillator isn't running, so assume there
-		 * isn't one and grab the processor speed from the PLL.
-		 * NOTE: some old silicon doesn't allow reading the PLL.
-		 */
+	/*
+	 * On early Au1000, sys_cpupll was write-only. Since these
+	 * silicon versions of Au1000 are not sold by AMD, we don't bend
+	 * over backwards trying to determine the frequency.
+	 */
+	if (cur_cpu_spec[0]->cpu_pll_wo)
+#ifdef CONFIG_SOC_AU1000_FREQUENCY
+		cpu_speed = CONFIG_SOC_AU1000_FREQUENCY;
+#else
+		cpu_speed = 396000000;
+#endif
+	else
 		cpu_speed = (au_readl(SYS_CPUPLL) & 0x0000003f) * AU1000_SRC_CLK;
-		no_au1xxx_32khz = 1;
-	}
 	mips_hpt_frequency = cpu_speed;
 	// Equation: Baudrate = CPU / (SD * 2 * CLKDIV * 16)
 	set_au1x00_uart_baud_base(cpu_speed / (2 * ((int)(au_readl(SYS_POWERCTRL)&0x03) + 2) * 16));
diff --git a/include/asm-mips/mach-au1x00/au1000.h b/include/asm-mips/mach-au1x00/au1000.h
index cb18af989645..5bb57bf2b9d7 100644
--- a/include/asm-mips/mach-au1x00/au1000.h
+++ b/include/asm-mips/mach-au1x00/au1000.h
@@ -1786,6 +1786,7 @@ struct cpu_spec {
 	char		*cpu_name;
 	unsigned char	cpu_od;		/* Set Config[OD] */
 	unsigned char	cpu_bclk;	/* Enable BCLK switching */
+	unsigned char	cpu_pll_wo;	/* sys_cpupll reg. write-only */
 };
 
 extern struct cpu_spec		cpu_specs[];
-- 
cgit v1.2.3


From 34e6bbf23c8f43e8713d9bd092680f1660494b4a Mon Sep 17 00:00:00 2001
From: Fabio Checconi <fabio@gandalf.sssup.i>
Date: Wed, 2 Apr 2008 14:31:02 +0200
Subject: cfq-iosched: fix rcu freeing of cfq io contexts

SLAB_DESTROY_BY_RCU is not a direct substitute for normal call_rcu()
freeing, since it'll page freeing but NOT object freeing. So change
cfq to do the freeing on its own.

Signed-off-by: Fabio Checconi <fabio@gandalf.sssup.it>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/cfq-iosched.c       | 57 ++++++++++++++++++++++-------------------------
 include/linux/iocontext.h |  3 +++
 2 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 0f962ecae91f..f26da2bfcc15 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1143,24 +1143,37 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 }
 
 /*
- * Call func for each cic attached to this ioc. Returns number of cic's seen.
+ * Call func for each cic attached to this ioc.
  */
-static unsigned int
+static void
 call_for_each_cic(struct io_context *ioc,
 		  void (*func)(struct io_context *, struct cfq_io_context *))
 {
 	struct cfq_io_context *cic;
 	struct hlist_node *n;
-	int called = 0;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) {
+	hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
 		func(ioc, cic);
-		called++;
-	}
 	rcu_read_unlock();
+}
+
+static void cfq_cic_free_rcu(struct rcu_head *head)
+{
+	struct cfq_io_context *cic;
+
+	cic = container_of(head, struct cfq_io_context, rcu_head);
+
+	kmem_cache_free(cfq_ioc_pool, cic);
+	elv_ioc_count_dec(ioc_count);
+
+	if (ioc_gone && !elv_ioc_count_read(ioc_count))
+		complete(ioc_gone);
+}
 
-	return called;
+static void cfq_cic_free(struct cfq_io_context *cic)
+{
+	call_rcu(&cic->rcu_head, cfq_cic_free_rcu);
 }
 
 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
@@ -1174,24 +1187,18 @@ static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
 	hlist_del_rcu(&cic->cic_list);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
-	kmem_cache_free(cfq_ioc_pool, cic);
+	cfq_cic_free(cic);
 }
 
 static void cfq_free_io_context(struct io_context *ioc)
 {
-	int freed;
-
 	/*
-	 * ioc->refcount is zero here, so no more cic's are allowed to be
-	 * linked into this ioc. So it should be ok to iterate over the known
-	 * list, we will see all cic's since no new ones are added.
+	 * ioc->refcount is zero here, or we are called from elv_unregister(),
+	 * so no more cic's are allowed to be linked into this ioc.  So it
+	 * should be ok to iterate over the known list, we will see all cic's
+	 * since no new ones are added.
 	 */
-	freed = call_for_each_cic(ioc, cic_free_func);
-
-	elv_ioc_count_mod(ioc_count, -freed);
-
-	if (ioc_gone && !elv_ioc_count_read(ioc_count))
-		complete(ioc_gone);
+	call_for_each_cic(ioc, cic_free_func);
 }
 
 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@ -1458,15 +1465,6 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
 	return cfqq;
 }
 
-static void cfq_cic_free(struct cfq_io_context *cic)
-{
-	kmem_cache_free(cfq_ioc_pool, cic);
-	elv_ioc_count_dec(ioc_count);
-
-	if (ioc_gone && !elv_ioc_count_read(ioc_count))
-		complete(ioc_gone);
-}
-
 /*
  * We drop cfq io contexts lazily, so we may find a dead one.
  */
@@ -2138,7 +2136,7 @@ static int __init cfq_slab_setup(void)
 	if (!cfq_pool)
 		goto fail;
 
-	cfq_ioc_pool = KMEM_CACHE(cfq_io_context, SLAB_DESTROY_BY_RCU);
+	cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0);
 	if (!cfq_ioc_pool)
 		goto fail;
 
@@ -2286,7 +2284,6 @@ static void __exit cfq_exit(void)
 	smp_wmb();
 	if (elv_ioc_count_read(ioc_count))
 		wait_for_completion(ioc_gone);
-	synchronize_rcu();
 	cfq_slab_kill();
 }
 
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 1b4ccf25b4d2..cac4b364cd40 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -2,6 +2,7 @@
 #define IOCONTEXT_H
 
 #include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
 
 /*
  * This is the per-process anticipatory I/O scheduler state.
@@ -54,6 +55,8 @@ struct cfq_io_context {
 
 	void (*dtor)(struct io_context *); /* destructor */
 	void (*exit)(struct io_context *); /* called on task exit */
+
+	struct rcu_head rcu_head;
 };
 
 /*
-- 
cgit v1.2.3


From dd135ebbd2a6b5e07dadb66c4dd033bb69531051 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 2 Apr 2008 13:04:40 -0700
Subject: kvm: provide kvm.h for all architecture: fixes headers_install
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently include/linux/kvm.h is not considered by make headers_install,
because Kbuild cannot handle " unifdef-$(CONFIG_FOO) += foo.h.  This problem
was introduced by

commit fb56dbb31c4738a3918db81fd24da732ce3b4ae6
Author: Avi Kivity <avi@qumranet.com>
Date:   Sun Dec 2 10:50:06 2007 +0200

    KVM: Export include/linux/kvm.h only if $ARCH actually supports KVM

    Currently, make headers_check barfs due to <asm/kvm.h>, which <linux/kvm.h>
    includes, not existing.  Rather than add a zillion <asm/kvm.h>s, export kvm.
    only if the arch actually supports it.

    Signed-off-by: Avi Kivity <avi@qumranet.com>

which makes this an 2.6.25 regression.

One way of solving the issue is to enhance Kbuild, but Avi and David conviced
me, that changing headers_install is not the way to go.  This patch changes
the definition for linux/kvm.h to unifdef-y.

If  unifdef-y is used for linux/kvm.h "make headers_check" will fail on all
architectures without asm/kvm.h.  Therefore, this patch also provides
asm/kvm.h on all architectures.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Avi Kivity <avi@qumranet.com>
Cc: Sam Ravnborg <sam@ravnborg.org
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-alpha/kvm.h        | 6 ++++++
 include/asm-arm/kvm.h          | 6 ++++++
 include/asm-avr32/kvm.h        | 6 ++++++
 include/asm-blackfin/kvm.h     | 6 ++++++
 include/asm-cris/kvm.h         | 6 ++++++
 include/asm-frv/kvm.h          | 6 ++++++
 include/asm-generic/Kbuild.asm | 2 ++
 include/asm-h8300/kvm.h        | 6 ++++++
 include/asm-ia64/kvm.h         | 6 ++++++
 include/asm-m32r/kvm.h         | 6 ++++++
 include/asm-m68k/kvm.h         | 6 ++++++
 include/asm-m68knommu/kvm.h    | 6 ++++++
 include/asm-mips/kvm.h         | 6 ++++++
 include/asm-mn10300/kvm.h      | 6 ++++++
 include/asm-parisc/kvm.h       | 6 ++++++
 include/asm-powerpc/kvm.h      | 6 ++++++
 include/asm-s390/kvm.h         | 6 ++++++
 include/asm-sh/kvm.h           | 6 ++++++
 include/asm-sparc/kvm.h        | 6 ++++++
 include/asm-sparc64/kvm.h      | 6 ++++++
 include/asm-um/kvm.h           | 6 ++++++
 include/asm-v850/kvm.h         | 6 ++++++
 include/asm-xtensa/kvm.h       | 6 ++++++
 include/linux/Kbuild           | 2 +-
 24 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 include/asm-alpha/kvm.h
 create mode 100644 include/asm-arm/kvm.h
 create mode 100644 include/asm-avr32/kvm.h
 create mode 100644 include/asm-blackfin/kvm.h
 create mode 100644 include/asm-cris/kvm.h
 create mode 100644 include/asm-frv/kvm.h
 create mode 100644 include/asm-h8300/kvm.h
 create mode 100644 include/asm-ia64/kvm.h
 create mode 100644 include/asm-m32r/kvm.h
 create mode 100644 include/asm-m68k/kvm.h
 create mode 100644 include/asm-m68knommu/kvm.h
 create mode 100644 include/asm-mips/kvm.h
 create mode 100644 include/asm-mn10300/kvm.h
 create mode 100644 include/asm-parisc/kvm.h
 create mode 100644 include/asm-powerpc/kvm.h
 create mode 100644 include/asm-s390/kvm.h
 create mode 100644 include/asm-sh/kvm.h
 create mode 100644 include/asm-sparc/kvm.h
 create mode 100644 include/asm-sparc64/kvm.h
 create mode 100644 include/asm-um/kvm.h
 create mode 100644 include/asm-v850/kvm.h
 create mode 100644 include/asm-xtensa/kvm.h

(limited to 'include')

diff --git a/include/asm-alpha/kvm.h b/include/asm-alpha/kvm.h
new file mode 100644
index 000000000000..b9daec429689
--- /dev/null
+++ b/include/asm-alpha/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_ALPHA_H
+#define __LINUX_KVM_ALPHA_H
+
+/* alpha does not support KVM */
+
+#endif
diff --git a/include/asm-arm/kvm.h b/include/asm-arm/kvm.h
new file mode 100644
index 000000000000..cb3c08cbcb9e
--- /dev/null
+++ b/include/asm-arm/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_ARM_H
+#define __LINUX_KVM_ARM_H
+
+/* arm does not support KVM */
+
+#endif
diff --git a/include/asm-avr32/kvm.h b/include/asm-avr32/kvm.h
new file mode 100644
index 000000000000..8c5777020e2c
--- /dev/null
+++ b/include/asm-avr32/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_AVR32_H
+#define __LINUX_KVM_AVR32_H
+
+/* avr32 does not support KVM */
+
+#endif
diff --git a/include/asm-blackfin/kvm.h b/include/asm-blackfin/kvm.h
new file mode 100644
index 000000000000..e3477d77c014
--- /dev/null
+++ b/include/asm-blackfin/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_BLACKFIN_H
+#define __LINUX_KVM_BLACKFIN_H
+
+/* blackfin does not support KVM */
+
+#endif
diff --git a/include/asm-cris/kvm.h b/include/asm-cris/kvm.h
new file mode 100644
index 000000000000..c860f51149f0
--- /dev/null
+++ b/include/asm-cris/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_CRIS_H
+#define __LINUX_KVM_CRIS_H
+
+/* cris does not support KVM */
+
+#endif
diff --git a/include/asm-frv/kvm.h b/include/asm-frv/kvm.h
new file mode 100644
index 000000000000..9c8a4f08d0a9
--- /dev/null
+++ b/include/asm-frv/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_FRV_H
+#define __LINUX_KVM_FRV_H
+
+/* frv does not support KVM */
+
+#endif
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index fd9dcfd91c39..92a6d91d0c1a 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -1,3 +1,5 @@
+header-y  += kvm.h
+
 ifeq ($(wildcard include/asm-$(SRCARCH)/a.out.h),include/asm-$(SRCARCH)/a.out.h)
 unifdef-y += a.out.h
 endif
diff --git a/include/asm-h8300/kvm.h b/include/asm-h8300/kvm.h
new file mode 100644
index 000000000000..bdbed7b987e1
--- /dev/null
+++ b/include/asm-h8300/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_H8300_H
+#define __LINUX_KVM_H8300_H
+
+/* h8300 does not support KVM */
+
+#endif
diff --git a/include/asm-ia64/kvm.h b/include/asm-ia64/kvm.h
new file mode 100644
index 000000000000..030d29b4b26b
--- /dev/null
+++ b/include/asm-ia64/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_IA64_H
+#define __LINUX_KVM_IA64_H
+
+/* ia64 does not support KVM */
+
+#endif
diff --git a/include/asm-m32r/kvm.h b/include/asm-m32r/kvm.h
new file mode 100644
index 000000000000..99a40515b77e
--- /dev/null
+++ b/include/asm-m32r/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_M32R_H
+#define __LINUX_KVM_M32R_H
+
+/* m32r does not support KVM */
+
+#endif
diff --git a/include/asm-m68k/kvm.h b/include/asm-m68k/kvm.h
new file mode 100644
index 000000000000..7ed27fce5240
--- /dev/null
+++ b/include/asm-m68k/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_M68K_H
+#define __LINUX_KVM_M68K_H
+
+/* m68k does not support KVM */
+
+#endif
diff --git a/include/asm-m68knommu/kvm.h b/include/asm-m68knommu/kvm.h
new file mode 100644
index 000000000000..b49d4258dabb
--- /dev/null
+++ b/include/asm-m68knommu/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_M68KNOMMU_H
+#define __LINUX_KVM_M68KNOMMU_H
+
+/* m68knommu does not support KVM */
+
+#endif
diff --git a/include/asm-mips/kvm.h b/include/asm-mips/kvm.h
new file mode 100644
index 000000000000..093a5b7f796b
--- /dev/null
+++ b/include/asm-mips/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_MIPS_H
+#define __LINUX_KVM_MIPS_H
+
+/* mips does not support KVM */
+
+#endif
diff --git a/include/asm-mn10300/kvm.h b/include/asm-mn10300/kvm.h
new file mode 100644
index 000000000000..f6b609ff4a57
--- /dev/null
+++ b/include/asm-mn10300/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_MN10300_H
+#define __LINUX_KVM_MN10300_H
+
+/* mn10300 does not support KVM */
+
+#endif
diff --git a/include/asm-parisc/kvm.h b/include/asm-parisc/kvm.h
new file mode 100644
index 000000000000..00cc45812547
--- /dev/null
+++ b/include/asm-parisc/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_PARISC_H
+#define __LINUX_KVM_PARISC_H
+
+/* parisc does not support KVM */
+
+#endif
diff --git a/include/asm-powerpc/kvm.h b/include/asm-powerpc/kvm.h
new file mode 100644
index 000000000000..d1b530fbf8dd
--- /dev/null
+++ b/include/asm-powerpc/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_POWERPC_H
+#define __LINUX_KVM_POWERPC_H
+
+/* powerpc does not support KVM */
+
+#endif
diff --git a/include/asm-s390/kvm.h b/include/asm-s390/kvm.h
new file mode 100644
index 000000000000..573f2a351386
--- /dev/null
+++ b/include/asm-s390/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_S390_H
+#define __LINUX_KVM_S390_H
+
+/* s390 does not support KVM */
+
+#endif
diff --git a/include/asm-sh/kvm.h b/include/asm-sh/kvm.h
new file mode 100644
index 000000000000..6af51dbab2d0
--- /dev/null
+++ b/include/asm-sh/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_SH_H
+#define __LINUX_KVM_SH_H
+
+/* sh does not support KVM */
+
+#endif
diff --git a/include/asm-sparc/kvm.h b/include/asm-sparc/kvm.h
new file mode 100644
index 000000000000..2e5478da3819
--- /dev/null
+++ b/include/asm-sparc/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_SPARC_H
+#define __LINUX_KVM_SPARC_H
+
+/* sparc does not support KVM */
+
+#endif
diff --git a/include/asm-sparc64/kvm.h b/include/asm-sparc64/kvm.h
new file mode 100644
index 000000000000..380537a77bf9
--- /dev/null
+++ b/include/asm-sparc64/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_SPARC64_H
+#define __LINUX_KVM_SPARC64_H
+
+/* sparc64 does not support KVM */
+
+#endif
diff --git a/include/asm-um/kvm.h b/include/asm-um/kvm.h
new file mode 100644
index 000000000000..66aa77094551
--- /dev/null
+++ b/include/asm-um/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_UM_H
+#define __LINUX_KVM_UM_H
+
+/* um does not support KVM */
+
+#endif
diff --git a/include/asm-v850/kvm.h b/include/asm-v850/kvm.h
new file mode 100644
index 000000000000..3f729b79febc
--- /dev/null
+++ b/include/asm-v850/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_V850_H
+#define __LINUX_KVM_V850_H
+
+/* v850 does not support KVM */
+
+#endif
diff --git a/include/asm-xtensa/kvm.h b/include/asm-xtensa/kvm.h
new file mode 100644
index 000000000000..bda4e331e98c
--- /dev/null
+++ b/include/asm-xtensa/kvm.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_KVM_XTENSA_H
+#define __LINUX_KVM_XTENSA_H
+
+/* xtensa does not support KVM */
+
+#endif
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 4a446a19295e..9cdd12a9e843 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -253,7 +253,7 @@ unifdef-y += kd.h
 unifdef-y += kernelcapi.h
 unifdef-y += kernel.h
 unifdef-y += keyboard.h
-unifdef-$(CONFIG_HAVE_KVM) += kvm.h
+unifdef-y += kvm.h
 unifdef-y += llc.h
 unifdef-y += loop.h
 unifdef-y += lp.h
-- 
cgit v1.2.3


From c143d43aa3149b83e4b40624a27aa2b18638afec Mon Sep 17 00:00:00 2001
From: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Date: Wed, 2 Apr 2008 13:04:43 -0700
Subject: alpha: fix ALSA DMA mmap crash

Make dma_alloc_coherent respect gfp flags (__GFP_COMP is one that
matters).

Signed-off-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Tested-by: Michael Cree <mcree@orcon.net.nz>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/pci_iommu.c   | 8 +++++---
 include/asm-alpha/dma-mapping.h | 2 +-
 include/asm-alpha/pci.h         | 8 +++++++-
 3 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 4e1c08636edd..dd6e334ab9e1 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -424,11 +424,13 @@ EXPORT_SYMBOL(pci_unmap_page);
    else DMA_ADDRP is undefined.  */
 
 void *
-pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
+__pci_alloc_consistent(struct pci_dev *pdev, size_t size,
+		       dma_addr_t *dma_addrp, gfp_t gfp)
 {
 	void *cpu_addr;
 	long order = get_order(size);
-	gfp_t gfp = GFP_ATOMIC;
+
+	gfp &= ~GFP_DMA;
 
 try_again:
 	cpu_addr = (void *)__get_free_pages(gfp, order);
@@ -458,7 +460,7 @@ try_again:
 
 	return cpu_addr;
 }
-EXPORT_SYMBOL(pci_alloc_consistent);
+EXPORT_SYMBOL(__pci_alloc_consistent);
 
 /* Free and unmap a consistent DMA buffer.  CPU_ADDR and DMA_ADDR must
    be values that were returned from pci_alloc_consistent.  SIZE must
diff --git a/include/asm-alpha/dma-mapping.h b/include/asm-alpha/dma-mapping.h
index 75a1aff5b57b..db351d1296f4 100644
--- a/include/asm-alpha/dma-mapping.h
+++ b/include/asm-alpha/dma-mapping.h
@@ -11,7 +11,7 @@
 #define dma_unmap_single(dev, addr, size, dir)		\
 		pci_unmap_single(alpha_gendev_to_pci(dev), addr, size, dir)
 #define dma_alloc_coherent(dev, size, addr, gfp)	\
-		pci_alloc_consistent(alpha_gendev_to_pci(dev), size, addr)
+	      __pci_alloc_consistent(alpha_gendev_to_pci(dev), size, addr, gfp)
 #define dma_free_coherent(dev, size, va, addr)		\
 		pci_free_consistent(alpha_gendev_to_pci(dev), size, va, addr)
 #define dma_map_page(dev, page, off, size, dir)		\
diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h
index d5b10ef64364..d31fd49ff79a 100644
--- a/include/asm-alpha/pci.h
+++ b/include/asm-alpha/pci.h
@@ -76,7 +76,13 @@ extern inline void pcibios_penalize_isa_irq(int irq, int active)
    successful and sets *DMA_ADDRP to the pci side dma address as well,
    else DMA_ADDRP is undefined.  */
 
-extern void *pci_alloc_consistent(struct pci_dev *, size_t, dma_addr_t *);
+extern void *__pci_alloc_consistent(struct pci_dev *, size_t,
+				    dma_addr_t *, gfp_t);
+static inline void *
+pci_alloc_consistent(struct pci_dev *dev, size_t size, dma_addr_t *dma)
+{
+	return __pci_alloc_consistent(dev, size, dma, GFP_ATOMIC);
+}
 
 /* Free and unmap a consistent DMA buffer.  CPU_ADDR and DMA_ADDR must
    be values that were returned from pci_alloc_consistent.  SIZE must
-- 
cgit v1.2.3


From 06f11f37aa84e83b48cdf36037c4414f5a83c13f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 2 Apr 2008 13:04:52 -0700
Subject: alpha: get_current(): don't add zero to current_thread_info()->task

A nasty compile error:

In file included from security/keys/internal.h:16,
                 from security/keys/sysctl.c:14:
include/linux/key-ui.h: In function 'key_permission':
include/linux/key-ui.h:51: error: invalid use of undefined type 'struct task_struct'

apparently the compiler has decided that it needs to know sizeof(task_struct)
so that it can add zero to a task_struct* (which is rather dumb of it).

Getting task_struct in scope in these deeply-nested headers is scary-looking,
so let's just remove the "+ 0".

Cc: David Howells <dhowells@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-alpha/current.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-alpha/current.h b/include/asm-alpha/current.h
index 8d88a13c1bec..094d285a1b34 100644
--- a/include/asm-alpha/current.h
+++ b/include/asm-alpha/current.h
@@ -3,7 +3,7 @@
 
 #include <linux/thread_info.h>
 
-#define get_current()	(current_thread_info()->task + 0)
+#define get_current()	(current_thread_info()->task)
 #define current		get_current()
 
 #endif /* _ALPHA_CURRENT_H */
-- 
cgit v1.2.3


From 436d34b36202ef724778ded1e9cb10f8c37b32bc Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Wed, 2 Apr 2008 17:28:46 +0900
Subject: libata: uninline atapi_cmd_type()

Uninline atapi_cmd_type().  It doesn't really have to be inline and
more case will be added which need to access unexported libata
variable.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/ata/libata-core.c | 34 ++++++++++++++++++++++++++++++++++
 include/linux/libata.h    | 22 +---------------------
 2 files changed, 35 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 2db5c9c9ca10..e9b69ba489d2 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -325,6 +325,39 @@ static void ata_force_horkage(struct ata_device *dev)
 	}
 }
 
+/**
+ *	atapi_cmd_type - Determine ATAPI command type from SCSI opcode
+ *	@opcode: SCSI opcode
+ *
+ *	Determine ATAPI command type from @opcode.
+ *
+ *	LOCKING:
+ *	None.
+ *
+ *	RETURNS:
+ *	ATAPI_{READ|WRITE|READ_CD|PASS_THRU|MISC}
+ */
+int atapi_cmd_type(u8 opcode)
+{
+	switch (opcode) {
+	case GPCMD_READ_10:
+	case GPCMD_READ_12:
+		return ATAPI_READ;
+
+	case GPCMD_WRITE_10:
+	case GPCMD_WRITE_12:
+	case GPCMD_WRITE_AND_VERIFY_10:
+		return ATAPI_WRITE;
+
+	case GPCMD_READ_CD:
+	case GPCMD_READ_CD_MSF:
+		return ATAPI_READ_CD;
+
+	default:
+		return ATAPI_MISC;
+	}
+}
+
 /**
  *	ata_tf_to_fis - Convert ATA taskfile to SATA FIS structure
  *	@tf: Taskfile to convert
@@ -7774,6 +7807,7 @@ EXPORT_SYMBOL_GPL(ata_tf_read);
 EXPORT_SYMBOL_GPL(ata_noop_dev_select);
 EXPORT_SYMBOL_GPL(ata_std_dev_select);
 EXPORT_SYMBOL_GPL(sata_print_link_status);
+EXPORT_SYMBOL_GPL(atapi_cmd_type);
 EXPORT_SYMBOL_GPL(ata_tf_to_fis);
 EXPORT_SYMBOL_GPL(ata_tf_from_fis);
 EXPORT_SYMBOL_GPL(ata_pack_xfermask);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index b064bfeb69ee..92c64909ed25 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -849,6 +849,7 @@ extern unsigned int ata_dev_try_classify(struct ata_device *dev, int present,
  */
 extern void ata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf);
 extern void ata_tf_read(struct ata_port *ap, struct ata_taskfile *tf);
+extern int atapi_cmd_type(u8 opcode);
 extern void ata_tf_to_fis(const struct ata_taskfile *tf,
 			  u8 pmp, int is_cmd, u8 *fis);
 extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf);
@@ -1379,27 +1380,6 @@ static inline int ata_try_flush_cache(const struct ata_device *dev)
 	       ata_id_has_flush_ext(dev->id);
 }
 
-static inline int atapi_cmd_type(u8 opcode)
-{
-	switch (opcode) {
-	case GPCMD_READ_10:
-	case GPCMD_READ_12:
-		return ATAPI_READ;
-
-	case GPCMD_WRITE_10:
-	case GPCMD_WRITE_12:
-	case GPCMD_WRITE_AND_VERIFY_10:
-		return ATAPI_WRITE;
-
-	case GPCMD_READ_CD:
-	case GPCMD_READ_CD_MSF:
-		return ATAPI_READ_CD;
-
-	default:
-		return ATAPI_MISC;
-	}
-}
-
 static inline unsigned int ac_err_mask(u8 status)
 {
 	if (status & (ATA_BUSY | ATA_DRQ))
-- 
cgit v1.2.3


From e52dcc4899cf1b7601379c31542bd91cd2997a64 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Wed, 2 Apr 2008 17:35:19 +0900
Subject: libata: ATA_12/16 doesn't fall into ATAPI_MISC

SAT passthrus don't really fit into ATAPI_MISC class.  SAT passthru
commands always transfer multiple of 512 bytes and variable length
response is not allowed.  This patch creates a separate category -
ATAPI_PASS_THRU - for these.

This fixes HSM violation on "hdparm -I".

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/ata/libata-core.c | 5 +++++
 include/linux/libata.h    | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index e9b69ba489d2..be95fdb69726 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -353,6 +353,11 @@ int atapi_cmd_type(u8 opcode)
 	case GPCMD_READ_CD_MSF:
 		return ATAPI_READ_CD;
 
+	case ATA_16:
+	case ATA_12:
+		if (atapi_passthru16)
+			return ATAPI_PASS_THRU;
+		/* fall thru */
 	default:
 		return ATAPI_MISC;
 	}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 92c64909ed25..37ee881c42ac 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -350,7 +350,8 @@ enum {
 	ATAPI_READ		= 0,		/* READs */
 	ATAPI_WRITE		= 1,		/* WRITEs */
 	ATAPI_READ_CD		= 2,		/* READ CD [MSF] */
-	ATAPI_MISC		= 3,		/* the rest */
+	ATAPI_PASS_THRU		= 3,		/* SAT pass-thru */
+	ATAPI_MISC		= 4,		/* the rest */
 };
 
 enum ata_xfer_mask {
-- 
cgit v1.2.3


From bae1d2507e44417455eda76d4435352fee14cf51 Mon Sep 17 00:00:00 2001
From: Ravikiran G Thirumalai <kiran@scalex86.org>
Date: Fri, 4 Apr 2008 03:06:29 -0700
Subject: x86: fix breakage of vSMP irq operations

25-rc* stopped working with CONFIG_X86_VSMP on vSMP machines.

Looks like the vsmp irq ops got accidentally removed during merge of x86_64
pvops in 2.6.25. -- commit 6abcd98ffafbff81f0bfd7ee1d129e634af13245 removed
vsmp irq ops.

Tested with both CONFIG_X86_VSMP and without CONFIG_X86_VSMP, on vSMP and non
vSMP x86_64 machines.

Please apply.

Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/irqflags.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index 92021c1ffa3a..0e2292483b35 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -70,6 +70,26 @@ static inline void raw_local_irq_restore(unsigned long flags)
 	native_restore_fl(flags);
 }
 
+#ifdef CONFIG_X86_VSMP
+
+/*
+ * Interrupt control for the VSMP architecture:
+ */
+
+static inline void raw_local_irq_disable(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+	raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+	raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
+}
+
+#else
+
 static inline void raw_local_irq_disable(void)
 {
 	native_irq_disable();
@@ -80,6 +100,8 @@ static inline void raw_local_irq_enable(void)
 	native_irq_enable();
 }
 
+#endif
+
 /*
  * Used in the idle loop; sti takes one instruction cycle
  * to complete:
@@ -137,10 +159,17 @@ static inline unsigned long __raw_local_irq_save(void)
 #define raw_local_irq_save(flags) \
 		do { (flags) = __raw_local_irq_save(); } while (0)
 
+#ifdef CONFIG_X86_VSMP
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC);
+}
+#else
 static inline int raw_irqs_disabled_flags(unsigned long flags)
 {
 	return !(flags & X86_EFLAGS_IF);
 }
+#endif
 
 static inline int raw_irqs_disabled(void)
 {
-- 
cgit v1.2.3


From 5761d64b277c287a7520b868c32d656ef03374b4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 4 Apr 2008 16:26:10 +0200
Subject: x86: revert assign IRQs to hpet timer

The commits:

commit 37a47db8d7f0f38dac5acf5a13abbc8f401707fa
Author: Balaji Rao <balajirrao@gmail.com>
Date:   Wed Jan 30 13:30:03 2008 +0100

    x86: assign IRQs to HPET timers, fix

and

commit e3f37a54f690d3e64995ea7ecea08c5ab3070faf
Author: Balaji Rao <balajirrao@gmail.com>
Date:   Wed Jan 30 13:30:03 2008 +0100

    x86: assign IRQs to HPET timers

have been identified to cause a regression on some platforms due to
the assignement of legacy IRQs which makes the legacy devices
connected to those IRQs disfunctional.

Revert them.

This fixes http://bugzilla.kernel.org/show_bug.cgi?id=10382

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c |  9 ++++++---
 drivers/char/hpet.c    | 51 +++++++-------------------------------------------
 include/linux/hpet.h   |  2 +-
 3 files changed, 14 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 235fd6c77504..36652ea1a265 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -133,13 +133,16 @@ static void hpet_reserve_platform_timers(unsigned long id)
 #ifdef CONFIG_HPET_EMULATE_RTC
 	hpet_reserve_timer(&hd, 1);
 #endif
+
 	hd.hd_irq[0] = HPET_LEGACY_8254;
 	hd.hd_irq[1] = HPET_LEGACY_RTC;
 
-       for (i = 2; i < nrtimers; timer++, i++)
-	       hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >>
-		       Tn_INT_ROUTE_CNF_SHIFT;
+	for (i = 2; i < nrtimers; timer++, i++)
+		hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >>
+			Tn_INT_ROUTE_CNF_SHIFT;
+
 	hpet_alloc(&hd);
+
 }
 #else
 static void hpet_reserve_platform_timers(unsigned long id) { }
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 465ad35ed38f..1399971be689 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -731,14 +731,14 @@ static unsigned long hpet_calibrate(struct hpets *hpetp)
 
 int hpet_alloc(struct hpet_data *hdp)
 {
-	u64 cap, mcfg, hpet_config;
+	u64 cap, mcfg;
 	struct hpet_dev *devp;
-	u32 i, ntimer, irq;
+	u32 i, ntimer;
 	struct hpets *hpetp;
 	size_t siz;
 	struct hpet __iomem *hpet;
 	static struct hpets *last = NULL;
-	unsigned long period, irq_bitmap;
+	unsigned long period;
 	unsigned long long temp;
 
 	/*
@@ -765,47 +765,11 @@ int hpet_alloc(struct hpet_data *hdp)
 	hpetp->hp_hpet_phys = hdp->hd_phys_address;
 
 	hpetp->hp_ntimer = hdp->hd_nirqs;
-	hpet = hpetp->hp_hpet;
-
-	/* Assign IRQs statically for legacy devices */
-	hpetp->hp_dev[0].hd_hdwirq = hdp->hd_irq[0];
-	hpetp->hp_dev[1].hd_hdwirq = hdp->hd_irq[1];
-
-	/* Assign IRQs dynamically for the others */
-	for (i = 2, devp = &hpetp->hp_dev[2]; i < hdp->hd_nirqs; i++, devp++) {
-		struct hpet_timer __iomem *timer;
 
-		timer = &hpet->hpet_timers[devp - hpetp->hp_dev];
+	for (i = 0; i < hdp->hd_nirqs; i++)
+		hpetp->hp_dev[i].hd_hdwirq = hdp->hd_irq[i];
 
-		/* Check if there's already an IRQ assigned to the timer */
-		if (hdp->hd_irq[i]) {
-			hpetp->hp_dev[i].hd_hdwirq = hdp->hd_irq[i];
-			continue;
-		}
-
-		hpet_config = readq(&timer->hpet_config);
-		irq_bitmap = (hpet_config & Tn_INT_ROUTE_CAP_MASK)
-			>> Tn_INT_ROUTE_CAP_SHIFT;
-		if (!irq_bitmap)
-			irq = 0;        /* No valid IRQ Assignable */
-		else {
-			irq = find_first_bit(&irq_bitmap, 32);
-			do {
-				hpet_config |= irq << Tn_INT_ROUTE_CNF_SHIFT;
-				writeq(hpet_config, &timer->hpet_config);
-
-				/*
-				 * Verify whether we have written a valid
-				 * IRQ number by reading it back again
-				 */
-				hpet_config = readq(&timer->hpet_config);
-				if (irq == (hpet_config & Tn_INT_ROUTE_CNF_MASK)
-						>> Tn_INT_ROUTE_CNF_SHIFT)
-					break;  /* Success */
-			} while ((irq = (find_next_bit(&irq_bitmap, 32, irq))));
-		}
-		hpetp->hp_dev[i].hd_hdwirq = irq;
-	}
+	hpet = hpetp->hp_hpet;
 
 	cap = readq(&hpet->hpet_cap);
 
@@ -836,8 +800,7 @@ int hpet_alloc(struct hpet_data *hdp)
 		hpetp->hp_which, hdp->hd_phys_address,
 		hpetp->hp_ntimer > 1 ? "s" : "");
 	for (i = 0; i < hpetp->hp_ntimer; i++)
-		printk("%s %d", i > 0 ? "," : "",
-				hpetp->hp_dev[i].hd_hdwirq);
+		printk("%s %d", i > 0 ? "," : "", hdp->hd_irq[i]);
 	printk("\n");
 
 	printk(KERN_INFO "hpet%u: %u %d-bit timers, %Lu Hz\n",
diff --git a/include/linux/hpet.h b/include/linux/hpet.h
index 9cd94bfd07e5..2dc29ce6c8e4 100644
--- a/include/linux/hpet.h
+++ b/include/linux/hpet.h
@@ -64,7 +64,7 @@ struct hpet {
  */
 
 #define	Tn_INT_ROUTE_CAP_MASK		(0xffffffff00000000ULL)
-#define	Tn_INT_ROUTE_CAP_SHIFT		(32UL)
+#define	Tn_INI_ROUTE_CAP_SHIFT		(32UL)
 #define	Tn_FSB_INT_DELCAP_MASK		(0x8000UL)
 #define	Tn_FSB_INT_DELCAP_SHIFT		(15)
 #define	Tn_FSB_EN_CNF_MASK		(0x4000UL)
-- 
cgit v1.2.3


From 865ab8753837b73fe1e800fc88462c5914eec549 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Wed, 2 Apr 2008 23:53:19 +0400
Subject: [MIPS] Pb1200: Fix header breakage

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/mach-pb1x00/pb1200.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-mips/mach-pb1x00/pb1200.h b/include/asm-mips/mach-pb1x00/pb1200.h
index ed5fd7390678..72213e3d02c7 100644
--- a/include/asm-mips/mach-pb1x00/pb1200.h
+++ b/include/asm-mips/mach-pb1x00/pb1200.h
@@ -245,7 +245,7 @@ enum external_pb1200_ints {
 	PB1200_SD1_INSERT_INT,
 	PB1200_SD1_EJECT_INT,
 
-	PB1200_INT_END			(PB1200_INT_BEGIN + 15)
+	PB1200_INT_END		= PB1200_INT_BEGIN + 15
 };
 
 /* For drivers/pcmcia/au1000_db1x00.c */
-- 
cgit v1.2.3


From 8bab8dded67d026c39367bbd5e27d2f6c556c38e Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 4 Apr 2008 14:29:57 -0700
Subject: cgroups: add cgroup support for enabling controllers at boot time

The effects of cgroup_disable=foo are:

- foo isn't auto-mounted if you mount all cgroups in a single hierarchy
- foo isn't visible as an individually mountable subsystem

As a result there will only ever be one call to foo->create(), at init time;
all processes will stay in this group, and the group will never be mounted on
a visible hierarchy.  Any additional effects (e.g.  not allocating metadata)
are up to the foo subsystem.

This doesn't handle early_init subsystems (their "disabled" bit isn't set be,
but it could easily be extended to do so if any of the early_init systems
wanted it - I think it would just involve some nastier parameter processing
since it would occur before the command-line argument parser had been run.

Hugh said:

  Ballpark figures, I'm trying to get this question out rather than
  processing the exact numbers: CONFIG_CGROUP_MEM_RES_CTLR adds 15% overhead
  to the affected paths, booting with cgroup_disable=memory cuts that back to
  1% overhead (due to slightly bigger struct page).

  I'm no expert on distros, they may have no interest whatever in
  CONFIG_CGROUP_MEM_RES_CTLR=y; and the rest of us can easily build with or
  without it, or apply the cgroup_disable=memory patches.

Unix bench's execl test result on x86_64 was

== just after boot without mounting any cgroup fs.==
mem_cgorup=off : Execl Throughput       43.0     3150.1      732.6
mem_cgroup=on  : Execl Throughput       43.0     2932.6      682.0
==

[lizf@cn.fujitsu.com: fix boot option parsing]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Sudhir Kumar <skumar@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  4 ++++
 include/linux/cgroup.h              |  1 +
 kernel/cgroup.c                     | 42 ++++++++++++++++++++++++++++++++-----
 3 files changed, 42 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4cd1a5da80a4..32e9297ef747 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -375,6 +375,10 @@ and is between 256 and 4096 characters. It is defined in the file
 	ccw_timeout_log [S390]
 			See Documentation/s390/CommonIO for details.
 
+	cgroup_disable= [KNL] Disable a particular controller
+			Format: {name of the controller(s) to disable}
+				{Currently supported controllers - "memory"}
+
 	checkreqprot	[SELINUX] Set initial checkreqprot flag value.
 			Format: { "0" | "1" }
 			See security/selinux/Kconfig help text.
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 028ba3b523b1..a6a6035a4e1e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -256,6 +256,7 @@ struct cgroup_subsys {
 	void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
 	int subsys_id;
 	int active;
+	int disabled;
 	int early_init;
 #define MAX_CGROUP_TYPE_NAMELEN 32
 	const char *name;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 53d86b4b0ce0..62f1a5231fe9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -782,7 +782,14 @@ static int parse_cgroupfs_options(char *data,
 		if (!*token)
 			return -EINVAL;
 		if (!strcmp(token, "all")) {
-			opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1;
+			/* Add all non-disabled subsystems */
+			int i;
+			opts->subsys_bits = 0;
+			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+				struct cgroup_subsys *ss = subsys[i];
+				if (!ss->disabled)
+					opts->subsys_bits |= 1ul << i;
+			}
 		} else if (!strcmp(token, "noprefix")) {
 			set_bit(ROOT_NOPREFIX, &opts->flags);
 		} else if (!strncmp(token, "release_agent=", 14)) {
@@ -800,7 +807,8 @@ static int parse_cgroupfs_options(char *data,
 			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 				ss = subsys[i];
 				if (!strcmp(token, ss->name)) {
-					set_bit(i, &opts->subsys_bits);
+					if (!ss->disabled)
+						set_bit(i, &opts->subsys_bits);
 					break;
 				}
 			}
@@ -2600,13 +2608,13 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
 {
 	int i;
 
-	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\n");
+	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
 	mutex_lock(&cgroup_mutex);
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
-		seq_printf(m, "%s\t%lu\t%d\n",
+		seq_printf(m, "%s\t%lu\t%d\t%d\n",
 			   ss->name, ss->root->subsys_bits,
-			   ss->root->number_of_cgroups);
+			   ss->root->number_of_cgroups, !ss->disabled);
 	}
 	mutex_unlock(&cgroup_mutex);
 	return 0;
@@ -3010,3 +3018,27 @@ static void cgroup_release_agent(struct work_struct *work)
 	spin_unlock(&release_list_lock);
 	mutex_unlock(&cgroup_mutex);
 }
+
+static int __init cgroup_disable(char *str)
+{
+	int i;
+	char *token;
+
+	while ((token = strsep(&str, ",")) != NULL) {
+		if (!*token)
+			continue;
+
+		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+			struct cgroup_subsys *ss = subsys[i];
+
+			if (!strcmp(token, ss->name)) {
+				ss->disabled = 1;
+				printk(KERN_INFO "Disabling %s control group"
+					" subsystem\n", ss->name);
+				break;
+			}
+		}
+	}
+	return 1;
+}
+__setup("cgroup_disable=", cgroup_disable);
-- 
cgit v1.2.3


From 871de939030c903fd5ed50a7c4c88e02998e1cbc Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 7 Apr 2008 11:56:34 -0700
Subject: x86: fix 64-bit asm NOPS for CONFIG_GENERIC_CPU

ASM_NOP's for 64-bit kernel with CONFIG_GENERIC_CPU is broken
with the recent x86 nops merge. They were using GENERIC_NOPS
which will truncate the upper 32bits of %rsi, because of the missing
64bit rex prefix.

For now, fall back ASM NOPS for generic cpu to K8 NOPS, similar
to the code before the wrong x86 nop merge.

This should resolve the crash seen by Ingo on a test-system:

BUG: unable to handle kernel paging request at 00000000d80d8ee8
IP: [<ffffffff802121af>] save_i387_ia32+0x61/0xd8
PGD b8e0067 PUD 51490067 PMD 0
Oops: 0000 [1] SMP
CPU 2
Modules linked in:
Pid: 3871, comm: distcc Not tainted 2.6.25-rc7-sched-devel.git-x86-latest.git #359
RIP: 0010:[<ffffffff802121af>]  [<ffffffff802121af>] save_i387_ia32+0x61/0xd8
RSP: 0000:ffff81003abd3cb8  EFLAGS: 00010246
RAX: ffff810082e93400 RBX: 00000000ffc37f84 RCX: ffff8100d80d8ee0
RDX: 0000000000000000 RSI: 00000000d80d8ee0 RDI: ffff810082e93400
RBP: 00000000ffc37fdc R08: 00000000ffc37f88 R09: 0000000000000008
R10: ffff81003abd2000 R11: 0000000000000000 R12: ffff810082e93400
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
FS:  0000000000000000(0000) GS:ffff81011fb12dc0(0063) knlGS:00000000f7f1a6c0
CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
CR2: 00000000d80d8ee8 CR3: 0000000076922000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process distcc (pid: 3871, threadinfo ffff81003abd2000, task ffff8100d80d8ee0)
Stack:  ffff8100bb670380 ffffffff8026de50 0000000000000118 0000000000000002
 0000000000000002 ffff81003abd3e68 ffff81003abd3ed8 ffff81003abd3de8
 ffff81003abd3d18 ffffffff80229785 ffff8100d80d8ee0 ffff810001041280
Call Trace:
 [<ffffffff8026de50>] ? __generic_file_aio_write_nolock+0x343/0x377
 [<ffffffff80229785>] ? update_curr+0x54/0x64
 [<ffffffff80227cd3>] ? ia32_setup_sigcontext+0x125/0x1d2
 [<ffffffff8022839f>] ? ia32_setup_frame+0x73/0x1a5
 [<ffffffff8020b2a5>] ? do_notify_resume+0x1aa/0x7db
 [<ffffffff8024ae8c>] ? getnstimeofday+0x31/0x85
 [<ffffffff80249858>] ? ktime_get_ts+0x17/0x48
 [<ffffffff80249933>] ? ktime_get+0xc/0x41
 [<ffffffff8024973e>] ? hrtimer_nanosleep+0x75/0xd5
 [<ffffffff80249261>] ? hrtimer_wakeup+0x0/0x21
 [<ffffffff8020bfbc>] ? int_signal+0x12/0x17
 [<ffffffff8030e6b3>] ? dummy_file_free_security+0x0/0x1

Code: a6 08 05 00 00 f6 40 14 01 74 34 4c 89 e7 48 0f ae 07 48 8b 86 08 05 00 00 80 78 02 00 79 02 db e2 90 8d b4 26 00 00 00 00 89 f6 <48> 8b 46 08 83 60 14 fe 0f 20 c0 48 83 c8 08 0f 22 c0 eb 07 c6

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/nops.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h
index e3b2bce0aff8..b3930ae539b3 100644
--- a/include/asm-x86/nops.h
+++ b/include/asm-x86/nops.h
@@ -73,16 +73,7 @@
 #define P6_NOP7	".byte 0x0f,0x1f,0x80,0,0,0,0\n"
 #define P6_NOP8	".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
 
-#if defined(CONFIG_MK8)
-#define ASM_NOP1 K8_NOP1
-#define ASM_NOP2 K8_NOP2
-#define ASM_NOP3 K8_NOP3
-#define ASM_NOP4 K8_NOP4
-#define ASM_NOP5 K8_NOP5
-#define ASM_NOP6 K8_NOP6
-#define ASM_NOP7 K8_NOP7
-#define ASM_NOP8 K8_NOP8
-#elif defined(CONFIG_MK7)
+#if defined(CONFIG_MK7)
 #define ASM_NOP1 K7_NOP1
 #define ASM_NOP2 K7_NOP2
 #define ASM_NOP3 K7_NOP3
@@ -100,6 +91,15 @@
 #define ASM_NOP6 P6_NOP6
 #define ASM_NOP7 P6_NOP7
 #define ASM_NOP8 P6_NOP8
+#elif defined(CONFIG_X86_64)
+#define ASM_NOP1 K8_NOP1
+#define ASM_NOP2 K8_NOP2
+#define ASM_NOP3 K8_NOP3
+#define ASM_NOP4 K8_NOP4
+#define ASM_NOP5 K8_NOP5
+#define ASM_NOP6 K8_NOP6
+#define ASM_NOP7 K8_NOP7
+#define ASM_NOP8 K8_NOP8
 #else
 #define ASM_NOP1 GENERIC_NOP1
 #define ASM_NOP2 GENERIC_NOP2
-- 
cgit v1.2.3


From 2557a933b795c1988c721ebb871cd735128bb9cb Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 7 Apr 2008 14:30:28 +1000
Subject: virtio: remove overzealous BUG_ON.

The 'disable_cb' callback is designed as an optimization to tell the host
we don't need callbacks now.  As it is not reliable, the debug check is
overzealous: it can happen on two CPUs at the same time.  Document this.

Even if it were reliable, the virtio_net driver doesn't disable
callbacks on transmit so the START_USE/END_USE debugging reentrance
protection can be easily tripped even on UP.

Thanks to Balaji Rao for the bug report and testing.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
CC: Balaji Rao <balajirrao@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/virtio/virtio_ring.c | 3 ---
 include/linux/virtio.h       | 5 ++++-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index aa714028641e..c2fa5c630813 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -214,10 +214,7 @@ static void vring_disable_cb(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	START_USE(vq);
-	BUG_ON(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
 	vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
-	END_USE(vq);
 }
 
 static bool vring_enable_cb(struct virtqueue *_vq)
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 12c18ac1b973..e7d10845b3c1 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -41,6 +41,8 @@ struct virtqueue
  *	Returns NULL or the "data" token handed to add_buf.
  * @disable_cb: disable callbacks
  *	vq: the struct virtqueue we're talking about.
+ *	Note that this is not necessarily synchronous, hence unreliable and only
+ *	useful as an optimization.
  * @enable_cb: restart callbacks after disable_cb.
  *	vq: the struct virtqueue we're talking about.
  *	This re-enables callbacks; it returns "false" if there are pending
@@ -48,7 +50,8 @@ struct virtqueue
  *	checking for more work, and enabling callbacks.
  *
  * Locking rules are straightforward: the driver is responsible for
- * locking.  No two operations may be invoked simultaneously.
+ * locking.  No two operations may be invoked simultaneously, with the exception
+ * of @disable_cb.
  *
  * All operations can be called in any context.
  */
-- 
cgit v1.2.3


From 9c5a3d729cf430609d091ff610a7db363aafcd47 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Sat, 5 Apr 2008 15:13:23 +0100
Subject: [MIPS] Handle aliases in vmalloc correctly.

flush_cache_vmap / flush_cache_vunmap were calling flush_cache_all which -
having been deprecated - turned into a nop ...

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/c-r4k.c          | 14 ++++++++++++++
 arch/mips/mm/c-tx39.c         | 15 +++++++++++++++
 arch/mips/mm/cache.c          |  3 +++
 include/asm-mips/cacheflush.h | 18 ++++++++++++++++--
 4 files changed, 48 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 6496925b5e29..77aefb4ebedd 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -361,6 +361,16 @@ static inline int has_valid_asid(const struct mm_struct *mm)
 #endif
 }
 
+static void r4k__flush_cache_vmap(void)
+{
+	r4k_blast_dcache();
+}
+
+static void r4k__flush_cache_vunmap(void)
+{
+	r4k_blast_dcache();
+}
+
 static inline void local_r4k_flush_cache_range(void * args)
 {
 	struct vm_area_struct *vma = args;
@@ -1281,6 +1291,10 @@ void __cpuinit r4k_cache_init(void)
 					PAGE_SIZE - 1);
 	else
 		shm_align_mask = PAGE_SIZE-1;
+
+	__flush_cache_vmap	= r4k__flush_cache_vmap;
+	__flush_cache_vunmap	= r4k__flush_cache_vunmap;
+
 	flush_cache_all		= cache_noop;
 	__flush_cache_all	= r4k___flush_cache_all;
 	flush_cache_mm		= r4k_flush_cache_mm;
diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c
index b09d56981d53..a9f7f1f5e9b4 100644
--- a/arch/mips/mm/c-tx39.c
+++ b/arch/mips/mm/c-tx39.c
@@ -122,6 +122,16 @@ static inline void tx39_blast_icache(void)
 	local_irq_restore(flags);
 }
 
+static void tx39__flush_cache_vmap(void)
+{
+	tx39_blast_dcache();
+}
+
+static void tx39__flush_cache_vunmap(void)
+{
+	tx39_blast_dcache();
+}
+
 static inline void tx39_flush_cache_all(void)
 {
 	if (!cpu_has_dc_aliases)
@@ -344,6 +354,8 @@ void __cpuinit tx39_cache_init(void)
 	switch (current_cpu_type()) {
 	case CPU_TX3912:
 		/* TX39/H core (writethru direct-map cache) */
+		__flush_cache_vmap	= tx39__flush_cache_vmap;
+		__flush_cache_vunmap	= tx39__flush_cache_vunmap;
 		flush_cache_all	= tx39h_flush_icache_all;
 		__flush_cache_all	= tx39h_flush_icache_all;
 		flush_cache_mm		= (void *) tx39h_flush_icache_all;
@@ -369,6 +381,9 @@ void __cpuinit tx39_cache_init(void)
 		write_c0_wired(0);	/* set 8 on reset... */
 		/* board-dependent init code may set WBON */
 
+		__flush_cache_vmap	= tx39__flush_cache_vmap;
+		__flush_cache_vunmap	= tx39__flush_cache_vunmap;
+
 		flush_cache_all = tx39_flush_cache_all;
 		__flush_cache_all = tx39___flush_cache_all;
 		flush_cache_mm = tx39_flush_cache_mm;
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 51ab1faa027d..f5903679ee6a 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -30,6 +30,9 @@ void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page,
 	unsigned long pfn);
 void (*flush_icache_range)(unsigned long start, unsigned long end);
 
+void (*__flush_cache_vmap)(void);
+void (*__flush_cache_vunmap)(void);
+
 /* MIPS specific cache operations */
 void (*flush_cache_sigtramp)(unsigned long addr);
 void (*local_flush_data_cache_page)(void * addr);
diff --git a/include/asm-mips/cacheflush.h b/include/asm-mips/cacheflush.h
index 01e7eadc97e2..d5c0f2fda51b 100644
--- a/include/asm-mips/cacheflush.h
+++ b/include/asm-mips/cacheflush.h
@@ -63,8 +63,22 @@ static inline void flush_icache_page(struct vm_area_struct *vma,
 }
 
 extern void (*flush_icache_range)(unsigned long start, unsigned long end);
-#define flush_cache_vmap(start, end)		flush_cache_all()
-#define flush_cache_vunmap(start, end)		flush_cache_all()
+
+extern void (*__flush_cache_vmap)(void);
+
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+	if (cpu_has_dc_aliases)
+		__flush_cache_vmap();
+}
+
+extern void (*__flush_cache_vunmap)(void);
+
+static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
+{
+	if (cpu_has_dc_aliases)
+		__flush_cache_vunmap();
+}
 
 extern void copy_to_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
-- 
cgit v1.2.3


From 882bebaaca4bb1484078d44ef011f918c0e1e14e Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Mon, 7 Apr 2008 22:33:07 -0700
Subject: [TCP]: tcp_simple_retransmit can cause S+L
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes Bugzilla #10384

tcp_simple_retransmit does L increment without any checking
whatsoever for overflowing S+L when Reno is in use.

The simplest scenario I can currently think of is rather
complex in practice (there might be some more straightforward
cases though). Ie., if mss is reduced during mtu probing, it
may end up marking everything lost and if some duplicate ACKs
arrived prior to that sacked_out will be non-zero as well,
leading to S+L > packets_out, tcp_clean_rtx_queue on the next
cumulative ACK or tcp_fastretrans_alert on the next duplicate
ACK will fix the S counter.

More straightforward (but questionable) solution would be to
just call tcp_reset_reno_sack() in tcp_simple_retransmit but
it would negatively impact the probe's retransmission, ie.,
the retransmissions would not occur if some duplicate ACKs
had arrived.

So I had to add reno sacked_out reseting to CA_Loss state
when the first cumulative ACK arrives (this stale sacked_out
might actually be the explanation for the reports of left_out
overflows in kernel prior to 2.6.23 and S+L overflow reports
of 2.6.24). However, this alone won't be enough to fix kernel
before 2.6.24 because it is building on top of the commit
1b6d427bb7e ([TCP]: Reduce sacked_out with reno when purging
write_queue) to keep the sacked_out from overflowing.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Reported-by: Alessandro Suardi <alessandro.suardi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  2 ++
 net/ipv4/tcp_input.c  | 24 ++++++++++++++++++------
 net/ipv4/tcp_output.c |  3 +++
 3 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7de4ea3a04d9..4fd3eb2f8ec2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -752,6 +752,8 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
 	return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
 }
 
+extern int tcp_limit_reno_sacked(struct tcp_sock *tp);
+
 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
  * The exception is rate halving phase, when cwnd is decreasing towards
  * ssthresh.
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7d0958785bfb..b4812c3cbbcf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1625,13 +1625,11 @@ out:
 	return flag;
 }
 
-/* If we receive more dupacks than we expected counting segments
- * in assumption of absent reordering, interpret this as reordering.
- * The only another reason could be bug in receiver TCP.
+/* Limits sacked_out so that sum with lost_out isn't ever larger than
+ * packets_out. Returns zero if sacked_out adjustement wasn't necessary.
  */
-static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+int tcp_limit_reno_sacked(struct tcp_sock *tp)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	u32 holes;
 
 	holes = max(tp->lost_out, 1U);
@@ -1639,8 +1637,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
 
 	if ((tp->sacked_out + holes) > tp->packets_out) {
 		tp->sacked_out = tp->packets_out - holes;
-		tcp_update_reordering(sk, tp->packets_out + addend, 0);
+		return 1;
 	}
+	return 0;
+}
+
+/* If we receive more dupacks than we expected counting segments
+ * in assumption of absent reordering, interpret this as reordering.
+ * The only another reason could be bug in receiver TCP.
+ */
+static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	if (tcp_limit_reno_sacked(tp))
+		tcp_update_reordering(sk, tp->packets_out + addend, 0);
 }
 
 /* Emulate SACKs for SACKless connection: account for a new dupack. */
@@ -2600,6 +2610,8 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 	case TCP_CA_Loss:
 		if (flag & FLAG_DATA_ACKED)
 			icsk->icsk_retransmits = 0;
+		if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
+			tcp_reset_reno_sack(tp);
 		if (!tcp_try_undo_loss(sk)) {
 			tcp_moderate_cwnd(tp);
 			tcp_xmit_retransmit_queue(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 72b9350006fe..d29ef79c00ca 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1808,6 +1808,9 @@ void tcp_simple_retransmit(struct sock *sk)
 	if (!lost)
 		return;
 
+	if (tcp_is_reno(tp))
+		tcp_limit_reno_sacked(tp);
+
 	tcp_verify_left_out(tp);
 
 	/* Don't muck with the congestion window here.
-- 
cgit v1.2.3


From a31b9dd8edc6e4e75b5299fee6093b3c54548446 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 10 Apr 2008 16:10:45 +0100
Subject: FRV: Handle update_mmu_cache() being called when current->mm is NULL
 [try #2]

Handle update_mmu_cache() being called when current->mm is NULL.

We cache static TLB mappings for the current page table in DAMPR4 and DAMPR5
on the theory that the next data lookup is likely to be in the same general
region, and thus is likely to be mapped by the same page table.  However, we
can't get this information if we can't access the appropriate mm_struct.

If current->mm is NULL, we just clear the cache in the knowledge that the TLB
miss handlers will load it.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-frv/pgtable.h | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index 6c0682ed5fc9..4e219046fe42 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -507,13 +507,22 @@ static inline int pte_file(pte_t pte)
  */
 static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
+	struct mm_struct *mm;
 	unsigned long ampr;
-	pgd_t *pge = pgd_offset(current->mm, address);
-	pud_t *pue = pud_offset(pge, address);
-	pmd_t *pme = pmd_offset(pue, address);
 
-	ampr = pme->ste[0] & 0xffffff00;
-	ampr |= xAMPRx_L | xAMPRx_SS_16Kb | xAMPRx_S | xAMPRx_C | xAMPRx_V;
+	mm = current->mm;
+	if (mm) {
+		pgd_t *pge = pgd_offset(mm, address);
+		pud_t *pue = pud_offset(pge, address);
+		pmd_t *pme = pmd_offset(pue, address);
+
+		ampr = pme->ste[0] & 0xffffff00;
+		ampr |= xAMPRx_L | xAMPRx_SS_16Kb | xAMPRx_S | xAMPRx_C |
+			xAMPRx_V;
+	} else {
+		address = ULONG_MAX;
+		ampr = 0;
+	}
 
 	asm volatile("movgs %0,scr0\n"
 		     "movgs %0,scr1\n"
-- 
cgit v1.2.3


From 0c93d8e4d342b1b5cda1037f2527fcf443c80fbc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 10 Apr 2008 16:10:50 +0100
Subject: FRV: Move STACK_TOP_MAX up [try #2]

Move STACK_TOP_MAX up so that we don't try moving the stack above it as that
causes setup_arg_pages() to malfunction.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-frv/mem-layout.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-frv/mem-layout.h b/include/asm-frv/mem-layout.h
index 83532252b8be..734a1d0583b6 100644
--- a/include/asm-frv/mem-layout.h
+++ b/include/asm-frv/mem-layout.h
@@ -60,7 +60,7 @@
  */
 #define BRK_BASE			__UL(2 * 1024 * 1024 + PAGE_SIZE)
 #define STACK_TOP			__UL(2 * 1024 * 1024)
-#define STACK_TOP_MAX	STACK_TOP
+#define STACK_TOP_MAX			__UL(0xc0000000)
 
 /* userspace process size */
 #ifdef CONFIG_MMU
-- 
cgit v1.2.3


From e31c243f984628d02f045dc4b622f1e2827860dc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 10 Apr 2008 16:10:55 +0100
Subject: FRV: Add support for emulation of userspace atomic ops [try #2]

Use traps 120-126 to emulate atomic cmpxchg32, xchg32, and XOR-, OR-, AND-, SUB-
and ADD-to-memory operations for userspace.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/kernel/entry-table.S |   8 +-
 arch/frv/kernel/entry.S       |  20 ++++
 arch/frv/kernel/traps.c       | 227 ++++++++++++++++++++++++++++++++++++++++++
 include/asm-frv/spr-regs.h    |  14 +++
 4 files changed, 268 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/frv/kernel/entry-table.S b/arch/frv/kernel/entry-table.S
index d3b9253d862a..bf35f33e48c9 100644
--- a/arch/frv/kernel/entry-table.S
+++ b/arch/frv/kernel/entry-table.S
@@ -316,8 +316,14 @@ __trap_fixup_kernel_data_tlb_miss:
 	.section	.trap.vector
 	.org		TBR_TT_TRAP0 >> 2
 	.long		system_call
-	.rept		126
+	.rept		119
 	.long		__entry_unsupported_trap
 	.endr
+
+	# userspace atomic op emulation, traps 120-126
+	.rept		7
+	.long		__entry_atomic_op
+	.endr
+	
 	.org		TBR_TT_BREAK >> 2
 	.long		__entry_debug_exception
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index f36d7f4a7c25..b8a4b94779b1 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -654,6 +654,26 @@ __entry_debug_exception:
 	movgs		gr4,psr
 	jmpl		@(gr5,gr0)	; call ill_insn(esfr1,epcr0,esr0)
 
+###############################################################################
+#
+# handle atomic operation emulation for userspace
+#
+###############################################################################
+	.globl		__entry_atomic_op
+__entry_atomic_op:
+	LEDS		0x6012
+	sethi.p		%hi(atomic_operation),gr5
+	setlo		%lo(atomic_operation),gr5
+	movsg		esfr1,gr8
+	movsg		epcr0,gr9
+	movsg		esr0,gr10
+
+	# now that we've accessed the exception regs, we can enable exceptions
+	movsg		psr,gr4
+	ori		gr4,#PSR_ET,gr4
+	movgs		gr4,psr
+	jmpl		@(gr5,gr0)	; call atomic_operation(esfr1,epcr0,esr0)
+
 ###############################################################################
 #
 # handle media exception
diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c
index 2e6098c85578..2f7e66877f3b 100644
--- a/arch/frv/kernel/traps.c
+++ b/arch/frv/kernel/traps.c
@@ -100,6 +100,233 @@ asmlinkage void illegal_instruction(unsigned long esfr1, unsigned long epcr0, un
 	force_sig_info(info.si_signo, &info, current);
 } /* end illegal_instruction() */
 
+/*****************************************************************************/
+/*
+ * handle atomic operations with errors
+ * - arguments in gr8, gr9, gr10
+ * - original memory value placed in gr5
+ * - replacement memory value placed in gr9
+ */
+asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
+				 unsigned long esr0)
+{
+	static DEFINE_SPINLOCK(atomic_op_lock);
+	unsigned long x, y, z, *p;
+	mm_segment_t oldfs;
+	siginfo_t info;
+	int ret;
+
+	y = 0;
+	z = 0;
+
+	oldfs = get_fs();
+	if (!user_mode(__frame))
+		set_fs(KERNEL_DS);
+
+	switch (__frame->tbr & TBR_TT) {
+		/* TIRA gr0,#120
+		 * u32 __atomic_user_cmpxchg32(u32 *ptr, u32 test, u32 new)
+		 */
+	case TBR_TT_ATOMIC_CMPXCHG32:
+		p = (unsigned long *) __frame->gr8;
+		x = __frame->gr9;
+		y = __frame->gr10;
+
+		for (;;) {
+			ret = get_user(z, p);
+			if (ret < 0)
+				goto error;
+
+			if (z != x)
+				goto done;
+
+			spin_lock_irq(&atomic_op_lock);
+
+			if (__get_user(z, p) == 0) {
+				if (z != x)
+					goto done2;
+
+				if (__put_user(y, p) == 0)
+					goto done2;
+				goto error2;
+			}
+
+			spin_unlock_irq(&atomic_op_lock);
+		}
+
+		/* TIRA gr0,#121
+		 * u32 __atomic_kernel_xchg32(void *v, u32 new)
+		 */
+	case TBR_TT_ATOMIC_XCHG32:
+		p = (unsigned long *) __frame->gr8;
+		y = __frame->gr9;
+
+		for (;;) {
+			ret = get_user(z, p);
+			if (ret < 0)
+				goto error;
+
+			spin_lock_irq(&atomic_op_lock);
+
+			if (__get_user(z, p) == 0) {
+				if (__put_user(y, p) == 0)
+					goto done2;
+				goto error2;
+			}
+
+			spin_unlock_irq(&atomic_op_lock);
+		}
+
+		/* TIRA gr0,#122
+		 * ulong __atomic_kernel_XOR_return(ulong i, ulong *v)
+		 */
+	case TBR_TT_ATOMIC_XOR:
+		p = (unsigned long *) __frame->gr8;
+		x = __frame->gr9;
+
+		for (;;) {
+			ret = get_user(z, p);
+			if (ret < 0)
+				goto error;
+
+			spin_lock_irq(&atomic_op_lock);
+
+			if (__get_user(z, p) == 0) {
+				y = x ^ z;
+				if (__put_user(y, p) == 0)
+					goto done2;
+				goto error2;
+			}
+
+			spin_unlock_irq(&atomic_op_lock);
+		}
+
+		/* TIRA gr0,#123
+		 * ulong __atomic_kernel_OR_return(ulong i, ulong *v)
+		 */
+	case TBR_TT_ATOMIC_OR:
+		p = (unsigned long *) __frame->gr8;
+		x = __frame->gr9;
+
+		for (;;) {
+			ret = get_user(z, p);
+			if (ret < 0)
+				goto error;
+
+			spin_lock_irq(&atomic_op_lock);
+
+			if (__get_user(z, p) == 0) {
+				y = x ^ z;
+				if (__put_user(y, p) == 0)
+					goto done2;
+				goto error2;
+			}
+
+			spin_unlock_irq(&atomic_op_lock);
+		}
+
+		/* TIRA gr0,#124
+		 * ulong __atomic_kernel_AND_return(ulong i, ulong *v)
+		 */
+	case TBR_TT_ATOMIC_AND:
+		p = (unsigned long *) __frame->gr8;
+		x = __frame->gr9;
+
+		for (;;) {
+			ret = get_user(z, p);
+			if (ret < 0)
+				goto error;
+
+			spin_lock_irq(&atomic_op_lock);
+
+			if (__get_user(z, p) == 0) {
+				y = x & z;
+				if (__put_user(y, p) == 0)
+					goto done2;
+				goto error2;
+			}
+
+			spin_unlock_irq(&atomic_op_lock);
+		}
+
+		/* TIRA gr0,#125
+		 * int __atomic_user_sub_return(atomic_t *v, int i)
+		 */
+	case TBR_TT_ATOMIC_SUB:
+		p = (unsigned long *) __frame->gr8;
+		x = __frame->gr9;
+
+		for (;;) {
+			ret = get_user(z, p);
+			if (ret < 0)
+				goto error;
+
+			spin_lock_irq(&atomic_op_lock);
+
+			if (__get_user(z, p) == 0) {
+				y = z - x;
+				if (__put_user(y, p) == 0)
+					goto done2;
+				goto error2;
+			}
+
+			spin_unlock_irq(&atomic_op_lock);
+		}
+
+		/* TIRA gr0,#126
+		 * int __atomic_user_add_return(atomic_t *v, int i)
+		 */
+	case TBR_TT_ATOMIC_ADD:
+		p = (unsigned long *) __frame->gr8;
+		x = __frame->gr9;
+
+		for (;;) {
+			ret = get_user(z, p);
+			if (ret < 0)
+				goto error;
+
+			spin_lock_irq(&atomic_op_lock);
+
+			if (__get_user(z, p) == 0) {
+				y = z + x;
+				if (__put_user(y, p) == 0)
+					goto done2;
+				goto error2;
+			}
+
+			spin_unlock_irq(&atomic_op_lock);
+		}
+
+	default:
+		BUG();
+	}
+
+done2:
+	spin_unlock_irq(&atomic_op_lock);
+done:
+	if (!user_mode(__frame))
+		set_fs(oldfs);
+	__frame->gr5 = z;
+	__frame->gr9 = y;
+	return;
+
+error2:
+	spin_unlock_irq(&atomic_op_lock);
+error:
+	if (!user_mode(__frame))
+		set_fs(oldfs);
+	__frame->pc -= 4;
+
+	die_if_kernel("-- Atomic Op Error --\n");
+
+	info.si_signo	= SIGSEGV;
+	info.si_code	= SEGV_ACCERR;
+	info.si_errno	= 0;
+	info.si_addr	= (void *) __frame->pc;
+
+	force_sig_info(info.si_signo, &info, current);
+}
+
 /*****************************************************************************/
 /*
  *
diff --git a/include/asm-frv/spr-regs.h b/include/asm-frv/spr-regs.h
index c2a541ef828d..01e6af5e99b8 100644
--- a/include/asm-frv/spr-regs.h
+++ b/include/asm-frv/spr-regs.h
@@ -99,9 +99,23 @@
 #define TBR_TT_TRAP1		(0x81 << 4)
 #define TBR_TT_TRAP2		(0x82 << 4)
 #define TBR_TT_TRAP3		(0x83 << 4)
+#define TBR_TT_TRAP120		(0xf8 << 4)
+#define TBR_TT_TRAP121		(0xf9 << 4)
+#define TBR_TT_TRAP122		(0xfa << 4)
+#define TBR_TT_TRAP123		(0xfb << 4)
+#define TBR_TT_TRAP124		(0xfc << 4)
+#define TBR_TT_TRAP125		(0xfd << 4)
 #define TBR_TT_TRAP126		(0xfe << 4)
 #define TBR_TT_BREAK		(0xff << 4)
 
+#define TBR_TT_ATOMIC_CMPXCHG32	TBR_TT_TRAP120
+#define TBR_TT_ATOMIC_XCHG32	TBR_TT_TRAP121
+#define TBR_TT_ATOMIC_XOR	TBR_TT_TRAP122
+#define TBR_TT_ATOMIC_OR	TBR_TT_TRAP123
+#define TBR_TT_ATOMIC_AND	TBR_TT_TRAP124
+#define TBR_TT_ATOMIC_SUB	TBR_TT_TRAP125
+#define TBR_TT_ATOMIC_ADD	TBR_TT_TRAP126
+
 #define __get_TBR()	({ unsigned long x; asm volatile("movsg tbr,%0" : "=r"(x)); x; })
 
 /*
-- 
cgit v1.2.3


From f17520e1f19172057328e50ffed01a42534921e9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 10 Apr 2008 16:11:05 +0100
Subject: FRV: Don't make smp_{r, w, }mb() interpolate MEMBAR when CONFIG_SMP=n
 [try #2]

Don't make smp_{r,w,}mb() interpolate a MEMBAR instruction when CONFIG_SMP=n as
SMP memory barries on UP systems should interpolate a compiler barrier only.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-frv/system.h | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/asm-frv/system.h b/include/asm-frv/system.h
index 2c57f4734746..30a67a9da11a 100644
--- a/include/asm-frv/system.h
+++ b/include/asm-frv/system.h
@@ -179,14 +179,23 @@ do {							\
 #define mb()			asm volatile ("membar" : : :"memory")
 #define rmb()			asm volatile ("membar" : : :"memory")
 #define wmb()			asm volatile ("membar" : : :"memory")
-#define set_mb(var, value)	do { var = value; mb(); } while (0)
+#define read_barrier_depends()	barrier()
 
-#define smp_mb()		mb()
-#define smp_rmb()		rmb()
-#define smp_wmb()		wmb()
-
-#define read_barrier_depends()		do {} while(0)
+#ifdef CONFIG_SMP
+#define smp_mb()			mb()
+#define smp_rmb()			rmb()
+#define smp_wmb()			wmb()
 #define smp_read_barrier_depends()	read_barrier_depends()
+#define set_mb(var, value) \
+	do { xchg(&var, (value)); } while (0)
+#else
+#define smp_mb()			barrier()
+#define smp_rmb()			barrier()
+#define smp_wmb()			barrier()
+#define smp_read_barrier_depends()	do {} while(0)
+#define set_mb(var, value) \
+	do { var = (value); barrier(); } while (0)
+#endif
 
 #define HARD_RESET_NOW()			\
 do {						\
-- 
cgit v1.2.3


From 54a015104136974262afa4b8ddd943ea70dec8a2 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Thu, 10 Apr 2008 15:37:38 -0700
Subject: asmlinkage_protect replaces prevent_tail_call

The prevent_tail_call() macro works around the problem of the compiler
clobbering argument words on the stack, which for asmlinkage functions
is the caller's (user's) struct pt_regs.  The tail/sibling-call
optimization is not the only way that the compiler can decide to use
stack argument words as scratch space, which we have to prevent.
Other optimizations can do it too.

Until we have new compiler support to make "asmlinkage" binding on the
compiler's own use of the stack argument frame, we have work around all
the manifestations of this issue that crop up.

More cases seem to be prevented by also keeping the incoming argument
variables live at the end of the function.  This makes their original
stack slots attractive places to leave those variables, so the compiler
tends not clobber them for something else.  It's still no guarantee, but
it handles some observed cases that prevent_tail_call() did not.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/tls.c     |  4 ++--
 fs/open.c                 |  8 ++++----
 include/asm-x86/linkage.h | 24 +++++++++++++++++++++++-
 include/linux/linkage.h   |  4 ++--
 kernel/exit.c             |  4 ++--
 kernel/uid16.c            | 22 +++++++++++-----------
 6 files changed, 44 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 022bcaa3b42e..ab6bf375a307 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -92,7 +92,7 @@ int do_set_thread_area(struct task_struct *p, int idx,
 asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
 {
 	int ret = do_set_thread_area(current, -1, u_info, 1);
-	prevent_tail_call(ret);
+	asmlinkage_protect(1, ret, u_info);
 	return ret;
 }
 
@@ -142,7 +142,7 @@ int do_get_thread_area(struct task_struct *p, int idx,
 asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
 {
 	int ret = do_get_thread_area(current, -1, u_info);
-	prevent_tail_call(ret);
+	asmlinkage_protect(1, ret, u_info);
 	return ret;
 }
 
diff --git a/fs/open.c b/fs/open.c
index a4b12022edaa..3fa4e4ffce4c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -335,7 +335,7 @@ asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
 {
 	long ret = do_sys_ftruncate(fd, length, 1);
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(2, ret, fd, length);
 	return ret;
 }
 
@@ -350,7 +350,7 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
 {
 	long ret = do_sys_ftruncate(fd, length, 0);
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(2, ret, fd, length);
 	return ret;
 }
 #endif
@@ -1067,7 +1067,7 @@ asmlinkage long sys_open(const char __user *filename, int flags, int mode)
 
 	ret = do_sys_open(AT_FDCWD, filename, flags, mode);
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(3, ret, filename, flags, mode);
 	return ret;
 }
 
@@ -1081,7 +1081,7 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
 
 	ret = do_sys_open(dfd, filename, flags, mode);
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(4, ret, dfd, filename, flags, mode);
 	return ret;
 }
 
diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h
index 31739c7d66a9..d605eeba0f70 100644
--- a/include/asm-x86/linkage.h
+++ b/include/asm-x86/linkage.h
@@ -8,12 +8,34 @@
 
 #ifdef CONFIG_X86_32
 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
-#define prevent_tail_call(ret) __asm__ ("" : "=r" (ret) : "0" (ret))
 /*
  * For 32-bit UML - mark functions implemented in assembly that use
  * regparm input parameters:
  */
 #define asmregparm __attribute__((regparm(3)))
+
+#define asmlinkage_protect(n, ret, args...) \
+	__asmlinkage_protect##n(ret, ##args)
+#define __asmlinkage_protect_n(ret, args...) \
+	__asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args)
+#define __asmlinkage_protect0(ret) \
+	__asmlinkage_protect_n(ret)
+#define __asmlinkage_protect1(ret, arg1) \
+	__asmlinkage_protect_n(ret, "g" (arg1))
+#define __asmlinkage_protect2(ret, arg1, arg2) \
+	__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2))
+#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
+	__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3))
+#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
+	__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
+			      "g" (arg4))
+#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
+	__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
+			      "g" (arg4), "g" (arg5))
+#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
+	__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
+			      "g" (arg4), "g" (arg5), "g" (arg6))
+
 #endif
 
 #ifdef CONFIG_X86_ALIGNMENT_16
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 0592936344c4..fe2a39c489b6 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -17,8 +17,8 @@
 # define asmregparm
 #endif
 
-#ifndef prevent_tail_call
-# define prevent_tail_call(ret) do { } while (0)
+#ifndef asmlinkage_protect
+# define asmlinkage_protect(n, ret, args...)	do { } while (0)
 #endif
 
 #ifndef __ALIGN
diff --git a/kernel/exit.c b/kernel/exit.c
index 53872bf993fa..073005b1cfb2 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1608,7 +1608,7 @@ asmlinkage long sys_waitid(int which, pid_t upid,
 	put_pid(pid);
 
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(5, ret, which, upid, infop, options, ru);
 	return ret;
 }
 
@@ -1640,7 +1640,7 @@ asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr,
 	put_pid(pid);
 
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(4, ret, upid, stat_addr, options, ru);
 	return ret;
 }
 
diff --git a/kernel/uid16.c b/kernel/uid16.c
index dd308ba4e03b..3e41c1673e2f 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -21,7 +21,7 @@ asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gi
 {
 	long ret = sys_chown(filename, low2highuid(user), low2highgid(group));
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(3, ret, filename, user, group);
 	return ret;
 }
 
@@ -29,7 +29,7 @@ asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_g
 {
 	long ret = sys_lchown(filename, low2highuid(user), low2highgid(group));
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(3, ret, filename, user, group);
 	return ret;
 }
 
@@ -37,7 +37,7 @@ asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group)
 {
 	long ret = sys_fchown(fd, low2highuid(user), low2highgid(group));
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(3, ret, fd, user, group);
 	return ret;
 }
 
@@ -45,7 +45,7 @@ asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid)
 {
 	long ret = sys_setregid(low2highgid(rgid), low2highgid(egid));
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(2, ret, rgid, egid);
 	return ret;
 }
 
@@ -53,7 +53,7 @@ asmlinkage long sys_setgid16(old_gid_t gid)
 {
 	long ret = sys_setgid(low2highgid(gid));
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(1, ret, gid);
 	return ret;
 }
 
@@ -61,7 +61,7 @@ asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid)
 {
 	long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid));
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(2, ret, ruid, euid);
 	return ret;
 }
 
@@ -69,7 +69,7 @@ asmlinkage long sys_setuid16(old_uid_t uid)
 {
 	long ret = sys_setuid(low2highuid(uid));
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(1, ret, uid);
 	return ret;
 }
 
@@ -78,7 +78,7 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
 	long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid),
 				 low2highuid(suid));
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(3, ret, ruid, euid, suid);
 	return ret;
 }
 
@@ -98,7 +98,7 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
 	long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid),
 				 low2highgid(sgid));
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(3, ret, rgid, egid, sgid);
 	return ret;
 }
 
@@ -117,7 +117,7 @@ asmlinkage long sys_setfsuid16(old_uid_t uid)
 {
 	long ret = sys_setfsuid(low2highuid(uid));
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(1, ret, uid);
 	return ret;
 }
 
@@ -125,7 +125,7 @@ asmlinkage long sys_setfsgid16(old_gid_t gid)
 {
 	long ret = sys_setfsgid(low2highgid(gid));
 	/* avoid REGPARM breakage on x86: */
-	prevent_tail_call(ret);
+	asmlinkage_protect(1, ret, gid);
 	return ret;
 }
 
-- 
cgit v1.2.3


From d10d89ec78114f925f63c5126a2b2490f501a462 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 10 Apr 2008 17:35:23 -0700
Subject: Add commentary about the new "asmlinkage_protect()" macro

It's really a pretty ugly thing to need, and some day it will hopefully
be obviated by teaching gcc about the magic calling conventions for the
low-level system call code, but in the meantime we can at least add big
honking comments about why we need these insane and strange macros.

I took my comments from my version of the macro, but I ended up deciding
to just pick Roland's version of the actual code instead (with his
prettier syntax that uses vararg macros).  Thus the previous two commits
that actually implement it.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/linkage.h | 11 +++++++++++
 include/linux/linkage.h   | 13 +++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h
index d605eeba0f70..c048353f4b85 100644
--- a/include/asm-x86/linkage.h
+++ b/include/asm-x86/linkage.h
@@ -14,6 +14,17 @@
  */
 #define asmregparm __attribute__((regparm(3)))
 
+/*
+ * Make sure the compiler doesn't do anything stupid with the
+ * arguments on the stack - they are owned by the *caller*, not
+ * the callee. This just fools gcc into not spilling into them,
+ * and keeps it from doing tailcall recursion and/or using the
+ * stack slots for temporaries, since they are live and "used"
+ * all the way to the end of the function.
+ *
+ * NOTE! On x86-64, all the arguments are in registers, so this
+ * only matters on a 32-bit kernel.
+ */
 #define asmlinkage_protect(n, ret, args...) \
 	__asmlinkage_protect##n(ret, ##args)
 #define __asmlinkage_protect_n(ret, args...) \
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index fe2a39c489b6..b163c5c40dbc 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -17,6 +17,19 @@
 # define asmregparm
 #endif
 
+/*
+ * This is used by architectures to keep arguments on the stack
+ * untouched by the compiler by keeping them live until the end.
+ * The argument stack may be owned by the assembly-language
+ * caller, not the callee, and gcc doesn't always understand
+ * that.
+ *
+ * We have the return value, and a maximum of six arguments.
+ *
+ * This should always be followed by a "return ret" for the
+ * protection to work (ie no more work that the compiler might
+ * end up needing stack temporaries for).
+ */
 #ifndef asmlinkage_protect
 # define asmlinkage_protect(n, ret, args...)	do { } while (0)
 #endif
-- 
cgit v1.2.3


From 544451a1a36b06f43fc67112ede3b92e6203eb9d Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 10 Apr 2008 21:29:28 -0700
Subject: pnp: increase number of devices supported per protocol

Increase the PNP "number of devices" limit.  We currently use an unsigned
char, which limits us to 256 devices per protocol.  This patch changes that to
an unsigned int.

Not all backends can take advantage of this: we limit ISAPNP to 10 devices in
isapnp_cfg_begin(), and PNPBIOS is limited to 256 devices because the BIOS
interfaces use a one-byte device node number.

But there is no limit on the number of PNPACPI devices we may have.  Large HP
Integrity machines have more than 256, which causes the current "unsigned char
number" to wrap around.  This causes errors like this:

    pnp: PnP ACPI init
    kobject_add failed for 00:00 with -EEXIST, don't try to register things with the same name in the same directory.

    Call Trace:
     [<a000000100010720>] show_stack+0x40/0xa0
     [<a0000001000107b0>] dump_stack+0x30/0x60
     [<a0000001001dbdf0>] kobject_add+0x290/0x2c0
     [<a0000001002bfd40>] device_add+0x160/0x860
     [<a0000001002c0470>] device_register+0x30/0x60
     [<a00000010026ba70>] __pnp_add_device+0x130/0x180
     [<a00000010026bb70>] pnp_add_device+0xb0/0xe0
     [<a0000001007f2730>] pnpacpi_add_device+0x510/0x5a0
     [<a0000001007f2810>] pnpacpi_add_device_handler+0x50/0x80

This patch increases the limit to fix this PNPACPI problem.  It should not
have any adverse effect on ISAPNP or PNPBIOS because their limits are still
enforced in the backends.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pnp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index 29dd55838e84..b2f05c230f4b 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -175,7 +175,7 @@ static inline void pnp_set_card_drvdata(struct pnp_card_link *pcard, void *data)
 struct pnp_dev {
 	struct device dev;		/* Driver Model device interface */
 	u64 dma_mask;
-	unsigned char number;		/* used as an index, must be unique */
+	unsigned int number;		/* used as an index, must be unique */
 	int status;
 
 	struct list_head global_list;	/* node in global list of devices */
-- 
cgit v1.2.3


From b0fac02370cffad956ff3de5e8ed4df7e7b875d7 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Apr 2008 13:46:54 +0200
Subject: Fix "$(AS) -traditional" compile breakage caused by
 asmlinkage_protect

git commit 54a015104136974262afa4b8ddd943ea70dec8a2 ("asmlinkage_protect
replaces prevent_tail_call") causes this build failure on s390:

    AS      arch/s390/kernel/entry64.o
  In file included from arch/s390/kernel/entry64.S:14:
  include/linux/linkage.h:34: error: syntax error in macro parameter list
  make[1]: *** [arch/s390/kernel/entry64.o] Error 1
  make: *** [arch/s390/kernel] Error 2

and some other architectures.  The reason is that some architectures add
the "-traditional" flag to the invocation of $(AS), which disables
variadic macro argument support.

So just surround the new define with an #ifndef __ASSEMBLY__ to prevent
any side effects on asm code.

Cc: Roland McGrath <roland@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/linkage.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index b163c5c40dbc..2119610b24f8 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -30,9 +30,12 @@
  * protection to work (ie no more work that the compiler might
  * end up needing stack temporaries for).
  */
+/* Assembly files may be compiled with -traditional .. */
+#ifndef __ASSEMBLY__
 #ifndef asmlinkage_protect
 # define asmlinkage_protect(n, ret, args...)	do { } while (0)
 #endif
+#endif
 
 #ifndef __ALIGN
 #define __ALIGN		.align 4,0x90
-- 
cgit v1.2.3


From dc07e721a26ec7e0adb66340f1763d220cfbbd0c Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 7 Apr 2008 15:59:05 -0400
Subject: Spell out behavior of atomic_dec_and_lock() in kerneldoc

A little more detail here wouldn't hurt.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/spinlock.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 576a5f77d3bd..1129ee0a7180 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -341,6 +341,9 @@ static inline void double_spin_unlock(spinlock_t *l1, spinlock_t *l2,
  * atomic_dec_and_lock - lock on reaching reference count zero
  * @atomic: the atomic counter
  * @lock: the spinlock in question
+ *
+ * Decrements @atomic by 1.  If the result is 0, returns true and locks
+ * @lock.  Returns false for all other cases.
  */
 extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
 #define atomic_dec_and_lock(atomic, lock) \
-- 
cgit v1.2.3