summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/kernel-parameters.txt4
-rw-r--r--Documentation/lockdep-design.txt6
-rw-r--r--Makefile2
-rw-r--r--arch/ia64/Makefile5
-rw-r--r--arch/ia64/include/asm/bitops.h2
-rw-r--r--arch/ia64/include/asm/pgtable.h1
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c4
-rw-r--r--arch/ia64/kernel/iosapic.c4
-rw-r--r--arch/ia64/kernel/pci-dma.c5
-rw-r--r--arch/ia64/kernel/topology.c6
-rw-r--r--arch/ia64/kvm/mmio.c6
-rw-r--r--arch/ia64/kvm/vcpu.c6
-rw-r--r--arch/ia64/kvm/vcpu.h13
-rw-r--r--arch/mn10300/include/asm/pci.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/kernel/dma.c6
-rw-r--r--arch/powerpc/kernel/perf_counter.c8
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/sh/boards/board-ap325rxa.c2
-rw-r--r--arch/sh/boards/mach-migor/setup.c2
-rw-r--r--arch/sh/kernel/cpu/sh2/setup-sh7619.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-mxg.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7201.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7203.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7206.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7705.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh770x.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7710.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7720.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh4-202.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh7750.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh7760.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7343.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7366.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7722.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7723.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7724.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7763.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7770.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7780.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7785.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7786.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-shx3.c2
-rw-r--r--arch/sh/kernel/cpu/sh5/setup-sh5.c2
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c8
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c18
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c40
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/reboot.c16
-rw-r--r--arch/x86/kernel/tsc.c29
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kvm/i8254.c3
-rw-r--r--arch/x86/kvm/mmu.c48
-rw-r--r--arch/x86/kvm/svm.c6
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c44
-rw-r--r--drivers/ata/ahci.c79
-rw-r--r--drivers/ata/libata-core.c3
-rw-r--r--drivers/ata/pata_at91.c17
-rw-r--r--drivers/ata/pata_atiixp.c19
-rw-r--r--drivers/ata/sata_nv.c8
-rw-r--r--drivers/base/platform.c3
-rw-r--r--drivers/char/pty.c2
-rw-r--r--drivers/gpu/drm/drm_irq.c2
-rw-r--r--drivers/gpu/drm/drm_modes.c2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c4
-rw-r--r--drivers/md/md.c32
-rw-r--r--drivers/md/md.h10
-rw-r--r--drivers/md/raid5.c34
-rw-r--r--drivers/mtd/maps/sbc8240.c0
-rw-r--r--drivers/mtd/ubi/eba.c1
-rw-r--r--drivers/mtd/ubi/scan.c13
-rw-r--r--drivers/pci/hotplug/sgi_hotplug.c7
-rw-r--r--fs/nfs/direct.c20
-rw-r--r--fs/nfs/nfs4xdr.c1374
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/ocfs2/alloc.c47
-rw-r--r--fs/ocfs2/aops.c69
-rw-r--r--fs/ocfs2/dcache.c35
-rw-r--r--fs/ocfs2/dcache.h3
-rw-r--r--fs/ocfs2/dlm/dlmast.c1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/journal.h19
-rw-r--r--fs/ocfs2/ocfs2.h22
-rw-r--r--fs/ocfs2/quota.h1
-rw-r--r--fs/ocfs2/quota_global.c134
-rw-r--r--fs/ocfs2/quota_local.c110
-rw-r--r--fs/ocfs2/stack_o2cb.c3
-rw-r--r--fs/ocfs2/super.c30
-rw-r--r--fs/ocfs2/xattr.c3
-rw-r--r--fs/proc/base.c27
-rw-r--r--fs/proc/task_mmu.c1
-rw-r--r--fs/proc/task_nommu.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_attr.c8
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_btree.c4
-rw-r--r--fs/xfs/xfs_da_btree.c6
-rw-r--r--fs/xfs/xfs_dir2.c2
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_inode.c10
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
-rw-r--r--include/linux/ftrace_event.h4
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/nfs_fs.h5
-rw-r--r--include/linux/perf_counter.h60
-rw-r--r--include/linux/sunrpc/xdr.h10
-rw-r--r--include/linux/wait.h9
-rw-r--r--include/trace/ftrace.h183
-rw-r--r--kernel/futex.c28
-rw-r--r--kernel/futex_compat.c6
-rw-r--r--kernel/irq/manage.c17
-rw-r--r--kernel/irq/numa_migrate.c4
-rw-r--r--kernel/lockdep_proc.c3
-rw-r--r--kernel/perf_counter.c581
-rw-r--r--kernel/posix-cpu-timers.c7
-rw-r--r--kernel/rtmutex.c4
-rw-r--r--kernel/trace/blktrace.c12
-rw-r--r--kernel/trace/ring_buffer.c15
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_events_filter.c20
-rw-r--r--kernel/wait.c5
-rw-r--r--mm/mempool.c4
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/xdr.c12
-rwxr-xr-xscripts/recordmcount.pl9
-rw-r--r--security/selinux/hooks.c3
-rw-r--r--sound/pci/hda/patch_realtek.c20
-rw-r--r--sound/soc/fsl/efika-audio-fabric.c2
-rw-r--r--sound/soc/fsl/pcm030-audio-fabric.c2
-rw-r--r--tools/perf/Documentation/perf-examples.txt225
-rw-r--r--tools/perf/Documentation/perf-stat.txt2
-rw-r--r--tools/perf/Documentation/perf-top.txt112
-rw-r--r--tools/perf/Makefile25
-rw-r--r--tools/perf/builtin-list.c3
-rw-r--r--tools/perf/builtin-record.c108
-rw-r--r--tools/perf/builtin-report.c111
-rw-r--r--tools/perf/builtin-stat.c2
-rw-r--r--tools/perf/builtin-top.c552
-rw-r--r--tools/perf/util/callchain.c32
-rw-r--r--tools/perf/util/callchain.h8
-rw-r--r--tools/perf/util/header.c5
-rw-r--r--tools/perf/util/parse-events.c36
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/symbol.c72
-rw-r--r--tools/perf/util/symbol.h26
-rw-r--r--virt/kvm/ioapic.c10
-rw-r--r--virt/kvm/irq_comm.c4
158 files changed, 3577 insertions, 1436 deletions
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 7bb0d934b6d8..dbea4f95fc85 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -139,6 +139,7 @@ Code Seq# Include File Comments
'm' all linux/synclink.h conflict!
'm' 00-1F net/irda/irmod.h conflict!
'n' 00-7F linux/ncp_fs.h
+'n' 80-8F linux/nilfs2_fs.h NILFS2
'n' E0-FF video/matrox.h matroxfb
'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2
'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2f1820683b69..c08813dbfce2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1115,6 +1115,10 @@ and is between 256 and 4096 characters. It is defined in the file
libata.dma=4 Compact Flash DMA only
Combinations also work, so libata.dma=3 enables DMA
for disks and CDROMs, but not CFs.
+
+ libata.ignore_hpa= [LIBATA] Ignore HPA limit
+ libata.ignore_hpa=0 keep BIOS limits (default)
+ libata.ignore_hpa=1 ignore limits, using full disk
libata.noacpi [LIBATA] Disables use of ACPI in libata suspend/resume
when set.
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
index e20d913d5914..abf768c681e2 100644
--- a/Documentation/lockdep-design.txt
+++ b/Documentation/lockdep-design.txt
@@ -30,9 +30,9 @@ State
The validator tracks lock-class usage history into 4n + 1 separate state bits:
- 'ever held in STATE context'
-- 'ever head as readlock in STATE context'
-- 'ever head with STATE enabled'
-- 'ever head as readlock with STATE enabled'
+- 'ever held as readlock in STATE context'
+- 'ever held with STATE enabled'
+- 'ever held as readlock with STATE enabled'
Where STATE can be either one of (kernel/lockdep_states.h)
- hardirq
diff --git a/Makefile b/Makefile
index 0d46615bffe5..abcfa85f8f82 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 31
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
NAME = Man-Eating Seals of Antiquity
# *DOCUMENTATION*
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 58a7e46affda..e7cbaa02cd0b 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -41,11 +41,6 @@ $(error Sorry, you need a newer version of the assember, one that is built from
ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
endif
-ifeq ($(call cc-version),0304)
- cflags-$(CONFIG_ITANIUM) += -mtune=merced
- cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley
-endif
-
KBUILD_CFLAGS += $(cflags-y)
head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
index e2ca80037335..57a2787bc9fb 100644
--- a/arch/ia64/include/asm/bitops.h
+++ b/arch/ia64/include/asm/bitops.h
@@ -286,7 +286,7 @@ __test_and_clear_bit(int nr, volatile void * addr)
{
__u32 *p = (__u32 *) addr + (nr >> 5);
__u32 m = 1 << (nr & 31);
- int oldbitset = *p & m;
+ int oldbitset = (*p & m) != 0;
*p &= ~m;
return oldbitset;
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 0a9cc73d35c7..8840a690d1e7 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -155,7 +155,6 @@
#include <linux/bitops.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
-#include <asm/processor.h>
/*
* Next come the mappings that determine how mmap() protection bits
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 2d311864e359..8ebccb589e1c 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -21,6 +21,7 @@ EXPORT_SYMBOL(csum_ipv6_magic);
#include <asm/page.h>
EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
#ifdef CONFIG_VIRTUAL_MEM_MAP
#include <linux/bootmem.h>
@@ -60,9 +61,6 @@ EXPORT_SYMBOL(__udivdi3);
EXPORT_SYMBOL(__moddi3);
EXPORT_SYMBOL(__umoddi3);
-#include <asm/page.h>
-EXPORT_SYMBOL(copy_page);
-
#if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE)
extern void xor_ia64_2(void);
extern void xor_ia64_3(void);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index c48b03f2b61d..dab4d393908c 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -1072,6 +1072,10 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
}
addr = ioremap(phys_addr, 0);
+ if (addr == NULL) {
+ spin_unlock_irqrestore(&iosapic_lock, flags);
+ return -ENOMEM;
+ }
ver = iosapic_version(addr);
if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
iounmap(addr);
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index 05695962fe44..f6b1ff0aea76 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -69,11 +69,6 @@ iommu_dma_init(void)
int iommu_dma_supported(struct device *dev, u64 mask)
{
- struct dma_map_ops *ops = platform_dma_get_ops(dev);
-
- if (ops->dma_supported)
- return ops->dma_supported(dev, mask);
-
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
The caller just has to use GFP_DMA in this case. */
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index bc80dff1df7a..8f060352e129 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -372,6 +372,10 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj,
&cache_ktype_percpu_entry, &sys_dev->kobj,
"%s", "cache");
+ if (unlikely(retval < 0)) {
+ cpu_cache_sysfs_exit(cpu);
+ return retval;
+ }
for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) {
this_object = LEAF_KOBJECT_PTR(cpu,i);
@@ -385,7 +389,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
}
kobject_put(&all_cpu_cache_info[cpu].kobj);
cpu_cache_sysfs_exit(cpu);
- break;
+ return retval;
}
kobject_uevent(&(this_object->kobj), KOBJ_ADD);
}
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 21f63fffc379..9bf55afd08d0 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -247,7 +247,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
/* Write high word. FIXME: this is a kludge! */
v.u.bits[1] &= 0x3ffff;
- mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
+ mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
+ ma, IOREQ_WRITE);
data = v.u.bits[0];
size = 3;
} else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
@@ -265,7 +266,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
/* Write high word.FIXME: this is a kludge! */
v.u.bits[1] &= 0x3ffff;
- mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
+ mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
+ 8, ma, IOREQ_WRITE);
data = v.u.bits[0];
size = 3;
} else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index 46b02cbcc874..cc406d064a09 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -461,7 +461,7 @@ void setreg(unsigned long regnum, unsigned long val,
u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
{
struct kvm_pt_regs *regs = vcpu_regs(vcpu);
- u64 val;
+ unsigned long val;
if (!reg)
return 0;
@@ -469,7 +469,7 @@ u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
return val;
}
-void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat)
+void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
{
struct kvm_pt_regs *regs = vcpu_regs(vcpu);
long sof = (regs->cr_ifs) & 0x7f;
@@ -1072,7 +1072,7 @@ void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
}
-int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
+int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
{
struct thash_data *data;
union ia64_isr visr, pt_isr;
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index 042af92ced83..360724d3ae69 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -686,14 +686,15 @@ static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
return highest_bits((int *)&(VMX(vcpu, insvc[0])));
}
-extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg,
+extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
struct ia64_fpreg *val);
-extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg,
+extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
struct ia64_fpreg *val);
-extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg);
-extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat);
-extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu);
-extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val);
+extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
+extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
+ u64 val, int nat);
+extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
+extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h
index 35d2ed6396f6..19aecc90f7a4 100644
--- a/arch/mn10300/include/asm/pci.h
+++ b/arch/mn10300/include/asm/pci.h
@@ -59,7 +59,6 @@ void pcibios_penalize_isa_irq(int irq);
#include <linux/slab.h>
#include <asm/scatterlist.h>
#include <linux/string.h>
-#include <linux/mm.h>
#include <asm/io.h>
struct pci_dev;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index dfdf13c9fefd..fddc3ed715fa 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
/* We don't currently support large pages. */
-#define KVM_PAGES_PER_HPAGE (1<<31)
+#define KVM_PAGES_PER_HPAGE (1UL << 31)
struct kvm;
struct kvm_run;
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 20a60d661ba8..ccf129d47d84 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -7,6 +7,7 @@
#include <linux/device.h>
#include <linux/dma-mapping.h>
+#include <linux/lmb.h>
#include <asm/bug.h>
#include <asm/abs_addr.h>
@@ -90,11 +91,10 @@ static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
static int dma_direct_dma_supported(struct device *dev, u64 mask)
{
#ifdef CONFIG_PPC64
- /* Could be improved to check for memory though it better be
- * done via some global so platforms can set the limit in case
+ /* Could be improved so platforms can set the limit in case
* they have limited DMA windows
*/
- return mask >= DMA_BIT_MASK(32);
+ return mask >= (lmb_end_of_DRAM() - 1);
#else
return 1;
#endif
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 809fdf94b95f..70e1f57f7dd8 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -518,6 +518,8 @@ void hw_perf_disable(void)
struct cpu_hw_counters *cpuhw;
unsigned long flags;
+ if (!ppmu)
+ return;
local_irq_save(flags);
cpuhw = &__get_cpu_var(cpu_hw_counters);
@@ -572,6 +574,8 @@ void hw_perf_enable(void)
int n_lim;
int idx;
+ if (!ppmu)
+ return;
local_irq_save(flags);
cpuhw = &__get_cpu_var(cpu_hw_counters);
if (!cpuhw->disabled) {
@@ -737,6 +741,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
long i, n, n0;
struct perf_counter *sub;
+ if (!ppmu)
+ return 0;
cpuhw = &__get_cpu_var(cpu_hw_counters);
n0 = cpuhw->n_counters;
n = collect_events(group_leader, ppmu->n_counter - n0,
@@ -1281,6 +1287,8 @@ void hw_perf_counter_setup(int cpu)
{
struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
+ if (!ppmu)
+ return;
memset(cpuhw, 0, sizeof(*cpuhw));
cpuhw->mmcr[0] = MMCR0_FC;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f04f5301b1b4..4d613415c435 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -386,7 +386,7 @@ no_timer:
}
__unset_cpu_idle(vcpu);
__set_current_state(TASK_RUNNING);
- remove_wait_queue(&vcpu->wq, &wait);
+ remove_wait_queue(&vcpu->arch.local_int.wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock(&vcpu->arch.local_int.float_int->lock);
hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 7ffd1b4315bd..b9c88cc519e2 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -547,7 +547,7 @@ static int __init ap325rxa_devices_setup(void)
return platform_add_devices(ap325rxa_devices,
ARRAY_SIZE(ap325rxa_devices));
}
-device_initcall(ap325rxa_devices_setup);
+arch_initcall(ap325rxa_devices_setup);
/* Return the board specific boot mode pin configuration */
static int ap325rxa_mode_pins(void)
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index f70f4644deb4..f9b2e4df35b9 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -608,7 +608,7 @@ static int __init migor_devices_setup(void)
return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices));
}
-__initcall(migor_devices_setup);
+arch_initcall(migor_devices_setup);
/* Return the board specific boot mode pin configuration */
static int migor_mode_pins(void)
diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index 13798733f2db..8555c05e8667 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -187,7 +187,7 @@ static int __init sh7619_devices_setup(void)
return platform_add_devices(sh7619_devices,
ARRAY_SIZE(sh7619_devices));
}
-__initcall(sh7619_devices_setup);
+arch_initcall(sh7619_devices_setup);
void __init plat_irq_setup(void)
{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
index 869c2da4820b..b67376445315 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-mxg.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
@@ -238,7 +238,7 @@ static int __init mxg_devices_setup(void)
return platform_add_devices(mxg_devices,
ARRAY_SIZE(mxg_devices));
}
-__initcall(mxg_devices_setup);
+arch_initcall(mxg_devices_setup);
void __init plat_irq_setup(void)
{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
index d8febe128066..fbde5b75deb9 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
@@ -357,7 +357,7 @@ static int __init sh7201_devices_setup(void)
return platform_add_devices(sh7201_devices,
ARRAY_SIZE(sh7201_devices));
}
-__initcall(sh7201_devices_setup);
+arch_initcall(sh7201_devices_setup);
void __init plat_irq_setup(void)
{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index 62e3039d2398..d3fd536c9a84 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -367,7 +367,7 @@ static int __init sh7203_devices_setup(void)
return platform_add_devices(sh7203_devices,
ARRAY_SIZE(sh7203_devices));
}
-__initcall(sh7203_devices_setup);
+arch_initcall(sh7203_devices_setup);
void __init plat_irq_setup(void)
{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index 3e6f3d7a58be..a9ccc5e8d9e9 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -338,7 +338,7 @@ static int __init sh7206_devices_setup(void)
return platform_add_devices(sh7206_devices,
ARRAY_SIZE(sh7206_devices));
}
-__initcall(sh7206_devices_setup);
+arch_initcall(sh7206_devices_setup);
void __init plat_irq_setup(void)
{
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
index 88f742fed9ed..c23105983878 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7705.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
@@ -222,7 +222,7 @@ static int __init sh7705_devices_setup(void)
return platform_add_devices(sh7705_devices,
ARRAY_SIZE(sh7705_devices));
}
-__initcall(sh7705_devices_setup);
+arch_initcall(sh7705_devices_setup);
static struct platform_device *sh7705_early_devices[] __initdata = {
&tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
index c56306798584..347ab35d0697 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
@@ -250,7 +250,7 @@ static int __init sh770x_devices_setup(void)
return platform_add_devices(sh770x_devices,
ARRAY_SIZE(sh770x_devices));
}
-__initcall(sh770x_devices_setup);
+arch_initcall(sh770x_devices_setup);
static struct platform_device *sh770x_early_devices[] __initdata = {
&tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
index efa76c8148f4..717e90ae1097 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7710.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
@@ -226,7 +226,7 @@ static int __init sh7710_devices_setup(void)
return platform_add_devices(sh7710_devices,
ARRAY_SIZE(sh7710_devices));
}
-__initcall(sh7710_devices_setup);
+arch_initcall(sh7710_devices_setup);
static struct platform_device *sh7710_early_devices[] __initdata = {
&tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
index 5b2107798edb..74d8baaf8e96 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
@@ -388,7 +388,7 @@ static int __init sh7720_devices_setup(void)
return platform_add_devices(sh7720_devices,
ARRAY_SIZE(sh7720_devices));
}
-__initcall(sh7720_devices_setup);
+arch_initcall(sh7720_devices_setup);
static struct platform_device *sh7720_early_devices[] __initdata = {
&cmt0_device,
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
index 6d088d123591..de4827df19aa 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
@@ -138,7 +138,7 @@ static int __init sh4202_devices_setup(void)
return platform_add_devices(sh4202_devices,
ARRAY_SIZE(sh4202_devices));
}
-__initcall(sh4202_devices_setup);
+arch_initcall(sh4202_devices_setup);
static struct platform_device *sh4202_early_devices[] __initdata = {
&tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index 851672d15cf4..1b8b122e8f3d 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -239,7 +239,7 @@ static int __init sh7750_devices_setup(void)
return platform_add_devices(sh7750_devices,
ARRAY_SIZE(sh7750_devices));
}
-__initcall(sh7750_devices_setup);
+arch_initcall(sh7750_devices_setup);
static struct platform_device *sh7750_early_devices[] __initdata = {
&tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index 5b822519bd90..7fbb7be9284c 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -265,7 +265,7 @@ static int __init sh7760_devices_setup(void)
return platform_add_devices(sh7760_devices,
ARRAY_SIZE(sh7760_devices));
}
-__initcall(sh7760_devices_setup);
+arch_initcall(sh7760_devices_setup);
static struct platform_device *sh7760_early_devices[] __initdata = {
&tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index 6307e087c864..ac4d5672ec1a 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -325,7 +325,7 @@ static int __init sh7343_devices_setup(void)
return platform_add_devices(sh7343_devices,
ARRAY_SIZE(sh7343_devices));
}
-__initcall(sh7343_devices_setup);
+arch_initcall(sh7343_devices_setup);
static struct platform_device *sh7343_early_devices[] __initdata = {
&cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index c18f7d09281b..1a956b1beccc 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -318,7 +318,7 @@ static int __init sh7366_devices_setup(void)
return platform_add_devices(sh7366_devices,
ARRAY_SIZE(sh7366_devices));
}
-__initcall(sh7366_devices_setup);
+arch_initcall(sh7366_devices_setup);
static struct platform_device *sh7366_early_devices[] __initdata = {
&cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index ea524a2da3e4..cda76ebf87c3 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -359,7 +359,7 @@ static int __init sh7722_devices_setup(void)
return platform_add_devices(sh7722_devices,
ARRAY_SIZE(sh7722_devices));
}
-__initcall(sh7722_devices_setup);
+arch_initcall(sh7722_devices_setup);
static struct platform_device *sh7722_early_devices[] __initdata = {
&cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index e1bb80b2a27b..b45dace9539f 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -473,7 +473,7 @@ static int __init sh7723_devices_setup(void)
return platform_add_devices(sh7723_devices,
ARRAY_SIZE(sh7723_devices));
}
-__initcall(sh7723_devices_setup);
+arch_initcall(sh7723_devices_setup);
static struct platform_device *sh7723_early_devices[] __initdata = {
&cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index e5ac9eb11c63..a04edaab9a29 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -508,7 +508,7 @@ static int __init sh7724_devices_setup(void)
return platform_add_devices(sh7724_devices,
ARRAY_SIZE(sh7724_devices));
}
-device_initcall(sh7724_devices_setup);
+arch_initcall(sh7724_devices_setup);
static struct platform_device *sh7724_early_devices[] __initdata = {
&cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
index f1e0c0d36da7..4659fff6b842 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
@@ -314,7 +314,7 @@ static int __init sh7763_devices_setup(void)
return platform_add_devices(sh7763_devices,
ARRAY_SIZE(sh7763_devices));
}
-__initcall(sh7763_devices_setup);
+arch_initcall(sh7763_devices_setup);
static struct platform_device *sh7763_early_devices[] __initdata = {
&tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
index 1e86209db284..eead08d89d32 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
@@ -368,7 +368,7 @@ static int __init sh7770_devices_setup(void)
return platform_add_devices(sh7770_devices,
ARRAY_SIZE(sh7770_devices));
}
-__initcall(sh7770_devices_setup);
+arch_initcall(sh7770_devices_setup);
static struct platform_device *sh7770_early_devices[] __initdata = {
&tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
index 715e05b431e5..2c901f446959 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
@@ -256,7 +256,7 @@ static int __init sh7780_devices_setup(void)
return platform_add_devices(sh7780_devices,
ARRAY_SIZE(sh7780_devices));
}
-__initcall(sh7780_devices_setup);
+arch_initcall(sh7780_devices_setup);
static struct platform_device *sh7780_early_devices[] __initdata = {
&tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index af561402570b..7f6c718b6c36 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -263,7 +263,7 @@ static int __init sh7785_devices_setup(void)
return platform_add_devices(sh7785_devices,
ARRAY_SIZE(sh7785_devices));
}
-__initcall(sh7785_devices_setup);
+arch_initcall(sh7785_devices_setup);
static struct platform_device *sh7785_early_devices[] __initdata = {
&tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index b70049470a0b..0104a8ec5369 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -547,7 +547,7 @@ static int __init sh7786_devices_setup(void)
return platform_add_devices(sh7786_devices,
ARRAY_SIZE(sh7786_devices));
}
-device_initcall(sh7786_devices_setup);
+arch_initcall(sh7786_devices_setup);
void __init plat_early_device_setup(void)
{
diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index 53c65fd9ccef..07f078961c71 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -256,7 +256,7 @@ static int __init shx3_devices_setup(void)
return platform_add_devices(shx3_devices,
ARRAY_SIZE(shx3_devices));
}
-__initcall(shx3_devices_setup);
+arch_initcall(shx3_devices_setup);
void __init plat_early_device_setup(void)
{
diff --git a/arch/sh/kernel/cpu/sh5/setup-sh5.c b/arch/sh/kernel/cpu/sh5/setup-sh5.c
index f5ff1ac57fc2..6a0f82f70032 100644
--- a/arch/sh/kernel/cpu/sh5/setup-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/setup-sh5.c
@@ -186,7 +186,7 @@ static int __init sh5_devices_setup(void)
return platform_add_devices(sh5_devices,
ARRAY_SIZE(sh5_devices));
}
-__initcall(sh5_devices_setup);
+arch_initcall(sh5_devices_setup);
void __init plat_early_device_setup(void)
{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8b..13ffa5df37d7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
+ select HAVE_PERF_COUNTERS if (!M386 && !M486)
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
- select HAVE_PERF_COUNTERS if (!M386 && !M486)
config X86_IO_APIC
def_bool y
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 2ed4e2bb3b32..a5371ec36776 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return x2apic_enabled();
}
-/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-
+/*
+ * need to use more than cpu 0, because we need more vectors when
+ * MSI-X are used.
+ */
static const struct cpumask *x2apic_target_cpus(void)
{
- return cpumask_of(0);
+ return cpu_online_mask;
}
/*
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 0b631c6a2e00..a8989aadc99a 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
-/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-
+/*
+ * need to use more than cpu 0, because we need more vectors when
+ * MSI-X are used.
+ */
static const struct cpumask *x2apic_target_cpus(void)
{
- return cpumask_of(0);
+ return cpu_online_mask;
}
static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e2485b03f1cf..63fddcd082cd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
level = cpuid_eax(1);
if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+
+ /*
+ * Some BIOSes incorrectly force this feature, but only K8
+ * revision D (model = 0x14) and later actually support it.
+ */
+ if (c->x86_model < 0x14)
+ clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
}
if (c->x86 == 0x10 || c->x86 == 0x11)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9a..5ce60a88027b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
}
-static const struct cpu_dev *this_cpu __cpuinitdata;
+static void __cpuinit default_init(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
+ display_cacheinfo(c);
+#else
+ /* Not much we can do here... */
+ /* Check if at least it has cpuid */
+ if (c->cpuid_level == -1) {
+ /* No cpuid. It must be an ancient CPU */
+ if (c->x86 == 4)
+ strcpy(c->x86_model_id, "486");
+ else if (c->x86 == 3)
+ strcpy(c->x86_model_id, "386");
+ }
+#endif
+}
+
+static const struct cpu_dev __cpuinitconst default_cpu = {
+ .c_init = default_init,
+ .c_vendor = "Unknown",
+ .c_x86_vendor = X86_VENDOR_UNKNOWN,
+};
+
+static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
-static void __cpuinit default_init(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_X86_64
- display_cacheinfo(c);
-#else
- /* Not much we can do here... */
- /* Check if at least it has cpuid */
- if (c->cpuid_level == -1) {
- /* No cpuid. It must be an ancient CPU */
- if (c->x86 == 4)
- strcpy(c->x86_model_id, "486");
- else if (c->x86 == 3)
- strcpy(c->x86_model_id, "386");
- }
-#endif
-}
-
-static const struct cpu_dev __cpuinitconst default_cpu = {
- .c_init = default_init,
- .c_vendor = "Unknown",
- .c_x86_vendor = X86_VENDOR_UNKNOWN,
-};
-
static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index bff8dd191dd5..8bc64cfbe936 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -36,6 +36,7 @@
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
+static DEFINE_PER_CPU(bool, thermal_throttle_active);
static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -96,24 +97,27 @@ static int therm_throt_process(int curr)
{
unsigned int cpu = smp_processor_id();
__u64 tmp_jiffs = get_jiffies_64();
+ bool was_throttled = __get_cpu_var(thermal_throttle_active);
+ bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
- if (curr)
+ if (is_throttled)
__get_cpu_var(thermal_throttle_count)++;
- if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
+ if (!(was_throttled ^ is_throttled) &&
+ time_before64(tmp_jiffs, __get_cpu_var(next_check)))
return 0;
__get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
/* if we just entered the thermal event */
- if (curr) {
+ if (is_throttled) {
printk(KERN_CRIT "CPU%d: Temperature above threshold, "
- "cpu clock throttled (total events = %lu)\n", cpu,
- __get_cpu_var(thermal_throttle_count));
+ "cpu clock throttled (total events = %lu)\n",
+ cpu, __get_cpu_var(thermal_throttle_count));
add_taint(TAINT_MACHINE_CHECK);
- } else {
- printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
+ } else if (was_throttled) {
+ printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
}
return 1;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a7aa8f900954..900332b800f8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -55,6 +55,7 @@ struct x86_pmu {
int num_counters_fixed;
int counter_bits;
u64 counter_mask;
+ int apic;
u64 max_period;
u64 intel_ctrl;
};
@@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x0000,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
[PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
static bool reserve_pmc_hardware(void)
{
+#ifdef CONFIG_X86_LOCAL_APIC
int i;
if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}
+#endif
return true;
+#ifdef CONFIG_X86_LOCAL_APIC
eventsel_fail:
for (i--; i >= 0; i--)
release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -644,10 +648,12 @@ perfctr_fail:
enable_lapic_nmi_watchdog();
return false;
+#endif
}
static void release_pmc_hardware(void)
{
+#ifdef CONFIG_X86_LOCAL_APIC
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void)
if (nmi_watchdog == NMI_LOCAL_APIC)
enable_lapic_nmi_watchdog();
+#endif
}
static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
atomic64_set(&hwc->period_left, hwc->sample_period);
+ } else {
+ /*
+ * If we have a PMU initialized but no APIC
+ * interrupts, we cannot sample hardware
+ * counters (user-space has to fall back and
+ * sample via a hrtimer based software counter):
+ */
+ if (!x86_pmu.apic)
+ return -EOPNOTSUPP;
}
counter->destroy = hw_perf_counter_destroy;
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
void set_perf_counter_pending(void)
{
+#ifdef CONFIG_X86_LOCAL_APIC
apic->send_IPI_self(LOCAL_PENDING_VECTOR);
+#endif
}
void perf_counters_lapic_init(void)
{
- if (!x86_pmu_initialized())
+#ifdef CONFIG_X86_LOCAL_APIC
+ if (!x86_pmu.apic || !x86_pmu_initialized())
return;
/*
* Always use NMI for PMU
*/
apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
}
static int __kprobes
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
regs = args->regs;
+#ifdef CONFIG_X86_LOCAL_APIC
apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
/*
* Can't rely on the handled return value to say it was our NMI, two
* counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
.event_map = p6_pmu_event_map,
.raw_event = p6_pmu_raw_event,
.max_events = ARRAY_SIZE(p6_perfmon_event_map),
+ .apic = 1,
.max_period = (1ULL << 31) - 1,
.version = 0,
.num_counters = 2,
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
.event_map = intel_pmu_event_map,
.raw_event = intel_pmu_raw_event,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
+ .apic = 1,
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
.num_counters = 4,
.counter_bits = 48,
.counter_mask = (1ULL << 48) - 1,
+ .apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
};
@@ -1589,13 +1614,14 @@ static int p6_pmu_init(void)
return -ENODEV;
}
+ x86_pmu = p6_pmu;
+
if (!cpu_has_apic) {
- pr_info("no Local APIC, try rebooting with lapic");
- return -ENODEV;
+ pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+ pr_info("no hardware sampling interrupt available.\n");
+ x86_pmu.apic = 0;
}
- x86_pmu = p6_pmu;
-
return 0;
}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 19ccf6d0dccf..fe26ba3e3451 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -354,7 +354,7 @@ void __init efi_init(void)
*/
c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
if (c16) {
- for (i = 0; i < sizeof(vendor) && *c16; ++i)
+ for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
vendor[i] = *c16++;
vendor[i] = '\0';
} else
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 834c9da8bf9d..a06e8d101844 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -405,7 +405,7 @@ EXPORT_SYMBOL(machine_real_restart);
#endif /* CONFIG_X86_32 */
/*
- * Apple MacBook5,2 (2009 MacBook) needs reboot=p
+ * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
*/
static int __init set_pci_reboot(const struct dmi_system_id *d)
{
@@ -418,12 +418,20 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
}
static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
- { /* Handle problems with rebooting on Apple MacBook5,2 */
+ { /* Handle problems with rebooting on Apple MacBook5 */
.callback = set_pci_reboot,
- .ident = "Apple MacBook",
+ .ident = "Apple MacBook5",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
+ },
+ },
+ { /* Handle problems with rebooting on Apple MacBookPro5 */
+ .callback = set_pci_reboot,
+ .ident = "Apple MacBookPro5",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
},
},
{ }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6e1a368d21d4..71f4368b357e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
* use the TSC value at the transitions to calculate a pretty
* good value for the TSC frequencty.
*/
+static inline int pit_verify_msb(unsigned char val)
+{
+ /* Ignore LSB */
+ inb(0x42);
+ return inb(0x42) == val;
+}
+
static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
{
int count;
u64 tsc = 0;
for (count = 0; count < 50000; count++) {
- /* Ignore LSB */
- inb(0x42);
- if (inb(0x42) != val)
+ if (!pit_verify_msb(val))
break;
tsc = get_cycles();
}
@@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void)
* to do that is to just read back the 16-bit counter
* once from the PIT.
*/
- inb(0x42);
- inb(0x42);
+ pit_verify_msb(0);
if (pit_expect_msb(0xff, &tsc, &d1)) {
for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
@@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void)
* Iterate until the error is less than 500 ppm
*/
delta -= tsc;
- if (d1+d2 < delta >> 11)
- goto success;
+ if (d1+d2 >= delta >> 11)
+ continue;
+
+ /*
+ * Check the PIT one more time to verify that
+ * all TSC reads were stable wrt the PIT.
+ *
+ * This also guarantees serialization of the
+ * last cycle read ('d2') in pit_expect_msb.
+ */
+ if (!pit_verify_msb(0xfe - i))
+ break;
+ goto success;
}
}
printk("Fast TSC calibration failed\n");
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index b263423fbe2a..95a7289e4b0c 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
ap.ds = __USER_DS;
ap.es = __USER_DS;
ap.fs = __KERNEL_PERCPU;
- ap.gs = 0;
+ ap.gs = __KERNEL_STACK_CANARY;
ap.eflags = 0;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4d6f0d293ee2..21f68e00524f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -104,6 +104,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
ktime_t remaining;
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+ if (!ps->pit_timer.period)
+ return 0;
+
/*
* The Counter does not stop when it reaches zero. In
* Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7030b5f911bf..0ef5bb2b4043 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -489,16 +489,20 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
*
* If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
* containing more mappings.
+ *
+ * Returns the number of rmap entries before the spte was added or zero if
+ * the spte was not added.
+ *
*/
-static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
+static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
{
struct kvm_mmu_page *sp;
struct kvm_rmap_desc *desc;
unsigned long *rmapp;
- int i;
+ int i, count = 0;
if (!is_rmap_pte(*spte))
- return;
+ return count;
gfn = unalias_gfn(vcpu->kvm, gfn);
sp = page_header(__pa(spte));
sp->gfns[spte - sp->spt] = gfn;
@@ -515,8 +519,10 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
} else {
rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
- while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
+ while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) {
desc = desc->more;
+ count += RMAP_EXT;
+ }
if (desc->shadow_ptes[RMAP_EXT-1]) {
desc->more = mmu_alloc_rmap_desc(vcpu);
desc = desc->more;
@@ -525,6 +531,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
;
desc->shadow_ptes[i] = spte;
}
+ return count;
}
static void rmap_desc_remove_entry(unsigned long *rmapp,
@@ -754,6 +761,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
return young;
}
+#define RMAP_RECYCLE_THRESHOLD 1000
+
+static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage)
+{
+ unsigned long *rmapp;
+
+ gfn = unalias_gfn(vcpu->kvm, gfn);
+ rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
+
+ kvm_unmap_rmapp(vcpu->kvm, rmapp);
+ kvm_flush_remote_tlbs(vcpu->kvm);
+}
+
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
{
return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
@@ -1407,24 +1427,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
*/
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
{
+ int used_pages;
+
+ used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;
+ used_pages = max(0, used_pages);
+
/*
* If we set the number of mmu pages to be smaller be than the
* number of actived pages , we must to free some mmu pages before we
* change the value
*/
- if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) >
- kvm_nr_mmu_pages) {
- int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
- - kvm->arch.n_free_mmu_pages;
-
- while (n_used_mmu_pages > kvm_nr_mmu_pages) {
+ if (used_pages > kvm_nr_mmu_pages) {
+ while (used_pages > kvm_nr_mmu_pages) {
struct kvm_mmu_page *page;
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
kvm_mmu_zap_page(kvm, page);
- n_used_mmu_pages--;
+ used_pages--;
}
kvm->arch.n_free_mmu_pages = 0;
}
@@ -1740,6 +1761,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
{
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte);
+ int rmap_count;
pgprintk("%s: spte %llx access %x write_fault %d"
" user_fault %d gfn %lx\n",
@@ -1781,9 +1803,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
if (!was_rmapped) {
- rmap_add(vcpu, shadow_pte, gfn, largepage);
+ rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage);
if (!is_rmap_pte(*shadow_pte))
kvm_release_pfn_clean(pfn);
+ if (rmap_count > RMAP_RECYCLE_THRESHOLD)
+ rmap_recycle(vcpu, gfn, largepage);
} else {
if (was_writeble)
kvm_release_pfn_dirty(pfn);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 71510e07e69e..b1f658ad2f06 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -711,6 +711,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
svm->vmcb->control.tsc_offset += delta;
vcpu->cpu = cpu;
kvm_migrate_timers(vcpu);
+ svm->asid_generation = 0;
}
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
@@ -1031,7 +1032,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
}
- svm->vcpu.cpu = svm_data->cpu;
svm->asid_generation = svm_data->asid_generation;
svm->vmcb->control.asid = svm_data->next_asid++;
}
@@ -2300,8 +2300,8 @@ static void pre_svm_run(struct vcpu_svm *svm)
struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
- if (svm->vcpu.cpu != cpu ||
- svm->asid_generation != svm_data->asid_generation)
+ /* FIXME: handle wraparound of asid_generation */
+ if (svm->asid_generation != svm_data->asid_generation)
new_asid(svm, svm_data);
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 356a0ce85c68..29f912927a58 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3157,8 +3157,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx = to_vmx(vcpu);
enum emulation_result err = EMULATE_DONE;
- preempt_enable();
local_irq_enable();
+ preempt_enable();
while (!guest_state_valid(vcpu)) {
err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
@@ -3168,7 +3168,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
if (err != EMULATE_DONE) {
kvm_report_emulation_failure(vcpu, "emulation failure");
- return;
+ break;
}
if (signal_pending(current))
@@ -3177,8 +3177,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
schedule();
}
- local_irq_disable();
preempt_disable();
+ local_irq_disable();
vmx->invalid_state_emulation_result = err;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe5474aec41a..3d4529011828 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -704,11 +704,48 @@ static bool msr_mtrr_valid(unsigned msr)
return false;
}
+static bool valid_pat_type(unsigned t)
+{
+ return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
+}
+
+static bool valid_mtrr_type(unsigned t)
+{
+ return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
+}
+
+static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+ int i;
+
+ if (!msr_mtrr_valid(msr))
+ return false;
+
+ if (msr == MSR_IA32_CR_PAT) {
+ for (i = 0; i < 8; i++)
+ if (!valid_pat_type((data >> (i * 8)) & 0xff))
+ return false;
+ return true;
+ } else if (msr == MSR_MTRRdefType) {
+ if (data & ~0xcff)
+ return false;
+ return valid_mtrr_type(data & 0xff);
+ } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
+ for (i = 0; i < 8 ; i++)
+ if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
+ return false;
+ return true;
+ }
+
+ /* variable MTRRs */
+ return valid_mtrr_type(data & 0xff);
+}
+
static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
- if (!msr_mtrr_valid(msr))
+ if (!mtrr_valid(vcpu, msr, data))
return 1;
if (msr == MSR_MTRRdefType) {
@@ -1079,14 +1116,13 @@ long kvm_arch_dev_ioctl(struct file *filp,
if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
goto out;
r = -E2BIG;
- if (n < num_msrs_to_save)
+ if (n < msr_list.nmsrs)
goto out;
r = -EFAULT;
if (copy_to_user(user_msr_list->indices, &msrs_to_save,
num_msrs_to_save * sizeof(u32)))
goto out;
- if (copy_to_user(user_msr_list->indices
- + num_msrs_to_save * sizeof(u32),
+ if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
&emulated_msrs,
ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
goto out;
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 958c1fa41900..fe3eba5d6b3e 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -219,6 +219,8 @@ enum {
AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */
AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */
AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */
+ AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = (1 << 11), /* treat SRST timeout as
+ link offline */
/* ap->flags bits */
@@ -1663,6 +1665,7 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class,
int (*check_ready)(struct ata_link *link))
{
struct ata_port *ap = link->ap;
+ struct ahci_host_priv *hpriv = ap->host->private_data;
const char *reason = NULL;
unsigned long now, msecs;
struct ata_taskfile tf;
@@ -1701,12 +1704,21 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class,
/* wait for link to become ready */
rc = ata_wait_after_reset(link, deadline, check_ready);
- /* link occupied, -ENODEV too is an error */
- if (rc) {
+ if (rc == -EBUSY && hpriv->flags & AHCI_HFLAG_SRST_TOUT_IS_OFFLINE) {
+ /*
+ * Workaround for cases where link online status can't
+ * be trusted. Treat device readiness timeout as link
+ * offline.
+ */
+ ata_link_printk(link, KERN_INFO,
+ "device not ready, treating as offline\n");
+ *class = ATA_DEV_NONE;
+ } else if (rc) {
+ /* link occupied, -ENODEV too is an error */
reason = "device not ready";
goto fail;
- }
- *class = ahci_dev_classify(ap);
+ } else
+ *class = ahci_dev_classify(ap);
DPRINTK("EXIT, class=%u\n", *class);
return 0;
@@ -1773,7 +1785,8 @@ static int ahci_sb600_softreset(struct ata_link *link, unsigned int *class,
irq_sts = readl(port_mmio + PORT_IRQ_STAT);
if (irq_sts & PORT_IRQ_BAD_PMP) {
ata_link_printk(link, KERN_WARNING,
- "failed due to HW bug, retry pmp=0\n");
+ "applying SB600 PMP SRST workaround "
+ "and retrying\n");
rc = ahci_do_softreset(link, class, 0, deadline,
ahci_check_ready);
}
@@ -2726,6 +2739,56 @@ static bool ahci_broken_suspend(struct pci_dev *pdev)
return !ver || strcmp(ver, dmi->driver_data) < 0;
}
+static bool ahci_broken_online(struct pci_dev *pdev)
+{
+#define ENCODE_BUSDEVFN(bus, slot, func) \
+ (void *)(unsigned long)(((bus) << 8) | PCI_DEVFN((slot), (func)))
+ static const struct dmi_system_id sysids[] = {
+ /*
+ * There are several gigabyte boards which use
+ * SIMG5723s configured as hardware RAID. Certain
+ * 5723 firmware revisions shipped there keep the link
+ * online but fail to answer properly to SRST or
+ * IDENTIFY when no device is attached downstream
+ * causing libata to retry quite a few times leading
+ * to excessive detection delay.
+ *
+ * As these firmwares respond to the second reset try
+ * with invalid device signature, considering unknown
+ * sig as offline works around the problem acceptably.
+ */
+ {
+ .ident = "EP45-DQ6",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR,
+ "Gigabyte Technology Co., Ltd."),
+ DMI_MATCH(DMI_BOARD_NAME, "EP45-DQ6"),
+ },
+ .driver_data = ENCODE_BUSDEVFN(0x0a, 0x00, 0),
+ },
+ {
+ .ident = "EP45-DS5",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR,
+ "Gigabyte Technology Co., Ltd."),
+ DMI_MATCH(DMI_BOARD_NAME, "EP45-DS5"),
+ },
+ .driver_data = ENCODE_BUSDEVFN(0x03, 0x00, 0),
+ },
+ { } /* terminate list */
+ };
+#undef ENCODE_BUSDEVFN
+ const struct dmi_system_id *dmi = dmi_first_match(sysids);
+ unsigned int val;
+
+ if (!dmi)
+ return false;
+
+ val = (unsigned long)dmi->driver_data;
+
+ return pdev->bus->number == (val >> 8) && pdev->devfn == (val & 0xff);
+}
+
static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
static int printed_version;
@@ -2841,6 +2904,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
"BIOS update required for suspend/resume\n");
}
+ if (ahci_broken_online(pdev)) {
+ hpriv->flags |= AHCI_HFLAG_SRST_TOUT_IS_OFFLINE;
+ dev_info(&pdev->dev,
+ "online status unreliable, applying workaround\n");
+ }
+
/* CAP.NP sometimes indicate the index of the last enabled
* port, at other times, that of the last possible port, so
* determining the maximum port number requires looking at
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8ac98ff16d7d..072ba5ea138f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4302,6 +4302,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
{ "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA },
{ "MAXTOR 6L080L4", "A93.0500", ATA_HORKAGE_BROKEN_HPA },
+ /* this one allows HPA unlocking but fails IOs on the area */
+ { "OCZ-VERTEX", "1.30", ATA_HORKAGE_BROKEN_HPA },
+
/* Devices which report 1 sector over size HPA */
{ "ST340823A", NULL, ATA_HORKAGE_HPA_SIZE, },
{ "ST320413A", NULL, ATA_HORKAGE_HPA_SIZE, },
diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c
index 5702affcb325..41c94b1ae493 100644
--- a/drivers/ata/pata_at91.c
+++ b/drivers/ata/pata_at91.c
@@ -250,7 +250,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
ata_port_desc(ap, "no IRQ, using PIO polling");
}
- info = kzalloc(sizeof(*info), GFP_KERNEL);
+ info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
if (!info) {
dev_err(dev, "failed to allocate memory for private data\n");
@@ -275,7 +275,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
if (!info->ide_addr) {
dev_err(dev, "failed to map IO base\n");
ret = -ENOMEM;
- goto err_ide_ioremap;
+ goto err_put;
}
info->alt_addr = devm_ioremap(dev,
@@ -284,7 +284,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
if (!info->alt_addr) {
dev_err(dev, "failed to map CTL base\n");
ret = -ENOMEM;
- goto err_alt_ioremap;
+ goto err_put;
}
ap->ioaddr.cmd_addr = info->ide_addr;
@@ -303,13 +303,8 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
irq ? ata_sff_interrupt : NULL,
irq_flags, &pata_at91_sht);
-err_alt_ioremap:
- devm_iounmap(dev, info->ide_addr);
-
-err_ide_ioremap:
+err_put:
clk_put(info->mck);
- kfree(info);
-
return ret;
}
@@ -317,7 +312,6 @@ static int __devexit pata_at91_remove(struct platform_device *pdev)
{
struct ata_host *host = dev_get_drvdata(&pdev->dev);
struct at91_ide_info *info;
- struct device *dev = &pdev->dev;
if (!host)
return 0;
@@ -328,11 +322,8 @@ static int __devexit pata_at91_remove(struct platform_device *pdev)
if (!info)
return 0;
- devm_iounmap(dev, info->ide_addr);
- devm_iounmap(dev, info->alt_addr);
clk_put(info->mck);
- kfree(info);
return 0;
}
diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index bec0b8ade66d..45915566e4e9 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -1,6 +1,7 @@
/*
* pata_atiixp.c - ATI PATA for new ATA layer
* (C) 2005 Red Hat Inc
+ * (C) 2009 Bartlomiej Zolnierkiewicz
*
* Based on
*
@@ -61,20 +62,19 @@ static void atiixp_set_pio_timing(struct ata_port *ap, struct ata_device *adev,
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
int dn = 2 * ap->port_no + adev->devno;
-
- /* Check this is correct - the order is odd in both drivers */
int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1);
- u16 pio_mode_data, pio_timing_data;
+ u32 pio_timing_data;
+ u16 pio_mode_data;
pci_read_config_word(pdev, ATIIXP_IDE_PIO_MODE, &pio_mode_data);
pio_mode_data &= ~(0x7 << (4 * dn));
pio_mode_data |= pio << (4 * dn);
pci_write_config_word(pdev, ATIIXP_IDE_PIO_MODE, pio_mode_data);
- pci_read_config_word(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data);
+ pci_read_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data);
pio_timing_data &= ~(0xFF << timing_shift);
pio_timing_data |= (pio_timings[pio] << timing_shift);
- pci_write_config_word(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data);
+ pci_write_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data);
}
/**
@@ -119,16 +119,17 @@ static void atiixp_set_dmamode(struct ata_port *ap, struct ata_device *adev)
udma_mode_data |= dma << (4 * dn);
pci_write_config_word(pdev, ATIIXP_IDE_UDMA_MODE, udma_mode_data);
} else {
- u16 mwdma_timing_data;
- /* Check this is correct - the order is odd in both drivers */
int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1);
+ u32 mwdma_timing_data;
dma -= XFER_MW_DMA_0;
- pci_read_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, &mwdma_timing_data);
+ pci_read_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING,
+ &mwdma_timing_data);
mwdma_timing_data &= ~(0xFF << timing_shift);
mwdma_timing_data |= (mwdma_timings[dma] << timing_shift);
- pci_write_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, mwdma_timing_data);
+ pci_write_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING,
+ mwdma_timing_data);
}
/*
* We must now look at the PIO mode situation. We may need to
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index b2d11f300c39..86a40582999c 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -602,6 +602,7 @@ MODULE_VERSION(DRV_VERSION);
static int adma_enabled;
static int swncq_enabled = 1;
+static int msi_enabled;
static void nv_adma_register_mode(struct ata_port *ap)
{
@@ -2459,6 +2460,11 @@ static int nv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
} else if (type == SWNCQ)
nv_swncq_host_init(host);
+ if (msi_enabled) {
+ dev_printk(KERN_NOTICE, &pdev->dev, "Using MSI\n");
+ pci_enable_msi(pdev);
+ }
+
pci_set_master(pdev);
return ata_host_activate(host, pdev->irq, ipriv->irq_handler,
IRQF_SHARED, ipriv->sht);
@@ -2558,4 +2564,6 @@ module_param_named(adma, adma_enabled, bool, 0444);
MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: false)");
module_param_named(swncq, swncq_enabled, bool, 0444);
MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: true)");
+module_param_named(msi, msi_enabled, bool, 0444);
+MODULE_PARM_DESC(msi, "Enable use of MSI (Default: false)");
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 81cb01bfc356..456594bd97bc 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -483,9 +483,6 @@ int platform_driver_register(struct platform_driver *drv)
drv->driver.remove = platform_drv_remove;
if (drv->shutdown)
drv->driver.shutdown = platform_drv_shutdown;
- if (drv->suspend || drv->resume)
- pr_warning("Platform driver '%s' needs updating - please use "
- "dev_pm_ops\n", drv->driver.name);
return driver_register(&drv->driver);
}
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 6e6942c45f5b..d083c73d784a 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -144,6 +144,8 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf,
static int pty_write_room(struct tty_struct *tty)
{
+ if (tty->stopped)
+ return 0;
return pty_space(tty->link);
}
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index b4a3dbcebe9b..f85aaf21e783 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -566,7 +566,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
ret = drm_vblank_get(dev, crtc);
if (ret) {
- DRM_ERROR("failed to acquire vblank counter, %d\n", ret);
+ DRM_DEBUG("failed to acquire vblank counter, %d\n", ret);
return ret;
}
seq = drm_vblank_count(dev, crtc);
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 54f492a488a9..7914097b09c6 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -566,6 +566,8 @@ void drm_mode_connector_list_update(struct drm_connector *connector)
found_it = 1;
/* if equal delete the probed mode */
mode->status = pmode->status;
+ /* Merge type bits together */
+ mode->type |= pmode->type;
list_del(&pmode->head);
drm_mode_destroy(connector->dev, pmode);
break;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 83aee80e77a6..7ebc84c2881e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -190,7 +190,7 @@ u32 i915_get_vblank_counter(struct drm_device *dev, int pipe)
low_frame = pipe ? PIPEBFRAMEPIXEL : PIPEAFRAMEPIXEL;
if (!i915_pipe_enabled(dev, pipe)) {
- DRM_ERROR("trying to get vblank count for disabled pipe %d\n", pipe);
+ DRM_DEBUG("trying to get vblank count for disabled pipe %d\n", pipe);
return 0;
}
@@ -219,7 +219,7 @@ u32 gm45_get_vblank_counter(struct drm_device *dev, int pipe)
int reg = pipe ? PIPEB_FRMCOUNT_GM45 : PIPEA_FRMCOUNT_GM45;
if (!i915_pipe_enabled(dev, pipe)) {
- DRM_ERROR("trying to get vblank count for disabled pipe %d\n", pipe);
+ DRM_DEBUG("trying to get vblank count for disabled pipe %d\n", pipe);
return 0;
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5b98bea4ff9b..103f2d33fa89 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
else
new->md_minor = MINOR(unit) >> MdpMinorShift;
+ mutex_init(&new->open_mutex);
mutex_init(&new->reconfig_mutex);
INIT_LIST_HEAD(&new->disks);
INIT_LIST_HEAD(&new->all_mddevs);
@@ -1974,17 +1975,14 @@ repeat:
/* otherwise we have to go forward and ... */
mddev->events ++;
if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
- /* .. if the array isn't clean, insist on an odd 'events' */
- if ((mddev->events&1)==0) {
- mddev->events++;
+ /* .. if the array isn't clean, an 'even' event must also go
+ * to spares. */
+ if ((mddev->events&1)==0)
nospares = 0;
- }
} else {
- /* otherwise insist on an even 'events' (for clean states) */
- if ((mddev->events&1)) {
- mddev->events++;
+ /* otherwise an 'odd' event must go to spares */
+ if ((mddev->events&1))
nospares = 0;
- }
}
}
@@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
if (max < mddev->resync_min)
return -EINVAL;
if (max < mddev->resync_max &&
+ mddev->ro == 0 &&
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
@@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
struct gendisk *disk = mddev->gendisk;
mdk_rdev_t *rdev;
+ mutex_lock(&mddev->open_mutex);
if (atomic_read(&mddev->openers) > is_open) {
printk("md: %s still in use.\n",mdname(mddev));
- return -EBUSY;
- }
-
- if (mddev->pers) {
+ err = -EBUSY;
+ } else if (mddev->pers) {
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
set_disk_ro(disk, 1);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
-
+out:
+ mutex_unlock(&mddev->open_mutex);
+ if (err)
+ return err;
/*
* Free resources if final stop
*/
@@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
blk_integrity_unregister(disk);
md_new_event(mddev);
sysfs_notify_dirent(mddev->sysfs_state);
-out:
return err;
}
@@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
}
BUG_ON(mddev != bdev->bd_disk->private_data);
- if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
+ if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
goto out;
err = 0;
atomic_inc(&mddev->openers);
- mddev_unlock(mddev);
+ mutex_unlock(&mddev->open_mutex);
check_disk_change(bdev);
out:
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 78f03168baf9..f8fc188bc762 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
* so we don't loop trying */
int in_sync; /* know to not need resync */
+ /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
+ * that we are never stopping an array while it is open.
+ * 'reconfig_mutex' protects all other reconfiguration.
+ * These locks are separate due to conflicting interactions
+ * with bdev->bd_mutex.
+ * Lock ordering is:
+ * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
+ * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
+ */
+ struct mutex open_mutex;
struct mutex reconfig_mutex;
atomic_t active; /* general refcount */
atomic_t openers; /* number of active opens */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2b521ee67dfa..b8a2c5dc67ba 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
conf->reshape_progress < raid5_size(mddev, 0, 0)) {
sector_nr = raid5_size(mddev, 0, 0)
- conf->reshape_progress;
- } else if (mddev->delta_disks > 0 &&
+ } else if (mddev->delta_disks >= 0 &&
conf->reshape_progress > 0)
sector_nr = conf->reshape_progress;
sector_div(sector_nr, new_data_disks);
@@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev)
(old_disks-max_degraded));
/* here_old is the first stripe that we might need to read
* from */
- if (here_new >= here_old) {
+ if (mddev->delta_disks == 0) {
+ /* We cannot be sure it is safe to start an in-place
+ * reshape. It is only safe if user-space if monitoring
+ * and taking constant backups.
+ * mdadm always starts a situation like this in
+ * readonly mode so it can take control before
+ * allowing any writes. So just check for that.
+ */
+ if ((here_new * mddev->new_chunk_sectors !=
+ here_old * mddev->chunk_sectors) ||
+ mddev->ro == 0) {
+ printk(KERN_ERR "raid5: in-place reshape must be started"
+ " in read-only mode - aborting\n");
+ return -EINVAL;
+ }
+ } else if (mddev->delta_disks < 0
+ ? (here_new * mddev->new_chunk_sectors <=
+ here_old * mddev->chunk_sectors)
+ : (here_new * mddev->new_chunk_sectors >=
+ here_old * mddev->chunk_sectors)) {
/* Reading from the same stripe as writing to - bad */
printk(KERN_ERR "raid5: reshape_position too early for "
"auto-recovery - aborting.\n");
@@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
mddev->degraded--;
for (d = conf->raid_disks ;
d < conf->raid_disks - mddev->delta_disks;
- d++)
- raid5_remove_disk(mddev, d);
+ d++) {
+ mdk_rdev_t *rdev = conf->disks[d].rdev;
+ if (rdev && raid5_remove_disk(mddev, d) == 0) {
+ char nm[20];
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ sysfs_remove_link(&mddev->kobj, nm);
+ rdev->raid_disk = -1;
+ }
+ }
}
mddev->layout = conf->algorithm;
mddev->chunk_sectors = conf->chunk_sectors;
diff --git a/drivers/mtd/maps/sbc8240.c b/drivers/mtd/maps/sbc8240.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/drivers/mtd/maps/sbc8240.c
+++ /dev/null
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 0f2034c3ed2f..e4d9ef0c965a 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -1254,6 +1254,7 @@ out_free:
if (!ubi->volumes[i])
continue;
kfree(ubi->volumes[i]->eba_tbl);
+ ubi->volumes[i]->eba_tbl = NULL;
}
return err;
}
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index a423131b6171..b847745394b4 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -781,11 +781,22 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
return -EINVAL;
}
+ /*
+ * Make sure that all PEBs have the same image sequence number.
+ * This allows us to detect situations when users flash UBI
+ * images incorrectly, so that the flash has the new UBI image
+ * and leftovers from the old one. This feature was added
+ * relatively recently, and the sequence number was always
+ * zero, because old UBI implementations always set it to zero.
+ * For this reasons, we do not panic if some PEBs have zero
+ * sequence number, while other PEBs have non-zero sequence
+ * number.
+ */
image_seq = be32_to_cpu(ech->image_seq);
if (!si->image_seq_set) {
ubi->image_seq = image_seq;
si->image_seq_set = 1;
- } else if (ubi->image_seq != image_seq) {
+ } else if (ubi->image_seq && ubi->image_seq != image_seq) {
ubi_err("bad image sequence number %d in PEB %d, "
"expected %d", image_seq, pnum, ubi->image_seq);
ubi_dbg_dump_ec_hdr(ech);
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index a4494d78e7c2..8aebe1e9d3d6 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -90,11 +90,10 @@ static struct hotplug_slot_ops sn_hotplug_slot_ops = {
static DEFINE_MUTEX(sn_hotplug_mutex);
-static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot,
- char *buf)
+static ssize_t path_show(struct pci_slot *pci_slot, char *buf)
{
int retval = -ENOENT;
- struct slot *slot = bss_hotplug_slot->private;
+ struct slot *slot = pci_slot->hotplug->private;
if (!slot)
return retval;
@@ -103,7 +102,7 @@ static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot,
return retval;
}
-static struct hotplug_slot_attribute sn_slot_path_attr = __ATTR_RO(path);
+static struct pci_slot_attribute sn_slot_path_attr = __ATTR_RO(path);
static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device)
{
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 489fc01a3204..e4e089a8f294 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata)
if (put_dreq(dreq))
nfs_direct_complete(dreq);
- nfs_readdata_release(calldata);
+ nfs_readdata_free(data);
}
static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
data->npages, 1, 0, data->pagevec, NULL);
up_read(&current->mm->mmap_sem);
if (result < 0) {
- nfs_readdata_release(data);
+ nfs_readdata_free(data);
break;
}
if ((unsigned)result < data->npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
nfs_direct_release_pages(data->pagevec, result);
- nfs_readdata_release(data);
+ nfs_readdata_free(data);
break;
}
bytes -= pgbase;
@@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
data->inode = inode;
data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
- data->args.context = get_nfs_open_context(ctx);
+ data->args.context = ctx;
data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = data->pagevec;
@@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
list_del(&data->pages);
nfs_direct_release_pages(data->pagevec, data->npages);
- nfs_writedata_release(data);
+ nfs_writedata_free(data);
}
}
@@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata)
dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
nfs_direct_write_complete(dreq, data->inode);
- nfs_commitdata_release(calldata);
+ nfs_commit_free(data);
}
static const struct rpc_call_ops nfs_commit_direct_ops = {
@@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
data->args.fh = NFS_FH(data->inode);
data->args.offset = 0;
data->args.count = 0;
- data->args.context = get_nfs_open_context(dreq->ctx);
+ data->args.context = dreq->ctx;
data->res.count = 0;
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
@@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
data->npages, 0, 0, data->pagevec, NULL);
up_read(&current->mm->mmap_sem);
if (result < 0) {
- nfs_writedata_release(data);
+ nfs_writedata_free(data);
break;
}
if ((unsigned)result < data->npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
nfs_direct_release_pages(data->pagevec, result);
- nfs_writedata_release(data);
+ nfs_writedata_free(data);
break;
}
bytes -= pgbase;
@@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
data->inode = inode;
data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
- data->args.context = get_nfs_open_context(ctx);
+ data->args.context = ctx;
data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = data->pagevec;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e65cc2e650c8..cfc30d362f94 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -702,29 +702,12 @@ struct compound_hdr {
u32 minorversion;
};
-/*
- * START OF "GENERIC" ENCODE ROUTINES.
- * These may look a little ugly since they are imported from a "generic"
- * set of XDR encode/decode routines which are intended to be shared by
- * all of our NFSv4 implementations (OpenBSD, MacOS X...).
- *
- * If the pain of reading these is too great, it should be a straightforward
- * task to translate them into Linux-specific versions which are more
- * consistent with the style used in NFSv2/v3...
- */
-#define WRITE32(n) *p++ = htonl(n)
-#define WRITE64(n) do { \
- *p++ = htonl((uint32_t)((n) >> 32)); \
- *p++ = htonl((uint32_t)(n)); \
-} while (0)
-#define WRITEMEM(ptr,nbytes) do { \
- p = xdr_encode_opaque_fixed(p, ptr, nbytes); \
-} while (0)
-
-#define RESERVE_SPACE(nbytes) do { \
- p = xdr_reserve_space(xdr, nbytes); \
- BUG_ON(!p); \
-} while (0)
+static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
+{
+ __be32 *p = xdr_reserve_space(xdr, nbytes);
+ BUG_ON(!p);
+ return p;
+}
static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
{
@@ -749,12 +732,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
- RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2));
- WRITE32(hdr->taglen);
- WRITEMEM(hdr->tag, hdr->taglen);
- WRITE32(hdr->minorversion);
+ p = reserve_space(xdr, 4 + hdr->taglen + 8);
+ p = xdr_encode_opaque(p, hdr->tag, hdr->taglen);
+ *p++ = cpu_to_be32(hdr->minorversion);
hdr->nops_p = p;
- WRITE32(hdr->nops);
+ *p = cpu_to_be32(hdr->nops);
}
static void encode_nops(struct compound_hdr *hdr)
@@ -829,55 +811,53 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
len += 16;
else if (iap->ia_valid & ATTR_MTIME)
len += 4;
- RESERVE_SPACE(len);
+ p = reserve_space(xdr, len);
/*
* We write the bitmap length now, but leave the bitmap and the attribute
* buffer length to be backfilled at the end of this routine.
*/
- WRITE32(2);
+ *p++ = cpu_to_be32(2);
q = p;
p += 3;
if (iap->ia_valid & ATTR_SIZE) {
bmval0 |= FATTR4_WORD0_SIZE;
- WRITE64(iap->ia_size);
+ p = xdr_encode_hyper(p, iap->ia_size);
}
if (iap->ia_valid & ATTR_MODE) {
bmval1 |= FATTR4_WORD1_MODE;
- WRITE32(iap->ia_mode & S_IALLUGO);
+ *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO);
}
if (iap->ia_valid & ATTR_UID) {
bmval1 |= FATTR4_WORD1_OWNER;
- WRITE32(owner_namelen);
- WRITEMEM(owner_name, owner_namelen);
+ p = xdr_encode_opaque(p, owner_name, owner_namelen);
}
if (iap->ia_valid & ATTR_GID) {
bmval1 |= FATTR4_WORD1_OWNER_GROUP;
- WRITE32(owner_grouplen);
- WRITEMEM(owner_group, owner_grouplen);
+ p = xdr_encode_opaque(p, owner_group, owner_grouplen);
}
if (iap->ia_valid & ATTR_ATIME_SET) {
bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
- WRITE32(NFS4_SET_TO_CLIENT_TIME);
- WRITE32(0);
- WRITE32(iap->ia_mtime.tv_sec);
- WRITE32(iap->ia_mtime.tv_nsec);
+ *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
+ *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
}
else if (iap->ia_valid & ATTR_ATIME) {
bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
- WRITE32(NFS4_SET_TO_SERVER_TIME);
+ *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
if (iap->ia_valid & ATTR_MTIME_SET) {
bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
- WRITE32(NFS4_SET_TO_CLIENT_TIME);
- WRITE32(0);
- WRITE32(iap->ia_mtime.tv_sec);
- WRITE32(iap->ia_mtime.tv_nsec);
+ *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
+ *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
}
else if (iap->ia_valid & ATTR_MTIME) {
bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
- WRITE32(NFS4_SET_TO_SERVER_TIME);
+ *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
/*
@@ -891,7 +871,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
len = (char *)p - (char *)q - 12;
*q++ = htonl(bmval0);
*q++ = htonl(bmval1);
- *q++ = htonl(len);
+ *q = htonl(len);
/* out: */
}
@@ -900,9 +880,9 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd
{
__be32 *p;
- RESERVE_SPACE(8);
- WRITE32(OP_ACCESS);
- WRITE32(access);
+ p = reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(OP_ACCESS);
+ *p = cpu_to_be32(access);
hdr->nops++;
hdr->replen += decode_access_maxsz;
}
@@ -911,10 +891,10 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
{
__be32 *p;
- RESERVE_SPACE(8+NFS4_STATEID_SIZE);
- WRITE32(OP_CLOSE);
- WRITE32(arg->seqid->sequence->counter);
- WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(OP_CLOSE);
+ *p++ = cpu_to_be32(arg->seqid->sequence->counter);
+ xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_close_maxsz;
}
@@ -923,10 +903,10 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar
{
__be32 *p;
- RESERVE_SPACE(16);
- WRITE32(OP_COMMIT);
- WRITE64(args->offset);
- WRITE32(args->count);
+ p = reserve_space(xdr, 16);
+ *p++ = cpu_to_be32(OP_COMMIT);
+ p = xdr_encode_hyper(p, args->offset);
+ *p = cpu_to_be32(args->count);
hdr->nops++;
hdr->replen += decode_commit_maxsz;
}
@@ -935,30 +915,28 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
{
__be32 *p;
- RESERVE_SPACE(8);
- WRITE32(OP_CREATE);
- WRITE32(create->ftype);
+ p = reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(OP_CREATE);
+ *p = cpu_to_be32(create->ftype);
switch (create->ftype) {
case NF4LNK:
- RESERVE_SPACE(4);
- WRITE32(create->u.symlink.len);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(create->u.symlink.len);
xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
break;
case NF4BLK: case NF4CHR:
- RESERVE_SPACE(8);
- WRITE32(create->u.device.specdata1);
- WRITE32(create->u.device.specdata2);
+ p = reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(create->u.device.specdata1);
+ *p = cpu_to_be32(create->u.device.specdata2);
break;
default:
break;
}
- RESERVE_SPACE(4 + create->name->len);
- WRITE32(create->name->len);
- WRITEMEM(create->name->name, create->name->len);
+ encode_string(xdr, create->name->len, create->name->name);
hdr->nops++;
hdr->replen += decode_create_maxsz;
@@ -969,10 +947,10 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
{
__be32 *p;
- RESERVE_SPACE(12);
- WRITE32(OP_GETATTR);
- WRITE32(1);
- WRITE32(bitmap);
+ p = reserve_space(xdr, 12);
+ *p++ = cpu_to_be32(OP_GETATTR);
+ *p++ = cpu_to_be32(1);
+ *p = cpu_to_be32(bitmap);
hdr->nops++;
hdr->replen += decode_getattr_maxsz;
}
@@ -981,11 +959,11 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm
{
__be32 *p;
- RESERVE_SPACE(16);
- WRITE32(OP_GETATTR);
- WRITE32(2);
- WRITE32(bm0);
- WRITE32(bm1);
+ p = reserve_space(xdr, 16);
+ *p++ = cpu_to_be32(OP_GETATTR);
+ *p++ = cpu_to_be32(2);
+ *p++ = cpu_to_be32(bm0);
+ *p = cpu_to_be32(bm1);
hdr->nops++;
hdr->replen += decode_getattr_maxsz;
}
@@ -1012,8 +990,8 @@ static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_GETFH);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_GETFH);
hdr->nops++;
hdr->replen += decode_getfh_maxsz;
}
@@ -1022,10 +1000,9 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
{
__be32 *p;
- RESERVE_SPACE(8 + name->len);
- WRITE32(OP_LINK);
- WRITE32(name->len);
- WRITEMEM(name->name, name->len);
+ p = reserve_space(xdr, 8 + name->len);
+ *p++ = cpu_to_be32(OP_LINK);
+ xdr_encode_opaque(p, name->name, name->len);
hdr->nops++;
hdr->replen += decode_link_maxsz;
}
@@ -1052,27 +1029,27 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
{
__be32 *p;
- RESERVE_SPACE(32);
- WRITE32(OP_LOCK);
- WRITE32(nfs4_lock_type(args->fl, args->block));
- WRITE32(args->reclaim);
- WRITE64(args->fl->fl_start);
- WRITE64(nfs4_lock_length(args->fl));
- WRITE32(args->new_lock_owner);
+ p = reserve_space(xdr, 32);
+ *p++ = cpu_to_be32(OP_LOCK);
+ *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block));
+ *p++ = cpu_to_be32(args->reclaim);
+ p = xdr_encode_hyper(p, args->fl->fl_start);
+ p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ *p = cpu_to_be32(args->new_lock_owner);
if (args->new_lock_owner){
- RESERVE_SPACE(4+NFS4_STATEID_SIZE+32);
- WRITE32(args->open_seqid->sequence->counter);
- WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE);
- WRITE32(args->lock_seqid->sequence->counter);
- WRITE64(args->lock_owner.clientid);
- WRITE32(16);
- WRITEMEM("lock id:", 8);
- WRITE64(args->lock_owner.id);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
+ *p++ = cpu_to_be32(args->open_seqid->sequence->counter);
+ p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
+ p = xdr_encode_hyper(p, args->lock_owner.clientid);
+ *p++ = cpu_to_be32(16);
+ p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+ xdr_encode_hyper(p, args->lock_owner.id);
}
else {
- RESERVE_SPACE(NFS4_STATEID_SIZE+4);
- WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE);
- WRITE32(args->lock_seqid->sequence->counter);
+ p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
+ p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
+ *p = cpu_to_be32(args->lock_seqid->sequence->counter);
}
hdr->nops++;
hdr->replen += decode_lock_maxsz;
@@ -1082,15 +1059,15 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar
{
__be32 *p;
- RESERVE_SPACE(52);
- WRITE32(OP_LOCKT);
- WRITE32(nfs4_lock_type(args->fl, 0));
- WRITE64(args->fl->fl_start);
- WRITE64(nfs4_lock_length(args->fl));
- WRITE64(args->lock_owner.clientid);
- WRITE32(16);
- WRITEMEM("lock id:", 8);
- WRITE64(args->lock_owner.id);
+ p = reserve_space(xdr, 52);
+ *p++ = cpu_to_be32(OP_LOCKT);
+ *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ p = xdr_encode_hyper(p, args->fl->fl_start);
+ p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ p = xdr_encode_hyper(p, args->lock_owner.clientid);
+ *p++ = cpu_to_be32(16);
+ p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+ xdr_encode_hyper(p, args->lock_owner.id);
hdr->nops++;
hdr->replen += decode_lockt_maxsz;
}
@@ -1099,13 +1076,13 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
{
__be32 *p;
- RESERVE_SPACE(12+NFS4_STATEID_SIZE+16);
- WRITE32(OP_LOCKU);
- WRITE32(nfs4_lock_type(args->fl, 0));
- WRITE32(args->seqid->sequence->counter);
- WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE);
- WRITE64(args->fl->fl_start);
- WRITE64(nfs4_lock_length(args->fl));
+ p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16);
+ *p++ = cpu_to_be32(OP_LOCKU);
+ *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ *p++ = cpu_to_be32(args->seqid->sequence->counter);
+ p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_hyper(p, args->fl->fl_start);
+ xdr_encode_hyper(p, nfs4_lock_length(args->fl));
hdr->nops++;
hdr->replen += decode_locku_maxsz;
}
@@ -1115,10 +1092,9 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
int len = name->len;
__be32 *p;
- RESERVE_SPACE(8 + len);
- WRITE32(OP_LOOKUP);
- WRITE32(len);
- WRITEMEM(name->name, len);
+ p = reserve_space(xdr, 8 + len);
+ *p++ = cpu_to_be32(OP_LOOKUP);
+ xdr_encode_opaque(p, name->name, len);
hdr->nops++;
hdr->replen += decode_lookup_maxsz;
}
@@ -1127,21 +1103,21 @@ static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
{
__be32 *p;
- RESERVE_SPACE(8);
+ p = reserve_space(xdr, 8);
switch (fmode & (FMODE_READ|FMODE_WRITE)) {
case FMODE_READ:
- WRITE32(NFS4_SHARE_ACCESS_READ);
+ *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_READ);
break;
case FMODE_WRITE:
- WRITE32(NFS4_SHARE_ACCESS_WRITE);
+ *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_WRITE);
break;
case FMODE_READ|FMODE_WRITE:
- WRITE32(NFS4_SHARE_ACCESS_BOTH);
+ *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_BOTH);
break;
default:
- WRITE32(0);
+ *p++ = cpu_to_be32(0);
}
- WRITE32(0); /* for linux, share_deny = 0 always */
+ *p = cpu_to_be32(0); /* for linux, share_deny = 0 always */
}
static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1151,29 +1127,29 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
* opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
* owner 4 = 32
*/
- RESERVE_SPACE(8);
- WRITE32(OP_OPEN);
- WRITE32(arg->seqid->sequence->counter);
+ p = reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(OP_OPEN);
+ *p = cpu_to_be32(arg->seqid->sequence->counter);
encode_share_access(xdr, arg->fmode);
- RESERVE_SPACE(28);
- WRITE64(arg->clientid);
- WRITE32(16);
- WRITEMEM("open id:", 8);
- WRITE64(arg->id);
+ p = reserve_space(xdr, 28);
+ p = xdr_encode_hyper(p, arg->clientid);
+ *p++ = cpu_to_be32(16);
+ p = xdr_encode_opaque_fixed(p, "open id:", 8);
+ xdr_encode_hyper(p, arg->id);
}
static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
{
__be32 *p;
- RESERVE_SPACE(4);
+ p = reserve_space(xdr, 4);
switch(arg->open_flags & O_EXCL) {
case 0:
- WRITE32(NFS4_CREATE_UNCHECKED);
+ *p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
encode_attrs(xdr, arg->u.attrs, arg->server);
break;
default:
- WRITE32(NFS4_CREATE_EXCLUSIVE);
+ *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
encode_nfs4_verifier(xdr, &arg->u.verifier);
}
}
@@ -1182,14 +1158,14 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
{
__be32 *p;
- RESERVE_SPACE(4);
+ p = reserve_space(xdr, 4);
switch (arg->open_flags & O_CREAT) {
case 0:
- WRITE32(NFS4_OPEN_NOCREATE);
+ *p = cpu_to_be32(NFS4_OPEN_NOCREATE);
break;
default:
BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
- WRITE32(NFS4_OPEN_CREATE);
+ *p = cpu_to_be32(NFS4_OPEN_CREATE);
encode_createmode(xdr, arg);
}
}
@@ -1198,16 +1174,16 @@ static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delega
{
__be32 *p;
- RESERVE_SPACE(4);
+ p = reserve_space(xdr, 4);
switch (delegation_type) {
case 0:
- WRITE32(NFS4_OPEN_DELEGATE_NONE);
+ *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE);
break;
case FMODE_READ:
- WRITE32(NFS4_OPEN_DELEGATE_READ);
+ *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ);
break;
case FMODE_WRITE|FMODE_READ:
- WRITE32(NFS4_OPEN_DELEGATE_WRITE);
+ *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE);
break;
default:
BUG();
@@ -1218,8 +1194,8 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(NFS4_OPEN_CLAIM_NULL);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(NFS4_OPEN_CLAIM_NULL);
encode_string(xdr, name->len, name->name);
}
@@ -1227,8 +1203,8 @@ static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(NFS4_OPEN_CLAIM_PREVIOUS);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(NFS4_OPEN_CLAIM_PREVIOUS);
encode_delegation_type(xdr, type);
}
@@ -1236,9 +1212,9 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE);
- WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
- WRITEMEM(stateid->data, NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+ xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
encode_string(xdr, name->len, name->name);
}
@@ -1267,10 +1243,10 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE+4);
- WRITE32(OP_OPEN_CONFIRM);
- WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
- WRITE32(arg->seqid->sequence->counter);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ *p++ = cpu_to_be32(OP_OPEN_CONFIRM);
+ p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
+ *p = cpu_to_be32(arg->seqid->sequence->counter);
hdr->nops++;
hdr->replen += decode_open_confirm_maxsz;
}
@@ -1279,10 +1255,10 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE+4);
- WRITE32(OP_OPEN_DOWNGRADE);
- WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
- WRITE32(arg->seqid->sequence->counter);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
+ p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
+ *p = cpu_to_be32(arg->seqid->sequence->counter);
encode_share_access(xdr, arg->fmode);
hdr->nops++;
hdr->replen += decode_open_downgrade_maxsz;
@@ -1294,10 +1270,9 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd
int len = fh->size;
__be32 *p;
- RESERVE_SPACE(8 + len);
- WRITE32(OP_PUTFH);
- WRITE32(len);
- WRITEMEM(fh->data, len);
+ p = reserve_space(xdr, 8 + len);
+ *p++ = cpu_to_be32(OP_PUTFH);
+ xdr_encode_opaque(p, fh->data, len);
hdr->nops++;
hdr->replen += decode_putfh_maxsz;
}
@@ -1306,8 +1281,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_PUTROOTFH);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_PUTROOTFH);
hdr->nops++;
hdr->replen += decode_putrootfh_maxsz;
}
@@ -1317,26 +1292,26 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
nfs4_stateid stateid;
__be32 *p;
- RESERVE_SPACE(NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, NFS4_STATEID_SIZE);
if (ctx->state != NULL) {
nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
- WRITEMEM(stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
} else
- WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
}
static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_READ);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_READ);
encode_stateid(xdr, args->context);
- RESERVE_SPACE(12);
- WRITE64(args->offset);
- WRITE32(args->count);
+ p = reserve_space(xdr, 12);
+ p = xdr_encode_hyper(p, args->offset);
+ *p = cpu_to_be32(args->count);
hdr->nops++;
hdr->replen += decode_read_maxsz;
}
@@ -1349,20 +1324,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
};
__be32 *p;
- RESERVE_SPACE(12+NFS4_VERIFIER_SIZE+20);
- WRITE32(OP_READDIR);
- WRITE64(readdir->cookie);
- WRITEMEM(readdir->verifier.data, NFS4_VERIFIER_SIZE);
- WRITE32(readdir->count >> 1); /* We're not doing readdirplus */
- WRITE32(readdir->count);
- WRITE32(2);
+ p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
+ *p++ = cpu_to_be32(OP_READDIR);
+ p = xdr_encode_hyper(p, readdir->cookie);
+ p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
+ *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */
+ *p++ = cpu_to_be32(readdir->count);
+ *p++ = cpu_to_be32(2);
/* Switch to mounted_on_fileid if the server supports it */
if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
attrs[0] &= ~FATTR4_WORD0_FILEID;
else
attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
- WRITE32(attrs[0] & readdir->bitmask[0]);
- WRITE32(attrs[1] & readdir->bitmask[1]);
+ *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
+ *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
hdr->nops++;
hdr->replen += decode_readdir_maxsz;
dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
@@ -1378,8 +1353,8 @@ static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_READLINK);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_READLINK);
hdr->nops++;
hdr->replen += decode_readlink_maxsz;
}
@@ -1388,10 +1363,9 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc
{
__be32 *p;
- RESERVE_SPACE(8 + name->len);
- WRITE32(OP_REMOVE);
- WRITE32(name->len);
- WRITEMEM(name->name, name->len);
+ p = reserve_space(xdr, 8 + name->len);
+ *p++ = cpu_to_be32(OP_REMOVE);
+ xdr_encode_opaque(p, name->name, name->len);
hdr->nops++;
hdr->replen += decode_remove_maxsz;
}
@@ -1400,14 +1374,10 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co
{
__be32 *p;
- RESERVE_SPACE(8 + oldname->len);
- WRITE32(OP_RENAME);
- WRITE32(oldname->len);
- WRITEMEM(oldname->name, oldname->len);
-
- RESERVE_SPACE(4 + newname->len);
- WRITE32(newname->len);
- WRITEMEM(newname->name, newname->len);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_RENAME);
+ encode_string(xdr, oldname->len, oldname->name);
+ encode_string(xdr, newname->len, newname->name);
hdr->nops++;
hdr->replen += decode_rename_maxsz;
}
@@ -1416,9 +1386,9 @@ static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client
{
__be32 *p;
- RESERVE_SPACE(12);
- WRITE32(OP_RENEW);
- WRITE64(client_stateid->cl_clientid);
+ p = reserve_space(xdr, 12);
+ *p++ = cpu_to_be32(OP_RENEW);
+ xdr_encode_hyper(p, client_stateid->cl_clientid);
hdr->nops++;
hdr->replen += decode_renew_maxsz;
}
@@ -1428,8 +1398,8 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_RESTOREFH);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_RESTOREFH);
hdr->nops++;
hdr->replen += decode_restorefh_maxsz;
}
@@ -1439,16 +1409,16 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE);
- WRITE32(OP_SETATTR);
- WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE);
- RESERVE_SPACE(2*4);
- WRITE32(1);
- WRITE32(FATTR4_WORD0_ACL);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(OP_SETATTR);
+ xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 2*4);
+ *p++ = cpu_to_be32(1);
+ *p = cpu_to_be32(FATTR4_WORD0_ACL);
if (arg->acl_len % 4)
return -EINVAL;
- RESERVE_SPACE(4);
- WRITE32(arg->acl_len);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(arg->acl_len);
xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
hdr->nops++;
hdr->replen += decode_setacl_maxsz;
@@ -1460,8 +1430,8 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_SAVEFH);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_SAVEFH);
hdr->nops++;
hdr->replen += decode_savefh_maxsz;
}
@@ -1470,9 +1440,9 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE);
- WRITE32(OP_SETATTR);
- WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(OP_SETATTR);
+ xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_setattr_maxsz;
encode_attrs(xdr, arg->iap, server);
@@ -1482,17 +1452,17 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
{
__be32 *p;
- RESERVE_SPACE(4 + NFS4_VERIFIER_SIZE);
- WRITE32(OP_SETCLIENTID);
- WRITEMEM(setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
+ p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE);
+ *p++ = cpu_to_be32(OP_SETCLIENTID);
+ xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
- RESERVE_SPACE(4);
- WRITE32(setclientid->sc_prog);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(setclientid->sc_prog);
encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
- RESERVE_SPACE(4);
- WRITE32(setclientid->sc_cb_ident);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(setclientid->sc_cb_ident);
hdr->nops++;
hdr->replen += decode_setclientid_maxsz;
}
@@ -1501,10 +1471,10 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_
{
__be32 *p;
- RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE);
- WRITE32(OP_SETCLIENTID_CONFIRM);
- WRITE64(client_state->cl_clientid);
- WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
+ p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
+ *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
+ p = xdr_encode_hyper(p, client_state->cl_clientid);
+ xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
hdr->nops++;
hdr->replen += decode_setclientid_confirm_maxsz;
}
@@ -1513,15 +1483,15 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_WRITE);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_WRITE);
encode_stateid(xdr, args->context);
- RESERVE_SPACE(16);
- WRITE64(args->offset);
- WRITE32(args->stable);
- WRITE32(args->count);
+ p = reserve_space(xdr, 16);
+ p = xdr_encode_hyper(p, args->offset);
+ *p++ = cpu_to_be32(args->stable);
+ *p = cpu_to_be32(args->count);
xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
hdr->nops++;
@@ -1532,10 +1502,10 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
- WRITE32(OP_DELEGRETURN);
- WRITEMEM(stateid->data, NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(OP_DELEGRETURN);
+ xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_delegreturn_maxsz;
}
@@ -1548,16 +1518,16 @@ static void encode_exchange_id(struct xdr_stream *xdr,
{
__be32 *p;
- RESERVE_SPACE(4 + sizeof(args->verifier->data));
- WRITE32(OP_EXCHANGE_ID);
- WRITEMEM(args->verifier->data, sizeof(args->verifier->data));
+ p = reserve_space(xdr, 4 + sizeof(args->verifier->data));
+ *p++ = cpu_to_be32(OP_EXCHANGE_ID);
+ xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data));
encode_string(xdr, args->id_len, args->id);
- RESERVE_SPACE(12);
- WRITE32(args->flags);
- WRITE32(0); /* zero length state_protect4_a */
- WRITE32(0); /* zero length implementation id array */
+ p = reserve_space(xdr, 12);
+ *p++ = cpu_to_be32(args->flags);
+ *p++ = cpu_to_be32(0); /* zero length state_protect4_a */
+ *p = cpu_to_be32(0); /* zero length implementation id array */
hdr->nops++;
hdr->replen += decode_exchange_id_maxsz;
}
@@ -1571,55 +1541,43 @@ static void encode_create_session(struct xdr_stream *xdr,
uint32_t len;
struct nfs_client *clp = args->client;
- RESERVE_SPACE(4);
- WRITE32(OP_CREATE_SESSION);
-
- RESERVE_SPACE(8);
- WRITE64(clp->cl_ex_clid);
+ len = scnprintf(machine_name, sizeof(machine_name), "%s",
+ clp->cl_ipaddr);
- RESERVE_SPACE(8);
- WRITE32(clp->cl_seqid); /*Sequence id */
- WRITE32(args->flags); /*flags */
+ p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
+ *p++ = cpu_to_be32(OP_CREATE_SESSION);
+ p = xdr_encode_hyper(p, clp->cl_ex_clid);
+ *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
+ *p++ = cpu_to_be32(args->flags); /*flags */
- RESERVE_SPACE(2*28); /* 2 channel_attrs */
/* Fore Channel */
- WRITE32(args->fc_attrs.headerpadsz); /* header padding size */
- WRITE32(args->fc_attrs.max_rqst_sz); /* max req size */
- WRITE32(args->fc_attrs.max_resp_sz); /* max resp size */
- WRITE32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */
- WRITE32(args->fc_attrs.max_ops); /* max operations */
- WRITE32(args->fc_attrs.max_reqs); /* max requests */
- WRITE32(0); /* rdmachannel_attrs */
+ *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
+ *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */
+ *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */
+ *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */
+ *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */
+ *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */
+ *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
/* Back Channel */
- WRITE32(args->fc_attrs.headerpadsz); /* header padding size */
- WRITE32(args->bc_attrs.max_rqst_sz); /* max req size */
- WRITE32(args->bc_attrs.max_resp_sz); /* max resp size */
- WRITE32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */
- WRITE32(args->bc_attrs.max_ops); /* max operations */
- WRITE32(args->bc_attrs.max_reqs); /* max requests */
- WRITE32(0); /* rdmachannel_attrs */
-
- RESERVE_SPACE(4);
- WRITE32(args->cb_program); /* cb_program */
-
- RESERVE_SPACE(4); /* # of security flavors */
- WRITE32(1);
-
- RESERVE_SPACE(4);
- WRITE32(RPC_AUTH_UNIX); /* auth_sys */
+ *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
+ *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */
+ *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */
+ *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */
+ *p++ = cpu_to_be32(args->bc_attrs.max_ops); /* max operations */
+ *p++ = cpu_to_be32(args->bc_attrs.max_reqs); /* max requests */
+ *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
+
+ *p++ = cpu_to_be32(args->cb_program); /* cb_program */
+ *p++ = cpu_to_be32(1);
+ *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
/* authsys_parms rfc1831 */
- RESERVE_SPACE(4);
- WRITE32((u32)clp->cl_boot_time.tv_nsec); /* stamp */
- len = scnprintf(machine_name, sizeof(machine_name), "%s",
- clp->cl_ipaddr);
- RESERVE_SPACE(16 + len);
- WRITE32(len);
- WRITEMEM(machine_name, len);
- WRITE32(0); /* UID */
- WRITE32(0); /* GID */
- WRITE32(0); /* No more gids */
+ *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */
+ p = xdr_encode_opaque(p, machine_name, len);
+ *p++ = cpu_to_be32(0); /* UID */
+ *p++ = cpu_to_be32(0); /* GID */
+ *p = cpu_to_be32(0); /* No more gids */
hdr->nops++;
hdr->replen += decode_create_session_maxsz;
}
@@ -1629,9 +1587,9 @@ static void encode_destroy_session(struct xdr_stream *xdr,
struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4 + NFS4_MAX_SESSIONID_LEN);
- WRITE32(OP_DESTROY_SESSION);
- WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
+ p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN);
+ *p++ = cpu_to_be32(OP_DESTROY_SESSION);
+ xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
hdr->nops++;
hdr->replen += decode_destroy_session_maxsz;
}
@@ -1655,8 +1613,8 @@ static void encode_sequence(struct xdr_stream *xdr,
WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
slot = tp->slots + args->sa_slotid;
- RESERVE_SPACE(4);
- WRITE32(OP_SEQUENCE);
+ p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16);
+ *p++ = cpu_to_be32(OP_SEQUENCE);
/*
* Sessionid + seqid + slotid + max slotid + cache_this
@@ -1670,12 +1628,11 @@ static void encode_sequence(struct xdr_stream *xdr,
((u32 *)session->sess_id.data)[3],
slot->seq_nr, args->sa_slotid,
tp->highest_used_slotid, args->sa_cache_this);
- RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 16);
- WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
- WRITE32(slot->seq_nr);
- WRITE32(args->sa_slotid);
- WRITE32(tp->highest_used_slotid);
- WRITE32(args->sa_cache_this);
+ p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
+ *p++ = cpu_to_be32(slot->seq_nr);
+ *p++ = cpu_to_be32(args->sa_slotid);
+ *p++ = cpu_to_be32(tp->highest_used_slotid);
+ *p = cpu_to_be32(args->sa_cache_this);
hdr->nops++;
hdr->replen += decode_sequence_maxsz;
#endif /* CONFIG_NFS_V4_1 */
@@ -2466,68 +2423,53 @@ static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
}
#endif /* CONFIG_NFS_V4_1 */
-/*
- * START OF "GENERIC" DECODE ROUTINES.
- * These may look a little ugly since they are imported from a "generic"
- * set of XDR encode/decode routines which are intended to be shared by
- * all of our NFSv4 implementations (OpenBSD, MacOS X...).
- *
- * If the pain of reading these is too great, it should be a straightforward
- * task to translate them into Linux-specific versions which are more
- * consistent with the style used in NFSv2/v3...
- */
-#define READ32(x) (x) = ntohl(*p++)
-#define READ64(x) do { \
- (x) = (u64)ntohl(*p++) << 32; \
- (x) |= ntohl(*p++); \
-} while (0)
-#define READTIME(x) do { \
- p++; \
- (x.tv_sec) = ntohl(*p++); \
- (x.tv_nsec) = ntohl(*p++); \
-} while (0)
-#define COPYMEM(x,nbytes) do { \
- memcpy((x), p, nbytes); \
- p += XDR_QUADLEN(nbytes); \
-} while (0)
-
-#define READ_BUF(nbytes) do { \
- p = xdr_inline_decode(xdr, nbytes); \
- if (unlikely(!p)) { \
- dprintk("nfs: %s: prematurely hit end of receive" \
- " buffer\n", __func__); \
- dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
- __func__, xdr->p, nbytes, xdr->end); \
- return -EIO; \
- } \
-} while (0)
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+ dprintk("nfs: %s: prematurely hit end of receive buffer. "
+ "Remaining buffer length is %tu words.\n",
+ func, xdr->end - xdr->p);
+}
static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
{
__be32 *p;
- READ_BUF(4);
- READ32(*len);
- READ_BUF(*len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *len = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, *len);
+ if (unlikely(!p))
+ goto out_overflow;
*string = (char *)p;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
__be32 *p;
- READ_BUF(8);
- READ32(hdr->status);
- READ32(hdr->taglen);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ hdr->status = be32_to_cpup(p++);
+ hdr->taglen = be32_to_cpup(p);
- READ_BUF(hdr->taglen + 4);
+ p = xdr_inline_decode(xdr, hdr->taglen + 4);
+ if (unlikely(!p))
+ goto out_overflow;
hdr->tag = (char *)p;
p += XDR_QUADLEN(hdr->taglen);
- READ32(hdr->nops);
+ hdr->nops = be32_to_cpup(p);
if (unlikely(hdr->nops < 1))
return nfs4_stat_to_errno(hdr->status);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
@@ -2536,18 +2478,23 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
uint32_t opnum;
int32_t nfserr;
- READ_BUF(8);
- READ32(opnum);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ opnum = be32_to_cpup(p++);
if (opnum != expected) {
dprintk("nfs: Server returned operation"
" %d but we issued a request for %d\n",
opnum, expected);
return -EIO;
}
- READ32(nfserr);
+ nfserr = be32_to_cpup(p);
if (nfserr != NFS_OK)
return nfs4_stat_to_errno(nfserr);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
/* Dummy routine */
@@ -2557,8 +2504,11 @@ static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
unsigned int strlen;
char *str;
- READ_BUF(12);
- return decode_opaque_inline(xdr, &strlen, &str);
+ p = xdr_inline_decode(xdr, 12);
+ if (likely(p))
+ return decode_opaque_inline(xdr, &strlen, &str);
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
@@ -2566,27 +2516,39 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
uint32_t bmlen;
__be32 *p;
- READ_BUF(4);
- READ32(bmlen);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ bmlen = be32_to_cpup(p);
bitmap[0] = bitmap[1] = 0;
- READ_BUF((bmlen << 2));
+ p = xdr_inline_decode(xdr, (bmlen << 2));
+ if (unlikely(!p))
+ goto out_overflow;
if (bmlen > 0) {
- READ32(bitmap[0]);
+ bitmap[0] = be32_to_cpup(p++);
if (bmlen > 1)
- READ32(bitmap[1]);
+ bitmap[1] = be32_to_cpup(p);
}
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep)
{
__be32 *p;
- READ_BUF(4);
- READ32(*attrlen);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *attrlen = be32_to_cpup(p);
*savep = xdr->p;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
@@ -2609,8 +2571,10 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) {
- READ_BUF(4);
- READ32(*type);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *type = be32_to_cpup(p);
if (*type < NF4REG || *type > NF4NAMEDATTR) {
dprintk("%s: bad type %d\n", __func__, *type);
return -EIO;
@@ -2620,6 +2584,9 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
}
dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
@@ -2631,14 +2598,19 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) {
- READ_BUF(8);
- READ64(*change);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, change);
bitmap[0] &= ~FATTR4_WORD0_CHANGE;
ret = NFS_ATTR_FATTR_CHANGE;
}
dprintk("%s: change attribute=%Lu\n", __func__,
(unsigned long long)*change);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
@@ -2650,13 +2622,18 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) {
- READ_BUF(8);
- READ64(*size);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, size);
bitmap[0] &= ~FATTR4_WORD0_SIZE;
ret = NFS_ATTR_FATTR_SIZE;
}
dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2667,12 +2644,17 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) {
- READ_BUF(4);
- READ32(*res);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
}
dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true");
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2683,12 +2665,17 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) {
- READ_BUF(4);
- READ32(*res);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
}
dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true");
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
@@ -2701,9 +2688,11 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_FSID)) {
- READ_BUF(16);
- READ64(fsid->major);
- READ64(fsid->minor);
+ p = xdr_inline_decode(xdr, 16);
+ if (unlikely(!p))
+ goto out_overflow;
+ p = xdr_decode_hyper(p, &fsid->major);
+ xdr_decode_hyper(p, &fsid->minor);
bitmap[0] &= ~FATTR4_WORD0_FSID;
ret = NFS_ATTR_FATTR_FSID;
}
@@ -2711,6 +2700,9 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
(unsigned long long)fsid->major,
(unsigned long long)fsid->minor);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2721,12 +2713,17 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) {
- READ_BUF(4);
- READ32(*res);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME;
}
dprintk("%s: file size=%u\n", __func__, (unsigned int)*res);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2737,12 +2734,17 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
- READ_BUF(4);
- READ32(*res);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT;
}
dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2754,13 +2756,18 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) {
- READ_BUF(8);
- READ64(*fileid);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, fileid);
bitmap[0] &= ~FATTR4_WORD0_FILEID;
ret = NFS_ATTR_FATTR_FILEID;
}
dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2772,13 +2779,18 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) {
- READ_BUF(8);
- READ64(*fileid);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, fileid);
bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
ret = NFS_ATTR_FATTR_FILEID;
}
dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2790,12 +2802,17 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL;
}
dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2807,12 +2824,17 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_FILES_FREE;
}
dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2824,12 +2846,17 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL;
}
dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
@@ -2838,8 +2865,10 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
__be32 *p;
int status = 0;
- READ_BUF(4);
- READ32(n);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ n = be32_to_cpup(p);
if (n == 0)
goto root_path;
dprintk("path ");
@@ -2873,6 +2902,9 @@ out_eio:
dprintk(" status %d", status);
status = -EIO;
goto out;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res)
@@ -2890,8 +2922,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
status = decode_pathname(xdr, &res->fs_path);
if (unlikely(status != 0))
goto out;
- READ_BUF(4);
- READ32(n);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ n = be32_to_cpup(p);
if (n <= 0)
goto out_eio;
res->nlocations = 0;
@@ -2899,8 +2933,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
u32 m;
struct nfs4_fs_location *loc = &res->locations[res->nlocations];
- READ_BUF(4);
- READ32(m);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ m = be32_to_cpup(p);
loc->nservers = 0;
dprintk("%s: servers ", __func__);
@@ -2939,6 +2975,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
out:
dprintk("%s: fs_locations done, error = %d\n", __func__, status);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
out_eio:
status = -EIO;
goto out;
@@ -2953,12 +2991,17 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE;
}
dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
@@ -2970,12 +3013,17 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) {
- READ_BUF(4);
- READ32(*maxlink);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *maxlink = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_MAXLINK;
}
dprintk("%s: maxlink=%u\n", __func__, *maxlink);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
@@ -2987,12 +3035,17 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) {
- READ_BUF(4);
- READ32(*maxname);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *maxname = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_MAXNAME;
}
dprintk("%s: maxname=%u\n", __func__, *maxname);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3005,8 +3058,10 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) {
uint64_t maxread;
- READ_BUF(8);
- READ64(maxread);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, &maxread);
if (maxread > 0x7FFFFFFF)
maxread = 0x7FFFFFFF;
*res = (uint32_t)maxread;
@@ -3014,6 +3069,9 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
}
dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3026,8 +3084,10 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) {
uint64_t maxwrite;
- READ_BUF(8);
- READ64(maxwrite);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, &maxwrite);
if (maxwrite > 0x7FFFFFFF)
maxwrite = 0x7FFFFFFF;
*res = (uint32_t)maxwrite;
@@ -3035,6 +3095,9 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
}
dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
@@ -3047,14 +3110,19 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m
if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
- READ_BUF(4);
- READ32(tmp);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ tmp = be32_to_cpup(p);
*mode = tmp & ~S_IFMT;
bitmap[1] &= ~FATTR4_WORD1_MODE;
ret = NFS_ATTR_FATTR_MODE;
}
dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
@@ -3066,13 +3134,18 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) {
- READ_BUF(4);
- READ32(*nlink);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *nlink = be32_to_cpup(p);
bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
ret = NFS_ATTR_FATTR_NLINK;
}
dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
@@ -3086,9 +3159,13 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
- READ_BUF(4);
- READ32(len);
- READ_BUF(len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ goto out_overflow;
if (!may_sleep) {
/* do nothing */
} else if (len < XDR_MAX_NETOBJ) {
@@ -3104,6 +3181,9 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
}
dprintk("%s: uid=%d\n", __func__, (int)*uid);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
@@ -3117,9 +3197,13 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
- READ_BUF(4);
- READ32(len);
- READ_BUF(len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ goto out_overflow;
if (!may_sleep) {
/* do nothing */
} else if (len < XDR_MAX_NETOBJ) {
@@ -3135,6 +3219,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
}
dprintk("%s: gid=%d\n", __func__, (int)*gid);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
@@ -3149,9 +3236,11 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) {
dev_t tmp;
- READ_BUF(8);
- READ32(major);
- READ32(minor);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ major = be32_to_cpup(p++);
+ minor = be32_to_cpup(p);
tmp = MKDEV(major, minor);
if (MAJOR(tmp) == major && MINOR(tmp) == minor)
*rdev = tmp;
@@ -3160,6 +3249,9 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
}
dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3171,12 +3263,17 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL;
}
dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3188,12 +3285,17 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE;
}
dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3205,12 +3307,17 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL;
}
dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
@@ -3222,14 +3329,19 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) {
- READ_BUF(8);
- READ64(*used);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, used);
bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
ret = NFS_ATTR_FATTR_SPACE_USED;
}
dprintk("%s: space used=%Lu\n", __func__,
(unsigned long long)*used);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
@@ -3238,12 +3350,17 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
uint64_t sec;
uint32_t nsec;
- READ_BUF(12);
- READ64(sec);
- READ32(nsec);
+ p = xdr_inline_decode(xdr, 12);
+ if (unlikely(!p))
+ goto out_overflow;
+ p = xdr_decode_hyper(p, &sec);
+ nsec = be32_to_cpup(p);
time->tv_sec = (time_t)sec;
time->tv_nsec = (long)nsec;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
@@ -3321,11 +3438,16 @@ static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *c
{
__be32 *p;
- READ_BUF(20);
- READ32(cinfo->atomic);
- READ64(cinfo->before);
- READ64(cinfo->after);
+ p = xdr_inline_decode(xdr, 20);
+ if (unlikely(!p))
+ goto out_overflow;
+ cinfo->atomic = be32_to_cpup(p++);
+ p = xdr_decode_hyper(p, &cinfo->before);
+ xdr_decode_hyper(p, &cinfo->after);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
@@ -3337,40 +3459,62 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
status = decode_op_hdr(xdr, OP_ACCESS);
if (status)
return status;
- READ_BUF(8);
- READ32(supp);
- READ32(acc);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ supp = be32_to_cpup(p++);
+ acc = be32_to_cpup(p);
access->supported = supp;
access->access = acc;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
-static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
{
__be32 *p;
+
+ p = xdr_inline_decode(xdr, len);
+ if (likely(p)) {
+ memcpy(buf, p, len);
+ return 0;
+ }
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+{
+ return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
+}
+
+static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+{
int status;
status = decode_op_hdr(xdr, OP_CLOSE);
if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid);
- if (status)
- return status;
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
- return 0;
+ if (!status)
+ status = decode_stateid(xdr, &res->stateid);
+ return status;
+}
+
+static int decode_verifier(struct xdr_stream *xdr, void *verifier)
+{
+ return decode_opaque_fixed(xdr, verifier, 8);
}
static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
{
- __be32 *p;
int status;
status = decode_op_hdr(xdr, OP_COMMIT);
- if (status)
- return status;
- READ_BUF(8);
- COPYMEM(res->verf->verifier, 8);
- return 0;
+ if (!status)
+ status = decode_verifier(xdr, res->verf->verifier);
+ return status;
}
static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3384,10 +3528,16 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
return status;
if ((status = decode_change_info(xdr, cinfo)))
return status;
- READ_BUF(4);
- READ32(bmlen);
- READ_BUF(bmlen << 2);
- return 0;
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ bmlen = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, bmlen << 2);
+ if (likely(p))
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
@@ -3642,14 +3792,21 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
if (status)
return status;
- READ_BUF(4);
- READ32(len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
if (len > NFS4_FHSIZE)
return -EIO;
fh->size = len;
- READ_BUF(len);
- COPYMEM(fh->data, len);
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ goto out_overflow;
+ memcpy(fh->data, p, len);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3671,10 +3828,12 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
__be32 *p;
uint32_t namelen, type;
- READ_BUF(32);
- READ64(offset);
- READ64(length);
- READ32(type);
+ p = xdr_inline_decode(xdr, 32);
+ if (unlikely(!p))
+ goto out_overflow;
+ p = xdr_decode_hyper(p, &offset);
+ p = xdr_decode_hyper(p, &length);
+ type = be32_to_cpup(p++);
if (fl != NULL) {
fl->fl_start = (loff_t)offset;
fl->fl_end = fl->fl_start + (loff_t)length - 1;
@@ -3685,23 +3844,27 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
fl->fl_type = F_RDLCK;
fl->fl_pid = 0;
}
- READ64(clientid);
- READ32(namelen);
- READ_BUF(namelen);
- return -NFS4ERR_DENIED;
+ p = xdr_decode_hyper(p, &clientid);
+ namelen = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, namelen);
+ if (likely(p))
+ return -NFS4ERR_DENIED;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
{
- __be32 *p;
int status;
status = decode_op_hdr(xdr, OP_LOCK);
if (status == -EIO)
goto out;
if (status == 0) {
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
+ status = decode_stateid(xdr, &res->stateid);
+ if (unlikely(status))
+ goto out;
} else if (status == -NFS4ERR_DENIED)
status = decode_lock_denied(xdr, NULL);
if (res->open_seqid != NULL)
@@ -3722,16 +3885,13 @@ static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res)
static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
{
- __be32 *p;
int status;
status = decode_op_hdr(xdr, OP_LOCKU);
if (status != -EIO)
nfs_increment_lock_seqid(status, res->seqid);
- if (status == 0) {
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
- }
+ if (status == 0)
+ status = decode_stateid(xdr, &res->stateid);
return status;
}
@@ -3746,34 +3906,46 @@ static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
__be32 *p;
uint32_t limit_type, nblocks, blocksize;
- READ_BUF(12);
- READ32(limit_type);
+ p = xdr_inline_decode(xdr, 12);
+ if (unlikely(!p))
+ goto out_overflow;
+ limit_type = be32_to_cpup(p++);
switch (limit_type) {
case 1:
- READ64(*maxsize);
+ xdr_decode_hyper(p, maxsize);
break;
case 2:
- READ32(nblocks);
- READ32(blocksize);
+ nblocks = be32_to_cpup(p++);
+ blocksize = be32_to_cpup(p);
*maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
}
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
{
__be32 *p;
uint32_t delegation_type;
+ int status;
- READ_BUF(4);
- READ32(delegation_type);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ delegation_type = be32_to_cpup(p);
if (delegation_type == NFS4_OPEN_DELEGATE_NONE) {
res->delegation_type = 0;
return 0;
}
- READ_BUF(NFS4_STATEID_SIZE+4);
- COPYMEM(res->delegation.data, NFS4_STATEID_SIZE);
- READ32(res->do_recall);
+ status = decode_stateid(xdr, &res->delegation);
+ if (unlikely(status))
+ return status;
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ res->do_recall = be32_to_cpup(p);
switch (delegation_type) {
case NFS4_OPEN_DELEGATE_READ:
@@ -3785,6 +3957,9 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
return -EIO;
}
return decode_ace(xdr, NULL, res->server->nfs_client);
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3796,23 +3971,27 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
status = decode_op_hdr(xdr, OP_OPEN);
if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid);
- if (status)
+ if (!status)
+ status = decode_stateid(xdr, &res->stateid);
+ if (unlikely(status))
return status;
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
decode_change_info(xdr, &res->cinfo);
- READ_BUF(8);
- READ32(res->rflags);
- READ32(bmlen);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ res->rflags = be32_to_cpup(p++);
+ bmlen = be32_to_cpup(p);
if (bmlen > 10)
goto xdr_error;
- READ_BUF(bmlen << 2);
+ p = xdr_inline_decode(xdr, bmlen << 2);
+ if (unlikely(!p))
+ goto out_overflow;
savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
for (i = 0; i < savewords; ++i)
- READ32(res->attrset[i]);
+ res->attrset[i] = be32_to_cpup(p++);
for (; i < NFS4_BITMAP_SIZE; i++)
res->attrset[i] = 0;
@@ -3820,36 +3999,33 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
xdr_error:
dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
{
- __be32 *p;
int status;
status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid);
- if (status)
- return status;
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
- return 0;
+ if (!status)
+ status = decode_stateid(xdr, &res->stateid);
+ return status;
}
static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
{
- __be32 *p;
int status;
status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid);
- if (status)
- return status;
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
- return 0;
+ if (!status)
+ status = decode_stateid(xdr, &res->stateid);
+ return status;
}
static int decode_putfh(struct xdr_stream *xdr)
@@ -3872,9 +4048,11 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
status = decode_op_hdr(xdr, OP_READ);
if (status)
return status;
- READ_BUF(8);
- READ32(eof);
- READ32(count);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ eof = be32_to_cpup(p++);
+ count = be32_to_cpup(p);
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
recvd = req->rq_rcv_buf.len - hdrlen;
if (count > recvd) {
@@ -3887,6 +4065,9 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
res->eof = eof;
res->count = count;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
@@ -3901,17 +4082,17 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
int status;
status = decode_op_hdr(xdr, OP_READDIR);
- if (status)
+ if (!status)
+ status = decode_verifier(xdr, readdir->verifier.data);
+ if (unlikely(status))
return status;
- READ_BUF(8);
- COPYMEM(readdir->verifier.data, 8);
dprintk("%s: verifier = %08x:%08x\n",
__func__,
((u32 *)readdir->verifier.data)[0],
((u32 *)readdir->verifier.data)[1]);
- hdrlen = (char *) p - (char *) iov->iov_base;
+ hdrlen = (char *) xdr->p - (char *) iov->iov_base;
recvd = rcvbuf->len - hdrlen;
if (pglen > recvd)
pglen = recvd;
@@ -3999,8 +4180,10 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
return status;
/* Convert length of symlink */
- READ_BUF(4);
- READ32(len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
if (len >= rcvbuf->page_len || len <= 0) {
dprintk("nfs: server returned giant symlink!\n");
return -ENAMETOOLONG;
@@ -4024,6 +4207,9 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
kaddr[len+rcvbuf->page_base] = '\0';
kunmap_atomic(kaddr, KM_USER0);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -4121,10 +4307,16 @@ static int decode_setattr(struct xdr_stream *xdr)
status = decode_op_hdr(xdr, OP_SETATTR);
if (status)
return status;
- READ_BUF(4);
- READ32(bmlen);
- READ_BUF(bmlen << 2);
- return 0;
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ bmlen = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, bmlen << 2);
+ if (likely(p))
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
@@ -4133,35 +4325,50 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
uint32_t opnum;
int32_t nfserr;
- READ_BUF(8);
- READ32(opnum);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ opnum = be32_to_cpup(p++);
if (opnum != OP_SETCLIENTID) {
dprintk("nfs: decode_setclientid: Server returned operation"
" %d\n", opnum);
return -EIO;
}
- READ32(nfserr);
+ nfserr = be32_to_cpup(p);
if (nfserr == NFS_OK) {
- READ_BUF(8 + NFS4_VERIFIER_SIZE);
- READ64(clp->cl_clientid);
- COPYMEM(clp->cl_confirm.data, NFS4_VERIFIER_SIZE);
+ p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
+ if (unlikely(!p))
+ goto out_overflow;
+ p = xdr_decode_hyper(p, &clp->cl_clientid);
+ memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE);
} else if (nfserr == NFSERR_CLID_INUSE) {
uint32_t len;
/* skip netid string */
- READ_BUF(4);
- READ32(len);
- READ_BUF(len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ goto out_overflow;
/* skip uaddr string */
- READ_BUF(4);
- READ32(len);
- READ_BUF(len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ goto out_overflow;
return -NFSERR_CLID_INUSE;
} else
return nfs4_stat_to_errno(nfserr);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_setclientid_confirm(struct xdr_stream *xdr)
@@ -4178,11 +4385,16 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
if (status)
return status;
- READ_BUF(16);
- READ32(res->count);
- READ32(res->verf->committed);
- COPYMEM(res->verf->verifier, 8);
+ p = xdr_inline_decode(xdr, 16);
+ if (unlikely(!p))
+ goto out_overflow;
+ res->count = be32_to_cpup(p++);
+ res->verf->committed = be32_to_cpup(p++);
+ memcpy(res->verf->verifier, p, 8);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_delegreturn(struct xdr_stream *xdr)
@@ -4196,6 +4408,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
{
__be32 *p;
uint32_t dummy;
+ char *dummy_str;
int status;
struct nfs_client *clp = res->client;
@@ -4203,36 +4416,45 @@ static int decode_exchange_id(struct xdr_stream *xdr,
if (status)
return status;
- READ_BUF(8);
- READ64(clp->cl_ex_clid);
- READ_BUF(12);
- READ32(clp->cl_seqid);
- READ32(clp->cl_exchange_flags);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, &clp->cl_ex_clid);
+ p = xdr_inline_decode(xdr, 12);
+ if (unlikely(!p))
+ goto out_overflow;
+ clp->cl_seqid = be32_to_cpup(p++);
+ clp->cl_exchange_flags = be32_to_cpup(p++);
/* We ask for SP4_NONE */
- READ32(dummy);
+ dummy = be32_to_cpup(p);
if (dummy != SP4_NONE)
return -EIO;
/* Throw away minor_id */
- READ_BUF(8);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
/* Throw away Major id */
- READ_BUF(4);
- READ32(dummy);
- READ_BUF(dummy);
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
/* Throw away server_scope */
- READ_BUF(4);
- READ32(dummy);
- READ_BUF(dummy);
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
/* Throw away Implementation id array */
- READ_BUF(4);
- READ32(dummy);
- READ_BUF(dummy);
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_chan_attrs(struct xdr_stream *xdr,
@@ -4241,22 +4463,35 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
__be32 *p;
u32 nr_attrs;
- READ_BUF(28);
- READ32(attrs->headerpadsz);
- READ32(attrs->max_rqst_sz);
- READ32(attrs->max_resp_sz);
- READ32(attrs->max_resp_sz_cached);
- READ32(attrs->max_ops);
- READ32(attrs->max_reqs);
- READ32(nr_attrs);
+ p = xdr_inline_decode(xdr, 28);
+ if (unlikely(!p))
+ goto out_overflow;
+ attrs->headerpadsz = be32_to_cpup(p++);
+ attrs->max_rqst_sz = be32_to_cpup(p++);
+ attrs->max_resp_sz = be32_to_cpup(p++);
+ attrs->max_resp_sz_cached = be32_to_cpup(p++);
+ attrs->max_ops = be32_to_cpup(p++);
+ attrs->max_reqs = be32_to_cpup(p++);
+ nr_attrs = be32_to_cpup(p);
if (unlikely(nr_attrs > 1)) {
printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n",
__func__, nr_attrs);
return -EINVAL;
}
- if (nr_attrs == 1)
- READ_BUF(4); /* skip rdma_attrs */
+ if (nr_attrs == 1) {
+ p = xdr_inline_decode(xdr, 4); /* skip rdma_attrs */
+ if (unlikely(!p))
+ goto out_overflow;
+ }
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
+{
+ return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
}
static int decode_create_session(struct xdr_stream *xdr,
@@ -4268,24 +4503,26 @@ static int decode_create_session(struct xdr_stream *xdr,
struct nfs4_session *session = clp->cl_session;
status = decode_op_hdr(xdr, OP_CREATE_SESSION);
-
- if (status)
+ if (!status)
+ status = decode_sessionid(xdr, &session->sess_id);
+ if (unlikely(status))
return status;
- /* sessionid */
- READ_BUF(NFS4_MAX_SESSIONID_LEN);
- COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN);
-
/* seqid, flags */
- READ_BUF(8);
- READ32(clp->cl_seqid);
- READ32(session->flags);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ clp->cl_seqid = be32_to_cpup(p++);
+ session->flags = be32_to_cpup(p);
/* Channel attributes */
status = decode_chan_attrs(xdr, &session->fc_attrs);
if (!status)
status = decode_chan_attrs(xdr, &session->bc_attrs);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
@@ -4309,7 +4546,9 @@ static int decode_sequence(struct xdr_stream *xdr,
return 0;
status = decode_op_hdr(xdr, OP_SEQUENCE);
- if (status)
+ if (!status)
+ status = decode_sessionid(xdr, &id);
+ if (unlikely(status))
goto out_err;
/*
@@ -4318,36 +4557,43 @@ static int decode_sequence(struct xdr_stream *xdr,
*/
status = -ESERVERFAULT;
- slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
- READ_BUF(NFS4_MAX_SESSIONID_LEN + 20);
- COPYMEM(id.data, NFS4_MAX_SESSIONID_LEN);
if (memcmp(id.data, res->sr_session->sess_id.data,
NFS4_MAX_SESSIONID_LEN)) {
dprintk("%s Invalid session id\n", __func__);
goto out_err;
}
+
+ p = xdr_inline_decode(xdr, 20);
+ if (unlikely(!p))
+ goto out_overflow;
+
/* seqid */
- READ32(dummy);
+ slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
+ dummy = be32_to_cpup(p++);
if (dummy != slot->seq_nr) {
dprintk("%s Invalid sequence number\n", __func__);
goto out_err;
}
/* slot id */
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
if (dummy != res->sr_slotid) {
dprintk("%s Invalid slot id\n", __func__);
goto out_err;
}
/* highest slot id - currently not processed */
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
/* target highest slot id - currently not processed */
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
/* result flags - currently not processed */
- READ32(dummy);
+ dummy = be32_to_cpup(p);
status = 0;
out_err:
res->sr_status = status;
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ status = -EIO;
+ goto out_err;
#else /* CONFIG_NFS_V4_1 */
return 0;
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 73ea5e8d66ce..12c9e66d3f1d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -60,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
return p;
}
-static void nfs_readdata_free(struct nfs_read_data *p)
+void nfs_readdata_free(struct nfs_read_data *p)
{
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
mempool_free(p, nfs_rdata_mempool);
}
-void nfs_readdata_release(void *data)
+static void nfs_readdata_release(struct nfs_read_data *rdata)
{
- struct nfs_read_data *rdata = data;
-
put_nfs_open_context(rdata->args.context);
nfs_readdata_free(rdata);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6240e644f249..120acadc6a84 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -89,17 +89,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
return p;
}
-static void nfs_writedata_free(struct nfs_write_data *p)
+void nfs_writedata_free(struct nfs_write_data *p)
{
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
mempool_free(p, nfs_wdata_mempool);
}
-void nfs_writedata_release(void *data)
+static void nfs_writedata_release(struct nfs_write_data *wdata)
{
- struct nfs_write_data *wdata = data;
-
put_nfs_open_context(wdata->args.context);
nfs_writedata_free(wdata);
}
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9edcde4974aa..f9a3e8942669 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
* immediately to their right.
*/
left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
- if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) {
+ if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
+ BUG_ON(right_child_el->l_tree_depth);
BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
}
@@ -2476,15 +2477,37 @@ out_ret_path:
return ret;
}
-static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
- struct ocfs2_path *path)
+static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
+ int subtree_index, struct ocfs2_path *path)
{
- int i, idx;
+ int i, idx, ret;
struct ocfs2_extent_rec *rec;
struct ocfs2_extent_list *el;
struct ocfs2_extent_block *eb;
u32 range;
+ /*
+ * In normal tree rotation process, we will never touch the
+ * tree branch above subtree_index and ocfs2_extend_rotate_transaction
+ * doesn't reserve the credits for them either.
+ *
+ * But we do have a special case here which will update the rightmost
+ * records for all the bh in the path.
+ * So we have to allocate extra credits and access them.
+ */
+ ret = ocfs2_extend_trans(handle,
+ handle->h_buffer_credits + subtree_index);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_journal_access_path(inode, handle, path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
/* Path should always be rightmost. */
eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
BUG_ON(eb->h_next_leaf_blk != 0ULL);
@@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
ocfs2_journal_dirty(handle, path->p_node[i].bh);
}
+out:
+ return ret;
}
static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
@@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
if (del_right_subtree) {
ocfs2_unlink_subtree(inode, handle, left_path, right_path,
subtree_index, dealloc);
- ocfs2_update_edge_lengths(inode, handle, left_path);
+ ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
+ left_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
ocfs2_unlink_subtree(inode, handle, left_path, path,
subtree_index, dealloc);
- ocfs2_update_edge_lengths(inode, handle, left_path);
+ ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
+ left_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b2c52b3a1484..b401654011a2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
(unsigned long long)OCFS2_I(inode)->ip_blkno);
mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);
dump_stack();
+ goto bail;
}
past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
@@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc {
*/
unsigned c_new;
unsigned c_unwritten;
+ unsigned c_needs_zero;
};
-static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
-{
- return d->c_new || d->c_unwritten;
-}
-
struct ocfs2_write_ctxt {
/* Logical cluster position / len of write */
u32 w_cpos;
u32 w_clen;
+ /* First cluster allocated in a nonsparse extend */
+ u32 w_first_new_cpos;
+
struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
/*
@@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
return -ENOMEM;
wc->w_cpos = pos >> osb->s_clustersize_bits;
+ wc->w_first_new_cpos = UINT_MAX;
cend = (pos + len - 1) >> osb->s_clustersize_bits;
wc->w_clen = cend - wc->w_cpos + 1;
get_bh(di_bh);
@@ -1217,20 +1218,18 @@ out:
*/
static int ocfs2_write_cluster(struct address_space *mapping,
u32 phys, unsigned int unwritten,
+ unsigned int should_zero,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_write_ctxt *wc, u32 cpos,
loff_t user_pos, unsigned user_len)
{
- int ret, i, new, should_zero = 0;
+ int ret, i, new;
u64 v_blkno, p_blkno;
struct inode *inode = mapping->host;
struct ocfs2_extent_tree et;
new = phys == 0 ? 1 : 0;
- if (new || unwritten)
- should_zero = 1;
-
if (new) {
u32 tmp_pos;
@@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
if (tmpret) {
mlog_errno(tmpret);
if (ret == 0)
- tmpret = ret;
+ ret = tmpret;
}
}
@@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
local_len = osb->s_clustersize - cluster_off;
ret = ocfs2_write_cluster(mapping, desc->c_phys,
- desc->c_unwritten, data_ac, meta_ac,
+ desc->c_unwritten,
+ desc->c_needs_zero,
+ data_ac, meta_ac,
wc, desc->c_cpos, pos, local_len);
if (ret) {
mlog_errno(ret);
@@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
* newly allocated cluster.
*/
desc = &wc->w_desc[0];
- if (ocfs2_should_zero_cluster(desc))
+ if (desc->c_needs_zero)
ocfs2_figure_cluster_boundaries(osb,
desc->c_cpos,
&wc->w_target_from,
NULL);
desc = &wc->w_desc[wc->w_clen - 1];
- if (ocfs2_should_zero_cluster(desc))
+ if (desc->c_needs_zero)
ocfs2_figure_cluster_boundaries(osb,
desc->c_cpos,
NULL,
@@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
phys++;
}
+ /*
+ * If w_first_new_cpos is < UINT_MAX, we have a non-sparse
+ * file that got extended. w_first_new_cpos tells us
+ * where the newly allocated clusters are so we can
+ * zero them.
+ */
+ if (desc->c_cpos >= wc->w_first_new_cpos) {
+ BUG_ON(phys == 0);
+ desc->c_needs_zero = 1;
+ }
+
desc->c_phys = phys;
if (phys == 0) {
desc->c_new = 1;
+ desc->c_needs_zero = 1;
*clusters_to_alloc = *clusters_to_alloc + 1;
}
- if (ext_flags & OCFS2_EXT_UNWRITTEN)
+
+ if (ext_flags & OCFS2_EXT_UNWRITTEN) {
desc->c_unwritten = 1;
+ desc->c_needs_zero = 1;
+ }
num_clusters--;
}
@@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
if (newsize <= i_size_read(inode))
return 0;
- ret = ocfs2_extend_no_holes(inode, newsize, newsize - len);
+ ret = ocfs2_extend_no_holes(inode, newsize, pos);
if (ret)
mlog_errno(ret);
+ wc->w_first_new_cpos =
+ ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
+
return ret;
}
@@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page)
{
- int ret, credits = OCFS2_INODE_UPDATE_CREDITS;
+ int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
unsigned int clusters_to_alloc, extents_to_split;
struct ocfs2_write_ctxt *wc;
struct inode *inode = mapping->host;
@@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
}
- ocfs2_set_target_boundaries(osb, wc, pos, len,
- clusters_to_alloc + extents_to_split);
+ /*
+ * We have to zero sparse allocated clusters, unwritten extent clusters,
+ * and non-sparse clusters we just extended. For non-sparse writes,
+ * we know zeros will only be needed in the first and/or last cluster.
+ */
+ if (clusters_to_alloc || extents_to_split ||
+ wc->w_desc[0].c_needs_zero ||
+ wc->w_desc[wc->w_clen - 1].c_needs_zero)
+ cluster_of_pages = 1;
+ else
+ cluster_of_pages = 0;
+
+ ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
@@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
* extent.
*/
ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
- clusters_to_alloc + extents_to_split,
- mmap_page);
+ cluster_of_pages, mmap_page);
if (ret) {
mlog_errno(ret);
goto out_quota;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b574431a031d..2f28b7de2c8d 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -310,22 +310,19 @@ out_attach:
return ret;
}
-static DEFINE_SPINLOCK(dentry_list_lock);
+DEFINE_SPINLOCK(dentry_list_lock);
/* We limit the number of dentry locks to drop in one go. We have
* this limit so that we don't starve other users of ocfs2_wq. */
#define DL_INODE_DROP_COUNT 64
/* Drop inode references from dentry locks */
-void ocfs2_drop_dl_inodes(struct work_struct *work)
+static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
{
- struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
- dentry_lock_work);
struct ocfs2_dentry_lock *dl;
- int drop_count = DL_INODE_DROP_COUNT;
spin_lock(&dentry_list_lock);
- while (osb->dentry_lock_list && drop_count--) {
+ while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
dl = osb->dentry_lock_list;
osb->dentry_lock_list = dl->dl_next;
spin_unlock(&dentry_list_lock);
@@ -333,11 +330,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work)
kfree(dl);
spin_lock(&dentry_list_lock);
}
- if (osb->dentry_lock_list)
+ spin_unlock(&dentry_list_lock);
+}
+
+void ocfs2_drop_dl_inodes(struct work_struct *work)
+{
+ struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
+ dentry_lock_work);
+
+ __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
+ /*
+ * Don't queue dropping if umount is in progress. We flush the
+ * list in ocfs2_dismount_volume
+ */
+ spin_lock(&dentry_list_lock);
+ if (osb->dentry_lock_list &&
+ !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
queue_work(ocfs2_wq, &osb->dentry_lock_work);
spin_unlock(&dentry_list_lock);
}
+/* Flush the whole work queue */
+void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
+{
+ __ocfs2_drop_dl_inodes(osb, -1);
+}
+
/*
* ocfs2_dentry_iput() and friends.
*
@@ -368,7 +386,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
/* We leave dropping of inode reference to ocfs2_wq as that can
* possibly lead to inode deletion which gets tricky */
spin_lock(&dentry_list_lock);
- if (!osb->dentry_lock_list)
+ if (!osb->dentry_lock_list &&
+ !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
queue_work(ocfs2_wq, &osb->dentry_lock_work);
dl->dl_next = osb->dentry_lock_list;
osb->dentry_lock_list = dl;
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index faa12e75f98d..f5dd1789acf1 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -49,10 +49,13 @@ struct ocfs2_dentry_lock {
int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
u64 parent_blkno);
+extern spinlock_t dentry_list_lock;
+
void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl);
void ocfs2_drop_dl_inodes(struct work_struct *work);
+void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
int skip_unhashed);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index d07ddbe4b283..81eff8e58322 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
lock->ast_pending, lock->ml.type);
BUG();
}
- BUG_ON(!list_empty(&lock->ast_list));
if (lock->ast_pending)
mlog(0, "lock has an ast getting flushed right now\n");
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index bcb9260c3735..43e6e3280569 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
dlm->name, res->lockname.len, res->lockname.name,
- orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery",
+ orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
send_to);
/* send it */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 62442e413a00..aa501d3f93f1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1851,6 +1851,7 @@ relock:
if (ret)
goto out_dio;
+ count = ocount;
ret = generic_write_checks(file, ppos, &count,
S_ISBLK(inode->i_mode));
if (ret)
@@ -1918,8 +1919,10 @@ out_sems:
mutex_unlock(&inode->i_mutex);
+ if (written)
+ ret = written;
mlog_exit(ret);
- return written ? written : ret;
+ return ret;
}
static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f033760ecbea..c48b93ac6b65 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
os->os_osb = osb;
os->os_count = 0;
os->os_seqno = 0;
- os->os_scantime = CURRENT_TIME;
mutex_init(&os->os_lock);
INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
+}
+void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
+{
+ struct ocfs2_orphan_scan *os;
+
+ os = &osb->osb_orphan_scan;
+ os->os_scantime = CURRENT_TIME;
if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
else {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 5432c7f79cc6..2c3222aec622 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
/* Exported only for the journal struct init code in super.c. Do not call. */
void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
+void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
@@ -329,20 +330,27 @@ int ocfs2_journal_dirty(handle_t *handle,
/* extended attribute block update */
#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
+/* Update of a single quota block */
+#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1
+
/* global quotafile inode update, data block */
-#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
+ OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
+#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS
/*
* The two writes below can accidentally see global info dirty due
* to set_info() quotactl so make them prepared for the writes.
*/
/* quota data block, global info */
/* Write to local quota file */
-#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1)
+#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
+ OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
/* global quota data block, local quota data block, global quota inode,
* global quota info */
-#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3)
+#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
+ 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
static inline int ocfs2_quota_trans_credits(struct super_block *sb)
{
@@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb)
return credits;
}
-/* Number of credits needed for removing quota structure from file */
-int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
-/* Number of credits needed for initialization of new quota structure */
-int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
-
/* group extend. inode update and last group update. */
#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c9345ebb8493..39e1d5a39505 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -224,10 +224,12 @@ enum ocfs2_mount_options
OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
};
-#define OCFS2_OSB_SOFT_RO 0x0001
-#define OCFS2_OSB_HARD_RO 0x0002
-#define OCFS2_OSB_ERROR_FS 0x0004
-#define OCFS2_DEFAULT_ATIME_QUANTUM 60
+#define OCFS2_OSB_SOFT_RO 0x0001
+#define OCFS2_OSB_HARD_RO 0x0002
+#define OCFS2_OSB_ERROR_FS 0x0004
+#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
+
+#define OCFS2_DEFAULT_ATIME_QUANTUM 60
struct ocfs2_journal;
struct ocfs2_slot_info;
@@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
spin_unlock(&osb->osb_lock);
}
+
+static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
+ unsigned long flag)
+{
+ unsigned long ret;
+
+ spin_lock(&osb->osb_lock);
+ ret = osb->osb_flags & flag;
+ spin_unlock(&osb->osb_lock);
+ return ret;
+}
+
static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
int hard)
{
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 7365e2e08706..3fb96fcd4c81 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo {
unsigned int dqi_chunks; /* Number of chunks in local quota file */
unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
unsigned int dqi_syncms; /* How often should we sync with other nodes */
- unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
struct list_head dqi_chunk; /* List of chunks */
struct inode *dqi_gqinode; /* Global quota file inode */
struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index edfa60cd155c..bf7742d0ee3b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -69,6 +69,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
d->dqb_btime = cpu_to_le64(m->dqb_btime);
d->dqb_itime = cpu_to_le64(m->dqb_itime);
+ d->dqb_pad1 = d->dqb_pad2 = 0;
}
static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
@@ -211,14 +212,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
if (gqinode->i_size < off + len) {
- down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
- err = ocfs2_extend_no_holes(gqinode, off + len, off);
- up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
- if (err < 0)
- goto out;
+ loff_t rounded_end =
+ ocfs2_align_bytes_to_blocks(sb, off + len);
+
+ /* Space is already allocated in ocfs2_global_read_dquot() */
err = ocfs2_simple_size_update(gqinode,
oinfo->dqi_gqi_bh,
- off + len);
+ rounded_end);
if (err < 0)
goto out;
new = 1;
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
}
if (err) {
mlog_errno(err);
- return err;
+ goto out;
}
lock_buffer(bh);
if (new)
@@ -342,7 +342,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
- oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@@ -352,7 +351,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
- oinfo->dqi_syncjiff);
+ msecs_to_jiffies(oinfo->dqi_syncms));
out_err:
mlog_exit(status);
@@ -402,13 +401,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
return err;
}
+static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
+{
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+
+ /*
+ * We may need to allocate tree blocks and a leaf block but not the
+ * root block
+ */
+ return oinfo->dqi_gi.dqi_qtree_depth;
+}
+
+static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
+{
+ /* We modify all the allocated blocks, tree root, and info block */
+ return (ocfs2_global_qinit_alloc(sb, type) + 2) *
+ OCFS2_QUOTA_BLOCK_UPDATE_CREDITS;
+}
+
/* Read in information from global quota file and acquire a reference to it.
* dquot_acquire() has already started the transaction and locked quota file */
int ocfs2_global_read_dquot(struct dquot *dquot)
{
int err, err2, ex = 0;
- struct ocfs2_mem_dqinfo *info =
- sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+ struct super_block *sb = dquot->dq_sb;
+ int type = dquot->dq_type;
+ struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
+ struct inode *gqinode = info->dqi_gqinode;
+ int need_alloc = ocfs2_global_qinit_alloc(sb, type);
+ handle_t *handle = NULL;
err = ocfs2_qinfo_lock(info, 0);
if (err < 0)
@@ -419,14 +441,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
OCFS2_DQUOT(dquot)->dq_use_count++;
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
+ ocfs2_qinfo_unlock(info, 0);
+
if (!dquot->dq_off) { /* No real quota entry? */
- /* Upgrade to exclusive lock for allocation */
- ocfs2_qinfo_unlock(info, 0);
- err = ocfs2_qinfo_lock(info, 1);
- if (err < 0)
- goto out_qlock;
ex = 1;
+ /*
+ * Add blocks to quota file before we start a transaction since
+ * locking allocators ranks above a transaction start
+ */
+ WARN_ON(journal_current_handle());
+ down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+ err = ocfs2_extend_no_holes(gqinode,
+ gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
+ gqinode->i_size);
+ up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+ if (err < 0)
+ goto out;
}
+
+ handle = ocfs2_start_trans(osb,
+ ocfs2_calc_global_qinit_credits(sb, type));
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out;
+ }
+ err = ocfs2_qinfo_lock(info, ex);
+ if (err < 0)
+ goto out_trans;
err = qtree_write_dquot(&info->dqi_gi, dquot);
if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
@@ -438,6 +479,9 @@ out_qlock:
ocfs2_qinfo_unlock(info, 1);
else
ocfs2_qinfo_unlock(info, 0);
+out_trans:
+ if (handle)
+ ocfs2_commit_trans(osb, handle);
out:
if (err < 0)
mlog_errno(err);
@@ -607,7 +651,7 @@ static void qsync_work_fn(struct work_struct *work)
dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
- oinfo->dqi_syncjiff);
+ msecs_to_jiffies(oinfo->dqi_syncms));
}
/*
@@ -635,20 +679,18 @@ out:
return status;
}
-int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
+static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
{
- struct ocfs2_mem_dqinfo *oinfo;
- int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
- OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
-
- if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
- return 0;
-
- oinfo = sb_dqinfo(sb, type)->dqi_priv;
- /* We modify tree, leaf block, global info, local chunk header,
- * global and local inode */
- return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
- 2 * OCFS2_INODE_UPDATE_CREDITS;
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ /*
+ * We modify tree, leaf block, global info, local chunk header,
+ * global and local inode; OCFS2_QINFO_WRITE_CREDITS already
+ * accounts for inode update
+ */
+ return (oinfo->dqi_gi.dqi_qtree_depth + 2) *
+ OCFS2_QUOTA_BLOCK_UPDATE_CREDITS +
+ OCFS2_QINFO_WRITE_CREDITS +
+ OCFS2_INODE_UPDATE_CREDITS;
}
static int ocfs2_release_dquot(struct dquot *dquot)
@@ -680,33 +722,10 @@ out:
return status;
}
-int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
-{
- struct ocfs2_mem_dqinfo *oinfo;
- int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
- OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
- struct ocfs2_dinode *lfe, *gfe;
-
- if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
- return 0;
-
- oinfo = sb_dqinfo(sb, type)->dqi_priv;
- gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
- lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
- /* We can extend local file + global file. In local file we
- * can modify info, chunk header block and dquot block. In
- * global file we can modify info, tree and leaf block */
- return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
- ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
- 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
-}
-
static int ocfs2_acquire_dquot(struct dquot *dquot)
{
- handle_t *handle;
struct ocfs2_mem_dqinfo *oinfo =
sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
- struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
int status = 0;
mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
@@ -715,16 +734,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
status = ocfs2_lock_global_qf(oinfo, 1);
if (status < 0)
goto out;
- handle = ocfs2_start_trans(osb,
- ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto out_ilock;
- }
status = dquot_acquire(dquot);
- ocfs2_commit_trans(osb, handle);
-out_ilock:
ocfs2_unlock_global_qf(oinfo, 1);
out:
mlog_exit(status);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 5a460fa82553..bdb09cb6e1fe 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -20,6 +20,7 @@
#include "sysfile.h"
#include "dlmglue.h"
#include "quota.h"
+#include "uptodate.h"
/* Number of local quota structures per block */
static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
handle_t *handle;
int status;
- handle = ocfs2_start_trans(OCFS2_SB(sb), 1);
+ handle = ocfs2_start_trans(OCFS2_SB(sb),
+ OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
@@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
goto out_bh;
/* Mark quota file as clean if we are recovering quota file of
* some other node. */
- handle = ocfs2_start_trans(osb, 1);
+ handle = ocfs2_start_trans(osb,
+ OCFS2_LOCAL_QINFO_WRITE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
@@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
struct ocfs2_local_disk_chunk *dchunk;
int status;
handle_t *handle;
- struct buffer_head *bh = NULL;
+ struct buffer_head *bh = NULL, *dbh = NULL;
u64 p_blkno;
/* We are protected by dqio_sem so no locking needed */
@@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
mlog_errno(status);
goto out;
}
+ /* Local quota info and two new blocks we initialize */
+ handle = ocfs2_start_trans(OCFS2_SB(sb),
+ OCFS2_LOCAL_QINFO_WRITE_CREDITS +
+ 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out;
+ }
+ /* Initialize chunk header */
down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
&p_blkno, NULL, NULL);
up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
if (status < 0) {
mlog_errno(status);
- goto out;
+ goto out_trans;
}
bh = sb_getblk(sb, p_blkno);
if (!bh) {
status = -ENOMEM;
mlog_errno(status);
- goto out;
+ goto out_trans;
}
dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
-
- handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto out;
- }
-
+ ocfs2_set_new_buffer_uptodate(lqinode, bh);
status = ocfs2_journal_access_dq(handle, lqinode, bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto out_trans;
@@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
memset(dchunk->dqc_bitmap, 0,
sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
OCFS2_QBLK_RESERVED_SPACE);
- set_buffer_uptodate(bh);
unlock_buffer(bh);
status = ocfs2_journal_dirty(handle, bh);
if (status < 0) {
@@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
goto out_trans;
}
+ /* Initialize new block with structures */
+ down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+ status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
+ &p_blkno, NULL, NULL);
+ up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ dbh = sb_getblk(sb, p_blkno);
+ if (!dbh) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto out_trans;
+ }
+ ocfs2_set_new_buffer_uptodate(lqinode, dbh);
+ status = ocfs2_journal_access_dq(handle, lqinode, dbh,
+ OCFS2_JOURNAL_ACCESS_CREATE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ lock_buffer(dbh);
+ memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
+ unlock_buffer(dbh);
+ status = ocfs2_journal_dirty(handle, dbh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+
+ /* Update local quotafile info */
oinfo->dqi_blocks += 2;
oinfo->dqi_chunks++;
status = ocfs2_local_write_info(sb, type);
@@ -1031,6 +1068,7 @@ out_trans:
ocfs2_commit_trans(OCFS2_SB(sb), handle);
out:
brelse(bh);
+ brelse(dbh);
kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
return ERR_PTR(status);
}
@@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
struct ocfs2_local_disk_chunk *dchunk;
int epb = ol_quota_entries_per_block(sb);
unsigned int chunk_blocks;
+ struct buffer_head *bh;
+ u64 p_blkno;
int status;
handle_t *handle;
@@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
mlog_errno(status);
goto out;
}
- handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
+
+ /* Get buffer from the just added block */
+ down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+ status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
+ &p_blkno, NULL, NULL);
+ up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ bh = sb_getblk(sb, p_blkno);
+ if (!bh) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto out;
+ }
+ ocfs2_set_new_buffer_uptodate(lqinode, bh);
+
+ /* Local quota info, chunk header and the new block we initialize */
+ handle = ocfs2_start_trans(OCFS2_SB(sb),
+ OCFS2_LOCAL_QINFO_WRITE_CREDITS +
+ 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
goto out;
}
+ /* Zero created block */
+ status = ocfs2_journal_access_dq(handle, lqinode, bh,
+ OCFS2_JOURNAL_ACCESS_CREATE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ lock_buffer(bh);
+ memset(bh->b_data, 0, sb->s_blocksize);
+ unlock_buffer(bh);
+ status = ocfs2_journal_dirty(handle, bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ /* Update chunk header */
status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
@@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
mlog_errno(status);
goto out_trans;
}
+ /* Update file header */
oinfo->dqi_blocks++;
status = ocfs2_local_write_info(sb, type);
if (status < 0) {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 3f661376a2de..e49c41050264 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -17,6 +17,7 @@
* General Public License for more details.
*/
+#include <linux/kernel.h>
#include <linux/crc32.h>
#include <linux/module.h>
@@ -153,7 +154,7 @@ static int status_map[] = {
static int dlm_status_to_errno(enum dlm_status status)
{
- BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0])));
+ BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map));
return status_map[status];
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7efb349fb9bd..b0ee0fdf799a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
}
di = (struct ocfs2_dinode *) (*bh)->b_data;
memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
+ spin_lock_init(&stats->b_lock);
status = ocfs2_verify_volume(di, *bh, blksize, stats);
if (status >= 0)
goto bail;
@@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
wake_up(&osb->osb_mount_event);
/* Start this when the mount is almost sure of being successful */
- ocfs2_orphan_scan_init(osb);
+ ocfs2_orphan_scan_start(osb);
mlog_exit(status);
return status;
@@ -1213,14 +1214,27 @@ static int ocfs2_get_sb(struct file_system_type *fs_type,
mnt);
}
+static void ocfs2_kill_sb(struct super_block *sb)
+{
+ struct ocfs2_super *osb = OCFS2_SB(sb);
+
+ /* Prevent further queueing of inode drop events */
+ spin_lock(&dentry_list_lock);
+ ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
+ spin_unlock(&dentry_list_lock);
+ /* Wait for work to finish and/or remove it */
+ cancel_work_sync(&osb->dentry_lock_work);
+
+ kill_block_super(sb);
+}
+
static struct file_system_type ocfs2_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2",
.get_sb = ocfs2_get_sb, /* is this called when we mount
* the fs? */
- .kill_sb = kill_block_super, /* set to the generic one
- * right now, but do we
- * need to change that? */
+ .kill_sb = ocfs2_kill_sb,
+
.fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
.next = NULL
};
@@ -1819,6 +1833,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
debugfs_remove(osb->osb_ctxt);
+ /*
+ * Flush inode dropping work queue so that deletes are
+ * performed while the filesystem is still working
+ */
+ ocfs2_drop_all_dl_inodes(osb);
+
/* Orphan scan should be stopped as early as possible */
ocfs2_orphan_scan_stop(osb);
@@ -1981,6 +2001,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
+ ocfs2_orphan_scan_init(osb);
+
status = ocfs2_recovery_init(osb);
if (status) {
mlog(ML_ERROR, "Unable to initialize recovery state\n");
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index ba320e250747..d1a27cda984f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode,
struct ocfs2_xattr_block *xb;
struct ocfs2_xattr_value_root *xv;
size_t size;
- int ret = -ENODATA, name_offset, name_len, block_off, i;
+ int ret = -ENODATA, name_offset, name_len, i;
+ int uninitialized_var(block_off);
xs->bucket = ocfs2_xattr_bucket_new(inode);
if (!xs->bucket) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3ce5ae9e3d2d..175db258942f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -234,23 +234,20 @@ static int check_mem_permission(struct task_struct *task)
struct mm_struct *mm_for_maps(struct task_struct *task)
{
- struct mm_struct *mm = get_task_mm(task);
- if (!mm)
+ struct mm_struct *mm;
+
+ if (mutex_lock_killable(&task->cred_guard_mutex))
return NULL;
- down_read(&mm->mmap_sem);
- task_lock(task);
- if (task->mm != mm)
- goto out;
- if (task->mm != current->mm &&
- __ptrace_may_access(task, PTRACE_MODE_READ) < 0)
- goto out;
- task_unlock(task);
+
+ mm = get_task_mm(task);
+ if (mm && mm != current->mm &&
+ !ptrace_may_access(task, PTRACE_MODE_READ)) {
+ mmput(mm);
+ mm = NULL;
+ }
+ mutex_unlock(&task->cred_guard_mutex);
+
return mm;
-out:
- task_unlock(task);
- up_read(&mm->mmap_sem);
- mmput(mm);
- return NULL;
}
static int proc_pid_cmdline(struct task_struct *task, char * buffer)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6f61b7cc32e0..9bd8be1d235c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
mm = mm_for_maps(priv->task);
if (!mm)
return NULL;
+ down_read(&mm->mmap_sem);
tail_vma = get_gate_vma(priv->task);
priv->tail_vma = tail_vma;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 64a72e2e7650..8f5c05d3dbd3 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -189,6 +189,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
priv->task = NULL;
return NULL;
}
+ down_read(&mm->mmap_sem);
/* start from the Nth VMA */
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 0c93c7ef3d18..965df1227d64 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -770,7 +770,7 @@ xfs_buf_associate_memory(
bp->b_pages = NULL;
bp->b_addr = mem;
- rval = _xfs_buf_get_pages(bp, page_count, 0);
+ rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
if (rval)
return rval;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index db15feb906ff..4ece1906bd41 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2010,7 +2010,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
- blkcnt, XFS_BUF_LOCK, &bp);
+ blkcnt,
+ XFS_BUF_LOCK | XBF_DONT_BLOCK,
+ &bp);
if (error)
return(error);
@@ -2141,8 +2143,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
- bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno,
- blkcnt, XFS_BUF_LOCK);
+ bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt,
+ XFS_BUF_LOCK | XBF_DONT_BLOCK);
ASSERT(bp);
ASSERT(!XFS_BUF_GETERROR(bp));
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 7928b9983c1d..8ee5b5a76a2a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -6009,7 +6009,7 @@ xfs_getbmap(
*/
error = ENOMEM;
subnex = 16;
- map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL);
+ map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
if (!map)
goto out_unlock_ilock;
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index e9df99574829..26717388acf5 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -120,8 +120,8 @@ xfs_btree_check_sblock(
XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
if (bp)
xfs_buftrace("SBTREE ERROR", bp);
- XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
- cur->bc_mp);
+ XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
+ XFS_ERRLEVEL_LOW, cur->bc_mp, block);
return XFS_ERROR(EFSCORRUPTED);
}
return 0;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9ff6e57a5075..2847bbc1c534 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2201,7 +2201,7 @@ kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */
xfs_da_state_t *
xfs_da_state_alloc(void)
{
- return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP);
+ return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
}
/*
@@ -2261,9 +2261,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
int off;
if (nbuf == 1)
- dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP);
+ dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
else
- dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP);
+ dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
dabuf->dirty = 0;
#ifdef XFS_DABUF_DEBUG
dabuf->ra = ra;
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index c657bec6d951..bb1d58eb3982 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -256,7 +256,7 @@ xfs_dir_cilookup_result(
!(args->op_flags & XFS_DA_OP_CILOOKUP))
return EEXIST;
- args->value = kmem_alloc(len, KM_MAYFAIL);
+ args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
if (!args->value)
return ENOMEM;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cbd451bb4848..2d0b3e1da9e6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -167,17 +167,25 @@ xfs_growfs_data_private(
new = nb - mp->m_sb.sb_dblocks;
oagcount = mp->m_sb.sb_agcount;
if (nagcount > oagcount) {
+ void *new_perag, *old_perag;
+
xfs_filestream_flush(mp);
+
+ new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
+ KM_MAYFAIL);
+ if (!new_perag)
+ return XFS_ERROR(ENOMEM);
+
down_write(&mp->m_peraglock);
- mp->m_perag = kmem_realloc(mp->m_perag,
- sizeof(xfs_perag_t) * nagcount,
- sizeof(xfs_perag_t) * oagcount,
- KM_SLEEP);
- memset(&mp->m_perag[oagcount], 0,
- (nagcount - oagcount) * sizeof(xfs_perag_t));
+ memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
+ old_perag = mp->m_perag;
+ mp->m_perag = new_perag;
+
mp->m_flags |= XFS_MOUNT_32BITINODES;
nagimax = xfs_initialize_perag(mp, nagcount);
up_write(&mp->m_peraglock);
+
+ kmem_free(old_perag);
}
tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
tp->t_flags |= XFS_TRANS_RESERVE;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1f22d65fed0a..da428b3fe0f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -343,6 +343,16 @@ xfs_iformat(
return XFS_ERROR(EFSCORRUPTED);
}
+ if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
+ !ip->i_mount->m_rtdev_targp)) {
+ xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+ "corrupt dinode %Lu, has realtime flag set.",
+ ip->i_ino);
+ XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
+ XFS_ERRLEVEL_LOW, ip->i_mount, dip);
+ return XFS_ERROR(EFSCORRUPTED);
+ }
+
switch (ip->i_d.di_mode & S_IFMT) {
case S_IFIFO:
case S_IFCHR:
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3750f04ede0b..9dbdff3ea484 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3180,7 +3180,7 @@ try_again:
STATIC void
xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
{
- ASSERT(spin_is_locked(&log->l_icloglock));
+ assert_spin_locked(&log->l_icloglock);
if (iclog->ic_state == XLOG_STATE_ACTIVE) {
xlog_state_switch_iclogs(log, iclog, 0);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c4eca5ed5dab..492d75bae2bf 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -538,7 +538,9 @@ xfs_readlink_bmap(
d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
- bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0);
+ bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt),
+ XBF_LOCK | XBF_MAPPED |
+ XBF_DONT_BLOCK);
error = XFS_BUF_GETERROR(bp);
if (error) {
xfs_ioerror_alert("xfs_readlink",
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d7cd193c2277..a81170de7f6b 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -89,7 +89,9 @@ enum print_line_t {
TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
};
-
+void tracing_generic_entry_update(struct trace_entry *entry,
+ unsigned long flags,
+ int pc);
struct ring_buffer_event *
trace_current_buffer_lock_reserve(int type, unsigned long len,
unsigned long flags, int pc);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 16713dc672e4..3060bdc35ffe 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -110,6 +110,7 @@ struct kvm_memory_slot {
struct kvm_kernel_irq_routing_entry {
u32 gsi;
+ u32 type;
int (*set)(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int level);
union {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fdffb413b192..f6b90240dd41 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -473,7 +473,6 @@ extern int nfs_writepages(struct address_space *, struct writeback_control *);
extern int nfs_flush_incompatible(struct file *file, struct page *page);
extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
-extern void nfs_writedata_release(void *);
/*
* Try to write back everything synchronously (but check the
@@ -488,7 +487,6 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
extern int nfs_commit_inode(struct inode *, int);
extern struct nfs_write_data *nfs_commitdata_alloc(void);
extern void nfs_commit_free(struct nfs_write_data *wdata);
-extern void nfs_commitdata_release(void *wdata);
#else
static inline int
nfs_commit_inode(struct inode *inode, int how)
@@ -507,6 +505,7 @@ nfs_have_writebacks(struct inode *inode)
* Allocate nfs_write_data structures
*/
extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
+extern void nfs_writedata_free(struct nfs_write_data *);
/*
* linux/fs/nfs/read.c
@@ -515,7 +514,6 @@ extern int nfs_readpage(struct file *, struct page *);
extern int nfs_readpages(struct file *, struct address_space *,
struct list_head *, unsigned);
extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
-extern void nfs_readdata_release(void *data);
extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
struct page *);
@@ -523,6 +521,7 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
* Allocate nfs_read_data structures
*/
extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
+extern void nfs_readdata_free(struct nfs_read_data *);
/*
* linux/fs/nfs3proc.c
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e604e6ef72dd..b53f7006cc4e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -115,27 +115,44 @@ enum perf_counter_sample_format {
PERF_SAMPLE_TID = 1U << 1,
PERF_SAMPLE_TIME = 1U << 2,
PERF_SAMPLE_ADDR = 1U << 3,
- PERF_SAMPLE_GROUP = 1U << 4,
+ PERF_SAMPLE_READ = 1U << 4,
PERF_SAMPLE_CALLCHAIN = 1U << 5,
PERF_SAMPLE_ID = 1U << 6,
PERF_SAMPLE_CPU = 1U << 7,
PERF_SAMPLE_PERIOD = 1U << 8,
PERF_SAMPLE_STREAM_ID = 1U << 9,
+ PERF_SAMPLE_RAW = 1U << 10,
- PERF_SAMPLE_MAX = 1U << 10, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */
};
/*
- * Bits that can be set in attr.read_format to request that
- * reads on the counter should return the indicated quantities,
- * in increasing order of bit value, after the counter value.
+ * The format of the data returned by read() on a perf counter fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ * { u64 value;
+ * { u64 time_enabled; } && PERF_FORMAT_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_RUNNING
+ * { u64 id; } && PERF_FORMAT_ID
+ * } && !PERF_FORMAT_GROUP
+ *
+ * { u64 nr;
+ * { u64 time_enabled; } && PERF_FORMAT_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_RUNNING
+ * { u64 value;
+ * { u64 id; } && PERF_FORMAT_ID
+ * } cntr[nr];
+ * } && PERF_FORMAT_GROUP
+ * };
*/
enum perf_counter_read_format {
PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
PERF_FORMAT_ID = 1U << 2,
+ PERF_FORMAT_GROUP = 1U << 3,
- PERF_FORMAT_MAX = 1U << 3, /* non-ABI */
+ PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
};
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@@ -342,10 +359,8 @@ enum perf_event_type {
* struct {
* struct perf_event_header header;
* u32 pid, tid;
- * u64 value;
- * { u64 time_enabled; } && PERF_FORMAT_ENABLED
- * { u64 time_running; } && PERF_FORMAT_RUNNING
- * { u64 parent_id; } && PERF_FORMAT_ID
+ *
+ * struct read_format values;
* };
*/
PERF_EVENT_READ = 8,
@@ -363,11 +378,24 @@ enum perf_event_type {
* { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 period; } && PERF_SAMPLE_PERIOD
*
- * { u64 nr;
- * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
+ * { struct read_format values; } && PERF_SAMPLE_READ
*
* { u64 nr,
* u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
+ *
+ * #
+ * # The RAW record below is opaque data wrt the ABI
+ * #
+ * # That is, the ABI doesn't make any promises wrt to
+ * # the stability of its content, it may vary depending
+ * # on event, hardware, kernel version and phase of
+ * # the moon.
+ * #
+ * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
+ * #
+ *
+ * { u32 size;
+ * char data[size];}&& PERF_SAMPLE_RAW
* };
*/
PERF_EVENT_SAMPLE = 9,
@@ -413,6 +441,11 @@ struct perf_callchain_entry {
__u64 ip[PERF_MAX_STACK_DEPTH];
};
+struct perf_raw_record {
+ u32 size;
+ void *data;
+};
+
struct task_struct;
/**
@@ -681,10 +714,13 @@ struct perf_sample_data {
struct pt_regs *regs;
u64 addr;
u64 period;
+ struct perf_raw_record *raw;
};
extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
struct perf_sample_data *data);
+extern void perf_counter_output(struct perf_counter *counter, int nmi,
+ struct perf_sample_data *data);
/*
* Return 1 for a software counter, 0 for a hardware counter
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index b99c625fddfe..7da466ba4b0d 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -117,17 +117,15 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le
static inline __be32 *
xdr_encode_hyper(__be32 *p, __u64 val)
{
- *p++ = htonl(val >> 32);
- *p++ = htonl(val & 0xFFFFFFFF);
- return p;
+ *(__be64 *)p = cpu_to_be64(val);
+ return p + 2;
}
static inline __be32 *
xdr_decode_hyper(__be32 *p, __u64 *valp)
{
- *valp = ((__u64) ntohl(*p++)) << 32;
- *valp |= ntohl(*p++);
- return p;
+ *valp = be64_to_cpup((__be64 *)p);
+ return p + 2;
}
/*
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6788e1a4d4ca..cf3c2f5dba51 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -77,7 +77,14 @@ struct task_struct;
#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
{ .flags = word, .bit_nr = bit, }
-extern void init_waitqueue_head(wait_queue_head_t *q);
+extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *);
+
+#define init_waitqueue_head(q) \
+ do { \
+ static struct lock_class_key __key; \
+ \
+ __init_waitqueue_head((q), &__key); \
+ } while (0)
#ifdef CONFIG_LOCKDEP
# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 1867553c61e5..f64fbaae781a 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -144,6 +144,9 @@
#undef TP_fast_assign
#define TP_fast_assign(args...) args
+#undef TP_perf_assign
+#define TP_perf_assign(args...)
+
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
static int \
@@ -345,6 +348,56 @@ static inline int ftrace_get_offsets_##call( \
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+#ifdef CONFIG_EVENT_PROFILE
+
+/*
+ * Generate the functions needed for tracepoint perf_counter support.
+ *
+ * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
+ *
+ * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call)
+ * {
+ * int ret = 0;
+ *
+ * if (!atomic_inc_return(&event_call->profile_count))
+ * ret = register_trace_<call>(ftrace_profile_<call>);
+ *
+ * return ret;
+ * }
+ *
+ * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call)
+ * {
+ * if (atomic_add_negative(-1, &event->call->profile_count))
+ * unregister_trace_<call>(ftrace_profile_<call>);
+ * }
+ *
+ */
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
+ \
+static void ftrace_profile_##call(proto); \
+ \
+static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
+{ \
+ int ret = 0; \
+ \
+ if (!atomic_inc_return(&event_call->profile_count)) \
+ ret = register_trace_##call(ftrace_profile_##call); \
+ \
+ return ret; \
+} \
+ \
+static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
+{ \
+ if (atomic_add_negative(-1, &event_call->profile_count)) \
+ unregister_trace_##call(ftrace_profile_##call); \
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#endif
+
/*
* Stage 4 of the trace events.
*
@@ -447,28 +500,6 @@ static inline int ftrace_get_offsets_##call( \
#define TP_FMT(fmt, args...) fmt "\n", ##args
#ifdef CONFIG_EVENT_PROFILE
-#define _TRACE_PROFILE(call, proto, args) \
-static void ftrace_profile_##call(proto) \
-{ \
- extern void perf_tpcounter_event(int); \
- perf_tpcounter_event(event_##call.id); \
-} \
- \
-static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
-{ \
- int ret = 0; \
- \
- if (!atomic_inc_return(&event_call->profile_count)) \
- ret = register_trace_##call(ftrace_profile_##call); \
- \
- return ret; \
-} \
- \
-static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
-{ \
- if (atomic_add_negative(-1, &event_call->profile_count)) \
- unregister_trace_##call(ftrace_profile_##call); \
-}
#define _TRACE_PROFILE_INIT(call) \
.profile_count = ATOMIC_INIT(-1), \
@@ -476,7 +507,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
.profile_disable = ftrace_profile_disable_##call,
#else
-#define _TRACE_PROFILE(call, proto, args)
#define _TRACE_PROFILE_INIT(call)
#endif
@@ -502,7 +532,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
\
static struct ftrace_event_call event_##call; \
\
@@ -586,6 +615,110 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-#undef _TRACE_PROFILE
+/*
+ * Define the insertion callback to profile events
+ *
+ * The job is very similar to ftrace_raw_event_<call> except that we don't
+ * insert in the ring buffer but in a perf counter.
+ *
+ * static void ftrace_profile_<call>(proto)
+ * {
+ * struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
+ * struct ftrace_event_call *event_call = &event_<call>;
+ * extern void perf_tpcounter_event(int, u64, u64, void *, int);
+ * struct ftrace_raw_##call *entry;
+ * u64 __addr = 0, __count = 1;
+ * unsigned long irq_flags;
+ * int __entry_size;
+ * int __data_size;
+ * int pc;
+ *
+ * local_save_flags(irq_flags);
+ * pc = preempt_count();
+ *
+ * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
+ *
+ * // Below we want to get the aligned size by taking into account
+ * // the u32 field that will later store the buffer size
+ * __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),
+ * sizeof(u64));
+ * __entry_size -= sizeof(u32);
+ *
+ * do {
+ * char raw_data[__entry_size]; <- allocate our sample in the stack
+ * struct trace_entry *ent;
+ *
+ * zero dead bytes from alignment to avoid stack leak to userspace:
+ *
+ * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
+ * entry = (struct ftrace_raw_<call> *)raw_data;
+ * ent = &entry->ent;
+ * tracing_generic_entry_update(ent, irq_flags, pc);
+ * ent->type = event_call->id;
+ *
+ * <tstruct> <- do some jobs with dynamic arrays
+ *
+ * <assign> <- affect our values
+ *
+ * perf_tpcounter_event(event_call->id, __addr, __count, entry,
+ * __entry_size); <- submit them to perf counter
+ * } while (0);
+ *
+ * }
+ */
+
+#ifdef CONFIG_EVENT_PROFILE
+
+#undef __perf_addr
+#define __perf_addr(a) __addr = (a)
+
+#undef __perf_count
+#define __perf_count(c) __count = (c)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
+static void ftrace_profile_##call(proto) \
+{ \
+ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
+ struct ftrace_event_call *event_call = &event_##call; \
+ extern void perf_tpcounter_event(int, u64, u64, void *, int); \
+ struct ftrace_raw_##call *entry; \
+ u64 __addr = 0, __count = 1; \
+ unsigned long irq_flags; \
+ int __entry_size; \
+ int __data_size; \
+ int pc; \
+ \
+ local_save_flags(irq_flags); \
+ pc = preempt_count(); \
+ \
+ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
+ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
+ sizeof(u64)); \
+ __entry_size -= sizeof(u32); \
+ \
+ do { \
+ char raw_data[__entry_size]; \
+ struct trace_entry *ent; \
+ \
+ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
+ entry = (struct ftrace_raw_##call *)raw_data; \
+ ent = &entry->ent; \
+ tracing_generic_entry_update(ent, irq_flags, pc); \
+ ent->type = event_call->id; \
+ \
+ tstruct \
+ \
+ { assign; } \
+ \
+ perf_tpcounter_event(event_call->id, __addr, __count, entry,\
+ __entry_size); \
+ } while (0); \
+ \
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+#endif /* CONFIG_EVENT_PROFILE */
+
#undef _TRACE_PROFILE_INIT
diff --git a/kernel/futex.c b/kernel/futex.c
index 0672ff88f159..e18cfbdc7190 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1010,15 +1010,19 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
* requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
* q: the futex_q
* key: the key of the requeue target futex
+ * hb: the hash_bucket of the requeue target futex
*
* During futex_requeue, with requeue_pi=1, it is possible to acquire the
* target futex if it is uncontended or via a lock steal. Set the futex_q key
* to the requeue target futex so the waiter can detect the wakeup on the right
* futex, but remove it from the hb and NULL the rt_waiter so it can detect
- * atomic lock acquisition. Must be called with the q->lock_ptr held.
+ * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
+ * to protect access to the pi_state to fixup the owner later. Must be called
+ * with both q->lock_ptr and hb->lock held.
*/
static inline
-void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
+ struct futex_hash_bucket *hb)
{
drop_futex_key_refs(&q->key);
get_futex_key_refs(key);
@@ -1030,6 +1034,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
WARN_ON(!q->rt_waiter);
q->rt_waiter = NULL;
+ q->lock_ptr = &hb->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+ q->list.plist.lock = &hb->lock;
+#endif
+
wake_up_state(q->task, TASK_NORMAL);
}
@@ -1088,7 +1097,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
set_waiters);
if (ret == 1)
- requeue_pi_wake_futex(top_waiter, key2);
+ requeue_pi_wake_futex(top_waiter, key2, hb2);
return ret;
}
@@ -1247,8 +1256,15 @@ retry_private:
if (!match_futex(&this->key, &key1))
continue;
- WARN_ON(!requeue_pi && this->rt_waiter);
- WARN_ON(requeue_pi && !this->rt_waiter);
+ /*
+ * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
+ * be paired with each other and no other futex ops.
+ */
+ if ((requeue_pi && !this->rt_waiter) ||
+ (!requeue_pi && this->rt_waiter)) {
+ ret = -EINVAL;
+ break;
+ }
/*
* Wake nr_wake waiters. For requeue_pi, if we acquired the
@@ -1273,7 +1289,7 @@ retry_private:
this->task, 1);
if (ret == 1) {
/* We got the lock. */
- requeue_pi_wake_futex(this, &key2);
+ requeue_pi_wake_futex(this, &key2, hb2);
continue;
} else if (ret) {
/* -EDEADLK */
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d607a5b9ee29..235716556bf1 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -180,7 +180,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
int cmd = op & FUTEX_CMD_MASK;
if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- cmd == FUTEX_WAIT_BITSET)) {
+ cmd == FUTEX_WAIT_BITSET ||
+ cmd == FUTEX_WAIT_REQUEUE_PI)) {
if (get_compat_timespec(&ts, utime))
return -EFAULT;
if (!timespec_valid(&ts))
@@ -191,7 +192,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+ cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
val2 = (int) (unsigned long) utime;
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 61c679db4687..d222515a5a06 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -761,7 +761,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action, **action_ptr;
- struct task_struct *irqthread;
unsigned long flags;
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -809,9 +808,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
desc->chip->disable(irq);
}
- irqthread = action->thread;
- action->thread = NULL;
-
spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
@@ -819,12 +815,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
/* Make sure it's not being used on another CPU: */
synchronize_irq(irq);
- if (irqthread) {
- if (!test_bit(IRQTF_DIED, &action->thread_flags))
- kthread_stop(irqthread);
- put_task_struct(irqthread);
- }
-
#ifdef CONFIG_DEBUG_SHIRQ
/*
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -840,6 +830,13 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
local_irq_restore(flags);
}
#endif
+
+ if (action->thread) {
+ if (!test_bit(IRQTF_DIED, &action->thread_flags))
+ kthread_stop(action->thread);
+ put_task_struct(action->thread);
+ }
+
return action;
}
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 2f69bee57bf2..3fd30197da2e 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -107,8 +107,8 @@ out_unlock:
struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
{
- /* those all static, do move them */
- if (desc->irq < NR_IRQS_LEGACY)
+ /* those static or target node is -1, do not move them */
+ if (desc->irq < NR_IRQS_LEGACY || node == -1)
return desc;
if (desc->node != node)
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index d7135aa2d2c4..e94caa666dba 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -758,7 +758,8 @@ static int __init lockdep_proc_init(void)
&proc_lockdep_stats_operations);
#ifdef CONFIG_LOCK_STAT
- proc_create("lock_stat", S_IRUSR, NULL, &proc_lock_stat_operations);
+ proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
+ &proc_lock_stat_operations);
#endif
return 0;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 673c1aaf7332..534e20d14d63 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -88,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); }
void __weak hw_perf_enable(void) { barrier(); }
void __weak hw_perf_counter_setup(int cpu) { barrier(); }
+void __weak hw_perf_counter_setup_online(int cpu) { barrier(); }
int __weak
hw_perf_group_sched_in(struct perf_counter *group_leader,
@@ -306,6 +307,10 @@ counter_sched_out(struct perf_counter *counter,
return;
counter->state = PERF_COUNTER_STATE_INACTIVE;
+ if (counter->pending_disable) {
+ counter->pending_disable = 0;
+ counter->state = PERF_COUNTER_STATE_OFF;
+ }
counter->tstamp_stopped = ctx->time;
counter->pmu->disable(counter);
counter->oncpu = -1;
@@ -1691,7 +1696,32 @@ static int perf_release(struct inode *inode, struct file *file)
return 0;
}
-static u64 perf_counter_read_tree(struct perf_counter *counter)
+static int perf_counter_read_size(struct perf_counter *counter)
+{
+ int entry = sizeof(u64); /* value */
+ int size = 0;
+ int nr = 1;
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ size += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ size += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_ID)
+ entry += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_GROUP) {
+ nr += counter->group_leader->nr_siblings;
+ size += sizeof(u64);
+ }
+
+ size += entry * nr;
+
+ return size;
+}
+
+static u64 perf_counter_read_value(struct perf_counter *counter)
{
struct perf_counter *child;
u64 total = 0;
@@ -1703,14 +1733,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter)
return total;
}
+static int perf_counter_read_entry(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ int n = 0, count = 0;
+ u64 values[2];
+
+ values[n++] = perf_counter_read_value(counter);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ count = n * sizeof(u64);
+
+ if (copy_to_user(buf, values, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static int perf_counter_read_group(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ struct perf_counter *leader = counter->group_leader, *sub;
+ int n = 0, size = 0, err = -EFAULT;
+ u64 values[3];
+
+ values[n++] = 1 + leader->nr_siblings;
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = leader->total_time_enabled +
+ atomic64_read(&leader->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = leader->total_time_running +
+ atomic64_read(&leader->child_total_time_running);
+ }
+
+ size = n * sizeof(u64);
+
+ if (copy_to_user(buf, values, size))
+ return -EFAULT;
+
+ err = perf_counter_read_entry(leader, read_format, buf + size);
+ if (err < 0)
+ return err;
+
+ size += err;
+
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ err = perf_counter_read_entry(counter, read_format,
+ buf + size);
+ if (err < 0)
+ return err;
+
+ size += err;
+ }
+
+ return size;
+}
+
+static int perf_counter_read_one(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ u64 values[4];
+ int n = 0;
+
+ values[n++] = perf_counter_read_value(counter);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = counter->total_time_enabled +
+ atomic64_read(&counter->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = counter->total_time_running +
+ atomic64_read(&counter->child_total_time_running);
+ }
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ if (copy_to_user(buf, values, n * sizeof(u64)))
+ return -EFAULT;
+
+ return n * sizeof(u64);
+}
+
/*
* Read the performance counter - simple non blocking version for now
*/
static ssize_t
perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
{
- u64 values[4];
- int n;
+ u64 read_format = counter->attr.read_format;
+ int ret;
/*
* Return end-of-file for a read on a counter that is in
@@ -1720,28 +1832,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
if (counter->state == PERF_COUNTER_STATE_ERROR)
return 0;
+ if (count < perf_counter_read_size(counter))
+ return -ENOSPC;
+
WARN_ON_ONCE(counter->ctx->parent_ctx);
mutex_lock(&counter->child_mutex);
- values[0] = perf_counter_read_tree(counter);
- n = 1;
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- values[n++] = counter->total_time_enabled +
- atomic64_read(&counter->child_total_time_enabled);
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- values[n++] = counter->total_time_running +
- atomic64_read(&counter->child_total_time_running);
- if (counter->attr.read_format & PERF_FORMAT_ID)
- values[n++] = primary_counter_id(counter);
+ if (read_format & PERF_FORMAT_GROUP)
+ ret = perf_counter_read_group(counter, read_format, buf);
+ else
+ ret = perf_counter_read_one(counter, read_format, buf);
mutex_unlock(&counter->child_mutex);
- if (count < n * sizeof(u64))
- return -EINVAL;
- count = n * sizeof(u64);
-
- if (copy_to_user(buf, values, count))
- return -EFAULT;
-
- return count;
+ return ret;
}
static ssize_t
@@ -2245,7 +2347,7 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
if (counter->pending_disable) {
counter->pending_disable = 0;
- perf_counter_disable(counter);
+ __perf_counter_disable(counter);
}
if (counter->pending_wakeup) {
@@ -2630,7 +2732,80 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
return task_pid_nr_ns(p, counter->ns);
}
-static void perf_counter_output(struct perf_counter *counter, int nmi,
+static void perf_output_read_one(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ u64 read_format = counter->attr.read_format;
+ u64 values[4];
+ int n = 0;
+
+ values[n++] = atomic64_read(&counter->count);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = counter->total_time_enabled +
+ atomic64_read(&counter->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = counter->total_time_running +
+ atomic64_read(&counter->child_total_time_running);
+ }
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+}
+
+/*
+ * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
+ */
+static void perf_output_read_group(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ struct perf_counter *leader = counter->group_leader, *sub;
+ u64 read_format = counter->attr.read_format;
+ u64 values[5];
+ int n = 0;
+
+ values[n++] = 1 + leader->nr_siblings;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ values[n++] = leader->total_time_enabled;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ values[n++] = leader->total_time_running;
+
+ if (leader != counter)
+ leader->pmu->read(leader);
+
+ values[n++] = atomic64_read(&leader->count);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(leader);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ n = 0;
+
+ if (sub != counter)
+ sub->pmu->read(sub);
+
+ values[n++] = atomic64_read(&sub->count);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(sub);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+ }
+}
+
+static void perf_output_read(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ if (counter->attr.read_format & PERF_FORMAT_GROUP)
+ perf_output_read_group(handle, counter);
+ else
+ perf_output_read_one(handle, counter);
+}
+
+void perf_counter_output(struct perf_counter *counter, int nmi,
struct perf_sample_data *data)
{
int ret;
@@ -2641,10 +2816,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
struct {
u32 pid, tid;
} tid_entry;
- struct {
- u64 id;
- u64 counter;
- } group_entry;
struct perf_callchain_entry *callchain = NULL;
int callchain_size = 0;
u64 time;
@@ -2699,10 +2870,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_PERIOD)
header.size += sizeof(u64);
- if (sample_type & PERF_SAMPLE_GROUP) {
- header.size += sizeof(u64) +
- counter->nr_siblings * sizeof(group_entry);
- }
+ if (sample_type & PERF_SAMPLE_READ)
+ header.size += perf_counter_read_size(counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
callchain = perf_callchain(data->regs);
@@ -2714,6 +2883,18 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
header.size += sizeof(u64);
}
+ if (sample_type & PERF_SAMPLE_RAW) {
+ int size = sizeof(u32);
+
+ if (data->raw)
+ size += data->raw->size;
+ else
+ size += sizeof(u32);
+
+ WARN_ON_ONCE(size & (sizeof(u64)-1));
+ header.size += size;
+ }
+
ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
if (ret)
return;
@@ -2747,26 +2928,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_PERIOD)
perf_output_put(&handle, data->period);
- /*
- * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
- */
- if (sample_type & PERF_SAMPLE_GROUP) {
- struct perf_counter *leader, *sub;
- u64 nr = counter->nr_siblings;
-
- perf_output_put(&handle, nr);
-
- leader = counter->group_leader;
- list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- if (sub != counter)
- sub->pmu->read(sub);
-
- group_entry.id = primary_counter_id(sub);
- group_entry.counter = atomic64_read(&sub->count);
-
- perf_output_put(&handle, group_entry);
- }
- }
+ if (sample_type & PERF_SAMPLE_READ)
+ perf_output_read(&handle, counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
if (callchain)
@@ -2777,6 +2940,22 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
}
}
+ if (sample_type & PERF_SAMPLE_RAW) {
+ if (data->raw) {
+ perf_output_put(&handle, data->raw->size);
+ perf_output_copy(&handle, data->raw->data, data->raw->size);
+ } else {
+ struct {
+ u32 size;
+ u32 data;
+ } raw = {
+ .size = sizeof(u32),
+ .data = 0,
+ };
+ perf_output_put(&handle, raw);
+ }
+ }
+
perf_output_end(&handle);
}
@@ -2789,8 +2968,6 @@ struct perf_read_event {
u32 pid;
u32 tid;
- u64 value;
- u64 format[3];
};
static void
@@ -2802,34 +2979,20 @@ perf_counter_read_event(struct perf_counter *counter,
.header = {
.type = PERF_EVENT_READ,
.misc = 0,
- .size = sizeof(event) - sizeof(event.format),
+ .size = sizeof(event) + perf_counter_read_size(counter),
},
.pid = perf_counter_pid(counter, task),
.tid = perf_counter_tid(counter, task),
- .value = atomic64_read(&counter->count),
};
- int ret, i = 0;
-
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
- event.header.size += sizeof(u64);
- event.format[i++] = counter->total_time_enabled;
- }
-
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
- event.header.size += sizeof(u64);
- event.format[i++] = counter->total_time_running;
- }
-
- if (counter->attr.read_format & PERF_FORMAT_ID) {
- event.header.size += sizeof(u64);
- event.format[i++] = primary_counter_id(counter);
- }
+ int ret;
ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
if (ret)
return;
- perf_output_copy(&handle, &event, event.header.size);
+ perf_output_put(&handle, event);
+ perf_output_read(&handle, counter);
+
perf_output_end(&handle);
}
@@ -2840,7 +3003,8 @@ perf_counter_read_event(struct perf_counter *counter,
*/
struct perf_task_event {
- struct task_struct *task;
+ struct task_struct *task;
+ struct perf_counter_context *task_ctx;
struct {
struct perf_event_header header;
@@ -2864,10 +3028,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
return;
task_event->event.pid = perf_counter_pid(counter, task);
- task_event->event.ppid = perf_counter_pid(counter, task->real_parent);
+ task_event->event.ppid = perf_counter_pid(counter, current);
task_event->event.tid = perf_counter_tid(counter, task);
- task_event->event.ptid = perf_counter_tid(counter, task->real_parent);
+ task_event->event.ptid = perf_counter_tid(counter, current);
perf_output_put(&handle, task_event->event);
perf_output_end(&handle);
@@ -2900,24 +3064,23 @@ static void perf_counter_task_ctx(struct perf_counter_context *ctx,
static void perf_counter_task_event(struct perf_task_event *task_event)
{
struct perf_cpu_context *cpuctx;
- struct perf_counter_context *ctx;
+ struct perf_counter_context *ctx = task_event->task_ctx;
cpuctx = &get_cpu_var(perf_cpu_context);
perf_counter_task_ctx(&cpuctx->ctx, task_event);
put_cpu_var(perf_cpu_context);
rcu_read_lock();
- /*
- * doesn't really matter which of the child contexts the
- * events ends up in.
- */
- ctx = rcu_dereference(current->perf_counter_ctxp);
+ if (!ctx)
+ ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
if (ctx)
perf_counter_task_ctx(ctx, task_event);
rcu_read_unlock();
}
-static void perf_counter_task(struct task_struct *task, int new)
+static void perf_counter_task(struct task_struct *task,
+ struct perf_counter_context *task_ctx,
+ int new)
{
struct perf_task_event task_event;
@@ -2927,8 +3090,9 @@ static void perf_counter_task(struct task_struct *task, int new)
return;
task_event = (struct perf_task_event){
- .task = task,
- .event = {
+ .task = task,
+ .task_ctx = task_ctx,
+ .event = {
.header = {
.type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
.misc = 0,
@@ -2946,7 +3110,7 @@ static void perf_counter_task(struct task_struct *task, int new)
void perf_counter_fork(struct task_struct *task)
{
- perf_counter_task(task, 1);
+ perf_counter_task(task, NULL, 1);
}
/*
@@ -3335,125 +3499,111 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
* Generic software counter infrastructure
*/
-static void perf_swcounter_update(struct perf_counter *counter)
+/*
+ * We directly increment counter->count and keep a second value in
+ * counter->hw.period_left to count intervals. This period counter
+ * is kept in the range [-sample_period, 0] so that we can use the
+ * sign as trigger.
+ */
+
+static u64 perf_swcounter_set_period(struct perf_counter *counter)
{
struct hw_perf_counter *hwc = &counter->hw;
- u64 prev, now;
- s64 delta;
+ u64 period = hwc->last_period;
+ u64 nr, offset;
+ s64 old, val;
+
+ hwc->last_period = hwc->sample_period;
again:
- prev = atomic64_read(&hwc->prev_count);
- now = atomic64_read(&hwc->count);
- if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev)
- goto again;
+ old = val = atomic64_read(&hwc->period_left);
+ if (val < 0)
+ return 0;
- delta = now - prev;
+ nr = div64_u64(period + val, period);
+ offset = nr * period;
+ val -= offset;
+ if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+ goto again;
- atomic64_add(delta, &counter->count);
- atomic64_sub(delta, &hwc->period_left);
+ return nr;
}
-static void perf_swcounter_set_period(struct perf_counter *counter)
+static void perf_swcounter_overflow(struct perf_counter *counter,
+ int nmi, struct perf_sample_data *data)
{
struct hw_perf_counter *hwc = &counter->hw;
- s64 left = atomic64_read(&hwc->period_left);
- s64 period = hwc->sample_period;
+ u64 overflow;
- if (unlikely(left <= -period)) {
- left = period;
- atomic64_set(&hwc->period_left, left);
- hwc->last_period = period;
- }
+ data->period = counter->hw.last_period;
+ overflow = perf_swcounter_set_period(counter);
- if (unlikely(left <= 0)) {
- left += period;
- atomic64_add(period, &hwc->period_left);
- hwc->last_period = period;
- }
+ if (hwc->interrupts == MAX_INTERRUPTS)
+ return;
- atomic64_set(&hwc->prev_count, -left);
- atomic64_set(&hwc->count, -left);
+ for (; overflow; overflow--) {
+ if (perf_counter_overflow(counter, nmi, data)) {
+ /*
+ * We inhibit the overflow from happening when
+ * hwc->interrupts == MAX_INTERRUPTS.
+ */
+ break;
+ }
+ }
}
-static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
+static void perf_swcounter_unthrottle(struct perf_counter *counter)
{
- enum hrtimer_restart ret = HRTIMER_RESTART;
- struct perf_sample_data data;
- struct perf_counter *counter;
- u64 period;
-
- counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
- counter->pmu->read(counter);
-
- data.addr = 0;
- data.regs = get_irq_regs();
/*
- * In case we exclude kernel IPs or are somehow not in interrupt
- * context, provide the next best thing, the user IP.
+ * Nothing to do, we already reset hwc->interrupts.
*/
- if ((counter->attr.exclude_kernel || !data.regs) &&
- !counter->attr.exclude_user)
- data.regs = task_pt_regs(current);
+}
- if (data.regs) {
- if (perf_counter_overflow(counter, 0, &data))
- ret = HRTIMER_NORESTART;
- }
+static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
+ int nmi, struct perf_sample_data *data)
+{
+ struct hw_perf_counter *hwc = &counter->hw;
- period = max_t(u64, 10000, counter->hw.sample_period);
- hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+ atomic64_add(nr, &counter->count);
- return ret;
-}
+ if (!hwc->sample_period)
+ return;
-static void perf_swcounter_overflow(struct perf_counter *counter,
- int nmi, struct perf_sample_data *data)
-{
- data->period = counter->hw.last_period;
+ if (!data->regs)
+ return;
- perf_swcounter_update(counter);
- perf_swcounter_set_period(counter);
- if (perf_counter_overflow(counter, nmi, data))
- /* soft-disable the counter */
- ;
+ if (!atomic64_add_negative(nr, &hwc->period_left))
+ perf_swcounter_overflow(counter, nmi, data);
}
static int perf_swcounter_is_counting(struct perf_counter *counter)
{
- struct perf_counter_context *ctx;
- unsigned long flags;
- int count;
-
+ /*
+ * The counter is active, we're good!
+ */
if (counter->state == PERF_COUNTER_STATE_ACTIVE)
return 1;
+ /*
+ * The counter is off/error, not counting.
+ */
if (counter->state != PERF_COUNTER_STATE_INACTIVE)
return 0;
/*
- * If the counter is inactive, it could be just because
- * its task is scheduled out, or because it's in a group
- * which could not go on the PMU. We want to count in
- * the first case but not the second. If the context is
- * currently active then an inactive software counter must
- * be the second case. If it's not currently active then
- * we need to know whether the counter was active when the
- * context was last active, which we can determine by
- * comparing counter->tstamp_stopped with ctx->time.
- *
- * We are within an RCU read-side critical section,
- * which protects the existence of *ctx.
+ * The counter is inactive, if the context is active
+ * we're part of a group that didn't make it on the 'pmu',
+ * not counting.
*/
- ctx = counter->ctx;
- spin_lock_irqsave(&ctx->lock, flags);
- count = 1;
- /* Re-check state now we have the lock */
- if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
- counter->ctx->is_active ||
- counter->tstamp_stopped < ctx->time)
- count = 0;
- spin_unlock_irqrestore(&ctx->lock, flags);
- return count;
+ if (counter->ctx->is_active)
+ return 0;
+
+ /*
+ * We're inactive and the context is too, this means the
+ * task is scheduled out, we're counting events that happen
+ * to us, like migration events.
+ */
+ return 1;
}
static int perf_swcounter_match(struct perf_counter *counter,
@@ -3479,15 +3629,6 @@ static int perf_swcounter_match(struct perf_counter *counter,
return 1;
}
-static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
- int nmi, struct perf_sample_data *data)
-{
- int neg = atomic64_add_negative(nr, &counter->hw.count);
-
- if (counter->hw.sample_period && !neg && data->regs)
- perf_swcounter_overflow(counter, nmi, data);
-}
-
static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
enum perf_type_id type,
u32 event, u64 nr, int nmi,
@@ -3566,27 +3707,66 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
static void perf_swcounter_read(struct perf_counter *counter)
{
- perf_swcounter_update(counter);
}
static int perf_swcounter_enable(struct perf_counter *counter)
{
- perf_swcounter_set_period(counter);
+ struct hw_perf_counter *hwc = &counter->hw;
+
+ if (hwc->sample_period) {
+ hwc->last_period = hwc->sample_period;
+ perf_swcounter_set_period(counter);
+ }
return 0;
}
static void perf_swcounter_disable(struct perf_counter *counter)
{
- perf_swcounter_update(counter);
}
static const struct pmu perf_ops_generic = {
.enable = perf_swcounter_enable,
.disable = perf_swcounter_disable,
.read = perf_swcounter_read,
+ .unthrottle = perf_swcounter_unthrottle,
};
/*
+ * hrtimer based swcounter callback
+ */
+
+static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
+{
+ enum hrtimer_restart ret = HRTIMER_RESTART;
+ struct perf_sample_data data;
+ struct perf_counter *counter;
+ u64 period;
+
+ counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
+ counter->pmu->read(counter);
+
+ data.addr = 0;
+ data.regs = get_irq_regs();
+ /*
+ * In case we exclude kernel IPs or are somehow not in interrupt
+ * context, provide the next best thing, the user IP.
+ */
+ if ((counter->attr.exclude_kernel || !data.regs) &&
+ !counter->attr.exclude_user)
+ data.regs = task_pt_regs(current);
+
+ if (data.regs) {
+ if (perf_counter_overflow(counter, 0, &data))
+ ret = HRTIMER_NORESTART;
+ }
+
+ period = max_t(u64, 10000, counter->hw.sample_period);
+ hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+
+ return ret;
+}
+
+/*
* Software counter: cpu wall time clock
*/
@@ -3703,17 +3883,24 @@ static const struct pmu perf_ops_task_clock = {
};
#ifdef CONFIG_EVENT_PROFILE
-void perf_tpcounter_event(int event_id)
+void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
+ int entry_size)
{
+ struct perf_raw_record raw = {
+ .size = entry_size,
+ .data = record,
+ };
+
struct perf_sample_data data = {
.regs = get_irq_regs(),
- .addr = 0,
+ .addr = addr,
+ .raw = &raw,
};
if (!data.regs)
data.regs = task_pt_regs(current);
- do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data);
+ do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data);
}
EXPORT_SYMBOL_GPL(perf_tpcounter_event);
@@ -3727,6 +3914,14 @@ static void tp_perf_counter_destroy(struct perf_counter *counter)
static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
{
+ /*
+ * Raw tracepoint data is a severe data leak, only allow root to
+ * have these.
+ */
+ if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
+ !capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
if (ftrace_profile_enable(counter->attr.config))
return NULL;
@@ -3860,9 +4055,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
atomic64_set(&hwc->period_left, hwc->sample_period);
/*
- * we currently do not support PERF_SAMPLE_GROUP on inherited counters
+ * we currently do not support PERF_FORMAT_GROUP on inherited counters
*/
- if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
+ if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
goto done;
switch (attr->type) {
@@ -4269,7 +4464,7 @@ void perf_counter_exit_task(struct task_struct *child)
unsigned long flags;
if (likely(!child->perf_counter_ctxp)) {
- perf_counter_task(child, 0);
+ perf_counter_task(child, NULL, 0);
return;
}
@@ -4289,6 +4484,7 @@ void perf_counter_exit_task(struct task_struct *child)
* incremented the context's refcount before we do put_ctx below.
*/
spin_lock(&child_ctx->lock);
+ child->perf_counter_ctxp = NULL;
/*
* If this context is a clone; unclone it so it can't get
* swapped to another process while we're removing all
@@ -4302,9 +4498,7 @@ void perf_counter_exit_task(struct task_struct *child)
* won't get any samples after PERF_EVENT_EXIT. We can however still
* get a few PERF_EVENT_READ events.
*/
- perf_counter_task(child, 0);
-
- child->perf_counter_ctxp = NULL;
+ perf_counter_task(child, child_ctx, 0);
/*
* We can recurse on the same lock type through:
@@ -4525,6 +4719,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
perf_counter_init_cpu(cpu);
break;
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ hw_perf_counter_setup_online(cpu);
+ break;
+
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
perf_counter_exit_cpu(cpu);
@@ -4549,6 +4748,8 @@ void __init perf_counter_init(void)
{
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
+ perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
+ (void *)(long)smp_processor_id());
register_cpu_notifier(&perf_cpu_nb);
}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bece7c0b67b2..e33a21cb9407 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -521,11 +521,12 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
{
- struct task_cputime cputime;
+ struct signal_struct *const sig = tsk->signal;
- thread_group_cputimer(tsk, &cputime);
cleanup_timers(tsk->signal->cpu_timers,
- cputime.utime, cputime.stime, cputime.sum_exec_runtime);
+ cputime_add(tsk->utime, sig->utime),
+ cputime_add(tsk->stime, sig->stime),
+ tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
}
static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index fcd107a78c5a..29bd4baf9e75 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -1039,16 +1039,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
/* We got the lock for task. */
debug_rt_mutex_lock(lock);
-
rt_mutex_set_owner(lock, task, 0);
-
+ spin_unlock(&lock->wait_lock);
rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
-
if (ret && !waiter->task) {
/*
* Reset the return value. We might have
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 1090b0aed9ba..7a34cb563fec 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -267,8 +267,8 @@ static void blk_trace_free(struct blk_trace *bt)
{
debugfs_remove(bt->msg_file);
debugfs_remove(bt->dropped_file);
- debugfs_remove(bt->dir);
relay_close(bt->rchan);
+ debugfs_remove(bt->dir);
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
@@ -378,18 +378,8 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
static int blk_remove_buf_file_callback(struct dentry *dentry)
{
- struct dentry *parent = dentry->d_parent;
debugfs_remove(dentry);
- /*
- * this will fail for all but the last file, but that is ok. what we
- * care about is the top level buts->name directory going away, when
- * the last trace file is gone. Then we don't have to rmdir() that
- * manually on trace stop, so it nicely solves the issue with
- * force killing of running traces.
- */
-
- debugfs_remove(parent);
return 0;
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bf27bb7a63e2..a330513d96ce 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -735,6 +735,7 @@ ring_buffer_free(struct ring_buffer *buffer)
put_online_cpus();
+ kfree(buffer->buffers);
free_cpumask_var(buffer->cpumask);
kfree(buffer);
@@ -1785,7 +1786,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
*/
RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
- if (!rb_try_to_discard(cpu_buffer, event))
+ if (rb_try_to_discard(cpu_buffer, event))
goto out;
/*
@@ -2383,7 +2384,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
* the box. Return the padding, and we will release
* the current locks, and try again.
*/
- rb_advance_reader(cpu_buffer);
return event;
case RINGBUF_TYPE_TIME_EXTEND:
@@ -2486,7 +2486,7 @@ static inline int rb_ok_to_lock(void)
* buffer too. A one time deal is all you get from reading
* the ring buffer from an NMI.
*/
- if (likely(!in_nmi() && !oops_in_progress))
+ if (likely(!in_nmi()))
return 1;
tracing_off_permanent();
@@ -2519,6 +2519,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
if (dolock)
spin_lock(&cpu_buffer->reader_lock);
event = rb_buffer_peek(buffer, cpu, ts);
+ if (event && event->type_len == RINGBUF_TYPE_PADDING)
+ rb_advance_reader(cpu_buffer);
if (dolock)
spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
@@ -2590,12 +2592,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
spin_lock(&cpu_buffer->reader_lock);
event = rb_buffer_peek(buffer, cpu, ts);
- if (!event)
- goto out_unlock;
-
- rb_advance_reader(cpu_buffer);
+ if (event)
+ rb_advance_reader(cpu_buffer);
- out_unlock:
if (dolock)
spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8930e39b9d8c..c22b40f8f576 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -848,6 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
}
+EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
int type,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3548ae5cc780..8b9f4f6e9559 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -438,10 +438,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
int *ent_cpu, u64 *ent_ts);
-void tracing_generic_entry_update(struct trace_entry *entry,
- unsigned long flags,
- int pc);
-
void default_wait_pipe(struct trace_iterator *iter);
void poll_wait_pipe(struct trace_iterator *iter);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 936c621bbf46..f32dc9d1ea7b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -624,9 +624,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
return -ENOSPC;
}
- filter->preds[filter->n_preds] = pred;
- filter->n_preds++;
-
list_for_each_entry(call, &ftrace_events, list) {
if (!call->define_fields)
@@ -643,6 +640,9 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
}
replace_filter_string(call->filter, filter_string);
}
+
+ filter->preds[filter->n_preds] = pred;
+ filter->n_preds++;
out:
return err;
}
@@ -1029,12 +1029,17 @@ static int replace_preds(struct event_subsystem *system,
if (elt->op == OP_AND || elt->op == OP_OR) {
pred = create_logical_pred(elt->op);
+ if (!pred)
+ return -ENOMEM;
if (call) {
err = filter_add_pred(ps, call, pred);
filter_free_pred(pred);
- } else
+ } else {
err = filter_add_subsystem_pred(ps, system,
pred, filter_string);
+ if (err)
+ filter_free_pred(pred);
+ }
if (err)
return err;
@@ -1048,12 +1053,17 @@ static int replace_preds(struct event_subsystem *system,
}
pred = create_pred(elt->op, operand1, operand2);
+ if (!pred)
+ return -ENOMEM;
if (call) {
err = filter_add_pred(ps, call, pred);
filter_free_pred(pred);
- } else
+ } else {
err = filter_add_subsystem_pred(ps, system, pred,
filter_string);
+ if (err)
+ filter_free_pred(pred);
+ }
if (err)
return err;
diff --git a/kernel/wait.c b/kernel/wait.c
index ea7c3b4275cf..c4bd3d825f35 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,13 +10,14 @@
#include <linux/wait.h>
#include <linux/hash.h>
-void init_waitqueue_head(wait_queue_head_t *q)
+void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
{
spin_lock_init(&q->lock);
+ lockdep_set_class(&q->lock, key);
INIT_LIST_HEAD(&q->task_list);
}
-EXPORT_SYMBOL(init_waitqueue_head);
+EXPORT_SYMBOL(__init_waitqueue_head);
void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
diff --git a/mm/mempool.c b/mm/mempool.c
index a46eb1b4bb66..32e75d400503 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -303,14 +303,14 @@ EXPORT_SYMBOL(mempool_free_slab);
*/
void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
{
- size_t size = (size_t)(long)pool_data;
+ size_t size = (size_t)pool_data;
return kmalloc(size, gfp_mask);
}
EXPORT_SYMBOL(mempool_kmalloc);
void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data)
{
- size_t size = (size_t) pool_data;
+ size_t size = (size_t)pool_data;
return kzalloc(size, gfp_mask);
}
EXPORT_SYMBOL(mempool_kzalloc);
diff --git a/net/socket.c b/net/socket.c
index 791d71a36a93..6d4716559047 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -736,7 +736,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
if (more)
flags |= MSG_MORE;
- return sock->ops->sendpage(sock, page, offset, size, flags);
+ return kernel_sendpage(sock, page, offset, size, flags);
}
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 406e26de584e..8bd690c48b69 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -24,7 +24,7 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj)
unsigned int quadlen = XDR_QUADLEN(obj->len);
p[quadlen] = 0; /* zero trailing bytes */
- *p++ = htonl(obj->len);
+ *p++ = cpu_to_be32(obj->len);
memcpy(p, obj->data, obj->len);
return p + XDR_QUADLEN(obj->len);
}
@@ -35,7 +35,7 @@ xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
{
unsigned int len;
- if ((len = ntohl(*p++)) > XDR_MAX_NETOBJ)
+ if ((len = be32_to_cpu(*p++)) > XDR_MAX_NETOBJ)
return NULL;
obj->len = len;
obj->data = (u8 *) p;
@@ -83,7 +83,7 @@ EXPORT_SYMBOL_GPL(xdr_encode_opaque_fixed);
*/
__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
{
- *p++ = htonl(nbytes);
+ *p++ = cpu_to_be32(nbytes);
return xdr_encode_opaque_fixed(p, ptr, nbytes);
}
EXPORT_SYMBOL_GPL(xdr_encode_opaque);
@@ -101,7 +101,7 @@ xdr_decode_string_inplace(__be32 *p, char **sp,
{
u32 len;
- len = ntohl(*p++);
+ len = be32_to_cpu(*p++);
if (len > maxlen)
return NULL;
*lenp = len;
@@ -771,7 +771,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
if (status)
return status;
- *obj = ntohl(raw);
+ *obj = be32_to_cpu(raw);
return 0;
}
EXPORT_SYMBOL_GPL(xdr_decode_word);
@@ -779,7 +779,7 @@ EXPORT_SYMBOL_GPL(xdr_decode_word);
int
xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
{
- __be32 raw = htonl(obj);
+ __be32 raw = cpu_to_be32(obj);
return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj));
}
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index d29baa2e063a..911ba7ffab84 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -393,7 +393,7 @@ while (<IN>) {
$read_function = 0;
}
# print out any recorded offsets
- update_funcs() if ($text_found);
+ update_funcs() if (defined($ref_func));
# reset all markers and arrays
$text_found = 0;
@@ -414,7 +414,10 @@ while (<IN>) {
$offset = hex $1;
} else {
# if we already have a function, and this is weak, skip it
- if (!defined($ref_func) && !defined($weak{$text})) {
+ if (!defined($ref_func) && !defined($weak{$text}) &&
+ # PPC64 can have symbols that start with .L and
+ # gcc considers these special. Don't use them!
+ $text !~ /^\.L/) {
$ref_func = $text;
$offset = hex $1;
}
@@ -441,7 +444,7 @@ while (<IN>) {
}
# dump out anymore offsets that may have been found
-update_funcs() if ($text_found);
+update_funcs() if (defined($ref_func));
# If we did not find any mcount callers, we are done (do nothing).
if (!$opened) {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 15c2a08a66f1..1e8cfc4c2ed6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1285,6 +1285,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
context, len);
if (rc == -ERANGE) {
+ kfree(context);
+
/* Need a larger buffer. Query for the right size. */
rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
NULL, 0);
@@ -1292,7 +1294,6 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
dput(dentry);
goto out_unlock;
}
- kfree(context);
len = rc;
context = kmalloc(len+1, GFP_NOFS);
if (!context) {
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 51c44fdbc0f0..fea976793ae5 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -13563,6 +13563,8 @@ static int patch_alc269(struct hda_codec *codec)
set_capture_mixer(spec);
set_beep_amp(spec, 0x0b, 0x04, HDA_INPUT);
+ spec->vmaster_nid = 0x02;
+
codec->patch_ops = alc_patch_ops;
if (board_config == ALC269_AUTO)
spec->init_hook = alc269_auto_init;
@@ -15577,9 +15579,12 @@ static int patch_alc861vd(struct hda_codec *codec)
spec->stream_digital_playback = &alc861vd_pcm_digital_playback;
spec->stream_digital_capture = &alc861vd_pcm_digital_capture;
- spec->adc_nids = alc861vd_adc_nids;
- spec->num_adc_nids = ARRAY_SIZE(alc861vd_adc_nids);
- spec->capsrc_nids = alc861vd_capsrc_nids;
+ if (!spec->adc_nids) {
+ spec->adc_nids = alc861vd_adc_nids;
+ spec->num_adc_nids = ARRAY_SIZE(alc861vd_adc_nids);
+ }
+ if (!spec->capsrc_nids)
+ spec->capsrc_nids = alc861vd_capsrc_nids;
set_capture_mixer(spec);
set_beep_amp(spec, 0x0b, 0x05, HDA_INPUT);
@@ -17496,9 +17501,12 @@ static int patch_alc662(struct hda_codec *codec)
spec->stream_digital_playback = &alc662_pcm_digital_playback;
spec->stream_digital_capture = &alc662_pcm_digital_capture;
- spec->adc_nids = alc662_adc_nids;
- spec->num_adc_nids = ARRAY_SIZE(alc662_adc_nids);
- spec->capsrc_nids = alc662_capsrc_nids;
+ if (!spec->adc_nids) {
+ spec->adc_nids = alc662_adc_nids;
+ spec->num_adc_nids = ARRAY_SIZE(alc662_adc_nids);
+ }
+ if (!spec->capsrc_nids)
+ spec->capsrc_nids = alc662_capsrc_nids;
if (!spec->cap_mixer)
set_capture_mixer(spec);
diff --git a/sound/soc/fsl/efika-audio-fabric.c b/sound/soc/fsl/efika-audio-fabric.c
index 85b0e7569504..3326e2a1e863 100644
--- a/sound/soc/fsl/efika-audio-fabric.c
+++ b/sound/soc/fsl/efika-audio-fabric.c
@@ -30,6 +30,8 @@
#include "mpc5200_psc_ac97.h"
#include "../codecs/stac9766.h"
+#define DRV_NAME "efika-audio-fabric"
+
static struct snd_soc_device device;
static struct snd_soc_card card;
diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c
index 8766f7a3893d..b928ef7d28eb 100644
--- a/sound/soc/fsl/pcm030-audio-fabric.c
+++ b/sound/soc/fsl/pcm030-audio-fabric.c
@@ -30,6 +30,8 @@
#include "mpc5200_psc_ac97.h"
#include "../codecs/wm9712.h"
+#define DRV_NAME "pcm030-audio-fabric"
+
static struct snd_soc_device device;
static struct snd_soc_card card;
diff --git a/tools/perf/Documentation/perf-examples.txt b/tools/perf/Documentation/perf-examples.txt
new file mode 100644
index 000000000000..8eb6c489fb15
--- /dev/null
+++ b/tools/perf/Documentation/perf-examples.txt
@@ -0,0 +1,225 @@
+
+ ------------------------------
+ ****** perf by examples ******
+ ------------------------------
+
+[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ]
+
+
+First, discovery/enumeration of available counters can be done via
+'perf list':
+
+titan:~> perf list
+ [...]
+ kmem:kmalloc [Tracepoint event]
+ kmem:kmem_cache_alloc [Tracepoint event]
+ kmem:kmalloc_node [Tracepoint event]
+ kmem:kmem_cache_alloc_node [Tracepoint event]
+ kmem:kfree [Tracepoint event]
+ kmem:kmem_cache_free [Tracepoint event]
+ kmem:mm_page_free_direct [Tracepoint event]
+ kmem:mm_pagevec_free [Tracepoint event]
+ kmem:mm_page_alloc [Tracepoint event]
+ kmem:mm_page_alloc_zone_locked [Tracepoint event]
+ kmem:mm_page_pcpu_drain [Tracepoint event]
+ kmem:mm_page_alloc_extfrag [Tracepoint event]
+
+Then any (or all) of the above event sources can be activated and
+measured. For example the page alloc/free properties of a 'hackbench
+run' are:
+
+ titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
+ -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10
+ Time: 0.575
+
+ Performance counter stats for './hackbench 10':
+
+ 13857 kmem:mm_page_pcpu_drain
+ 27576 kmem:mm_page_alloc
+ 6025 kmem:mm_pagevec_free
+ 20934 kmem:mm_page_free_direct
+
+ 0.613972165 seconds time elapsed
+
+You can observe the statistical properties as well, by using the
+'repeat the workload N times' feature of perf stat:
+
+ titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
+ kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
+ kmem:mm_page_free_direct ./hackbench 10
+ Time: 0.627
+ Time: 0.644
+ Time: 0.564
+ Time: 0.559
+ Time: 0.626
+
+ Performance counter stats for './hackbench 10' (5 runs):
+
+ 12920 kmem:mm_page_pcpu_drain ( +- 3.359% )
+ 25035 kmem:mm_page_alloc ( +- 3.783% )
+ 6104 kmem:mm_pagevec_free ( +- 0.934% )
+ 18376 kmem:mm_page_free_direct ( +- 4.941% )
+
+ 0.643954516 seconds time elapsed ( +- 2.363% )
+
+Furthermore, these tracepoints can be used to sample the workload as
+well. For example the page allocations done by a 'git gc' can be
+captured the following way:
+
+ titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc
+ Counting objects: 1148, done.
+ Delta compression using up to 2 threads.
+ Compressing objects: 100% (450/450), done.
+ Writing objects: 100% (1148/1148), done.
+ Total 1148 (delta 690), reused 1148 (delta 690)
+ [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ]
+
+To check which functions generated page allocations:
+
+ titan:~/git> perf report
+ # Samples: 10646
+ #
+ # Overhead Command Shared Object
+ # ........ ............... ..........................
+ #
+ 23.57% git-repack /lib64/libc-2.5.so
+ 21.81% git /lib64/libc-2.5.so
+ 14.59% git ./git
+ 11.79% git-repack ./git
+ 7.12% git /lib64/ld-2.5.so
+ 3.16% git-repack /lib64/libpthread-2.5.so
+ 2.09% git-repack /bin/bash
+ 1.97% rm /lib64/libc-2.5.so
+ 1.39% mv /lib64/ld-2.5.so
+ 1.37% mv /lib64/libc-2.5.so
+ 1.12% git-repack /lib64/ld-2.5.so
+ 0.95% rm /lib64/ld-2.5.so
+ 0.90% git-update-serv /lib64/libc-2.5.so
+ 0.73% git-update-serv /lib64/ld-2.5.so
+ 0.68% perf /lib64/libpthread-2.5.so
+ 0.64% git-repack /usr/lib64/libz.so.1.2.3
+
+Or to see it on a more finegrained level:
+
+titan:~/git> perf report --sort comm,dso,symbol
+# Samples: 10646
+#
+# Overhead Command Shared Object Symbol
+# ........ ............... .......................... ......
+#
+ 9.35% git-repack ./git [.] insert_obj_hash
+ 9.12% git ./git [.] insert_obj_hash
+ 7.31% git /lib64/libc-2.5.so [.] memcpy
+ 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc
+ 6.24% git-repack /lib64/libc-2.5.so [.] memcpy
+ 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork
+ 5.47% git /lib64/libc-2.5.so [.] _int_malloc
+ 2.99% git /lib64/libc-2.5.so [.] memset
+
+Furthermore, call-graph sampling can be done too, of page
+allocations - to see precisely what kind of page allocations there
+are:
+
+ titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc
+ Counting objects: 1148, done.
+ Delta compression using up to 2 threads.
+ Compressing objects: 100% (450/450), done.
+ Writing objects: 100% (1148/1148), done.
+ Total 1148 (delta 690), reused 1148 (delta 690)
+ [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ]
+
+ titan:~/git> perf report -g
+ # Samples: 10686
+ #
+ # Overhead Command Shared Object
+ # ........ ............... ..........................
+ #
+ 23.25% git-repack /lib64/libc-2.5.so
+ |
+ |--50.00%-- _int_free
+ |
+ |--37.50%-- __GI___fork
+ | make_child
+ |
+ |--12.50%-- ptmalloc_unlock_all2
+ | make_child
+ |
+ --6.25%-- __GI_strcpy
+ 21.61% git /lib64/libc-2.5.so
+ |
+ |--30.00%-- __GI_read
+ | |
+ | --83.33%-- git_config_from_file
+ | git_config
+ | |
+ [...]
+
+Or you can observe the whole system's page allocations for 10
+seconds:
+
+titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
+kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
+kmem:mm_page_free_direct sleep 10
+
+ Performance counter stats for 'sleep 10':
+
+ 171585 kmem:mm_page_pcpu_drain
+ 322114 kmem:mm_page_alloc
+ 73623 kmem:mm_pagevec_free
+ 254115 kmem:mm_page_free_direct
+
+ 10.000591410 seconds time elapsed
+
+Or observe how fluctuating the page allocations are, via statistical
+analysis done over ten 1-second intervals:
+
+ titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
+ kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
+ kmem:mm_page_free_direct sleep 1
+
+ Performance counter stats for 'sleep 1' (10 runs):
+
+ 17254 kmem:mm_page_pcpu_drain ( +- 3.709% )
+ 34394 kmem:mm_page_alloc ( +- 4.617% )
+ 7509 kmem:mm_pagevec_free ( +- 4.820% )
+ 25653 kmem:mm_page_free_direct ( +- 3.672% )
+
+ 1.058135029 seconds time elapsed ( +- 3.089% )
+
+Or you can annotate the recorded 'git gc' run on a per symbol basis
+and check which instructions/source-code generated page allocations:
+
+ titan:~/git> perf annotate __GI___fork
+ ------------------------------------------------
+ Percent | Source code & Disassembly of libc-2.5.so
+ ------------------------------------------------
+ :
+ :
+ : Disassembly of section .plt:
+ : Disassembly of section .text:
+ :
+ : 00000031a2e95560 <__fork>:
+ [...]
+ 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax
+ 0.00 : 31a2e95607: 0f 05 syscall
+ 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
+ 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202>
+ 0.00 : 31a2e95615: 85 c0 test %eax,%eax
+
+( this shows that 83.42% of __GI___fork's page allocations come from
+ the 0x38 system call it performs. )
+
+etc. etc. - a lot more is possible. I could list a dozen of
+other different usecases straight away - neither of which is
+possible via /proc/vmstat.
+
+/proc/vmstat is not in the same league really, in terms of
+expressive power of system analysis and performance
+analysis.
+
+All that the above results needed were those new tracepoints
+in include/tracing/events/kmem.h.
+
+ Ingo
+
+
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 0d74346d21ab..484080dd5b6f 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -40,7 +40,7 @@ OPTIONS
-a::
system-wide collection
--S::
+-c::
scale counter values
EXAMPLES
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 539d01289725..4a7d558dc309 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -3,36 +3,122 @@ perf-top(1)
NAME
----
-perf-top - Run a command and profile it
+perf-top - System profiling tool.
SYNOPSIS
--------
[verse]
-'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+'perf top' [-e <EVENT> | --event=EVENT] [<options>]
DESCRIPTION
-----------
-This command runs a command and gathers a performance counter profile
-from it.
+This command generates and displays a performance counter profile in realtime.
OPTIONS
-------
-<command>...::
- Any command you can specify in a shell.
+-a::
+--all-cpus::
+ System-wide collection. (default)
+
+-c <count>::
+--count=<count>::
+ Event period to sample.
+
+-C <cpu>::
+--CPU=<cpu>::
+ CPU to profile.
+
+-d <seconds>::
+--delay=<seconds>::
+ Number of seconds to delay between refreshes.
--e::
---event=::
+-e <event>::
+--event=<event>::
Select the PMU event. Selection can be a symbolic event name
(use 'perf list' to list all events) or a raw PMU
event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ hexadecimal event descriptor.
--a::
- system-wide collection
+-E <entries>::
+--entries=<entries>::
+ Display this many functions.
+
+-f <count>::
+--count-filter=<count>::
+ Only display functions with more events than this.
+
+-F <freq>::
+--freq=<freq>::
+ Profile at this frequency.
+
+-i::
+--inherit::
+ Child tasks inherit counters, only makes sens with -p option.
+
+-k <path>::
+--vmlinux=<path>::
+ Path to vmlinux. Required for annotation functionality.
+
+-m <pages>::
+--mmap-pages=<pages>::
+ Number of mmapped data pages.
+
+-p <pid>::
+--pid=<pid>::
+ Profile events on existing pid.
+
+-r <priority>::
+--realtime=<priority>::
+ Collect data with this RT SCHED_FIFO priority.
+
+-s <symbol>::
+--sym-annotate=<symbol>::
+ Annotate this symbol. Requires -k option.
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+-z::
+--zero::
+ Zero history across display updates.
+
+INTERACTIVE PROMPTING KEYS
+--------------------------
+
+[d]::
+ Display refresh delay.
+
+[e]::
+ Number of entries to display.
+
+[E]::
+ Event to display when multiple counters are active.
+
+[f]::
+ Profile display filter (>= hit count).
+
+[F]::
+ Annotation display filter (>= % of total).
+
+[s]::
+ Annotate symbol.
+
+[S]::
+ Stop annotation, return to full profile display.
+
+[w]::
+ Toggle between weighted sum and individual count[E]r profile.
+
+[z]::
+ Toggle event count zeroing across display updates.
+
+[qQ]::
+ Quit.
+
+Pressing any unmapped key displays a menu, and prompts for input.
--l::
- scale counter values
SEE ALSO
--------
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 1916e44b9bb0..c045b4271e57 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -382,18 +382,29 @@ endif
ifdef NO_DEMANGLE
BASIC_CFLAGS += -DNO_DEMANGLE
else
-
has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y")
- has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
-
ifeq ($(has_bfd),y)
EXTLIBS += -lbfd
- else ifeq ($(has_bfd_iberty),y)
- EXTLIBS += -lbfd -liberty
else
- msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
- BASIC_CFLAGS += -DNO_DEMANGLE
+ has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
+ ifeq ($(has_bfd_iberty),y)
+ EXTLIBS += -lbfd -liberty
+ else
+ has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
+ ifeq ($(has_bfd_iberty_z),y)
+ EXTLIBS += -lbfd -liberty -lz
+ else
+ has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y")
+ ifeq ($(has_cplus_demangle),y)
+ EXTLIBS += -liberty
+ BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
+ else
+ msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
+ BASIC_CFLAGS += -DNO_DEMANGLE
+ endif
+ endif
+ endif
endif
endif
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index f990fa8a35c9..d88c6961274c 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -10,11 +10,12 @@
#include "perf.h"
-#include "util/parse-options.h"
#include "util/parse-events.h"
+#include "util/cache.h"
int cmd_list(int argc __used, const char **argv __used, const char *prefix __used)
{
+ setup_pager();
print_events();
return 0;
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6da09928130f..3d051b9cf25f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,7 +34,9 @@ static int output;
static const char *output_name = "perf.data";
static int group = 0;
static unsigned int realtime_prio = 0;
+static int raw_samples = 0;
static int system_wide = 0;
+static int profile_cpu = -1;
static pid_t target_pid = -1;
static int inherit = 1;
static int force = 0;
@@ -203,46 +205,48 @@ static void sig_atexit(void)
kill(getpid(), signr);
}
-static void pid_synthesize_comm_event(pid_t pid, int full)
+static pid_t pid_synthesize_comm_event(pid_t pid, int full)
{
struct comm_event comm_ev;
char filename[PATH_MAX];
char bf[BUFSIZ];
- int fd;
- size_t size;
- char *field, *sep;
+ FILE *fp;
+ size_t size = 0;
DIR *tasks;
struct dirent dirent, *next;
+ pid_t tgid = 0;
- snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
+ snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
- fd = open(filename, O_RDONLY);
- if (fd < 0) {
+ fp = fopen(filename, "r");
+ if (fd == NULL) {
/*
* We raced with a task exiting - just return:
*/
if (verbose)
fprintf(stderr, "couldn't open %s\n", filename);
- return;
+ return 0;
}
- if (read(fd, bf, sizeof(bf)) < 0) {
- fprintf(stderr, "couldn't read %s\n", filename);
- exit(EXIT_FAILURE);
- }
- close(fd);
- /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
memset(&comm_ev, 0, sizeof(comm_ev));
- field = strchr(bf, '(');
- if (field == NULL)
- goto out_failure;
- sep = strchr(++field, ')');
- if (sep == NULL)
- goto out_failure;
- size = sep - field;
- memcpy(comm_ev.comm, field, size++);
-
- comm_ev.pid = pid;
+ while (!comm_ev.comm[0] || !comm_ev.pid) {
+ if (fgets(bf, sizeof(bf), fp) == NULL)
+ goto out_failure;
+
+ if (memcmp(bf, "Name:", 5) == 0) {
+ char *name = bf + 5;
+ while (*name && isspace(*name))
+ ++name;
+ size = strlen(name) - 1;
+ memcpy(comm_ev.comm, name, size++);
+ } else if (memcmp(bf, "Tgid:", 5) == 0) {
+ char *tgids = bf + 5;
+ while (*tgids && isspace(*tgids))
+ ++tgids;
+ tgid = comm_ev.pid = atoi(tgids);
+ }
+ }
+
comm_ev.header.type = PERF_EVENT_COMM;
size = ALIGN(size, sizeof(u64));
comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
@@ -251,7 +255,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
comm_ev.tid = pid;
write_output(&comm_ev, comm_ev.header.size);
- return;
+ goto out_fclose;
}
snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -268,7 +272,10 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
write_output(&comm_ev, comm_ev.header.size);
}
closedir(tasks);
- return;
+
+out_fclose:
+ fclose(fp);
+ return tgid;
out_failure:
fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
@@ -276,7 +283,7 @@ out_failure:
exit(EXIT_FAILURE);
}
-static void pid_synthesize_mmap_samples(pid_t pid)
+static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
{
char filename[PATH_MAX];
FILE *fp;
@@ -328,7 +335,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
mmap_ev.len -= mmap_ev.start;
mmap_ev.header.size = (sizeof(mmap_ev) -
(sizeof(mmap_ev.filename) - size));
- mmap_ev.pid = pid;
+ mmap_ev.pid = tgid;
mmap_ev.tid = pid;
write_output(&mmap_ev, mmap_ev.header.size);
@@ -347,14 +354,14 @@ static void synthesize_all(void)
while (!readdir_r(proc, &dirent, &next) && next) {
char *end;
- pid_t pid;
+ pid_t pid, tgid;
pid = strtol(dirent.d_name, &end, 10);
if (*end) /* only interested in proper numerical dirents */
continue;
- pid_synthesize_comm_event(pid, 1);
- pid_synthesize_mmap_samples(pid);
+ tgid = pid_synthesize_comm_event(pid, 1);
+ pid_synthesize_mmap_samples(pid, tgid);
}
closedir(proc);
@@ -392,7 +399,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
PERF_FORMAT_TOTAL_TIME_RUNNING |
PERF_FORMAT_ID;
- attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+ attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
if (freq) {
attr->sample_type |= PERF_SAMPLE_PERIOD;
@@ -412,6 +419,9 @@ static void create_counter(int counter, int cpu, pid_t pid)
if (call_graph)
attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
+ if (raw_samples)
+ attr->sample_type |= PERF_SAMPLE_RAW;
+
attr->mmap = track;
attr->comm = track;
attr->inherit = (cpu < 0) && inherit;
@@ -425,6 +435,8 @@ try_again:
if (err == EPERM)
die("Permission error - are you root?\n");
+ else if (err == ENODEV && profile_cpu != -1)
+ die("No such device - did you specify an out-of-range profile CPU?\n");
/*
* If it's cycles then fall back to hrtimer
@@ -524,10 +536,14 @@ static int __cmd_record(int argc, const char **argv)
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
- if (!stat(output_name, &st) && !force && !append_file) {
- fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
- output_name);
- exit(-1);
+ if (!stat(output_name, &st) && st.st_size) {
+ if (!force && !append_file) {
+ fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
+ output_name);
+ exit(-1);
+ }
+ } else {
+ append_file = 0;
}
flags = O_CREAT|O_RDWR;
@@ -554,16 +570,22 @@ static int __cmd_record(int argc, const char **argv)
if (pid == -1)
pid = getpid();
- open_counters(-1, pid);
- } else for (i = 0; i < nr_cpus; i++)
- open_counters(i, target_pid);
+ open_counters(profile_cpu, pid);
+ } else {
+ if (profile_cpu != -1) {
+ open_counters(profile_cpu, target_pid);
+ } else {
+ for (i = 0; i < nr_cpus; i++)
+ open_counters(i, target_pid);
+ }
+ }
if (file_new)
perf_header__write(header, output);
if (!system_wide) {
- pid_synthesize_comm_event(pid, 0);
- pid_synthesize_mmap_samples(pid);
+ pid_t tgid = pid_synthesize_comm_event(pid, 0);
+ pid_synthesize_mmap_samples(pid, tgid);
} else
synthesize_all();
@@ -631,10 +653,14 @@ static const struct option options[] = {
"record events on existing pid"),
OPT_INTEGER('r', "realtime", &realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
+ OPT_BOOLEAN('R', "raw-samples", &raw_samples,
+ "collect raw sample records from all opened counters"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_BOOLEAN('A', "append", &append_file,
"append to the output file to do incremental profiling"),
+ OPT_INTEGER('C', "profile_cpu", &profile_cpu,
+ "CPU to profile on"),
OPT_BOOLEAN('f', "force", &force,
"overwrite existing data file"),
OPT_LONG('c', "count", &default_interval,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8cb58d68a006..b53a60fc12de 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -68,7 +68,7 @@ static int callchain;
static
struct callchain_param callchain_param = {
- .mode = CHAIN_GRAPH_ABS,
+ .mode = CHAIN_GRAPH_REL,
.min_percent = 0.5
};
@@ -112,7 +112,9 @@ struct read_event {
struct perf_event_header header;
u32 pid,tid;
u64 value;
- u64 format[3];
+ u64 time_enabled;
+ u64 time_running;
+ u64 id;
};
typedef union event_union {
@@ -698,7 +700,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
size_t ret = 0;
if (verbose)
- ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip);
+ ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip,
+ dso__symtab_origin(self->dso));
ret += repsep_fprintf(fp, "[%c] ", self->level);
if (self->sym) {
@@ -888,6 +891,21 @@ ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
return ret;
}
+static struct symbol *rem_sq_bracket;
+static struct callchain_list rem_hits;
+
+static void init_rem_hits(void)
+{
+ rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6);
+ if (!rem_sq_bracket) {
+ fprintf(stderr, "Not enough memory to display remaining hits\n");
+ return;
+ }
+
+ strcpy(rem_sq_bracket->name, "[...]");
+ rem_hits.sym = rem_sq_bracket;
+}
+
static size_t
callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
u64 total_samples, int depth, int depth_mask)
@@ -897,25 +915,34 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
struct callchain_list *chain;
int new_depth_mask = depth_mask;
u64 new_total;
+ u64 remaining;
size_t ret = 0;
int i;
if (callchain_param.mode == CHAIN_GRAPH_REL)
- new_total = self->cumul_hit;
+ new_total = self->children_hit;
else
new_total = total_samples;
+ remaining = new_total;
+
node = rb_first(&self->rb_root);
while (node) {
+ u64 cumul;
+
child = rb_entry(node, struct callchain_node, rb_node);
+ cumul = cumul_hits(child);
+ remaining -= cumul;
/*
* The depth mask manages the output of pipes that show
* the depth. We don't want to keep the pipes of the current
- * level for the last child of this depth
+ * level for the last child of this depth.
+ * Except if we have remaining filtered hits. They will
+ * supersede the last child
*/
next = rb_next(node);
- if (!next)
+ if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining))
new_depth_mask &= ~(1 << (depth - 1));
/*
@@ -930,7 +957,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
ret += ipchain__fprintf_graph(fp, chain, depth,
new_depth_mask, i++,
new_total,
- child->cumul_hit);
+ cumul);
}
ret += callchain__fprintf_graph(fp, child, new_total,
depth + 1,
@@ -938,6 +965,19 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
node = next;
}
+ if (callchain_param.mode == CHAIN_GRAPH_REL &&
+ remaining && remaining != new_total) {
+
+ if (!rem_sq_bracket)
+ return ret;
+
+ new_depth_mask &= ~(1 << (depth - 1));
+
+ ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
+ new_depth_mask, 0, new_total,
+ remaining);
+ }
+
return ret;
}
@@ -1358,6 +1398,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples)
unsigned int width;
char *col_width = col_width_list_str;
+ init_rem_hits();
+
fprintf(fp, "# Samples: %Ld\n", (u64)total_samples);
fprintf(fp, "#\n");
@@ -1429,6 +1471,8 @@ print_entries:
}
fprintf(fp, "\n");
+ free(rem_sq_bracket);
+
return ret;
}
@@ -1482,11 +1526,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
more_data += sizeof(u64);
}
- dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n",
+ dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->header.misc,
- event->ip.pid,
+ event->ip.pid, event->ip.tid,
(void *)(long)ip,
(long long)period);
@@ -1546,10 +1590,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
if (show & show_mask) {
struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
- if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name))
+ if (dso_list && (!dso || !dso->name ||
+ !strlist__has_entry(dso_list, dso->name)))
return 0;
- if (sym_list && sym && !strlist__has_entry(sym_list, sym->name))
+ if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
return 0;
if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
@@ -1568,10 +1613,11 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
struct thread *thread = threads__findnew(event->mmap.pid);
struct map *map = map__new(&event->mmap);
- dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+ dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->mmap.pid,
+ event->mmap.tid,
(void *)(long)event->mmap.start,
(void *)(long)event->mmap.len,
(void *)(long)event->mmap.pgoff,
@@ -1690,14 +1736,37 @@ static void trace_event(event_t *event)
dprintf(".\n");
}
+static struct perf_header *header;
+
+static struct perf_counter_attr *perf_header__find_attr(u64 id)
+{
+ int i;
+
+ for (i = 0; i < header->attrs; i++) {
+ struct perf_header_attr *attr = header->attr[i];
+ int j;
+
+ for (j = 0; j < attr->ids; j++) {
+ if (attr->id[j] == id)
+ return &attr->attr;
+ }
+ }
+
+ return NULL;
+}
+
static int
process_read_event(event_t *event, unsigned long offset, unsigned long head)
{
- dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n",
+ struct perf_counter_attr *attr = perf_header__find_attr(event->read.id);
+
+ dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->read.pid,
event->read.tid,
+ attr ? __event_name(attr->type, attr->config)
+ : "FAIL",
event->read.value);
return 0;
@@ -1743,8 +1812,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
return 0;
}
-static struct perf_header *header;
-
static u64 perf_header__sample_type(void)
{
u64 sample_type = 0;
@@ -1812,6 +1879,13 @@ static int __cmd_report(void)
" -g?\n");
exit(-1);
}
+ } else if (callchain_param.mode != CHAIN_NONE && !callchain) {
+ callchain = 1;
+ if (register_callchain_param(&callchain_param) < 0) {
+ fprintf(stderr, "Can't register callchain"
+ " params\n");
+ exit(-1);
+ }
}
if (load_kernel() < 0) {
@@ -1950,6 +2024,13 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
else if (!strncmp(tok, "fractal", strlen(arg)))
callchain_param.mode = CHAIN_GRAPH_REL;
+ else if (!strncmp(tok, "none", strlen(arg))) {
+ callchain_param.mode = CHAIN_NONE;
+ callchain = 0;
+
+ return 0;
+ }
+
else
return -1;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f9510eeeb6c7..b4b06c7903e1 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -496,7 +496,7 @@ static const struct option options[] = {
"stat events on existing pid"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
- OPT_BOOLEAN('S', "scale", &scale,
+ OPT_BOOLEAN('c', "scale", &scale,
"scale/normalize counters"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f139f1ab9333..7de28ce9ca26 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -31,6 +31,8 @@
#include <fcntl.h>
#include <stdio.h>
+#include <termios.h>
+#include <unistd.h>
#include <errno.h>
#include <time.h>
@@ -54,7 +56,7 @@ static int system_wide = 0;
static int default_interval = 100000;
-static u64 count_filter = 5;
+static int count_filter = 5;
static int print_entries = 15;
static int target_pid = -1;
@@ -69,15 +71,28 @@ static int freq = 0;
static int verbose = 0;
static char *vmlinux = NULL;
-static char *sym_filter;
-static unsigned long filter_start;
-static unsigned long filter_end;
-
static int delay_secs = 2;
static int zero;
static int dump_symtab;
/*
+ * Source
+ */
+
+struct source_line {
+ u64 eip;
+ unsigned long count[MAX_COUNTERS];
+ char *line;
+ struct source_line *next;
+};
+
+static char *sym_filter = NULL;
+struct sym_entry *sym_filter_entry = NULL;
+static int sym_pcnt_filter = 5;
+static int sym_counter = 0;
+static int display_weighted = -1;
+
+/*
* Symbols
*/
@@ -91,9 +106,237 @@ struct sym_entry {
unsigned long snap_count;
double weight;
int skip;
+ struct source_line *source;
+ struct source_line *lines;
+ struct source_line **lines_tail;
+ pthread_mutex_t source_lock;
};
-struct sym_entry *sym_filter_entry;
+/*
+ * Source functions
+ */
+
+static void parse_source(struct sym_entry *syme)
+{
+ struct symbol *sym;
+ struct module *module;
+ struct section *section = NULL;
+ FILE *file;
+ char command[PATH_MAX*2], *path = vmlinux;
+ u64 start, end, len;
+
+ if (!syme)
+ return;
+
+ if (syme->lines) {
+ pthread_mutex_lock(&syme->source_lock);
+ goto out_assign;
+ }
+
+ sym = (struct symbol *)(syme + 1);
+ module = sym->module;
+
+ if (module)
+ path = module->path;
+ if (!path)
+ return;
+
+ start = sym->obj_start;
+ if (!start)
+ start = sym->start;
+
+ if (module) {
+ section = module->sections->find_section(module->sections, ".text");
+ if (section)
+ start -= section->vma;
+ }
+
+ end = start + sym->end - sym->start + 1;
+ len = sym->end - sym->start;
+
+ sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path);
+
+ file = popen(command, "r");
+ if (!file)
+ return;
+
+ pthread_mutex_lock(&syme->source_lock);
+ syme->lines_tail = &syme->lines;
+ while (!feof(file)) {
+ struct source_line *src;
+ size_t dummy = 0;
+ char *c;
+
+ src = malloc(sizeof(struct source_line));
+ assert(src != NULL);
+ memset(src, 0, sizeof(struct source_line));
+
+ if (getline(&src->line, &dummy, file) < 0)
+ break;
+ if (!src->line)
+ break;
+
+ c = strchr(src->line, '\n');
+ if (c)
+ *c = 0;
+
+ src->next = NULL;
+ *syme->lines_tail = src;
+ syme->lines_tail = &src->next;
+
+ if (strlen(src->line)>8 && src->line[8] == ':') {
+ src->eip = strtoull(src->line, NULL, 16);
+ if (section)
+ src->eip += section->vma;
+ }
+ if (strlen(src->line)>8 && src->line[16] == ':') {
+ src->eip = strtoull(src->line, NULL, 16);
+ if (section)
+ src->eip += section->vma;
+ }
+ }
+ pclose(file);
+out_assign:
+ sym_filter_entry = syme;
+ pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void __zero_source_counters(struct sym_entry *syme)
+{
+ int i;
+ struct source_line *line;
+
+ line = syme->lines;
+ while (line) {
+ for (i = 0; i < nr_counters; i++)
+ line->count[i] = 0;
+ line = line->next;
+ }
+}
+
+static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
+{
+ struct source_line *line;
+
+ if (syme != sym_filter_entry)
+ return;
+
+ if (pthread_mutex_trylock(&syme->source_lock))
+ return;
+
+ if (!syme->source)
+ goto out_unlock;
+
+ for (line = syme->lines; line; line = line->next) {
+ if (line->eip == ip) {
+ line->count[counter]++;
+ break;
+ }
+ if (line->eip > ip)
+ break;
+ }
+out_unlock:
+ pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void lookup_sym_source(struct sym_entry *syme)
+{
+ struct symbol *symbol = (struct symbol *)(syme + 1);
+ struct source_line *line;
+ char pattern[PATH_MAX];
+ char *idx;
+
+ sprintf(pattern, "<%s>:", symbol->name);
+
+ if (symbol->module) {
+ idx = strstr(pattern, "\t");
+ if (idx)
+ *idx = 0;
+ }
+
+ pthread_mutex_lock(&syme->source_lock);
+ for (line = syme->lines; line; line = line->next) {
+ if (strstr(line->line, pattern)) {
+ syme->source = line;
+ break;
+ }
+ }
+ pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void show_lines(struct source_line *queue, int count, int total)
+{
+ int i;
+ struct source_line *line;
+
+ line = queue;
+ for (i = 0; i < count; i++) {
+ float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
+
+ printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
+ line = line->next;
+ }
+}
+
+#define TRACE_COUNT 3
+
+static void show_details(struct sym_entry *syme)
+{
+ struct symbol *symbol;
+ struct source_line *line;
+ struct source_line *line_queue = NULL;
+ int displayed = 0;
+ int line_queue_count = 0, total = 0, more = 0;
+
+ if (!syme)
+ return;
+
+ if (!syme->source)
+ lookup_sym_source(syme);
+
+ if (!syme->source)
+ return;
+
+ symbol = (struct symbol *)(syme + 1);
+ printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
+ printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
+
+ pthread_mutex_lock(&syme->source_lock);
+ line = syme->source;
+ while (line) {
+ total += line->count[sym_counter];
+ line = line->next;
+ }
+
+ line = syme->source;
+ while (line) {
+ float pcnt = 0.0;
+
+ if (!line_queue_count)
+ line_queue = line;
+ line_queue_count++;
+
+ if (line->count[sym_counter])
+ pcnt = 100.0 * line->count[sym_counter] / (float)total;
+ if (pcnt >= (float)sym_pcnt_filter) {
+ if (displayed <= print_entries)
+ show_lines(line_queue, line_queue_count, total);
+ else more++;
+ displayed += line_queue_count;
+ line_queue_count = 0;
+ line_queue = NULL;
+ } else if (line_queue_count > TRACE_COUNT) {
+ line_queue = line_queue->next;
+ line_queue_count--;
+ }
+
+ line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
+ line = line->next;
+ }
+ pthread_mutex_unlock(&syme->source_lock);
+ if (more)
+ printf("%d lines not displayed, maybe increase display entries [e]\n", more);
+}
struct dso *kernel_dso;
@@ -112,6 +355,9 @@ static double sym_weight(const struct sym_entry *sym)
double weight = sym->snap_count;
int counter;
+ if (!display_weighted)
+ return weight;
+
for (counter = 1; counter < nr_counters-1; counter++)
weight *= sym->count[counter];
@@ -159,7 +405,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
static void print_sym_table(void)
{
int printed = 0, j;
- int counter;
+ int counter, snap = !display_weighted ? sym_counter : 0;
float samples_per_sec = samples/delay_secs;
float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
float sum_ksamples = 0.0;
@@ -175,7 +421,7 @@ static void print_sym_table(void)
pthread_mutex_unlock(&active_symbols_lock);
list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
- syme->snap_count = syme->count[0];
+ syme->snap_count = syme->count[snap];
if (syme->snap_count != 0) {
syme->weight = sym_weight(syme);
rb_insert_active_sym(&tmp, syme);
@@ -195,7 +441,7 @@ static void print_sym_table(void)
samples_per_sec,
100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
- if (nr_counters == 1) {
+ if (nr_counters == 1 || !display_weighted) {
printf("%Ld", (u64)attrs[0].sample_period);
if (freq)
printf("Hz ");
@@ -203,7 +449,9 @@ static void print_sym_table(void)
printf(" ");
}
- for (counter = 0; counter < nr_counters; counter++) {
+ if (!display_weighted)
+ printf("%s", event_name(sym_counter));
+ else for (counter = 0; counter < nr_counters; counter++) {
if (counter)
printf("/");
@@ -228,6 +476,11 @@ static void print_sym_table(void)
printf("------------------------------------------------------------------------------\n\n");
+ if (sym_filter_entry) {
+ show_details(sym_filter_entry);
+ return;
+ }
+
if (nr_counters == 1)
printf(" samples pcnt");
else
@@ -242,13 +495,13 @@ static void print_sym_table(void)
struct symbol *sym = (struct symbol *)(syme + 1);
double pcnt;
- if (++printed > print_entries || syme->snap_count < count_filter)
+ if (++printed > print_entries || (int)syme->snap_count < count_filter)
continue;
pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
sum_ksamples));
- if (nr_counters == 1)
+ if (nr_counters == 1 || !display_weighted)
printf("%20.2f - ", syme->weight);
else
printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
@@ -261,19 +514,250 @@ static void print_sym_table(void)
}
}
+static void prompt_integer(int *target, const char *msg)
+{
+ char *buf = malloc(0), *p;
+ size_t dummy = 0;
+ int tmp;
+
+ fprintf(stdout, "\n%s: ", msg);
+ if (getline(&buf, &dummy, stdin) < 0)
+ return;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = 0;
+
+ p = buf;
+ while(*p) {
+ if (!isdigit(*p))
+ goto out_free;
+ p++;
+ }
+ tmp = strtoul(buf, NULL, 10);
+ *target = tmp;
+out_free:
+ free(buf);
+}
+
+static void prompt_percent(int *target, const char *msg)
+{
+ int tmp = 0;
+
+ prompt_integer(&tmp, msg);
+ if (tmp >= 0 && tmp <= 100)
+ *target = tmp;
+}
+
+static void prompt_symbol(struct sym_entry **target, const char *msg)
+{
+ char *buf = malloc(0), *p;
+ struct sym_entry *syme = *target, *n, *found = NULL;
+ size_t dummy = 0;
+
+ /* zero counters of active symbol */
+ if (syme) {
+ pthread_mutex_lock(&syme->source_lock);
+ __zero_source_counters(syme);
+ *target = NULL;
+ pthread_mutex_unlock(&syme->source_lock);
+ }
+
+ fprintf(stdout, "\n%s: ", msg);
+ if (getline(&buf, &dummy, stdin) < 0)
+ goto out_free;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = 0;
+
+ pthread_mutex_lock(&active_symbols_lock);
+ syme = list_entry(active_symbols.next, struct sym_entry, node);
+ pthread_mutex_unlock(&active_symbols_lock);
+
+ list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
+ struct symbol *sym = (struct symbol *)(syme + 1);
+
+ if (!strcmp(buf, sym->name)) {
+ found = syme;
+ break;
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
+ sleep(1);
+ return;
+ } else
+ parse_source(found);
+
+out_free:
+ free(buf);
+}
+
+static void print_mapped_keys(void)
+{
+ char *name = NULL;
+
+ if (sym_filter_entry) {
+ struct symbol *sym = (struct symbol *)(sym_filter_entry+1);
+ name = sym->name;
+ }
+
+ fprintf(stdout, "\nMapped keys:\n");
+ fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs);
+ fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries);
+
+ if (nr_counters > 1)
+ fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter));
+
+ fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
+
+ if (vmlinux) {
+ fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
+ fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
+ fprintf(stdout, "\t[S] stop annotation.\n");
+ }
+
+ if (nr_counters > 1)
+ fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
+
+ fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0);
+ fprintf(stdout, "\t[qQ] quit.\n");
+}
+
+static int key_mapped(int c)
+{
+ switch (c) {
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'z':
+ case 'q':
+ case 'Q':
+ return 1;
+ case 'E':
+ case 'w':
+ return nr_counters > 1 ? 1 : 0;
+ case 'F':
+ case 's':
+ case 'S':
+ return vmlinux ? 1 : 0;
+ }
+
+ return 0;
+}
+
+static void handle_keypress(int c)
+{
+ if (!key_mapped(c)) {
+ struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+ struct termios tc, save;
+
+ print_mapped_keys();
+ fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
+ fflush(stdout);
+
+ tcgetattr(0, &save);
+ tc = save;
+ tc.c_lflag &= ~(ICANON | ECHO);
+ tc.c_cc[VMIN] = 0;
+ tc.c_cc[VTIME] = 0;
+ tcsetattr(0, TCSANOW, &tc);
+
+ poll(&stdin_poll, 1, -1);
+ c = getc(stdin);
+
+ tcsetattr(0, TCSAFLUSH, &save);
+ if (!key_mapped(c))
+ return;
+ }
+
+ switch (c) {
+ case 'd':
+ prompt_integer(&delay_secs, "Enter display delay");
+ break;
+ case 'e':
+ prompt_integer(&print_entries, "Enter display entries (lines)");
+ break;
+ case 'E':
+ if (nr_counters > 1) {
+ int i;
+
+ fprintf(stderr, "\nAvailable events:");
+ for (i = 0; i < nr_counters; i++)
+ fprintf(stderr, "\n\t%d %s", i, event_name(i));
+
+ prompt_integer(&sym_counter, "Enter details event counter");
+
+ if (sym_counter >= nr_counters) {
+ fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
+ sym_counter = 0;
+ sleep(1);
+ }
+ } else sym_counter = 0;
+ break;
+ case 'f':
+ prompt_integer(&count_filter, "Enter display event count filter");
+ break;
+ case 'F':
+ prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
+ break;
+ case 'q':
+ case 'Q':
+ printf("exiting.\n");
+ exit(0);
+ case 's':
+ prompt_symbol(&sym_filter_entry, "Enter details symbol");
+ break;
+ case 'S':
+ if (!sym_filter_entry)
+ break;
+ else {
+ struct sym_entry *syme = sym_filter_entry;
+
+ pthread_mutex_lock(&syme->source_lock);
+ sym_filter_entry = NULL;
+ __zero_source_counters(syme);
+ pthread_mutex_unlock(&syme->source_lock);
+ }
+ break;
+ case 'w':
+ display_weighted = ~display_weighted;
+ break;
+ case 'z':
+ zero = ~zero;
+ break;
+ }
+}
+
static void *display_thread(void *arg __used)
{
struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
- int delay_msecs = delay_secs * 1000;
+ struct termios tc, save;
+ int delay_msecs, c;
+
+ tcgetattr(0, &save);
+ tc = save;
+ tc.c_lflag &= ~(ICANON | ECHO);
+ tc.c_cc[VMIN] = 0;
+ tc.c_cc[VTIME] = 0;
- printf("PerfTop refresh period: %d seconds\n", delay_secs);
+repeat:
+ delay_msecs = delay_secs * 1000;
+ tcsetattr(0, TCSANOW, &tc);
+ /* trash return*/
+ getc(stdin);
do {
print_sym_table();
} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
- printf("key pressed - exiting.\n");
- exit(0);
+ c = getc(stdin);
+ tcsetattr(0, TCSAFLUSH, &save);
+
+ handle_keypress(c);
+ goto repeat;
return NULL;
}
@@ -293,7 +777,6 @@ static const char *skip_symbols[] = {
static int symbol_filter(struct dso *self, struct symbol *sym)
{
- static int filter_match;
struct sym_entry *syme;
const char *name = sym->name;
int i;
@@ -315,6 +798,10 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
return 1;
syme = dso__sym_priv(self, sym);
+ pthread_mutex_init(&syme->source_lock, NULL);
+ if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
+ sym_filter_entry = syme;
+
for (i = 0; skip_symbols[i]; i++) {
if (!strcmp(skip_symbols[i], name)) {
syme->skip = 1;
@@ -322,29 +809,6 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
}
}
- if (filter_match == 1) {
- filter_end = sym->start;
- filter_match = -1;
- if (filter_end - filter_start > 10000) {
- fprintf(stderr,
- "hm, too large filter symbol <%s> - skipping.\n",
- sym_filter);
- fprintf(stderr, "symbol filter start: %016lx\n",
- filter_start);
- fprintf(stderr, " end: %016lx\n",
- filter_end);
- filter_end = filter_start = 0;
- sym_filter = NULL;
- sleep(1);
- }
- }
-
- if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
- filter_match = 1;
- filter_start = sym->start;
- }
-
-
return 0;
}
@@ -380,8 +844,6 @@ out_delete_dso:
return -1;
}
-#define TRACE_COUNT 3
-
/*
* Binary search in the histogram table and record the hit:
*/
@@ -394,6 +856,7 @@ static void record_ip(u64 ip, int counter)
if (!syme->skip) {
syme->count[counter]++;
+ record_precise_ip(syme, counter, ip);
pthread_mutex_lock(&active_symbols_lock);
if (list_empty(&syme->node) || !syme->node.next)
__list_insert_active_sym(syme);
@@ -690,8 +1153,8 @@ static const struct option options[] = {
"put the counters into a counter group"),
OPT_BOOLEAN('i', "inherit", &inherit,
"child tasks inherit counters"),
- OPT_STRING('s', "sym-filter", &sym_filter, "pattern",
- "only display symbols matchig this pattern"),
+ OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
+ "symbol to annotate - requires -k option"),
OPT_BOOLEAN('z', "zero", &zero,
"zero history across updates"),
OPT_INTEGER('F', "freq", &freq,
@@ -734,6 +1197,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
delay_secs = 1;
parse_symbols();
+ parse_source(sym_filter_entry);
/*
* Fill in the ones not specifically initialized via -c:
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9d3c8141b8c1..011473411642 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -13,6 +13,7 @@
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
+#include <math.h>
#include "callchain.h"
@@ -26,10 +27,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct callchain_node *rnode;
+ u64 chain_cumul = cumul_hits(chain);
while (*p) {
+ u64 rnode_cumul;
+
parent = *p;
rnode = rb_entry(parent, struct callchain_node, rb_node);
+ rnode_cumul = cumul_hits(rnode);
switch (mode) {
case CHAIN_FLAT:
@@ -40,7 +45,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
break;
case CHAIN_GRAPH_ABS: /* Falldown */
case CHAIN_GRAPH_REL:
- if (rnode->cumul_hit < chain->cumul_hit)
+ if (rnode_cumul < chain_cumul)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
@@ -87,7 +92,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
chain_for_each_child(child, node) {
__sort_chain_graph_abs(child, min_hit);
- if (child->cumul_hit >= min_hit)
+ if (cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
CHAIN_GRAPH_ABS);
}
@@ -108,11 +113,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
u64 min_hit;
node->rb_root = RB_ROOT;
- min_hit = node->cumul_hit * min_percent / 100.0;
+ min_hit = ceil(node->children_hit * min_percent);
chain_for_each_child(child, node) {
__sort_chain_graph_rel(child, min_percent);
- if (child->cumul_hit >= min_hit)
+ if (cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
CHAIN_GRAPH_REL);
}
@@ -122,7 +127,7 @@ static void
sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root,
u64 min_hit __used, struct callchain_param *param)
{
- __sort_chain_graph_rel(chain_root, param->min_percent);
+ __sort_chain_graph_rel(chain_root, param->min_percent / 100.0);
rb_root->rb_node = chain_root->rb_root.rb_node;
}
@@ -211,7 +216,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain,
new = create_child(parent, false);
fill_node(new, chain, start, syms);
- new->cumul_hit = new->hit = 1;
+ new->children_hit = 0;
+ new->hit = 1;
}
/*
@@ -241,7 +247,8 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
/* split the hits */
new->hit = parent->hit;
- new->cumul_hit = parent->cumul_hit;
+ new->children_hit = parent->children_hit;
+ parent->children_hit = cumul_hits(new);
new->val_nr = parent->val_nr - idx_local;
parent->val_nr = idx_local;
@@ -249,6 +256,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
if (idx_total < chain->nr) {
parent->hit = 0;
add_child(parent, chain, idx_total, syms);
+ parent->children_hit++;
} else {
parent->hit = 1;
}
@@ -269,13 +277,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain,
unsigned int ret = __append_chain(rnode, chain, start, syms);
if (!ret)
- goto cumul;
+ goto inc_children_hit;
}
/* nothing in children, add to the current node */
add_child(root, chain, start, syms);
-cumul:
- root->cumul_hit++;
+inc_children_hit:
+ root->children_hit++;
}
static int
@@ -317,8 +325,6 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
/* we match 100% of the path, increment the hit */
if (i - start == root->val_nr && i == chain->nr) {
root->hit++;
- root->cumul_hit++;
-
return 0;
}
@@ -331,5 +337,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
void append_chain(struct callchain_node *root, struct ip_callchain *chain,
struct symbol **syms)
{
+ if (!chain->nr)
+ return;
__append_chain_children(root, chain, syms, 0);
}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 7812122bea1d..a926ae4f5a16 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -7,6 +7,7 @@
#include "symbol.h"
enum chain_mode {
+ CHAIN_NONE,
CHAIN_FLAT,
CHAIN_GRAPH_ABS,
CHAIN_GRAPH_REL
@@ -21,7 +22,7 @@ struct callchain_node {
struct rb_root rb_root; /* sorted tree of children */
unsigned int val_nr;
u64 hit;
- u64 cumul_hit; /* hit + hits of children */
+ u64 children_hit;
};
struct callchain_param;
@@ -48,6 +49,11 @@ static inline void callchain_init(struct callchain_node *node)
INIT_LIST_HEAD(&node->val);
}
+static inline u64 cumul_hits(struct callchain_node *node)
+{
+ return node->hit + node->children_hit;
+}
+
int register_callchain_param(struct callchain_param *param);
void append_chain(struct callchain_node *root, struct ip_callchain *chain,
struct symbol **syms);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 450384b3bbe5..b92a457ca32e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -185,6 +185,8 @@ static void do_read(int fd, void *buf, size_t size)
if (ret < 0)
die("failed to read");
+ if (ret == 0)
+ die("failed to read: missing data");
size -= ret;
buf += ret;
@@ -213,9 +215,10 @@ struct perf_header *perf_header__read(int fd)
for (i = 0; i < nr_attrs; i++) {
struct perf_header_attr *attr;
- off_t tmp = lseek(fd, 0, SEEK_CUR);
+ off_t tmp;
do_read(fd, &f_attr, sizeof(f_attr));
+ tmp = lseek(fd, 0, SEEK_CUR);
attr = perf_header_attr__new(&f_attr.attr);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 7bdad8df22a6..044178408783 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -121,13 +121,29 @@ static unsigned long hw_cache_stat[C(MAX)] = {
(strcmp(sys_dirent.d_name, ".")) && \
(strcmp(sys_dirent.d_name, "..")))
+static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
+{
+ char evt_path[MAXPATHLEN];
+ int fd;
+
+ snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
+ sys_dir->d_name, evt_dir->d_name);
+ fd = open(evt_path, O_RDONLY);
+ if (fd < 0)
+ return -EINVAL;
+ close(fd);
+
+ return 0;
+}
+
#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \
while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \
if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \
sys_dirent.d_name, evt_dirent.d_name) && \
(!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \
(strcmp(evt_dirent.d_name, ".")) && \
- (strcmp(evt_dirent.d_name, "..")))
+ (strcmp(evt_dirent.d_name, "..")) && \
+ (!tp_event_has_id(&sys_dirent, &evt_dirent)))
#define MAX_EVENT_LENGTH 30
@@ -223,9 +239,15 @@ char *event_name(int counter)
{
u64 config = attrs[counter].config;
int type = attrs[counter].type;
+
+ return __event_name(type, config);
+}
+
+char *__event_name(int type, u64 config)
+{
static char buf[32];
- if (attrs[counter].type == PERF_TYPE_RAW) {
+ if (type == PERF_TYPE_RAW) {
sprintf(buf, "raw 0x%llx", config);
return buf;
}
@@ -357,6 +379,7 @@ static int parse_tracepoint_event(const char **strp,
struct perf_counter_attr *attr)
{
const char *evt_name;
+ char *flags;
char sys_name[MAX_EVENT_LENGTH];
char id_buf[4];
int fd;
@@ -378,6 +401,15 @@ static int parse_tracepoint_event(const char **strp,
strncpy(sys_name, *strp, sys_length);
sys_name[sys_length] = '\0';
evt_name = evt_name + 1;
+
+ flags = strchr(evt_name, ':');
+ if (flags) {
+ *flags = '\0';
+ flags++;
+ if (!strncmp(flags, "record", strlen(flags)))
+ attr->sample_type |= PERF_SAMPLE_RAW;
+ }
+
evt_length = strlen(evt_name);
if (evt_length >= MAX_EVENT_LENGTH)
return 0;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 1ea5d09b6eb1..192a962e3a0f 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -10,6 +10,7 @@ extern int nr_counters;
extern struct perf_counter_attr attrs[MAX_COUNTERS];
extern char *event_name(int ctr);
+extern char *__event_name(int type, u64 config);
extern int parse_events(const struct option *opt, const char *str, int unset);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 16ddca202948..5c0f42e6b33b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -7,22 +7,17 @@
#include <gelf.h>
#include <elf.h>
-#ifndef NO_DEMANGLE
-#include <bfd.h>
-#else
-static inline
-char *bfd_demangle(void __used *v, const char __used *c, int __used i)
-{
- return NULL;
-}
-#endif
-
const char *sym_hist_filter;
-#ifndef DMGL_PARAMS
-#define DMGL_PARAMS (1 << 0) /* Include function args */
-#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
-#endif
+enum dso_origin {
+ DSO__ORIG_KERNEL = 0,
+ DSO__ORIG_JAVA_JIT,
+ DSO__ORIG_FEDORA,
+ DSO__ORIG_UBUNTU,
+ DSO__ORIG_BUILDID,
+ DSO__ORIG_DSO,
+ DSO__ORIG_NOT_FOUND,
+};
static struct symbol *symbol__new(u64 start, u64 len,
const char *name, unsigned int priv_size,
@@ -81,6 +76,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size)
self->sym_priv_size = sym_priv_size;
self->find_symbol = dso__find_symbol;
self->slen_calculated = 0;
+ self->origin = DSO__ORIG_NOT_FOUND;
}
return self;
@@ -710,7 +706,7 @@ static char *dso__read_build_id(struct dso *self, int verbose)
++raw;
bid += 2;
}
- if (verbose)
+ if (verbose >= 2)
printf("%s(%s): %s\n", __func__, self->name, build_id);
out_elf_end:
elf_end(elf);
@@ -720,11 +716,26 @@ out:
return build_id;
}
+char dso__symtab_origin(const struct dso *self)
+{
+ static const char origin[] = {
+ [DSO__ORIG_KERNEL] = 'k',
+ [DSO__ORIG_JAVA_JIT] = 'j',
+ [DSO__ORIG_FEDORA] = 'f',
+ [DSO__ORIG_UBUNTU] = 'u',
+ [DSO__ORIG_BUILDID] = 'b',
+ [DSO__ORIG_DSO] = 'd',
+ };
+
+ if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND)
+ return '!';
+ return origin[self->origin];
+}
+
int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
{
int size = PATH_MAX;
char *name = malloc(size), *build_id = NULL;
- int variant = 0;
int ret = -1;
int fd;
@@ -733,19 +744,26 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
self->adjust_symbols = 0;
- if (strncmp(self->name, "/tmp/perf-", 10) == 0)
- return dso__load_perf_map(self, filter, verbose);
+ if (strncmp(self->name, "/tmp/perf-", 10) == 0) {
+ ret = dso__load_perf_map(self, filter, verbose);
+ self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT :
+ DSO__ORIG_NOT_FOUND;
+ return ret;
+ }
+
+ self->origin = DSO__ORIG_FEDORA - 1;
more:
do {
- switch (variant) {
- case 0: /* Fedora */
+ self->origin++;
+ switch (self->origin) {
+ case DSO__ORIG_FEDORA:
snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
break;
- case 1: /* Ubuntu */
+ case DSO__ORIG_UBUNTU:
snprintf(name, size, "/usr/lib/debug%s", self->name);
break;
- case 2:
+ case DSO__ORIG_BUILDID:
build_id = dso__read_build_id(self, verbose);
if (build_id != NULL) {
snprintf(name, size,
@@ -754,16 +772,15 @@ more:
free(build_id);
break;
}
- variant++;
+ self->origin++;
/* Fall thru */
- case 3: /* Sane people */
+ case DSO__ORIG_DSO:
snprintf(name, size, "%s", self->name);
break;
default:
goto out;
}
- variant++;
fd = open(name, O_RDONLY);
} while (fd < 0);
@@ -784,6 +801,8 @@ more:
}
out:
free(name);
+ if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
+ return 0;
return ret;
}
@@ -899,6 +918,9 @@ int dso__load_kernel(struct dso *self, const char *vmlinux,
if (err <= 0)
err = dso__load_kallsyms(self, filter, verbose);
+ if (err > 0)
+ self->origin = DSO__ORIG_KERNEL;
+
return err;
}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 2f92b21c712d..b53bf0125c1b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -7,6 +7,30 @@
#include <linux/rbtree.h>
#include "module.h"
+#ifdef HAVE_CPLUS_DEMANGLE
+extern char *cplus_demangle(const char *, int);
+
+static inline char *bfd_demangle(void __used *v, const char *c, int i)
+{
+ return cplus_demangle(c, i);
+}
+#else
+#ifdef NO_DEMANGLE
+static inline char *bfd_demangle(void __used *v, const char __used *c,
+ int __used i)
+{
+ return NULL;
+}
+#else
+#include <bfd.h>
+#endif
+#endif
+
+#ifndef DMGL_PARAMS
+#define DMGL_PARAMS (1 << 0) /* Include function args */
+#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
+#endif
+
struct symbol {
struct rb_node rb_node;
u64 start;
@@ -26,6 +50,7 @@ struct dso {
unsigned int sym_priv_size;
unsigned char adjust_symbols;
unsigned char slen_calculated;
+ unsigned char origin;
char name[0];
};
@@ -49,6 +74,7 @@ int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose);
int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
size_t dso__fprintf(struct dso *self, FILE *fp);
+char dso__symtab_origin(const struct dso *self);
void symbol__init(void);
#endif /* _PERF_SYMBOL_ */
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 1eddae94bab3..1150c6d5c7b8 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -95,8 +95,6 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
pent->fields.remote_irr = 1;
}
- if (!pent->fields.trig_mode)
- ioapic->irr &= ~(1 << idx);
return injected;
}
@@ -136,7 +134,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
mask_after = ioapic->redirtbl[index].fields.mask;
if (mask_before != mask_after)
kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
- if (ioapic->irr & (1 << index))
+ if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG
+ && ioapic->irr & (1 << index))
ioapic_service(ioapic, index);
break;
}
@@ -184,9 +183,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
if (!level)
ioapic->irr &= ~mask;
else {
+ int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
ioapic->irr |= mask;
- if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
- || !entry.fields.remote_irr)
+ if ((edge && old_irr != ioapic->irr) ||
+ (!edge && !entry.fields.remote_irr))
ret = ioapic_service(ioapic, irq);
}
}
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a8bd466d00cc..ddc17f0e2f35 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,7 +160,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
unsigned gsi = pin;
list_for_each_entry(e, &kvm->irq_routing, link)
- if (e->irqchip.irqchip == irqchip &&
+ if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
+ e->irqchip.irqchip == irqchip &&
e->irqchip.pin == pin) {
gsi = e->gsi;
break;
@@ -259,6 +260,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
int delta;
e->gsi = ue->gsi;
+ e->type = ue->type;
switch (ue->type) {
case KVM_IRQ_ROUTING_IRQCHIP:
delta = 0;