summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-10-06 14:43:42 -0700
committerDavid S. Miller <davem@davemloft.net>2018-10-06 14:43:42 -0700
commit72438f8cef4e75a22140853baa4c68392c721b22 (patch)
tree78b7b6dafd827e0d55150a227db3978d91c0b6dd
parentfb4ee67529ff3e4c5874768477887c2df5714c96 (diff)
parentc1d84a1b42ef70d8ae601df9cadedc7ed4f1beb1 (diff)
downloadlinux-72438f8cef4e75a22140853baa4c68392c721b22.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/arm/mm/ioremap.c2
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c10
-rw-r--r--arch/riscv/kernel/setup.c2
-rw-r--r--arch/x86/entry/vdso/Makefile16
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c26
-rw-r--r--arch/x86/events/amd/uncore.c10
-rw-r--r--arch/x86/events/intel/uncore_snbep.c14
-rw-r--r--arch/x86/include/asm/perf_event.h8
-rw-r--r--arch/x86/include/asm/uv/uv.h6
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kvm/mmu.c24
-rw-r--r--arch/x86/kvm/vmx.c137
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--drivers/base/power/main.c5
-rw-r--r--drivers/crypto/caam/caamalg.c8
-rw-r--r--drivers/crypto/chelsio/chcr_algo.c32
-rw-r--r--drivers/crypto/chelsio/chcr_crypto.h2
-rw-r--r--drivers/crypto/mxs-dcp.c53
-rw-r--r--drivers/crypto/qat/qat_c3xxx/adf_drv.c6
-rw-r--r--drivers/crypto/qat/qat_c3xxxvf/adf_drv.c6
-rw-r--r--drivers/crypto/qat/qat_c62x/adf_drv.c6
-rw-r--r--drivers/crypto/qat/qat_c62xvf/adf_drv.c6
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_drv.c6
-rw-r--r--drivers/crypto/qat/qat_dh895xccvf/adf_drv.c6
-rw-r--r--drivers/gpio/gpiolib.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c37
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c10
-rw-r--r--drivers/gpu/drm/drm_client.c35
-rw-r--r--drivers/gpu/drm/drm_fb_cma_helper.c4
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c4
-rw-r--r--drivers/gpu/drm/drm_lease.c6
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_iommu.h34
-rw-r--r--drivers/gpu/drm/i2c/tda9950.c5
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c88
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.h1
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c33
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c1
-rw-r--r--drivers/iommu/amd_iommu.c2
-rw-r--r--drivers/md/dm-cache-metadata.c4
-rw-r--r--drivers/md/dm-cache-target.c9
-rw-r--r--drivers/md/dm-mpath.c14
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-thin-metadata.c6
-rw-r--r--drivers/net/dsa/b53/b53_common.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c14
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c6
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c17
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c5
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c2
-rw-r--r--drivers/net/hamradio/yam.c4
-rw-r--r--drivers/net/phy/phylink.c48
-rw-r--r--drivers/net/team/team.c6
-rw-r--r--drivers/net/usb/smsc75xx.c1
-rw-r--r--drivers/pci/controller/pci-mvebu.c52
-rw-r--r--drivers/pci/pci.c27
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/connect.c13
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/transport.c21
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/iomap.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c4
-rw-r--r--fs/ocfs2/refcounttree.c16
-rw-r--r--fs/overlayfs/copy_up.c2
-rw-r--r--fs/overlayfs/file.c2
-rw-r--r--fs/overlayfs/inode.c2
-rw-r--r--fs/overlayfs/namei.c2
-rw-r--r--fs/overlayfs/overlayfs.h4
-rw-r--r--fs/overlayfs/util.c3
-rw-r--r--fs/proc/base.c14
-rw-r--r--fs/read_write.c17
-rw-r--r--fs/xfs/libxfs/xfs_attr.c28
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c10
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c24
-rw-r--r--fs/xfs/libxfs/xfs_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c30
-rw-r--r--fs/xfs/scrub/alloc.c1
-rw-r--r--fs/xfs/scrub/inode.c4
-rw-r--r--fs/xfs/xfs_bmap_util.c20
-rw-r--r--fs/xfs/xfs_buf_item.c119
-rw-r--r--fs/xfs/xfs_buf_item.h1
-rw-r--r--fs/xfs/xfs_inode.c10
-rw-r--r--fs/xfs/xfs_iops.c12
-rw-r--r--fs/xfs/xfs_log_recover.c10
-rw-r--r--fs/xfs/xfs_reflink.c137
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans.c10
-rw-r--r--fs/xfs/xfs_trans_buf.c99
-rw-r--r--include/drm/drm_client.h5
-rw-r--r--include/linux/fs.h17
-rw-r--r--include/linux/hugetlb.h14
-rw-r--r--include/linux/mm.h6
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/virtio_net.h18
-rw-r--r--include/trace/events/migrate.h27
-rw-r--r--include/uapi/asm-generic/hugetlb_encode.h2
-rw-r--r--include/uapi/linux/memfd.h2
-rw-r--r--include/uapi/linux/mman.h2
-rw-r--r--include/uapi/linux/shm.h2
-rw-r--r--ipc/shm.c2
-rw-r--r--kernel/bpf/local_storage.c5
-rw-r--r--kernel/bpf/verifier.c10
-rw-r--r--kernel/events/core.c11
-rw-r--r--kernel/locking/test-ww_mutex.c10
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/deadline.c2
-rw-r--r--kernel/sched/fair.c104
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--mm/gup_benchmark.c3
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/hugetlb.c90
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/migrate.c62
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/rmap.c42
-rw-r--r--mm/vmscan.c7
-rw-r--r--mm/vmstat.c4
-rw-r--r--net/bpfilter/bpfilter_kern.c4
-rw-r--r--net/core/rtnetlink.c29
-rw-r--r--net/ipv6/raw.c29
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/openvswitch/conntrack.c4
-rw-r--r--net/packet/af_packet.c11
-rw-r--r--net/sched/sch_api.c24
-rw-r--r--net/wireless/reg.c7
-rw-r--r--net/wireless/wext-compat.c14
-rw-r--r--sound/hda/hdac_i915.c4
-rw-r--r--sound/pci/hda/patch_realtek.c1
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat2
-rw-r--r--tools/testing/selftests/x86/test_vdso.c172
136 files changed, 1494 insertions, 806 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index bb5f431043f7..86e83ea6ad08 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -324,7 +324,6 @@ F: Documentation/ABI/testing/sysfs-bus-acpi
F: Documentation/ABI/testing/configfs-acpi
F: drivers/pci/*acpi*
F: drivers/pci/*/*acpi*
-F: drivers/pci/*/*/*acpi*
F: tools/power/acpi/
ACPI APEI
@@ -8608,7 +8607,6 @@ F: include/linux/spinlock*.h
F: arch/*/include/asm/spinlock*.h
F: include/linux/rwlock*.h
F: include/linux/mutex*.h
-F: arch/*/include/asm/mutex*.h
F: include/linux/rwsem*.h
F: arch/*/include/asm/rwsem.h
F: include/linux/seqlock.h
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index fc91205ff46c..5bf9443cfbaa 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -473,7 +473,7 @@ void pci_ioremap_set_mem_type(int mem_type)
int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
{
- BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT);
+ BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
PCI_IO_VIRT_BASE + offset + SZ_64K,
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index fbc74b5fa3ed..8edf93b4490f 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -413,3 +413,4 @@
396 common pkey_free sys_pkey_free
397 common statx sys_statx
398 common rseq sys_rseq
+399 common io_pgetevents sys_io_pgetevents
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 933c574e1cf7..998f8d089ac7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -646,6 +646,16 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
local_irq_disable();
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+ /*
+ * If the PTE disappeared temporarily due to a THP
+ * collapse, just return and let the guest try again.
+ */
+ if (!ptep) {
+ local_irq_enable();
+ if (page)
+ put_page(page);
+ return RESUME_GUEST;
+ }
pte = *ptep;
local_irq_enable();
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index aee603123030..b2d26d9d8489 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -186,7 +186,7 @@ static void __init setup_bootmem(void)
BUG_ON(mem_size == 0);
set_max_mapnr(PFN_DOWN(mem_size));
- max_low_pfn = pfn_base + PFN_DOWN(mem_size);
+ max_low_pfn = memblock_end_of_DRAM();
#ifdef CONFIG_BLK_DEV_INITRD
setup_initrd();
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index fa3f439f0a92..141d415a8c80 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
$(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
-fno-omit-frame-pointer -foptimize-sibling-calls \
- -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS)
+ -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+ CFL += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
@@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+ KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
+
$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
$(obj)/vdso32.so.dbg: FORCE \
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d95c60..e48ca3afa091 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -43,8 +43,9 @@ extern u8 hvclock_page
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+ asm ("syscall" : "=a" (ret), "=m" (*ts) :
+ "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
+ "memory", "rcx", "r11");
return ret;
}
@@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+ asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) :
+ "0" (__NR_gettimeofday), "D" (tv), "S" (tz) :
+ "memory", "rcx", "r11");
return ret;
}
@@ -64,13 +66,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
- asm(
+ asm (
"mov %%ebx, %%edx \n"
- "mov %2, %%ebx \n"
+ "mov %[clock], %%ebx \n"
"call __kernel_vsyscall \n"
"mov %%edx, %%ebx \n"
- : "=a" (ret)
- : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
+ : "=a" (ret), "=m" (*ts)
+ : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
: "memory", "edx");
return ret;
}
@@ -79,13 +81,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
long ret;
- asm(
+ asm (
"mov %%ebx, %%edx \n"
- "mov %2, %%ebx \n"
+ "mov %[tv], %%ebx \n"
"call __kernel_vsyscall \n"
"mov %%edx, %%ebx \n"
- : "=a" (ret)
- : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
+ : "=a" (ret), "=m" (*tv), "=m" (*tz)
+ : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz)
: "memory", "edx");
return ret;
}
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 981ba5e8241b..8671de126eac 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -36,6 +36,7 @@
static int num_counters_llc;
static int num_counters_nb;
+static bool l3_mask;
static HLIST_HEAD(uncore_unused_list);
@@ -209,6 +210,13 @@ static int amd_uncore_event_init(struct perf_event *event)
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
hwc->idx = -1;
+ /*
+ * SliceMask and ThreadMask need to be set for certain L3 events in
+ * Family 17h. For other events, the two fields do not affect the count.
+ */
+ if (l3_mask)
+ hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK);
+
if (event->cpu < 0)
return -EINVAL;
@@ -525,6 +533,7 @@ static int __init amd_uncore_init(void)
amd_llc_pmu.name = "amd_l3";
format_attr_event_df.show = &event_show_df;
format_attr_event_l3.show = &event_show_l3;
+ l3_mask = true;
} else {
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
@@ -532,6 +541,7 @@ static int __init amd_uncore_init(void)
amd_llc_pmu.name = "amd_l2";
format_attr_event_df = format_attr_event;
format_attr_event_l3 = format_attr_event;
+ l3_mask = false;
}
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 51d7c117e3c7..c07bee31abe8 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3061,7 +3061,7 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = {
void bdx_uncore_cpu_init(void)
{
- int pkg = topology_phys_to_logical_pkg(0);
+ int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id);
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
@@ -3931,16 +3931,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = {
.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3),
},
{ /* M3UPI0 Link 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0),
},
{ /* M3UPI0 Link 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1),
},
{ /* M3UPI1 Link 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2),
},
{ /* end: all zeroes */ }
};
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 12f54082f4c8..78241b736f2a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -46,6 +46,14 @@
#define INTEL_ARCH_EVENT_MASK \
(ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
+#define AMD64_L3_SLICE_SHIFT 48
+#define AMD64_L3_SLICE_MASK \
+ ((0xFULL) << AMD64_L3_SLICE_SHIFT)
+
+#define AMD64_L3_THREAD_SHIFT 56
+#define AMD64_L3_THREAD_MASK \
+ ((0xFFULL) << AMD64_L3_THREAD_SHIFT)
+
#define X86_RAW_EVENT_MASK \
(ARCH_PERFMON_EVENTSEL_EVENT | \
ARCH_PERFMON_EVENTSEL_UMASK | \
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index a80c0673798f..e60c45fd3679 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -10,8 +10,13 @@ struct cpumask;
struct mm_struct;
#ifdef CONFIG_X86_UV
+#include <linux/efi.h>
extern enum uv_system_type get_uv_system_type(void);
+static inline bool is_early_uv_system(void)
+{
+ return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+}
extern int is_uv_system(void);
extern int is_uv_hubless(void);
extern void uv_cpu_init(void);
@@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
#else /* X86_UV */
static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
+static inline bool is_early_uv_system(void) { return 0; }
static inline int is_uv_system(void) { return 0; }
static inline int is_uv_hubless(void) { return 0; }
static inline void uv_cpu_init(void) { }
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 22ab408177b2..eeea634bee0a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c)
static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
/* AMD errata T13 (order #21922) */
- if ((c->x86 == 6)) {
+ if (c->x86 == 6) {
/* Duron Rev A0 */
if (c->x86_model == 3 && c->x86_stepping == 0)
size = 64;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6490f618e096..b52bd2b6cdb4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -26,6 +26,7 @@
#include <asm/apic.h>
#include <asm/intel-family.h>
#include <asm/i8259.h>
+#include <asm/uv/uv.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -1433,6 +1434,9 @@ void __init tsc_early_init(void)
{
if (!boot_cpu_has(X86_FEATURE_TSC))
return;
+ /* Don't change UV TSC multi-chassis synchronization */
+ if (is_early_uv_system())
+ return;
if (!determine_cpu_tsc_frequencies(true))
return;
loops_per_jiffy = get_loops_per_jiffy();
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d7e9bce6ff61..51b953ad9d4e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -249,6 +249,17 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
*/
static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
+/*
+ * In some cases, we need to preserve the GFN of a non-present or reserved
+ * SPTE when we usurp the upper five bits of the physical address space to
+ * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll
+ * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask
+ * left into the reserved bits, i.e. the GFN in the SPTE will be split into
+ * high and low parts. This mask covers the lower bits of the GFN.
+ */
+static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
+
+
static void mmu_spte_set(u64 *sptep, u64 spte);
static union kvm_mmu_page_role
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
@@ -357,9 +368,7 @@ static bool is_mmio_spte(u64 spte)
static gfn_t get_mmio_spte_gfn(u64 spte)
{
- u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask |
- shadow_nonpresent_or_rsvd_mask;
- u64 gpa = spte & ~mask;
+ u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;
gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)
& shadow_nonpresent_or_rsvd_mask;
@@ -423,6 +432,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
static void kvm_mmu_reset_all_pte_masks(void)
{
+ u8 low_phys_bits;
+
shadow_user_mask = 0;
shadow_accessed_mask = 0;
shadow_dirty_mask = 0;
@@ -437,12 +448,17 @@ static void kvm_mmu_reset_all_pte_masks(void)
* appropriate mask to guard against L1TF attacks. Otherwise, it is
* assumed that the CPU is not vulnerable to L1TF.
*/
+ low_phys_bits = boot_cpu_data.x86_phys_bits;
if (boot_cpu_data.x86_phys_bits <
- 52 - shadow_nonpresent_or_rsvd_mask_len)
+ 52 - shadow_nonpresent_or_rsvd_mask_len) {
shadow_nonpresent_or_rsvd_mask =
rsvd_bits(boot_cpu_data.x86_phys_bits -
shadow_nonpresent_or_rsvd_mask_len,
boot_cpu_data.x86_phys_bits - 1);
+ low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;
+ }
+ shadow_nonpresent_or_rsvd_lower_gfn_mask =
+ GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
}
static int is_cpuid_PSE36(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06412ba46aa3..612fd17be635 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -121,7 +121,6 @@ module_param_named(pml, enable_pml, bool, S_IRUGO);
#define MSR_BITMAP_MODE_X2APIC 1
#define MSR_BITMAP_MODE_X2APIC_APICV 2
-#define MSR_BITMAP_MODE_LM 4
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
@@ -857,6 +856,7 @@ struct nested_vmx {
/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
u64 vmcs01_debugctl;
+ u64 vmcs01_guest_bndcfgs;
u16 vpid02;
u16 last_vpid;
@@ -2899,8 +2899,7 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
}
- if (is_long_mode(&vmx->vcpu))
- wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#else
savesegment(fs, fs_sel);
savesegment(gs, gs_sel);
@@ -2951,8 +2950,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
vmx->loaded_cpu_state = NULL;
#ifdef CONFIG_X86_64
- if (is_long_mode(&vmx->vcpu))
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#endif
if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
kvm_load_ldt(host_state->ldt_sel);
@@ -2980,24 +2978,19 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
#ifdef CONFIG_X86_64
static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
{
- if (is_long_mode(&vmx->vcpu)) {
- preempt_disable();
- if (vmx->loaded_cpu_state)
- rdmsrl(MSR_KERNEL_GS_BASE,
- vmx->msr_guest_kernel_gs_base);
- preempt_enable();
- }
+ preempt_disable();
+ if (vmx->loaded_cpu_state)
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ preempt_enable();
return vmx->msr_guest_kernel_gs_base;
}
static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
{
- if (is_long_mode(&vmx->vcpu)) {
- preempt_disable();
- if (vmx->loaded_cpu_state)
- wrmsrl(MSR_KERNEL_GS_BASE, data);
- preempt_enable();
- }
+ preempt_disable();
+ if (vmx->loaded_cpu_state)
+ wrmsrl(MSR_KERNEL_GS_BASE, data);
+ preempt_enable();
vmx->msr_guest_kernel_gs_base = data;
}
#endif
@@ -3533,9 +3526,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
- if (kvm_mpx_supported())
- msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
-
/* We support free control of debug control saving. */
msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
@@ -3552,8 +3542,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
VM_ENTRY_LOAD_IA32_PAT;
msrs->entry_ctls_high |=
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
- if (kvm_mpx_supported())
- msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
/* We support free control of debug control loading. */
msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
@@ -3601,12 +3589,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
msrs->secondary_ctls_high);
msrs->secondary_ctls_low = 0;
msrs->secondary_ctls_high &=
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_DESC |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_WBINVD_EXITING;
+
/*
* We can emulate "VMCS shadowing," even if the hardware
* doesn't support it.
@@ -3663,6 +3651,10 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
msrs->secondary_ctls_high |=
SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ if (flexpriority_enabled)
+ msrs->secondary_ctls_high |=
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC,
msrs->misc_low,
@@ -5073,19 +5065,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if (!msr)
return;
- /*
- * MSR_KERNEL_GS_BASE is not intercepted when the guest is in
- * 64-bit mode as a 64-bit kernel may frequently access the
- * MSR. This means we need to manually save/restore the MSR
- * when switching between guest and host state, but only if
- * the guest is in 64-bit mode. Sync our cached value if the
- * guest is transitioning to 32-bit mode and the CPU contains
- * guest state, i.e. the cache is stale.
- */
-#ifdef CONFIG_X86_64
- if (!(efer & EFER_LMA))
- (void)vmx_read_guest_kernel_gs_base(vmx);
-#endif
vcpu->arch.efer = efer;
if (efer & EFER_LMA) {
vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
@@ -6078,9 +6057,6 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
mode |= MSR_BITMAP_MODE_X2APIC_APICV;
}
- if (is_long_mode(vcpu))
- mode |= MSR_BITMAP_MODE_LM;
-
return mode;
}
@@ -6121,9 +6097,6 @@ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
if (!changed)
return;
- vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
- !(mode & MSR_BITMAP_MODE_LM));
-
if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
@@ -6189,6 +6162,11 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
nested_mark_vmcs12_pages_dirty(vcpu);
}
+static u8 vmx_get_rvi(void)
+{
+ return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
+}
+
static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6201,7 +6179,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
return false;
- rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff;
+ rvi = vmx_get_rvi();
vapic_page = kmap(vmx->nested.virtual_apic_page);
vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
@@ -10245,15 +10223,16 @@ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
if (!lapic_in_kernel(vcpu))
return;
+ if (!flexpriority_enabled &&
+ !cpu_has_vmx_virtualize_x2apic_mode())
+ return;
+
/* Postpone execution until vmcs01 is the current VMCS. */
if (is_guest_mode(vcpu)) {
to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
return;
}
- if (!cpu_need_tpr_shadow(vcpu))
- return;
-
sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
@@ -10375,6 +10354,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
return max_irr;
}
+static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
+{
+ u8 rvi = vmx_get_rvi();
+ u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
+
+ return ((rvi & 0xf0) > (vppr & 0xf0));
+}
+
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
{
if (!kvm_vcpu_apicv_active(vcpu))
@@ -11264,6 +11251,23 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
#undef cr4_fixed1_update
}
+static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (kvm_mpx_supported()) {
+ bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
+
+ if (mpx_enabled) {
+ vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
+ vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
+ } else {
+ vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
+ vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
+ }
+ }
+}
+
static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -11280,8 +11284,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
- if (nested_vmx_allowed(vcpu))
+ if (nested_vmx_allowed(vcpu)) {
nested_vmx_cr_fixed1_bits_update(vcpu);
+ nested_vmx_entry_exit_ctls_update(vcpu);
+ }
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -12049,8 +12055,13 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
set_cr4_guest_host_mask(vmx);
- if (vmx_mpx_supported())
- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+ if (kvm_mpx_supported()) {
+ if (vmx->nested.nested_run_pending &&
+ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+ else
+ vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
+ }
if (enable_vpid) {
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
@@ -12595,15 +12606,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
bool from_vmentry = !!exit_qual;
u32 dummy_exit_qual;
- u32 vmcs01_cpu_exec_ctrl;
+ bool evaluate_pending_interrupts;
int r = 0;
- vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
+ (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
+ if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
+ evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ if (kvm_mpx_supported() &&
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+ vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
vmx_segment_cache_clear(vmx);
@@ -12643,16 +12660,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
* to L1 or delivered directly to L2 (e.g. In case L1 don't
* intercept EXTERNAL_INTERRUPT).
*
- * Usually this would be handled by L0 requesting a
- * IRQ/NMI window by setting VMCS accordingly. However,
- * this setting was done on VMCS01 and now VMCS02 is active
- * instead. Thus, we force L0 to perform pending event
- * evaluation by requesting a KVM_REQ_EVENT.
- */
- if (vmcs01_cpu_exec_ctrl &
- (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
+ * Usually this would be handled by the processor noticing an
+ * IRQ/NMI window request, or checking RVI during evaluation of
+ * pending virtual interrupts. However, this setting was done
+ * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
+ * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
+ */
+ if (unlikely(evaluate_pending_interrupts))
kvm_make_request(KVM_REQ_EVENT, vcpu);
- }
/*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index edbf00ec56b3..ca717737347e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4698,7 +4698,7 @@ static void kvm_init_msr_list(void)
*/
switch (msrs_to_save[i]) {
case MSR_IA32_BNDCFGS:
- if (!kvm_x86_ops->mpx_supported())
+ if (!kvm_mpx_supported())
continue;
break;
case MSR_TSC_AUX:
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 3f68e2919dc5..a690fd400260 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1713,8 +1713,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
dpm_wait_for_subordinate(dev, async);
- if (async_error)
+ if (async_error) {
+ dev->power.direct_complete = false;
goto Complete;
+ }
/*
* If a device configured to wake up the system from sleep states
@@ -1726,6 +1728,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
pm_wakeup_event(dev, 0);
if (pm_wakeup_pending()) {
+ dev->power.direct_complete = false;
async_error = -EBUSY;
goto Complete;
}
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index d67667970f7e..ec40f991e6c6 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -1553,8 +1553,8 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
edesc->src_nents = src_nents;
edesc->dst_nents = dst_nents;
edesc->sec4_sg_bytes = sec4_sg_bytes;
- edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
- desc_bytes;
+ edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc +
+ desc_bytes);
edesc->iv_dir = DMA_TO_DEVICE;
/* Make sure IV is located in a DMAable area */
@@ -1757,8 +1757,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
edesc->src_nents = src_nents;
edesc->dst_nents = dst_nents;
edesc->sec4_sg_bytes = sec4_sg_bytes;
- edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
- desc_bytes;
+ edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc +
+ desc_bytes);
edesc->iv_dir = DMA_FROM_DEVICE;
/* Make sure IV is located in a DMAable area */
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 5c539af8ed60..010bbf607797 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -367,7 +367,8 @@ static inline void dsgl_walk_init(struct dsgl_walk *walk,
walk->to = (struct phys_sge_pairs *)(dsgl + 1);
}
-static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid)
+static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid,
+ int pci_chan_id)
{
struct cpl_rx_phys_dsgl *phys_cpl;
@@ -385,6 +386,7 @@ static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid)
phys_cpl->rss_hdr_int.opcode = CPL_RX_PHYS_ADDR;
phys_cpl->rss_hdr_int.qid = htons(qid);
phys_cpl->rss_hdr_int.hash_val = 0;
+ phys_cpl->rss_hdr_int.channel = pci_chan_id;
}
static inline void dsgl_walk_add_page(struct dsgl_walk *walk,
@@ -718,7 +720,7 @@ static inline void create_wreq(struct chcr_context *ctx,
FILL_WR_RX_Q_ID(ctx->dev->rx_channel_id, qid,
!!lcb, ctx->tx_qidx);
- chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id,
+ chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->tx_chan_id,
qid);
chcr_req->ulptx.len = htonl((DIV_ROUND_UP(len16, 16) -
((sizeof(chcr_req->wreq)) >> 4)));
@@ -1339,16 +1341,23 @@ static int chcr_device_init(struct chcr_context *ctx)
adap->vres.ncrypto_fc);
rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan;
txq_perchan = ntxq / u_ctx->lldi.nchan;
- rxq_idx = ctx->dev->tx_channel_id * rxq_perchan;
- rxq_idx += id % rxq_perchan;
- txq_idx = ctx->dev->tx_channel_id * txq_perchan;
- txq_idx += id % txq_perchan;
spin_lock(&ctx->dev->lock_chcr_dev);
- ctx->rx_qidx = rxq_idx;
- ctx->tx_qidx = txq_idx;
+ ctx->tx_chan_id = ctx->dev->tx_channel_id;
ctx->dev->tx_channel_id = !ctx->dev->tx_channel_id;
ctx->dev->rx_channel_id = 0;
spin_unlock(&ctx->dev->lock_chcr_dev);
+ rxq_idx = ctx->tx_chan_id * rxq_perchan;
+ rxq_idx += id % rxq_perchan;
+ txq_idx = ctx->tx_chan_id * txq_perchan;
+ txq_idx += id % txq_perchan;
+ ctx->rx_qidx = rxq_idx;
+ ctx->tx_qidx = txq_idx;
+ /* Channel Id used by SGE to forward packet to Host.
+ * Same value should be used in cpl_fw6_pld RSS_CH field
+ * by FW. Driver programs PCI channel ID to be used in fw
+ * at the time of queue allocation with value "pi->tx_chan"
+ */
+ ctx->pci_chan_id = txq_idx / txq_perchan;
}
out:
return err;
@@ -2503,6 +2512,7 @@ void chcr_add_aead_dst_ent(struct aead_request *req,
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct dsgl_walk dsgl_walk;
unsigned int authsize = crypto_aead_authsize(tfm);
+ struct chcr_context *ctx = a_ctx(tfm);
u32 temp;
dsgl_walk_init(&dsgl_walk, phys_cpl);
@@ -2512,7 +2522,7 @@ void chcr_add_aead_dst_ent(struct aead_request *req,
dsgl_walk_add_page(&dsgl_walk, IV, &reqctx->iv_dma);
temp = req->cryptlen + (reqctx->op ? -authsize : authsize);
dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, req->assoclen);
- dsgl_walk_end(&dsgl_walk, qid);
+ dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id);
}
void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
@@ -2544,6 +2554,8 @@ void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
unsigned short qid)
{
struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+ struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(wrparam->req);
+ struct chcr_context *ctx = c_ctx(tfm);
struct dsgl_walk dsgl_walk;
dsgl_walk_init(&dsgl_walk, phys_cpl);
@@ -2552,7 +2564,7 @@ void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
reqctx->dstsg = dsgl_walk.last_sg;
reqctx->dst_ofst = dsgl_walk.last_sg_len;
- dsgl_walk_end(&dsgl_walk, qid);
+ dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id);
}
void chcr_add_hash_src_ent(struct ahash_request *req,
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 54835cb109e5..0d2c70c344f3 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -255,6 +255,8 @@ struct chcr_context {
struct chcr_dev *dev;
unsigned char tx_qidx;
unsigned char rx_qidx;
+ unsigned char tx_chan_id;
+ unsigned char pci_chan_id;
struct __crypto_ctx crypto_ctx[0];
};
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index a10c418d4e5c..56bd28174f52 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -63,7 +63,7 @@ struct dcp {
struct dcp_coherent_block *coh;
struct completion completion[DCP_MAX_CHANS];
- struct mutex mutex[DCP_MAX_CHANS];
+ spinlock_t lock[DCP_MAX_CHANS];
struct task_struct *thread[DCP_MAX_CHANS];
struct crypto_queue queue[DCP_MAX_CHANS];
};
@@ -349,13 +349,20 @@ static int dcp_chan_thread_aes(void *data)
int ret;
- do {
- __set_current_state(TASK_INTERRUPTIBLE);
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
- mutex_lock(&sdcp->mutex[chan]);
+ spin_lock(&sdcp->lock[chan]);
backlog = crypto_get_backlog(&sdcp->queue[chan]);
arq = crypto_dequeue_request(&sdcp->queue[chan]);
- mutex_unlock(&sdcp->mutex[chan]);
+ spin_unlock(&sdcp->lock[chan]);
+
+ if (!backlog && !arq) {
+ schedule();
+ continue;
+ }
+
+ set_current_state(TASK_RUNNING);
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
@@ -363,11 +370,8 @@ static int dcp_chan_thread_aes(void *data)
if (arq) {
ret = mxs_dcp_aes_block_crypt(arq);
arq->complete(arq, ret);
- continue;
}
-
- schedule();
- } while (!kthread_should_stop());
+ }
return 0;
}
@@ -409,9 +413,9 @@ static int mxs_dcp_aes_enqueue(struct ablkcipher_request *req, int enc, int ecb)
rctx->ecb = ecb;
actx->chan = DCP_CHAN_CRYPTO;
- mutex_lock(&sdcp->mutex[actx->chan]);
+ spin_lock(&sdcp->lock[actx->chan]);
ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base);
- mutex_unlock(&sdcp->mutex[actx->chan]);
+ spin_unlock(&sdcp->lock[actx->chan]);
wake_up_process(sdcp->thread[actx->chan]);
@@ -640,13 +644,20 @@ static int dcp_chan_thread_sha(void *data)
struct ahash_request *req;
int ret, fini;
- do {
- __set_current_state(TASK_INTERRUPTIBLE);
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
- mutex_lock(&sdcp->mutex[chan]);
+ spin_lock(&sdcp->lock[chan]);
backlog = crypto_get_backlog(&sdcp->queue[chan]);
arq = crypto_dequeue_request(&sdcp->queue[chan]);
- mutex_unlock(&sdcp->mutex[chan]);
+ spin_unlock(&sdcp->lock[chan]);
+
+ if (!backlog && !arq) {
+ schedule();
+ continue;
+ }
+
+ set_current_state(TASK_RUNNING);
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
@@ -658,12 +669,8 @@ static int dcp_chan_thread_sha(void *data)
ret = dcp_sha_req_to_buf(arq);
fini = rctx->fini;
arq->complete(arq, ret);
- if (!fini)
- continue;
}
-
- schedule();
- } while (!kthread_should_stop());
+ }
return 0;
}
@@ -721,9 +728,9 @@ static int dcp_sha_update_fx(struct ahash_request *req, int fini)
rctx->init = 1;
}
- mutex_lock(&sdcp->mutex[actx->chan]);
+ spin_lock(&sdcp->lock[actx->chan]);
ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base);
- mutex_unlock(&sdcp->mutex[actx->chan]);
+ spin_unlock(&sdcp->lock[actx->chan]);
wake_up_process(sdcp->thread[actx->chan]);
mutex_unlock(&actx->mutex);
@@ -997,7 +1004,7 @@ static int mxs_dcp_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, sdcp);
for (i = 0; i < DCP_MAX_CHANS; i++) {
- mutex_init(&sdcp->mutex[i]);
+ spin_lock_init(&sdcp->lock[i]);
init_completion(&sdcp->completion[i]);
crypto_init_queue(&sdcp->queue[i], 50);
}
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
index ba197f34c252..763c2166ee0e 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
@@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct adf_hw_device_data *hw_data;
char name[ADF_DEVICE_NAME_LENGTH];
unsigned int i, bar_nr;
- int ret, bar_mask;
+ unsigned long bar_mask;
+ int ret;
switch (ent->device) {
case ADF_C3XXX_PCI_DEVICE_ID:
@@ -235,8 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Find and map all the device's BARS */
i = 0;
bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
- for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
- ADF_PCI_MAX_BARS * 2) {
+ for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
bar->base_addr = pci_resource_start(pdev, bar_nr);
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
index 24ec908eb26c..613c7d5644ce 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
@@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct adf_hw_device_data *hw_data;
char name[ADF_DEVICE_NAME_LENGTH];
unsigned int i, bar_nr;
- int ret, bar_mask;
+ unsigned long bar_mask;
+ int ret;
switch (ent->device) {
case ADF_C3XXXIOV_PCI_DEVICE_ID:
@@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Find and map all the device's BARS */
i = 0;
bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
- for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
- ADF_PCI_MAX_BARS * 2) {
+ for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
bar->base_addr = pci_resource_start(pdev, bar_nr);
diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c
index 59a5a0df50b6..9cb832963357 100644
--- a/drivers/crypto/qat/qat_c62x/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62x/adf_drv.c
@@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct adf_hw_device_data *hw_data;
char name[ADF_DEVICE_NAME_LENGTH];
unsigned int i, bar_nr;
- int ret, bar_mask;
+ unsigned long bar_mask;
+ int ret;
switch (ent->device) {
case ADF_C62X_PCI_DEVICE_ID:
@@ -235,8 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Find and map all the device's BARS */
i = (hw_data->fuses & ADF_DEVICE_FUSECTL_MASK) ? 1 : 0;
bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
- for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
- ADF_PCI_MAX_BARS * 2) {
+ for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
bar->base_addr = pci_resource_start(pdev, bar_nr);
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
index b9f3e0e4fde9..278452b8ef81 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
@@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct adf_hw_device_data *hw_data;
char name[ADF_DEVICE_NAME_LENGTH];
unsigned int i, bar_nr;
- int ret, bar_mask;
+ unsigned long bar_mask;
+ int ret;
switch (ent->device) {
case ADF_C62XIOV_PCI_DEVICE_ID:
@@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Find and map all the device's BARS */
i = 0;
bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
- for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
- ADF_PCI_MAX_BARS * 2) {
+ for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
bar->base_addr = pci_resource_start(pdev, bar_nr);
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
index be5c5a988ca5..3a9708ef4ce2 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
@@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct adf_hw_device_data *hw_data;
char name[ADF_DEVICE_NAME_LENGTH];
unsigned int i, bar_nr;
- int ret, bar_mask;
+ unsigned long bar_mask;
+ int ret;
switch (ent->device) {
case ADF_DH895XCC_PCI_DEVICE_ID:
@@ -237,8 +238,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Find and map all the device's BARS */
i = 0;
bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
- for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
- ADF_PCI_MAX_BARS * 2) {
+ for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
bar->base_addr = pci_resource_start(pdev, bar_nr);
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
index 26ab17bfc6da..3da0f951cb59 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
@@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct adf_hw_device_data *hw_data;
char name[ADF_DEVICE_NAME_LENGTH];
unsigned int i, bar_nr;
- int ret, bar_mask;
+ unsigned long bar_mask;
+ int ret;
switch (ent->device) {
case ADF_DH895XCCIOV_PCI_DEVICE_ID:
@@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Find and map all the device's BARS */
i = 0;
bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
- for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
- ADF_PCI_MAX_BARS * 2) {
+ for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
bar->base_addr = pci_resource_start(pdev, bar_nr);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e8f8a1999393..a57300c1d649 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -571,7 +571,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
if (ret)
goto out_free_descs;
lh->descs[i] = desc;
- count = i;
+ count = i + 1;
if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW)
set_bit(FLAG_ACTIVE_LOW, &desc->flags);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index ec0d62a16e53..4f22e745df51 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -358,8 +358,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd)
{
- int retval;
struct mqd_manager *mqd_mgr;
+ int retval;
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
if (!mqd_mgr)
@@ -387,8 +387,12 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
if (!q->properties.is_active)
return 0;
- retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
- &q->properties, q->process->mm);
+ if (WARN(q->process->mm != current->mm,
+ "should only run in user thread"))
+ retval = -EFAULT;
+ else
+ retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
+ &q->properties, current->mm);
if (retval)
goto out_uninit_mqd;
@@ -545,9 +549,15 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
retval = map_queues_cpsch(dqm);
else if (q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
- q->properties.type == KFD_QUEUE_TYPE_SDMA))
- retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
- &q->properties, q->process->mm);
+ q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
+ if (WARN(q->process->mm != current->mm,
+ "should only run in user thread"))
+ retval = -EFAULT;
+ else
+ retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
+ q->pipe, q->queue,
+ &q->properties, current->mm);
+ }
out_unlock:
dqm_unlock(dqm);
@@ -653,6 +663,7 @@ out:
static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
+ struct mm_struct *mm = NULL;
struct queue *q;
struct mqd_manager *mqd_mgr;
struct kfd_process_device *pdd;
@@ -686,6 +697,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
kfd_flush_tlb(pdd);
}
+ /* Take a safe reference to the mm_struct, which may otherwise
+ * disappear even while the kfd_process is still referenced.
+ */
+ mm = get_task_mm(pdd->process->lead_thread);
+ if (!mm) {
+ retval = -EFAULT;
+ goto out;
+ }
+
/* activate all active queues on the qpd */
list_for_each_entry(q, &qpd->queues_list, list) {
if (!q->properties.is_evicted)
@@ -700,14 +720,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
q->properties.is_evicted = false;
q->properties.is_active = true;
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
- q->queue, &q->properties,
- q->process->mm);
+ q->queue, &q->properties, mm);
if (retval)
goto out;
dqm->queue_count++;
}
qpd->evicted = 0;
out:
+ if (mm)
+ mmput(mm);
dqm_unlock(dqm);
return retval;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 96875950845a..6903fe6c894b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4633,12 +4633,18 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
}
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
- /* Signal HW programming completion */
- drm_atomic_helper_commit_hw_done(state);
if (wait_for_vblank)
drm_atomic_helper_wait_for_flip_done(dev, state);
+ /*
+ * FIXME:
+ * Delay hw_done() until flip_done() is signaled. This is to block
+ * another commit from freeing the CRTC state while we're still
+ * waiting on flip_done.
+ */
+ drm_atomic_helper_commit_hw_done(state);
+
drm_atomic_helper_cleanup_planes(dev, state);
/* Finally, drop a runtime PM reference for each newly disabled CRTC,
diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index baff50a4c234..df31c3815092 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -63,20 +63,21 @@ static void drm_client_close(struct drm_client_dev *client)
EXPORT_SYMBOL(drm_client_close);
/**
- * drm_client_new - Create a DRM client
+ * drm_client_init - Initialise a DRM client
* @dev: DRM device
* @client: DRM client
* @name: Client name
* @funcs: DRM client functions (optional)
*
+ * This initialises the client and opens a &drm_file. Use drm_client_add() to complete the process.
* The caller needs to hold a reference on @dev before calling this function.
* The client is freed when the &drm_device is unregistered. See drm_client_release().
*
* Returns:
* Zero on success or negative error code on failure.
*/
-int drm_client_new(struct drm_device *dev, struct drm_client_dev *client,
- const char *name, const struct drm_client_funcs *funcs)
+int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,
+ const char *name, const struct drm_client_funcs *funcs)
{
int ret;
@@ -95,10 +96,6 @@ int drm_client_new(struct drm_device *dev, struct drm_client_dev *client,
if (ret)
goto err_put_module;
- mutex_lock(&dev->clientlist_mutex);
- list_add(&client->list, &dev->clientlist);
- mutex_unlock(&dev->clientlist_mutex);
-
drm_dev_get(dev);
return 0;
@@ -109,13 +106,33 @@ err_put_module:
return ret;
}
-EXPORT_SYMBOL(drm_client_new);
+EXPORT_SYMBOL(drm_client_init);
+
+/**
+ * drm_client_add - Add client to the device list
+ * @client: DRM client
+ *
+ * Add the client to the &drm_device client list to activate its callbacks.
+ * @client must be initialized by a call to drm_client_init(). After
+ * drm_client_add() it is no longer permissible to call drm_client_release()
+ * directly (outside the unregister callback), instead cleanup will happen
+ * automatically on driver unload.
+ */
+void drm_client_add(struct drm_client_dev *client)
+{
+ struct drm_device *dev = client->dev;
+
+ mutex_lock(&dev->clientlist_mutex);
+ list_add(&client->list, &dev->clientlist);
+ mutex_unlock(&dev->clientlist_mutex);
+}
+EXPORT_SYMBOL(drm_client_add);
/**
* drm_client_release - Release DRM client resources
* @client: DRM client
*
- * Releases resources by closing the &drm_file that was opened by drm_client_new().
+ * Releases resources by closing the &drm_file that was opened by drm_client_init().
* It is called automatically if the &drm_client_funcs.unregister callback is _not_ set.
*
* This function should only be called from the unregister callback. An exception
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index 9da36a6271d3..9ac1f2e0f064 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -160,7 +160,7 @@ struct drm_fbdev_cma *drm_fbdev_cma_init(struct drm_device *dev,
fb_helper = &fbdev_cma->fb_helper;
- ret = drm_client_new(dev, &fb_helper->client, "fbdev", NULL);
+ ret = drm_client_init(dev, &fb_helper->client, "fbdev", NULL);
if (ret)
goto err_free;
@@ -169,6 +169,8 @@ struct drm_fbdev_cma *drm_fbdev_cma_init(struct drm_device *dev,
if (ret)
goto err_client_put;
+ drm_client_add(&fb_helper->client);
+
return fbdev_cma;
err_client_put:
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 16ec93b75dbf..515a7aec57ac 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -3218,12 +3218,14 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
if (!fb_helper)
return -ENOMEM;
- ret = drm_client_new(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs);
+ ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs);
if (ret) {
kfree(fb_helper);
return ret;
}
+ drm_client_add(&fb_helper->client);
+
fb_helper->preferred_bpp = preferred_bpp;
drm_fbdev_client_hotplug(&fb_helper->client);
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index b54fb78a283c..b82da96ded5c 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -566,14 +566,14 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
lessee_priv->is_master = 1;
lessee_priv->authenticated = 1;
- /* Hook up the fd */
- fd_install(fd, lessee_file);
-
/* Pass fd back to userspace */
DRM_DEBUG_LEASE("Returning fd %d id %d\n", fd, lessee->lessee_id);
cl->fd = fd;
cl->lessee_id = lessee->lessee_id;
+ /* Hook up the fd */
+ fd_install(fd, lessee_file);
+
DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl succeeded\n");
return 0;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
index 87f6b5672e11..797d9ee5f15a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
@@ -55,37 +55,12 @@ static inline void __exynos_iommu_detach(struct exynos_drm_private *priv,
static inline int __exynos_iommu_create_mapping(struct exynos_drm_private *priv,
unsigned long start, unsigned long size)
{
- struct iommu_domain *domain;
- int ret;
-
- domain = iommu_domain_alloc(priv->dma_dev->bus);
- if (!domain)
- return -ENOMEM;
-
- ret = iommu_get_dma_cookie(domain);
- if (ret)
- goto free_domain;
-
- ret = iommu_dma_init_domain(domain, start, size, NULL);
- if (ret)
- goto put_cookie;
-
- priv->mapping = domain;
+ priv->mapping = iommu_get_domain_for_dev(priv->dma_dev);
return 0;
-
-put_cookie:
- iommu_put_dma_cookie(domain);
-free_domain:
- iommu_domain_free(domain);
- return ret;
}
static inline void __exynos_iommu_release_mapping(struct exynos_drm_private *priv)
{
- struct iommu_domain *domain = priv->mapping;
-
- iommu_put_dma_cookie(domain);
- iommu_domain_free(domain);
priv->mapping = NULL;
}
@@ -94,7 +69,9 @@ static inline int __exynos_iommu_attach(struct exynos_drm_private *priv,
{
struct iommu_domain *domain = priv->mapping;
- return iommu_attach_device(domain, dev);
+ if (dev != priv->dma_dev)
+ return iommu_attach_device(domain, dev);
+ return 0;
}
static inline void __exynos_iommu_detach(struct exynos_drm_private *priv,
@@ -102,7 +79,8 @@ static inline void __exynos_iommu_detach(struct exynos_drm_private *priv,
{
struct iommu_domain *domain = priv->mapping;
- iommu_detach_device(domain, dev);
+ if (dev != priv->dma_dev)
+ iommu_detach_device(domain, dev);
}
#else
#error Unsupported architecture and IOMMU/DMA-mapping glue code
diff --git a/drivers/gpu/drm/i2c/tda9950.c b/drivers/gpu/drm/i2c/tda9950.c
index 5d2f0d548469..250b5e02a314 100644
--- a/drivers/gpu/drm/i2c/tda9950.c
+++ b/drivers/gpu/drm/i2c/tda9950.c
@@ -191,7 +191,8 @@ static irqreturn_t tda9950_irq(int irq, void *data)
break;
}
/* TDA9950 executes all retries for us */
- tx_status |= CEC_TX_STATUS_MAX_RETRIES;
+ if (tx_status != CEC_TX_STATUS_OK)
+ tx_status |= CEC_TX_STATUS_MAX_RETRIES;
cec_transmit_done(priv->adap, tx_status, arb_lost_cnt,
nack_cnt, 0, err_cnt);
break;
@@ -310,7 +311,7 @@ static void tda9950_release(struct tda9950_priv *priv)
/* Wait up to .5s for it to signal non-busy */
do {
csr = tda9950_read(client, REG_CSR);
- if (!(csr & CSR_BUSY) || --timeout)
+ if (!(csr & CSR_BUSY) || !--timeout)
break;
msleep(10);
} while (1);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f7f2aa71d8d9..a262a64f5625 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -232,6 +232,20 @@ static bool compress_init(struct compress *c)
return true;
}
+static void *compress_next_page(struct drm_i915_error_object *dst)
+{
+ unsigned long page;
+
+ if (dst->page_count >= dst->num_pages)
+ return ERR_PTR(-ENOSPC);
+
+ page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ return dst->pages[dst->page_count++] = (void *)page;
+}
+
static int compress_page(struct compress *c,
void *src,
struct drm_i915_error_object *dst)
@@ -245,19 +259,14 @@ static int compress_page(struct compress *c,
do {
if (zstream->avail_out == 0) {
- unsigned long page;
-
- page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
- if (!page)
- return -ENOMEM;
+ zstream->next_out = compress_next_page(dst);
+ if (IS_ERR(zstream->next_out))
+ return PTR_ERR(zstream->next_out);
- dst->pages[dst->page_count++] = (void *)page;
-
- zstream->next_out = (void *)page;
zstream->avail_out = PAGE_SIZE;
}
- if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
+ if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK)
return -EIO;
} while (zstream->avail_in);
@@ -268,19 +277,42 @@ static int compress_page(struct compress *c,
return 0;
}
-static void compress_fini(struct compress *c,
+static int compress_flush(struct compress *c,
struct drm_i915_error_object *dst)
{
struct z_stream_s *zstream = &c->zstream;
- if (dst) {
- zlib_deflate(zstream, Z_FINISH);
- dst->unused = zstream->avail_out;
- }
+ do {
+ switch (zlib_deflate(zstream, Z_FINISH)) {
+ case Z_OK: /* more space requested */
+ zstream->next_out = compress_next_page(dst);
+ if (IS_ERR(zstream->next_out))
+ return PTR_ERR(zstream->next_out);
+
+ zstream->avail_out = PAGE_SIZE;
+ break;
+
+ case Z_STREAM_END:
+ goto end;
+
+ default: /* any error */
+ return -EIO;
+ }
+ } while (1);
+
+end:
+ memset(zstream->next_out, 0, zstream->avail_out);
+ dst->unused = zstream->avail_out;
+ return 0;
+}
+
+static void compress_fini(struct compress *c,
+ struct drm_i915_error_object *dst)
+{
+ struct z_stream_s *zstream = &c->zstream;
zlib_deflateEnd(zstream);
kfree(zstream->workspace);
-
if (c->tmp)
free_page((unsigned long)c->tmp);
}
@@ -319,6 +351,12 @@ static int compress_page(struct compress *c,
return 0;
}
+static int compress_flush(struct compress *c,
+ struct drm_i915_error_object *dst)
+{
+ return 0;
+}
+
static void compress_fini(struct compress *c,
struct drm_i915_error_object *dst)
{
@@ -917,6 +955,7 @@ i915_error_object_create(struct drm_i915_private *i915,
unsigned long num_pages;
struct sgt_iter iter;
dma_addr_t dma;
+ int ret;
if (!vma)
return NULL;
@@ -930,6 +969,7 @@ i915_error_object_create(struct drm_i915_private *i915,
dst->gtt_offset = vma->node.start;
dst->gtt_size = vma->node.size;
+ dst->num_pages = num_pages;
dst->page_count = 0;
dst->unused = 0;
@@ -938,28 +978,26 @@ i915_error_object_create(struct drm_i915_private *i915,
return NULL;
}
+ ret = -EINVAL;
for_each_sgt_dma(dma, iter, vma->pages) {
void __iomem *s;
- int ret;
ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0);
s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);
ret = compress_page(&compress, (void __force *)s, dst);
io_mapping_unmap_atomic(s);
-
if (ret)
- goto unwind;
+ break;
}
- goto out;
-unwind:
- while (dst->page_count--)
- free_page((unsigned long)dst->pages[dst->page_count]);
- kfree(dst);
- dst = NULL;
+ if (ret || compress_flush(&compress, dst)) {
+ while (dst->page_count--)
+ free_page((unsigned long)dst->pages[dst->page_count]);
+ kfree(dst);
+ dst = NULL;
+ }
-out:
compress_fini(&compress, dst);
ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
return dst;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index f893a4e8b783..8710fb18ed74 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -135,6 +135,7 @@ struct i915_gpu_state {
struct drm_i915_error_object {
u64 gtt_offset;
u64 gtt_size;
+ int num_pages;
int page_count;
int unused;
u32 *pages[0];
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 90628a47ae17..29877969310d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3091,36 +3091,27 @@ gen11_gt_irq_handler(struct drm_i915_private * const i915,
spin_unlock(&i915->irq_lock);
}
-static void
-gen11_gu_misc_irq_ack(struct drm_i915_private *dev_priv, const u32 master_ctl,
- u32 *iir)
+static u32
+gen11_gu_misc_irq_ack(struct drm_i915_private *dev_priv, const u32 master_ctl)
{
void __iomem * const regs = dev_priv->regs;
+ u32 iir;
if (!(master_ctl & GEN11_GU_MISC_IRQ))
- return;
+ return 0;
+
+ iir = raw_reg_read(regs, GEN11_GU_MISC_IIR);
+ if (likely(iir))
+ raw_reg_write(regs, GEN11_GU_MISC_IIR, iir);
- *iir = raw_reg_read(regs, GEN11_GU_MISC_IIR);
- if (likely(*iir))
- raw_reg_write(regs, GEN11_GU_MISC_IIR, *iir);
+ return iir;
}
static void
-gen11_gu_misc_irq_handler(struct drm_i915_private *dev_priv,
- const u32 master_ctl, const u32 iir)
+gen11_gu_misc_irq_handler(struct drm_i915_private *dev_priv, const u32 iir)
{
- if (!(master_ctl & GEN11_GU_MISC_IRQ))
- return;
-
- if (unlikely(!iir)) {
- DRM_ERROR("GU_MISC iir blank!\n");
- return;
- }
-
if (iir & GEN11_GU_MISC_GSE)
intel_opregion_asle_intr(dev_priv);
- else
- DRM_ERROR("Unexpected GU_MISC interrupt 0x%x\n", iir);
}
static irqreturn_t gen11_irq_handler(int irq, void *arg)
@@ -3157,12 +3148,12 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
enable_rpm_wakeref_asserts(i915);
}
- gen11_gu_misc_irq_ack(i915, master_ctl, &gu_misc_iir);
+ gu_misc_iir = gen11_gu_misc_irq_ack(i915, master_ctl);
/* Acknowledge and enable interrupts. */
raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ | master_ctl);
- gen11_gu_misc_irq_handler(i915, master_ctl, gu_misc_iir);
+ gen11_gu_misc_irq_handler(i915, gu_misc_iir);
return IRQ_HANDLED;
}
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 6a4d1388ad2d..1df3ce134cd0 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -592,7 +592,6 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN10_FEATURES, \
GEN(11), \
.ddb_size = 2048, \
- .has_csr = 0, \
.has_logical_ring_elsq = 1
static const struct intel_device_info intel_icelake_11_info = {
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 73e47d93e7a0..bee0dfb7b93b 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3069,7 +3069,7 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
return 0;
offset_mask = pte_pgsize - 1;
- __pte = *pte & PM_ADDR_MASK;
+ __pte = __sme_clr(*pte & PM_ADDR_MASK);
return (__pte & ~offset_mask) | (iova & offset_mask);
}
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 69dddeab124c..5936de71883f 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -1455,8 +1455,8 @@ static int __load_mappings(struct dm_cache_metadata *cmd,
if (hints_valid) {
r = dm_array_cursor_next(&cmd->hint_cursor);
if (r) {
- DMERR("dm_array_cursor_next for hint failed");
- goto out;
+ dm_array_cursor_end(&cmd->hint_cursor);
+ hints_valid = false;
}
}
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index a53413371725..e13d991e9fb5 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3009,8 +3009,13 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache)
static bool can_resize(struct cache *cache, dm_cblock_t new_size)
{
- if (from_cblock(new_size) > from_cblock(cache->cache_size))
- return true;
+ if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
+ if (cache->sized) {
+ DMERR("%s: unable to extend cache due to missing cache table reload",
+ cache_device_name(cache));
+ return false;
+ }
+ }
/*
* We can't drop a dirty block when shrinking the cache.
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d94ba6f72ff5..419362c2d8ac 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -806,19 +806,19 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
}
static int setup_scsi_dh(struct block_device *bdev, struct multipath *m,
- const char *attached_handler_name, char **error)
+ const char **attached_handler_name, char **error)
{
struct request_queue *q = bdev_get_queue(bdev);
int r;
if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
retain:
- if (attached_handler_name) {
+ if (*attached_handler_name) {
/*
* Clear any hw_handler_params associated with a
* handler that isn't already attached.
*/
- if (m->hw_handler_name && strcmp(attached_handler_name, m->hw_handler_name)) {
+ if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) {
kfree(m->hw_handler_params);
m->hw_handler_params = NULL;
}
@@ -830,7 +830,8 @@ retain:
* handler instead of the original table passed in.
*/
kfree(m->hw_handler_name);
- m->hw_handler_name = attached_handler_name;
+ m->hw_handler_name = *attached_handler_name;
+ *attached_handler_name = NULL;
}
}
@@ -867,7 +868,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
struct pgpath *p;
struct multipath *m = ti->private;
struct request_queue *q;
- const char *attached_handler_name;
+ const char *attached_handler_name = NULL;
/* we need at least a path arg */
if (as->argc < 1) {
@@ -890,7 +891,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
if (attached_handler_name || m->hw_handler_name) {
INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
- r = setup_scsi_dh(p->path.dev->bdev, m, attached_handler_name, &ti->error);
+ r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error);
if (r) {
dm_put_device(ti, p->path.dev);
goto bad;
@@ -905,6 +906,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
return p;
bad:
+ kfree(attached_handler_name);
free_pgpath(p);
return ERR_PTR(r);
}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5ba067fa0c72..c44925e4e481 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3353,7 +3353,7 @@ static const char *sync_str(enum sync_state state)
};
/* Return enum sync_state for @mddev derived from @recovery flags */
-static const enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)
+static enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)
{
if (test_bit(MD_RECOVERY_FROZEN, &recovery))
return st_frozen;
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 74f6770c70b1..20b0776e39ef 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -832,10 +832,8 @@ static void __set_metadata_reserve(struct dm_pool_metadata *pmd)
if (r) {
DMERR("could not get size of metadata device");
pmd->metadata_reserve = max_blocks;
- } else {
- sector_div(total, 10);
- pmd->metadata_reserve = min(max_blocks, total);
- }
+ } else
+ pmd->metadata_reserve = min(max_blocks, div_u64(total, 10));
}
struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 700d86dd5e13..0e4bbdcc614f 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1291,7 +1291,7 @@ void b53_vlan_add(struct dsa_switch *ds, int port,
b53_get_vlan_entry(dev, vid, vl);
vl->members |= BIT(port);
- if (untagged)
+ if (untagged && !dsa_is_cpu_port(ds, port))
vl->untag |= BIT(port);
else
vl->untag &= ~BIT(port);
@@ -1333,7 +1333,7 @@ int b53_vlan_del(struct dsa_switch *ds, int port,
pvid = 0;
}
- if (untagged)
+ if (untagged && !dsa_is_cpu_port(ds, port))
vl->untag &= ~(BIT(port));
b53_set_vlan_entry(dev, vid, vl);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0478e562abac..e2d92548226a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3017,10 +3017,11 @@ static void bnxt_free_hwrm_resources(struct bnxt *bp)
{
struct pci_dev *pdev = bp->pdev;
- dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr,
- bp->hwrm_cmd_resp_dma_addr);
-
- bp->hwrm_cmd_resp_addr = NULL;
+ if (bp->hwrm_cmd_resp_addr) {
+ dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr,
+ bp->hwrm_cmd_resp_dma_addr);
+ bp->hwrm_cmd_resp_addr = NULL;
+ }
}
static int bnxt_alloc_hwrm_resources(struct bnxt *bp)
@@ -4650,7 +4651,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
enables |= ring_grps ?
FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
- enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
+ enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0;
req->num_rx_rings = cpu_to_le16(rx_rings);
req->num_hw_ring_grps = cpu_to_le16(ring_grps);
@@ -8621,7 +8622,7 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
*max_tx = hw_resc->max_tx_rings;
*max_rx = hw_resc->max_rx_rings;
*max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp),
- hw_resc->max_irqs);
+ hw_resc->max_irqs - bnxt_get_ulp_msix_num(bp));
*max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs);
max_ring_grps = hw_resc->max_hw_ring_grps;
if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) {
@@ -9057,6 +9058,7 @@ init_err_cleanup_tc:
bnxt_clear_int_mode(bp);
init_err_pci_clean:
+ bnxt_free_hwrm_resources(bp);
bnxt_cleanup_pci(bp);
init_err_free:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index ddc98c359488..a85d2be986af 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -98,13 +98,13 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1);
for (i = 0; i < max_tc; i++) {
- u8 qidx;
+ u8 qidx = bp->tc_to_qidx[i];
req.enables |= cpu_to_le32(
- QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i);
+ QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID <<
+ qidx);
memset(&cos2bw, 0, sizeof(cos2bw));
- qidx = bp->tc_to_qidx[i];
cos2bw.queue_id = bp->q_info[qidx].queue_id;
if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) {
cos2bw.tsa =
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 8b0a253a18d8..1e82b9efe447 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -2158,6 +2158,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
return -EPERM;
if (copy_from_user(&t, useraddr, sizeof(t)))
return -EFAULT;
+ if (t.cmd != CHELSIO_SET_QSET_PARAMS)
+ return -EINVAL;
if (t.qset_idx >= SGE_QSETS)
return -EINVAL;
if (!in_range(t.intr_lat, 0, M_NEWTIMER) ||
@@ -2257,6 +2259,9 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
if (copy_from_user(&t, useraddr, sizeof(t)))
return -EFAULT;
+ if (t.cmd != CHELSIO_GET_QSET_PARAMS)
+ return -EINVAL;
+
/* Display qsets for all ports when offload enabled */
if (test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) {
q1 = 0;
@@ -2302,6 +2307,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
return -EBUSY;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
+ if (edata.cmd != CHELSIO_SET_QSET_NUM)
+ return -EINVAL;
if (edata.val < 1 ||
(edata.val > 1 && !(adapter->flags & USING_MSIX)))
return -EINVAL;
@@ -2342,6 +2349,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
return -EPERM;
if (copy_from_user(&t, useraddr, sizeof(t)))
return -EFAULT;
+ if (t.cmd != CHELSIO_LOAD_FW)
+ return -EINVAL;
/* Check t.len sanity ? */
fw_data = memdup_user(useraddr + sizeof(t), t.len);
if (IS_ERR(fw_data))
@@ -2365,6 +2374,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
return -EBUSY;
if (copy_from_user(&m, useraddr, sizeof(m)))
return -EFAULT;
+ if (m.cmd != CHELSIO_SETMTUTAB)
+ return -EINVAL;
if (m.nmtus != NMTUS)
return -EINVAL;
if (m.mtus[0] < 81) /* accommodate SACK */
@@ -2406,6 +2417,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
return -EBUSY;
if (copy_from_user(&m, useraddr, sizeof(m)))
return -EFAULT;
+ if (m.cmd != CHELSIO_SET_PM)
+ return -EINVAL;
if (!is_power_of_2(m.rx_pg_sz) ||
!is_power_of_2(m.tx_pg_sz))
return -EINVAL; /* not power of 2 */
@@ -2439,6 +2452,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
return -EIO; /* need the memory controllers */
if (copy_from_user(&t, useraddr, sizeof(t)))
return -EFAULT;
+ if (t.cmd != CHELSIO_GET_MEM)
+ return -EINVAL;
if ((t.addr & 7) || (t.len & 7))
return -EINVAL;
if (t.mem_id == MEM_CM)
@@ -2491,6 +2506,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
return -EAGAIN;
if (copy_from_user(&t, useraddr, sizeof(t)))
return -EFAULT;
+ if (t.cmd != CHELSIO_SET_TRACE_FILTER)
+ return -EINVAL;
tp = (const struct trace_params *)&t.sip;
if (t.config_tx)
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 74d122616e76..534787291b44 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4002,8 +4002,6 @@ static int be_enable_vxlan_offloads(struct be_adapter *adapter)
netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_TSO | NETIF_F_TSO6 |
NETIF_F_GSO_UDP_TUNNEL;
- netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
- netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
be16_to_cpu(port));
@@ -4025,8 +4023,6 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter)
adapter->vxlan_port = 0;
netdev->hw_enc_features = 0;
- netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
- netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
}
static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
@@ -5320,6 +5316,7 @@ static void be_netdev_init(struct net_device *netdev)
struct be_adapter *adapter = netdev_priv(netdev);
netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
+ NETIF_F_GSO_UDP_TUNNEL |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
NETIF_F_HW_VLAN_CTAG_TX;
if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 2373cd41a625..14f9679c957c 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -1755,7 +1755,7 @@ static void mvpp2_txq_desc_put(struct mvpp2_tx_queue *txq)
}
/* Set Tx descriptors fields relevant for CSUM calculation */
-static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto,
+static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto,
int ip_hdr_len, int l4_proto)
{
u32 command;
@@ -2645,14 +2645,15 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
if (skb->ip_summed == CHECKSUM_PARTIAL) {
int ip_hdr_len = 0;
u8 l4_proto;
+ __be16 l3_proto = vlan_get_protocol(skb);
- if (skb->protocol == htons(ETH_P_IP)) {
+ if (l3_proto == htons(ETH_P_IP)) {
struct iphdr *ip4h = ip_hdr(skb);
/* Calculate IPv4 checksum and L4 checksum */
ip_hdr_len = ip4h->ihl;
l4_proto = ip4h->protocol;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ } else if (l3_proto == htons(ETH_P_IPV6)) {
struct ipv6hdr *ip6h = ipv6_hdr(skb);
/* Read l4_protocol from one of IPv6 extra headers */
@@ -2664,7 +2665,7 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
}
return mvpp2_txq_desc_csum(skb_network_offset(skb),
- skb->protocol, ip_hdr_len, l4_proto);
+ l3_proto, ip_hdr_len, l4_proto);
}
return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 4d271fb3de3d..5890fdfd62c3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -718,14 +718,17 @@ static void mlxsw_pci_eq_tasklet(unsigned long data)
memset(&active_cqns, 0, sizeof(active_cqns));
while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) {
- u8 event_type = mlxsw_pci_eqe_event_type_get(eqe);
- switch (event_type) {
- case MLXSW_PCI_EQE_EVENT_TYPE_CMD:
+ /* Command interface completion events are always received on
+ * queue MLXSW_PCI_EQ_ASYNC_NUM (EQ0) and completion events
+ * are mapped to queue MLXSW_PCI_EQ_COMP_NUM (EQ1).
+ */
+ switch (q->num) {
+ case MLXSW_PCI_EQ_ASYNC_NUM:
mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe);
q->u.eq.ev_cmd_count++;
break;
- case MLXSW_PCI_EQE_EVENT_TYPE_COMP:
+ case MLXSW_PCI_EQ_COMP_NUM:
cqn = mlxsw_pci_eqe_cqn_get(eqe);
set_bit(cqn, active_cqns);
cq_handle = true;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 88c33a8474eb..2b14fd0dcc42 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4855,6 +4855,8 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
upper_dev = info->upper_dev;
if (info->linking)
break;
+ if (is_vlan_dev(upper_dev))
+ mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, upper_dev);
if (netif_is_macvlan(upper_dev))
mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
break;
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 16ec7af6ab7b..ba9df430fca6 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -966,6 +966,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
sizeof(struct yamdrv_ioctl_mcs));
if (IS_ERR(ym))
return PTR_ERR(ym);
+ if (ym->cmd != SIOCYAMSMCS)
+ return -EINVAL;
if (ym->bitrate > YAM_MAXBITRATE) {
kfree(ym);
return -EINVAL;
@@ -981,6 +983,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (copy_from_user(&yi, ifr->ifr_data, sizeof(struct yamdrv_ioctl_cfg)))
return -EFAULT;
+ if (yi.cmd != SIOCYAMSCFG)
+ return -EINVAL;
if ((yi.cfg.mask & YAM_IOBASE) && netif_running(dev))
return -EINVAL; /* Cannot change this parameter when up */
if ((yi.cfg.mask & YAM_IRQ) && netif_running(dev))
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index b6993af5c9e4..9b8dd0d0ee42 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -690,6 +690,30 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
return 0;
}
+static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy,
+ phy_interface_t interface)
+{
+ int ret;
+
+ if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||
+ (pl->link_an_mode == MLO_AN_INBAND &&
+ phy_interface_mode_is_8023z(interface))))
+ return -EINVAL;
+
+ if (pl->phydev)
+ return -EBUSY;
+
+ ret = phy_attach_direct(pl->netdev, phy, 0, interface);
+ if (ret)
+ return ret;
+
+ ret = phylink_bringup_phy(pl, phy);
+ if (ret)
+ phy_detach(phy);
+
+ return ret;
+}
+
/**
* phylink_connect_phy() - connect a PHY to the phylink instance
* @pl: a pointer to a &struct phylink returned from phylink_create()
@@ -707,31 +731,13 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
*/
int phylink_connect_phy(struct phylink *pl, struct phy_device *phy)
{
- int ret;
-
- if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||
- (pl->link_an_mode == MLO_AN_INBAND &&
- phy_interface_mode_is_8023z(pl->link_interface))))
- return -EINVAL;
-
- if (pl->phydev)
- return -EBUSY;
-
/* Use PHY device/driver interface */
if (pl->link_interface == PHY_INTERFACE_MODE_NA) {
pl->link_interface = phy->interface;
pl->link_config.interface = pl->link_interface;
}
- ret = phy_attach_direct(pl->netdev, phy, 0, pl->link_interface);
- if (ret)
- return ret;
-
- ret = phylink_bringup_phy(pl, phy);
- if (ret)
- phy_detach(phy);
-
- return ret;
+ return __phylink_connect_phy(pl, phy, pl->link_interface);
}
EXPORT_SYMBOL_GPL(phylink_connect_phy);
@@ -1648,7 +1654,9 @@ static void phylink_sfp_link_up(void *upstream)
static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
{
- return phylink_connect_phy(upstream, phy);
+ struct phylink *pl = upstream;
+
+ return __phylink_connect_phy(upstream, phy, pl->link_config.interface);
}
static void phylink_sfp_disconnect_phy(void *upstream)
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 6a047d30e8c6..d887016e54b6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1167,6 +1167,12 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
return -EBUSY;
}
+ if (dev == port_dev) {
+ NL_SET_ERR_MSG(extack, "Cannot enslave team device to itself");
+ netdev_err(dev, "Cannot enslave team device to itself\n");
+ return -EINVAL;
+ }
+
if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(dev)) {
NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index e5a4cbb366dc..ec287c9741e8 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -1520,6 +1520,7 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
{
struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
if (pdata) {
+ cancel_work_sync(&pdata->set_multicast);
netif_dbg(dev, ifdown, dev->net, "free pdata\n");
kfree(pdata);
pdata = NULL;
diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
index 50eb0729385b..a41d79b8d46a 100644
--- a/drivers/pci/controller/pci-mvebu.c
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -1145,7 +1145,6 @@ static int mvebu_pcie_parse_request_resources(struct mvebu_pcie *pcie)
{
struct device *dev = &pcie->pdev->dev;
struct device_node *np = dev->of_node;
- unsigned int i;
int ret;
INIT_LIST_HEAD(&pcie->resources);
@@ -1179,13 +1178,58 @@ static int mvebu_pcie_parse_request_resources(struct mvebu_pcie *pcie)
resource_size(&pcie->io) - 1);
pcie->realio.name = "PCI I/O";
+ pci_add_resource(&pcie->resources, &pcie->realio);
+ }
+
+ return devm_request_pci_bus_resources(dev, &pcie->resources);
+}
+
+/*
+ * This is a copy of pci_host_probe(), except that it does the I/O
+ * remap as the last step, once we are sure we won't fail.
+ *
+ * It should be removed once the I/O remap error handling issue has
+ * been sorted out.
+ */
+static int mvebu_pci_host_probe(struct pci_host_bridge *bridge)
+{
+ struct mvebu_pcie *pcie;
+ struct pci_bus *bus, *child;
+ int ret;
+
+ ret = pci_scan_root_bus_bridge(bridge);
+ if (ret < 0) {
+ dev_err(bridge->dev.parent, "Scanning root bridge failed");
+ return ret;
+ }
+
+ pcie = pci_host_bridge_priv(bridge);
+ if (resource_size(&pcie->io) != 0) {
+ unsigned int i;
+
for (i = 0; i < resource_size(&pcie->realio); i += SZ_64K)
pci_ioremap_io(i, pcie->io.start + i);
+ }
- pci_add_resource(&pcie->resources, &pcie->realio);
+ bus = bridge->bus;
+
+ /*
+ * We insert PCI resources into the iomem_resource and
+ * ioport_resource trees in either pci_bus_claim_resources()
+ * or pci_bus_assign_resources().
+ */
+ if (pci_has_flag(PCI_PROBE_ONLY)) {
+ pci_bus_claim_resources(bus);
+ } else {
+ pci_bus_size_bridges(bus);
+ pci_bus_assign_resources(bus);
+
+ list_for_each_entry(child, &bus->children, node)
+ pcie_bus_configure_settings(child);
}
- return devm_request_pci_bus_resources(dev, &pcie->resources);
+ pci_bus_add_devices(bus);
+ return 0;
}
static int mvebu_pcie_probe(struct platform_device *pdev)
@@ -1268,7 +1312,7 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
bridge->align_resource = mvebu_pcie_align_resource;
bridge->msi = pcie->msi;
- return pci_host_probe(bridge);
+ return mvebu_pci_host_probe(bridge);
}
static const struct of_device_id mvebu_pcie_of_match_table[] = {
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 1835f3a7aa8d..51b6c81671c1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1289,12 +1289,12 @@ int pci_save_state(struct pci_dev *dev)
EXPORT_SYMBOL(pci_save_state);
static void pci_restore_config_dword(struct pci_dev *pdev, int offset,
- u32 saved_val, int retry)
+ u32 saved_val, int retry, bool force)
{
u32 val;
pci_read_config_dword(pdev, offset, &val);
- if (val == saved_val)
+ if (!force && val == saved_val)
return;
for (;;) {
@@ -1313,25 +1313,36 @@ static void pci_restore_config_dword(struct pci_dev *pdev, int offset,
}
static void pci_restore_config_space_range(struct pci_dev *pdev,
- int start, int end, int retry)
+ int start, int end, int retry,
+ bool force)
{
int index;
for (index = end; index >= start; index--)
pci_restore_config_dword(pdev, 4 * index,
pdev->saved_config_space[index],
- retry);
+ retry, force);
}
static void pci_restore_config_space(struct pci_dev *pdev)
{
if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
- pci_restore_config_space_range(pdev, 10, 15, 0);
+ pci_restore_config_space_range(pdev, 10, 15, 0, false);
/* Restore BARs before the command register. */
- pci_restore_config_space_range(pdev, 4, 9, 10);
- pci_restore_config_space_range(pdev, 0, 3, 0);
+ pci_restore_config_space_range(pdev, 4, 9, 10, false);
+ pci_restore_config_space_range(pdev, 0, 3, 0, false);
+ } else if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+ pci_restore_config_space_range(pdev, 12, 15, 0, false);
+
+ /*
+ * Force rewriting of prefetch registers to avoid S3 resume
+ * issues on Intel PCI bridges that occur when these
+ * registers are not explicitly written.
+ */
+ pci_restore_config_space_range(pdev, 9, 11, 0, true);
+ pci_restore_config_space_range(pdev, 0, 8, 0, false);
} else {
- pci_restore_config_space_range(pdev, 0, 15, 0);
+ pci_restore_config_space_range(pdev, 0, 15, 0, false);
}
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 0c9ab62c3df4..9dcaed031843 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1553,6 +1553,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
/* Flags */
#define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */
+#define MID_DELETED 2 /* Mid has been dequeued/deleted */
/* Types of response buffer returned from SendReceive2 */
#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7aa08dba4719..52d71b64c0c6 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -659,7 +659,15 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
mid->mid_state = MID_RESPONSE_RECEIVED;
else
mid->mid_state = MID_RESPONSE_MALFORMED;
- list_del_init(&mid->qhead);
+ /*
+ * Trying to handle/dequeue a mid after the send_recv()
+ * function has finished processing it is a bug.
+ */
+ if (mid->mid_flags & MID_DELETED)
+ printk_once(KERN_WARNING
+ "trying to dequeue a deleted mid\n");
+ else
+ list_del_init(&mid->qhead);
spin_unlock(&GlobalMid_Lock);
}
@@ -938,8 +946,7 @@ next_pdu:
} else {
mids[0] = server->ops->find_mid(server, buf);
bufs[0] = buf;
- if (mids[0])
- num_mids = 1;
+ num_mids = 1;
if (!mids[0] || !mids[0]->receive)
length = standard_receive3(server, mids[0]);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d954ce36b473..89985a0a6819 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1477,7 +1477,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
}
srch_inf->entries_in_buffer = 0;
- srch_inf->index_of_last_entry = 0;
+ srch_inf->index_of_last_entry = 2;
rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
fid->volatile_fid, 0, srch_inf);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 78f96fa3d7d9..b48f43963da6 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -142,7 +142,8 @@ void
cifs_delete_mid(struct mid_q_entry *mid)
{
spin_lock(&GlobalMid_Lock);
- list_del(&mid->qhead);
+ list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
spin_unlock(&GlobalMid_Lock);
DeleteMidQEntry(mid);
@@ -772,6 +773,11 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
return mid;
}
+static void
+cifs_noop_callback(struct mid_q_entry *mid)
+{
+}
+
int
compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
const int flags, const int num_rqst, struct smb_rqst *rqst,
@@ -826,8 +832,13 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
}
midQ[i]->mid_state = MID_REQUEST_SUBMITTED;
+ /*
+ * We don't invoke the callback compounds unless it is the last
+ * request.
+ */
+ if (i < num_rqst - 1)
+ midQ[i]->callback = cifs_noop_callback;
}
-
cifs_in_send_inc(ses->server);
rc = smb_send_rqst(ses->server, num_rqst, rqst, flags);
cifs_in_send_dec(ses->server);
@@ -908,6 +919,12 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
midQ[i]->resp_buf = NULL;
}
out:
+ /*
+ * This will dequeue all mids. After this it is important that the
+ * demultiplex_thread will not process any of these mids any futher.
+ * This is prevented above by using a noop callback that will not
+ * wake this thread except for the very last PDU.
+ */
for (i = 0; i < num_rqst; i++)
cifs_delete_mid(midQ[i]);
add_credits(ses->server, credits, optype);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 3212c29235ce..2005529af560 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -230,7 +230,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
ret = -EXDEV;
if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
goto fdput;
- ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen);
+ ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
fdput:
fdput(src_file);
return ret;
diff --git a/fs/iomap.c b/fs/iomap.c
index 74762b1ec233..ec15cf2ec696 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1051,6 +1051,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
} else {
WARN_ON_ONCE(!PageUptodate(page));
iomap_page_create(inode, page);
+ set_page_dirty(page);
}
return length;
@@ -1090,7 +1091,6 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
length -= ret;
}
- set_page_dirty(page);
wait_for_stable_page(page);
return VM_FAULT_LOCKED;
out_unlock:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 55a099e47ba2..b53e76391e52 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -541,7 +541,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
u64 dst_pos, u64 count)
{
- return nfserrno(do_clone_file_range(src, src_pos, dst, dst_pos, count));
+ return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
+ count));
}
ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index aaca0949fe53..826f0567ec43 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -584,9 +584,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
res->last_used = 0;
- spin_lock(&dlm->spinlock);
+ spin_lock(&dlm->track_lock);
list_add_tail(&res->tracking, &dlm->tracking_list);
- spin_unlock(&dlm->spinlock);
+ spin_unlock(&dlm->track_lock);
memset(res->lvb, 0, DLM_LVB_LEN);
memset(res->refmap, 0, sizeof(res->refmap));
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 7869622af22a..7a5ee145c733 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2946,6 +2946,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
if (map_end & (PAGE_SIZE - 1))
to = map_end & (PAGE_SIZE - 1);
+retry:
page = find_or_create_page(mapping, page_index, GFP_NOFS);
if (!page) {
ret = -ENOMEM;
@@ -2954,11 +2955,18 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
}
/*
- * In case PAGE_SIZE <= CLUSTER_SIZE, This page
- * can't be dirtied before we CoW it out.
+ * In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty
+ * page, so write it back.
*/
- if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize)
- BUG_ON(PageDirty(page));
+ if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) {
+ if (PageDirty(page)) {
+ /*
+ * write_on_page will unlock the page on return
+ */
+ ret = write_one_page(page);
+ goto retry;
+ }
+ }
if (!PageUptodate(page)) {
ret = block_read_full_page(page, ocfs2_get_block);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 296037afecdb..1cc797a08a5b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -141,7 +141,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
}
/* Try to use clone_file_range to clone up within the same fs */
- error = vfs_clone_file_range(old_file, 0, new_file, 0, len);
+ error = do_clone_file_range(old_file, 0, new_file, 0, len);
if (!error)
goto out;
/* Couldn't clone, so now we try to copy the data */
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index aeaefd2a551b..986313da0c88 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -240,8 +240,10 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out_unlock;
old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
ovl_iocb_to_rwf(iocb));
+ file_end_write(real.file);
revert_creds(old_cred);
/* Update size */
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b6ac545b5a32..3b7ed5d2279c 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -504,7 +504,7 @@ static const struct inode_operations ovl_special_inode_operations = {
.update_time = ovl_update_time,
};
-const struct address_space_operations ovl_aops = {
+static const struct address_space_operations ovl_aops = {
/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
.direct_IO = noop_direct_IO,
};
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index f28711846dd6..9c0ca6a7becf 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -686,7 +686,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
index = NULL;
goto out;
}
- pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
+ pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
"overlayfs: mount with '-o index=off' to disable inodes index.\n",
d_inode(origin)->i_ino, name.len, name.name,
err);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index f61839e1054c..a3c0d9584312 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -152,8 +152,8 @@ static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
int err = vfs_setxattr(dentry, name, value, size, flags);
- pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
- dentry, name, (int) size, (char *) value, flags, err);
+ pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
+ dentry, name, min((int)size, 48), value, size, flags, err);
return err;
}
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 8cfb62cc8672..ace4fe4c39a9 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -683,7 +683,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
struct dentry *upperdentry = ovl_dentry_upper(dentry);
struct dentry *index = NULL;
struct inode *inode;
- struct qstr name;
+ struct qstr name = { };
int err;
err = ovl_get_index_name(lowerdentry, &name);
@@ -726,6 +726,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
goto fail;
out:
+ kfree(name.name);
dput(index);
return;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ccf86f16d9f0..7e9f07bf260d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -407,6 +407,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
unsigned long *entries;
int err;
+ /*
+ * The ability to racily run the kernel stack unwinder on a running task
+ * and then observe the unwinder output is scary; while it is useful for
+ * debugging kernel issues, it can also allow an attacker to leak kernel
+ * stack contents.
+ * Doing this in a manner that is at least safe from races would require
+ * some work to ensure that the remote task can not be scheduled; and
+ * even then, this would still expose the unwinder as local attack
+ * surface.
+ * Therefore, this interface is restricted to root.
+ */
+ if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
+ return -EACCES;
+
entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
GFP_KERNEL);
if (!entries)
diff --git a/fs/read_write.c b/fs/read_write.c
index 39b4a21dd933..8a2737f0d61d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1818,8 +1818,8 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
}
EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
-int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len)
+int do_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, u64 len)
{
struct inode *inode_in = file_inode(file_in);
struct inode *inode_out = file_inode(file_out);
@@ -1866,6 +1866,19 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
return ret;
}
+EXPORT_SYMBOL(do_clone_file_range);
+
+int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, u64 len)
+{
+ int ret;
+
+ file_start_write(file_out);
+ ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+ file_end_write(file_out);
+
+ return ret;
+}
EXPORT_SYMBOL(vfs_clone_file_range);
/*
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 1e671d4eb6fa..c6299f82a6e4 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -587,7 +587,7 @@ xfs_attr_leaf_addname(
*/
error = xfs_attr3_leaf_to_node(args);
if (error)
- goto out_defer_cancel;
+ return error;
error = xfs_defer_finish(&args->trans);
if (error)
return error;
@@ -675,7 +675,7 @@ xfs_attr_leaf_addname(
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
if (error)
- goto out_defer_cancel;
+ return error;
error = xfs_defer_finish(&args->trans);
if (error)
return error;
@@ -693,9 +693,6 @@ xfs_attr_leaf_addname(
error = xfs_attr3_leaf_clearflag(args);
}
return error;
-out_defer_cancel:
- xfs_defer_cancel(args->trans);
- return error;
}
/*
@@ -738,15 +735,12 @@ xfs_attr_leaf_removename(
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
if (error)
- goto out_defer_cancel;
+ return error;
error = xfs_defer_finish(&args->trans);
if (error)
return error;
}
return 0;
-out_defer_cancel:
- xfs_defer_cancel(args->trans);
- return error;
}
/*
@@ -864,7 +858,7 @@ restart:
state = NULL;
error = xfs_attr3_leaf_to_node(args);
if (error)
- goto out_defer_cancel;
+ goto out;
error = xfs_defer_finish(&args->trans);
if (error)
goto out;
@@ -888,7 +882,7 @@ restart:
*/
error = xfs_da3_split(state);
if (error)
- goto out_defer_cancel;
+ goto out;
error = xfs_defer_finish(&args->trans);
if (error)
goto out;
@@ -984,7 +978,7 @@ restart:
if (retval && (state->path.active > 1)) {
error = xfs_da3_join(state);
if (error)
- goto out_defer_cancel;
+ goto out;
error = xfs_defer_finish(&args->trans);
if (error)
goto out;
@@ -1013,9 +1007,6 @@ out:
if (error)
return error;
return retval;
-out_defer_cancel:
- xfs_defer_cancel(args->trans);
- goto out;
}
/*
@@ -1107,7 +1098,7 @@ xfs_attr_node_removename(
if (retval && (state->path.active > 1)) {
error = xfs_da3_join(state);
if (error)
- goto out_defer_cancel;
+ goto out;
error = xfs_defer_finish(&args->trans);
if (error)
goto out;
@@ -1138,7 +1129,7 @@ xfs_attr_node_removename(
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
if (error)
- goto out_defer_cancel;
+ goto out;
error = xfs_defer_finish(&args->trans);
if (error)
goto out;
@@ -1150,9 +1141,6 @@ xfs_attr_node_removename(
out:
xfs_da_state_free(state);
return error;
-out_defer_cancel:
- xfs_defer_cancel(args->trans);
- goto out;
}
/*
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index af094063e402..d89363c6b523 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -485,7 +485,7 @@ xfs_attr_rmtval_set(
blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map,
&nmap);
if (error)
- goto out_defer_cancel;
+ return error;
error = xfs_defer_finish(&args->trans);
if (error)
return error;
@@ -553,9 +553,6 @@ xfs_attr_rmtval_set(
}
ASSERT(valuelen == 0);
return 0;
-out_defer_cancel:
- xfs_defer_cancel(args->trans);
- return error;
}
/*
@@ -625,7 +622,7 @@ xfs_attr_rmtval_remove(
error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
XFS_BMAPI_ATTRFORK, 1, &done);
if (error)
- goto out_defer_cancel;
+ return error;
error = xfs_defer_finish(&args->trans);
if (error)
return error;
@@ -638,7 +635,4 @@ xfs_attr_rmtval_remove(
return error;
}
return 0;
-out_defer_cancel:
- xfs_defer_cancel(args->trans);
- return error;
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 2760314fdf7f..a47670332326 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -673,7 +673,8 @@ xfs_bmap_extents_to_btree(
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
/*
- * Make space in the inode incore.
+ * Make space in the inode incore. This needs to be undone if we fail
+ * to expand the root.
*/
xfs_iroot_realloc(ip, 1, whichfork);
ifp->if_flags |= XFS_IFBROOT;
@@ -711,16 +712,15 @@ xfs_bmap_extents_to_btree(
args.minlen = args.maxlen = args.prod = 1;
args.wasdel = wasdel;
*logflagsp = 0;
- if ((error = xfs_alloc_vextent(&args))) {
- ASSERT(ifp->if_broot == NULL);
- goto err1;
- }
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ goto out_root_realloc;
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
- ASSERT(ifp->if_broot == NULL);
error = -ENOSPC;
- goto err1;
+ goto out_root_realloc;
}
+
/*
* Allocation can't fail, the space was reserved.
*/
@@ -732,9 +732,10 @@ xfs_bmap_extents_to_btree(
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
if (!abp) {
- error = -ENOSPC;
- goto err2;
+ error = -EFSCORRUPTED;
+ goto out_unreserve_dquot;
}
+
/*
* Fill in the child block.
*/
@@ -775,11 +776,12 @@ xfs_bmap_extents_to_btree(
*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
return 0;
-err2:
+out_unreserve_dquot:
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
-err1:
+out_root_realloc:
xfs_iroot_realloc(ip, -1, whichfork);
XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+ ASSERT(ifp->if_broot == NULL);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 059bc44c27e8..afbe336600e1 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1016,6 +1016,8 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
#define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */
#define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */
+/* Do not use bit 15, di_flags is legacy and unchanging now */
+
#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 30d1d60f1d46..09d9c8cfa4a0 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -415,6 +415,31 @@ xfs_dinode_verify_fork(
return NULL;
}
+static xfs_failaddr_t
+xfs_dinode_verify_forkoff(
+ struct xfs_dinode *dip,
+ struct xfs_mount *mp)
+{
+ if (!XFS_DFORK_Q(dip))
+ return NULL;
+
+ switch (dip->di_format) {
+ case XFS_DINODE_FMT_DEV:
+ if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
+ return __this_address;
+ break;
+ case XFS_DINODE_FMT_LOCAL: /* fall through ... */
+ case XFS_DINODE_FMT_EXTENTS: /* fall through ... */
+ case XFS_DINODE_FMT_BTREE:
+ if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3))
+ return __this_address;
+ break;
+ default:
+ return __this_address;
+ }
+ return NULL;
+}
+
xfs_failaddr_t
xfs_dinode_verify(
struct xfs_mount *mp,
@@ -470,6 +495,11 @@ xfs_dinode_verify(
if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
return __this_address;
+ /* check for illegal values of forkoff */
+ fa = xfs_dinode_verify_forkoff(dip, mp);
+ if (fa)
+ return fa;
+
/* Do we have appropriate data fork formats for the mode? */
switch (mode & S_IFMT) {
case S_IFIFO:
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 036b5c7021eb..376bcb585ae6 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -17,7 +17,6 @@
#include "xfs_sb.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
-#include "xfs_alloc.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 5b3b177c0fc9..e386c9b0b4ab 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -126,6 +126,7 @@ xchk_inode_flags(
{
struct xfs_mount *mp = sc->mp;
+ /* di_flags are all taken, last bit cannot be used */
if (flags & ~XFS_DIFLAG_ANY)
goto bad;
@@ -172,8 +173,9 @@ xchk_inode_flags2(
{
struct xfs_mount *mp = sc->mp;
+ /* Unknown di_flags2 could be from a future kernel */
if (flags2 & ~XFS_DIFLAG2_ANY)
- goto bad;
+ xchk_ino_set_warning(sc, ino);
/* reflink flag requires reflink feature */
if ((flags2 & XFS_DIFLAG2_REFLINK) &&
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index addbd74ecd8e..6de8d90041ff 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -702,13 +702,9 @@ xfs_bmap_punch_delalloc_range(
struct xfs_iext_cursor icur;
int error = 0;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (!(ifp->if_flags & XFS_IFEXTENTS)) {
- error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
- if (error)
- goto out_unlock;
- }
+ ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
goto out_unlock;
@@ -1584,7 +1580,7 @@ xfs_swap_extent_rmap(
tirec.br_blockcount, &irec,
&nimaps, 0);
if (error)
- goto out_defer;
+ goto out;
ASSERT(nimaps == 1);
ASSERT(tirec.br_startoff == irec.br_startoff);
trace_xfs_swap_extent_rmap_remap_piece(ip, &irec);
@@ -1599,22 +1595,22 @@ xfs_swap_extent_rmap(
/* Remove the mapping from the donor file. */
error = xfs_bmap_unmap_extent(tp, tip, &uirec);
if (error)
- goto out_defer;
+ goto out;
/* Remove the mapping from the source file. */
error = xfs_bmap_unmap_extent(tp, ip, &irec);
if (error)
- goto out_defer;
+ goto out;
/* Map the donor file's blocks into the source file. */
error = xfs_bmap_map_extent(tp, ip, &uirec);
if (error)
- goto out_defer;
+ goto out;
/* Map the source file's blocks into the donor file. */
error = xfs_bmap_map_extent(tp, tip, &irec);
if (error)
- goto out_defer;
+ goto out;
error = xfs_defer_finish(tpp);
tp = *tpp;
@@ -1636,8 +1632,6 @@ xfs_swap_extent_rmap(
tip->i_d.di_flags2 = tip_flags2;
return 0;
-out_defer:
- xfs_defer_cancel(tp);
out:
trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);
tip->i_d.di_flags2 = tip_flags2;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 1c9d1398980b..12d8455bfbb2 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -532,6 +532,49 @@ xfs_buf_item_push(
}
/*
+ * Drop the buffer log item refcount and take appropriate action. This helper
+ * determines whether the bli must be freed or not, since a decrement to zero
+ * does not necessarily mean the bli is unused.
+ *
+ * Return true if the bli is freed, false otherwise.
+ */
+bool
+xfs_buf_item_put(
+ struct xfs_buf_log_item *bip)
+{
+ struct xfs_log_item *lip = &bip->bli_item;
+ bool aborted;
+ bool dirty;
+
+ /* drop the bli ref and return if it wasn't the last one */
+ if (!atomic_dec_and_test(&bip->bli_refcount))
+ return false;
+
+ /*
+ * We dropped the last ref and must free the item if clean or aborted.
+ * If the bli is dirty and non-aborted, the buffer was clean in the
+ * transaction but still awaiting writeback from previous changes. In
+ * that case, the bli is freed on buffer writeback completion.
+ */
+ aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
+ XFS_FORCED_SHUTDOWN(lip->li_mountp);
+ dirty = bip->bli_flags & XFS_BLI_DIRTY;
+ if (dirty && !aborted)
+ return false;
+
+ /*
+ * The bli is aborted or clean. An aborted item may be in the AIL
+ * regardless of dirty state. For example, consider an aborted
+ * transaction that invalidated a dirty bli and cleared the dirty
+ * state.
+ */
+ if (aborted)
+ xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
+ xfs_buf_item_relse(bip->bli_buf);
+ return true;
+}
+
+/*
* Release the buffer associated with the buf log item. If there is no dirty
* logged data associated with the buffer recorded in the buf log item, then
* free the buf log item and remove the reference to it in the buffer.
@@ -556,76 +599,42 @@ xfs_buf_item_unlock(
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
- bool aborted;
- bool hold = !!(bip->bli_flags & XFS_BLI_HOLD);
- bool dirty = !!(bip->bli_flags & XFS_BLI_DIRTY);
+ bool released;
+ bool hold = bip->bli_flags & XFS_BLI_HOLD;
+ bool stale = bip->bli_flags & XFS_BLI_STALE;
#if defined(DEBUG) || defined(XFS_WARN)
- bool ordered = !!(bip->bli_flags & XFS_BLI_ORDERED);
+ bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
+ bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
#endif
- aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags);
-
- /* Clear the buffer's association with this transaction. */
- bp->b_transp = NULL;
-
- /*
- * The per-transaction state has been copied above so clear it from the
- * bli.
- */
- bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
-
- /*
- * If the buf item is marked stale, then don't do anything. We'll
- * unlock the buffer and free the buf item when the buffer is unpinned
- * for the last time.
- */
- if (bip->bli_flags & XFS_BLI_STALE) {
- trace_xfs_buf_item_unlock_stale(bip);
- ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
- if (!aborted) {
- atomic_dec(&bip->bli_refcount);
- return;
- }
- }
-
trace_xfs_buf_item_unlock(bip);
/*
- * If the buf item isn't tracking any data, free it, otherwise drop the
- * reference we hold to it. If we are aborting the transaction, this may
- * be the only reference to the buf item, so we free it anyway
- * regardless of whether it is dirty or not. A dirty abort implies a
- * shutdown, anyway.
- *
* The bli dirty state should match whether the blf has logged segments
* except for ordered buffers, where only the bli should be dirty.
*/
ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) ||
(ordered && dirty && !xfs_buf_item_dirty_format(bip)));
+ ASSERT(!stale || (bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
/*
- * Clean buffers, by definition, cannot be in the AIL. However, aborted
- * buffers may be in the AIL regardless of dirty state. An aborted
- * transaction that invalidates a buffer already in the AIL may have
- * marked it stale and cleared the dirty state, for example.
- *
- * Therefore if we are aborting a buffer and we've just taken the last
- * reference away, we have to check if it is in the AIL before freeing
- * it. We need to free it in this case, because an aborted transaction
- * has already shut the filesystem down and this is the last chance we
- * will have to do so.
+ * Clear the buffer's association with this transaction and
+ * per-transaction state from the bli, which has been copied above.
*/
- if (atomic_dec_and_test(&bip->bli_refcount)) {
- if (aborted) {
- ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
- xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
- xfs_buf_item_relse(bp);
- } else if (!dirty)
- xfs_buf_item_relse(bp);
- }
+ bp->b_transp = NULL;
+ bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
- if (!hold)
- xfs_buf_relse(bp);
+ /*
+ * Unref the item and unlock the buffer unless held or stale. Stale
+ * buffers remain locked until final unpin unless the bli is freed by
+ * the unref call. The latter implies shutdown because buffer
+ * invalidation dirties the bli and transaction.
+ */
+ released = xfs_buf_item_put(bip);
+ if (hold || (stale && !released))
+ return;
+ ASSERT(!stale || test_bit(XFS_LI_ABORTED, &lip->li_flags));
+ xfs_buf_relse(bp);
}
/*
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 3f7d7b72e7e6..90f65f891fab 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -51,6 +51,7 @@ struct xfs_buf_log_item {
int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
void xfs_buf_item_relse(struct xfs_buf *);
+bool xfs_buf_item_put(struct xfs_buf_log_item *);
void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
void xfs_buf_attach_iodone(struct xfs_buf *,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d957a46dc1cb..05db9540e459 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1563,7 +1563,7 @@ xfs_itruncate_extents_flags(
error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
XFS_ITRUNC_MAX_EXTENTS, &done);
if (error)
- goto out_bmap_cancel;
+ goto out;
/*
* Duplicate the transaction that has the permanent
@@ -1599,14 +1599,6 @@ xfs_itruncate_extents_flags(
out:
*tpp = tp;
return error;
-out_bmap_cancel:
- /*
- * If the bunmapi call encounters an error, return to the caller where
- * the transaction can be properly aborted. We just need to make sure
- * we're not holding any resources that we were not when we came in.
- */
- xfs_defer_cancel(tp);
- goto out;
}
int
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index c3e74f9128e8..f48ffd7a8d3e 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -471,8 +471,18 @@ xfs_vn_get_link_inline(
struct inode *inode,
struct delayed_call *done)
{
+ char *link;
+
ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE);
- return XFS_I(inode)->i_df.if_u1.if_data;
+
+ /*
+ * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if
+ * if_data is junk.
+ */
+ link = XFS_I(inode)->i_df.if_u1.if_data;
+ if (!link)
+ return ERR_PTR(-EFSCORRUPTED);
+ return link;
}
STATIC int
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a21dc61ec09e..1fc9e9042e0e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1570,16 +1570,6 @@ xlog_find_zeroed(
if (last_cycle != 0) { /* log completely written to */
xlog_put_bp(bp);
return 0;
- } else if (first_cycle != 1) {
- /*
- * If the cycle of the last block is zero, the cycle of
- * the first block must be 1. If it's not, maybe we're
- * not looking at a log... Bail out.
- */
- xfs_warn(log->l_mp,
- "Log inconsistent or not a log (last==0, first!=1)");
- error = -EINVAL;
- goto bp_err;
}
/* we have a partially zeroed log */
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 38f405415b88..5289e22cb081 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -352,6 +352,47 @@ xfs_reflink_convert_cow(
return error;
}
+/*
+ * Find the extent that maps the given range in the COW fork. Even if the extent
+ * is not shared we might have a preallocation for it in the COW fork. If so we
+ * use it that rather than trigger a new allocation.
+ */
+static int
+xfs_find_trim_cow_extent(
+ struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap,
+ bool *shared,
+ bool *found)
+{
+ xfs_fileoff_t offset_fsb = imap->br_startoff;
+ xfs_filblks_t count_fsb = imap->br_blockcount;
+ struct xfs_iext_cursor icur;
+ struct xfs_bmbt_irec got;
+ bool trimmed;
+
+ *found = false;
+
+ /*
+ * If we don't find an overlapping extent, trim the range we need to
+ * allocate to fit the hole we found.
+ */
+ if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) ||
+ got.br_startoff > offset_fsb)
+ return xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
+
+ *shared = true;
+ if (isnullstartblock(got.br_startblock)) {
+ xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
+ return 0;
+ }
+
+ /* real extent found - no need to allocate */
+ xfs_trim_extent(&got, offset_fsb, count_fsb);
+ *imap = got;
+ *found = true;
+ return 0;
+}
+
/* Allocate all CoW reservations covering a range of blocks in a file. */
int
xfs_reflink_allocate_cow(
@@ -363,78 +404,64 @@ xfs_reflink_allocate_cow(
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb = imap->br_startoff;
xfs_filblks_t count_fsb = imap->br_blockcount;
- struct xfs_bmbt_irec got;
- struct xfs_trans *tp = NULL;
+ struct xfs_trans *tp;
int nimaps, error = 0;
- bool trimmed;
+ bool found;
xfs_filblks_t resaligned;
xfs_extlen_t resblks = 0;
- struct xfs_iext_cursor icur;
-retry:
- ASSERT(xfs_is_reflink_inode(ip));
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ ASSERT(xfs_is_reflink_inode(ip));
- /*
- * Even if the extent is not shared we might have a preallocation for
- * it in the COW fork. If so use it.
- */
- if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) &&
- got.br_startoff <= offset_fsb) {
- *shared = true;
-
- /* If we have a real allocation in the COW fork we're done. */
- if (!isnullstartblock(got.br_startblock)) {
- xfs_trim_extent(&got, offset_fsb, count_fsb);
- *imap = got;
- goto convert;
- }
+ error = xfs_find_trim_cow_extent(ip, imap, shared, &found);
+ if (error || !*shared)
+ return error;
+ if (found)
+ goto convert;
- xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
- } else {
- error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
- if (error || !*shared)
- goto out;
- }
+ resaligned = xfs_aligned_fsb_count(imap->br_startoff,
+ imap->br_blockcount, xfs_get_cowextsz_hint(ip));
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
- if (!tp) {
- resaligned = xfs_aligned_fsb_count(imap->br_startoff,
- imap->br_blockcount, xfs_get_cowextsz_hint(ip));
- resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+ xfs_iunlock(ip, *lockmode);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
+ *lockmode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, *lockmode);
- xfs_iunlock(ip, *lockmode);
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
- *lockmode = XFS_ILOCK_EXCL;
- xfs_ilock(ip, *lockmode);
+ if (error)
+ return error;
- if (error)
- return error;
+ error = xfs_qm_dqattach_locked(ip, false);
+ if (error)
+ goto out_trans_cancel;
- error = xfs_qm_dqattach_locked(ip, false);
- if (error)
- goto out;
- goto retry;
+ /*
+ * Check for an overlapping extent again now that we dropped the ilock.
+ */
+ error = xfs_find_trim_cow_extent(ip, imap, shared, &found);
+ if (error || !*shared)
+ goto out_trans_cancel;
+ if (found) {
+ xfs_trans_cancel(tp);
+ goto convert;
}
error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
XFS_QMOPT_RES_REGBLKS);
if (error)
- goto out;
+ goto out_trans_cancel;
xfs_trans_ijoin(tp, ip, 0);
- nimaps = 1;
-
/* Allocate the entire reservation as unwritten blocks. */
+ nimaps = 1;
error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC,
resblks, imap, &nimaps);
if (error)
- goto out_trans_cancel;
+ goto out_unreserve;
xfs_inode_set_cowblocks_tag(ip);
-
- /* Finish up. */
error = xfs_trans_commit(tp);
if (error)
return error;
@@ -447,12 +474,12 @@ retry:
return -ENOSPC;
convert:
return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb);
-out_trans_cancel:
+
+out_unreserve:
xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
XFS_QMOPT_RES_REGBLKS);
-out:
- if (tp)
- xfs_trans_cancel(tp);
+out_trans_cancel:
+ xfs_trans_cancel(tp);
return error;
}
@@ -666,14 +693,12 @@ xfs_reflink_end_cow(
if (!del.br_blockcount)
goto prev_extent;
- ASSERT(!isnullstartblock(got.br_startblock));
-
/*
- * Don't remap unwritten extents; these are
- * speculatively preallocated CoW extents that have been
- * allocated but have not yet been involved in a write.
+ * Only remap real extent that contain data. With AIO
+ * speculatively preallocations can leak into the range we
+ * are called upon, and we need to skip them.
*/
- if (got.br_state == XFS_EXT_UNWRITTEN)
+ if (!xfs_bmap_is_real_extent(&got))
goto prev_extent;
/* Unmap the old blocks in the data fork. */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ad315e83bc02..3043e5ed6495 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -473,7 +473,6 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index bedc5a5133a5..912b42f5fe4a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -259,6 +259,14 @@ xfs_trans_alloc(
struct xfs_trans *tp;
int error;
+ /*
+ * Allocate the handle before we do our freeze accounting and setting up
+ * GFP_NOFS allocation context so that we avoid lockdep false positives
+ * by doing GFP_KERNEL allocations inside sb_start_intwrite().
+ */
+ tp = kmem_zone_zalloc(xfs_trans_zone,
+ (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
+
if (!(flags & XFS_TRANS_NO_WRITECOUNT))
sb_start_intwrite(mp->m_super);
@@ -270,8 +278,6 @@ xfs_trans_alloc(
mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
atomic_inc(&mp->m_active_trans);
- tp = kmem_zone_zalloc(xfs_trans_zone,
- (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
tp->t_magic = XFS_TRANS_HEADER_MAGIC;
tp->t_flags = flags;
tp->t_mountp = mp;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 15919f67a88f..286a287ac57a 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -322,49 +322,38 @@ xfs_trans_read_buf_map(
}
/*
- * Release the buffer bp which was previously acquired with one of the
- * xfs_trans_... buffer allocation routines if the buffer has not
- * been modified within this transaction. If the buffer is modified
- * within this transaction, do decrement the recursion count but do
- * not release the buffer even if the count goes to 0. If the buffer is not
- * modified within the transaction, decrement the recursion count and
- * release the buffer if the recursion count goes to 0.
+ * Release a buffer previously joined to the transaction. If the buffer is
+ * modified within this transaction, decrement the recursion count but do not
+ * release the buffer even if the count goes to 0. If the buffer is not modified
+ * within the transaction, decrement the recursion count and release the buffer
+ * if the recursion count goes to 0.
*
- * If the buffer is to be released and it was not modified before
- * this transaction began, then free the buf_log_item associated with it.
+ * If the buffer is to be released and it was not already dirty before this
+ * transaction began, then also free the buf_log_item associated with it.
*
- * If the transaction pointer is NULL, make this just a normal
- * brelse() call.
+ * If the transaction pointer is NULL, this is a normal xfs_buf_relse() call.
*/
void
xfs_trans_brelse(
- xfs_trans_t *tp,
- xfs_buf_t *bp)
+ struct xfs_trans *tp,
+ struct xfs_buf *bp)
{
- struct xfs_buf_log_item *bip;
- int freed;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
- /*
- * Default to a normal brelse() call if the tp is NULL.
- */
- if (tp == NULL) {
- ASSERT(bp->b_transp == NULL);
+ ASSERT(bp->b_transp == tp);
+
+ if (!tp) {
xfs_buf_relse(bp);
return;
}
- ASSERT(bp->b_transp == tp);
- bip = bp->b_log_item;
+ trace_xfs_trans_brelse(bip);
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
- ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
- trace_xfs_trans_brelse(bip);
-
/*
- * If the release is just for a recursive lock,
- * then decrement the count and return.
+ * If the release is for a recursive lookup, then decrement the count
+ * and return.
*/
if (bip->bli_recur > 0) {
bip->bli_recur--;
@@ -372,64 +361,24 @@ xfs_trans_brelse(
}
/*
- * If the buffer is dirty within this transaction, we can't
+ * If the buffer is invalidated or dirty in this transaction, we can't
* release it until we commit.
*/
if (test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags))
return;
-
- /*
- * If the buffer has been invalidated, then we can't release
- * it until the transaction commits to disk unless it is re-dirtied
- * as part of this transaction. This prevents us from pulling
- * the item from the AIL before we should.
- */
if (bip->bli_flags & XFS_BLI_STALE)
return;
- ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
-
/*
- * Free up the log item descriptor tracking the released item.
+ * Unlink the log item from the transaction and clear the hold flag, if
+ * set. We wouldn't want the next user of the buffer to get confused.
*/
+ ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
xfs_trans_del_item(&bip->bli_item);
+ bip->bli_flags &= ~XFS_BLI_HOLD;
- /*
- * Clear the hold flag in the buf log item if it is set.
- * We wouldn't want the next user of the buffer to
- * get confused.
- */
- if (bip->bli_flags & XFS_BLI_HOLD) {
- bip->bli_flags &= ~XFS_BLI_HOLD;
- }
-
- /*
- * Drop our reference to the buf log item.
- */
- freed = atomic_dec_and_test(&bip->bli_refcount);
-
- /*
- * If the buf item is not tracking data in the log, then we must free it
- * before releasing the buffer back to the free pool.
- *
- * If the fs has shutdown and we dropped the last reference, it may fall
- * on us to release a (possibly dirty) bli if it never made it to the
- * AIL (e.g., the aborted unpin already happened and didn't release it
- * due to our reference). Since we're already shutdown and need
- * ail_lock, just force remove from the AIL and release the bli here.
- */
- if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {
- xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
- xfs_buf_item_relse(bp);
- } else if (!(bip->bli_flags & XFS_BLI_DIRTY)) {
-/***
- ASSERT(bp->b_pincount == 0);
-***/
- ASSERT(atomic_read(&bip->bli_refcount) == 0);
- ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
- ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
- xfs_buf_item_relse(bp);
- }
+ /* drop the reference to the bli */
+ xfs_buf_item_put(bip);
bp->b_transp = NULL;
xfs_buf_relse(bp);
diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h
index 989f8e52864d..971bb7853776 100644
--- a/include/drm/drm_client.h
+++ b/include/drm/drm_client.h
@@ -87,9 +87,10 @@ struct drm_client_dev {
struct drm_file *file;
};
-int drm_client_new(struct drm_device *dev, struct drm_client_dev *client,
- const char *name, const struct drm_client_funcs *funcs);
+int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,
+ const char *name, const struct drm_client_funcs *funcs);
void drm_client_release(struct drm_client_dev *client);
+void drm_client_add(struct drm_client_dev *client);
void drm_client_dev_unregister(struct drm_device *dev);
void drm_client_dev_hotplug(struct drm_device *dev);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6c0b4a1c22ff..897eae8faee1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1828,8 +1828,10 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
struct inode *inode_out, loff_t pos_out,
u64 *len, bool is_dedupe);
+extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, u64 len);
extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len);
+ struct file *file_out, loff_t pos_out, u64 len);
extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
struct inode *dest, loff_t destoff,
loff_t len, bool *is_same);
@@ -2773,19 +2775,6 @@ static inline void file_end_write(struct file *file)
__sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
}
-static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- u64 len)
-{
- int ret;
-
- file_start_write(file_out);
- ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
- file_end_write(file_out);
-
- return ret;
-}
-
/*
* get_write_access() gets write permission for a file.
* put_write_access() releases this write permission.
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6b68e345f0ca..087fd5f48c91 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write);
struct page *follow_huge_pd(struct vm_area_struct *vma,
@@ -170,6 +172,18 @@ static inline unsigned long hugetlb_total_pages(void)
return 0;
}
+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
+ pte_t *ptep)
+{
+ return 0;
+}
+
+static inline void adjust_range_if_pmd_sharing_possible(
+ struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+}
+
#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; })
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a61ebe8ad4ca..0416a7204be3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2455,6 +2455,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
return vma;
}
+static inline bool range_in_vma(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ return (vma && vma->vm_start <= start && end <= vma->vm_end);
+}
+
#ifdef CONFIG_MMU
pgprot_t vm_get_page_prot(unsigned long vm_flags);
void vma_set_page_prot(struct vm_area_struct *vma);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1e22d96734e0..3f4c0b167333 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -671,12 +671,6 @@ typedef struct pglist_data {
#ifdef CONFIG_NUMA_BALANCING
/* Lock serializing the migrate rate limiting window */
spinlock_t numabalancing_migrate_lock;
-
- /* Rate limiting time interval */
- unsigned long numabalancing_migrate_next_window;
-
- /* Number of pages migrated during the rate limiting time interval */
- unsigned long numabalancing_migrate_nr_pages;
#endif
/*
* This is a per-node reserve of pages that are not available
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 9397628a1967..cb462f9ab7dd 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -5,6 +5,24 @@
#include <linux/if_vlan.h>
#include <uapi/linux/virtio_net.h>
+static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
+ const struct virtio_net_hdr *hdr)
+{
+ switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+ case VIRTIO_NET_HDR_GSO_TCPV4:
+ case VIRTIO_NET_HDR_GSO_UDP:
+ skb->protocol = cpu_to_be16(ETH_P_IP);
+ break;
+ case VIRTIO_NET_HDR_GSO_TCPV6:
+ skb->protocol = cpu_to_be16(ETH_P_IPV6);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
const struct virtio_net_hdr *hdr,
bool little_endian)
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 711372845945..705b33d1e395 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -70,33 +70,6 @@ TRACE_EVENT(mm_migrate_pages,
__print_symbolic(__entry->mode, MIGRATE_MODE),
__print_symbolic(__entry->reason, MIGRATE_REASON))
);
-
-TRACE_EVENT(mm_numa_migrate_ratelimit,
-
- TP_PROTO(struct task_struct *p, int dst_nid, unsigned long nr_pages),
-
- TP_ARGS(p, dst_nid, nr_pages),
-
- TP_STRUCT__entry(
- __array( char, comm, TASK_COMM_LEN)
- __field( pid_t, pid)
- __field( int, dst_nid)
- __field( unsigned long, nr_pages)
- ),
-
- TP_fast_assign(
- memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
- __entry->pid = p->pid;
- __entry->dst_nid = dst_nid;
- __entry->nr_pages = nr_pages;
- ),
-
- TP_printk("comm=%s pid=%d dst_nid=%d nr_pages=%lu",
- __entry->comm,
- __entry->pid,
- __entry->dst_nid,
- __entry->nr_pages)
-);
#endif /* _TRACE_MIGRATE_H */
/* This part must be outside protection */
diff --git a/include/uapi/asm-generic/hugetlb_encode.h b/include/uapi/asm-generic/hugetlb_encode.h
index e4732d3c2998..b0f8e87235bd 100644
--- a/include/uapi/asm-generic/hugetlb_encode.h
+++ b/include/uapi/asm-generic/hugetlb_encode.h
@@ -26,7 +26,9 @@
#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)
diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
index 015a4c0bbb47..7a8a26751c23 100644
--- a/include/uapi/linux/memfd.h
+++ b/include/uapi/linux/memfd.h
@@ -25,7 +25,9 @@
#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
+#define MFD_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
+#define MFD_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index bfd5938fede6..d0f515d53299 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -28,7 +28,9 @@
#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h
index dde1344f047c..6507ad0afc81 100644
--- a/include/uapi/linux/shm.h
+++ b/include/uapi/linux/shm.h
@@ -65,7 +65,9 @@ struct shmid_ds {
#define SHM_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define SHM_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define SHM_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
+#define SHM_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
#define SHM_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
+#define SHM_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
#define SHM_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define SHM_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define SHM_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
diff --git a/ipc/shm.c b/ipc/shm.c
index 4cd402e4cfeb..1c65fb357395 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -206,7 +206,7 @@ err:
* Callers of shm_lock() must validate the status of the returned ipc
* object pointer and error out as appropriate.
*/
- return (void *)ipcp;
+ return ERR_CAST(ipcp);
}
static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 22ad967d1e5f..830d7f095748 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -129,7 +129,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
struct bpf_cgroup_storage *storage;
struct bpf_storage_buffer *new;
- if (flags & BPF_NOEXIST)
+ if (flags != BPF_ANY && flags != BPF_EXIST)
return -EINVAL;
storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
@@ -195,6 +195,9 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
return ERR_PTR(-EINVAL);
+ if (attr->value_size == 0)
+ return ERR_PTR(-EINVAL);
+
if (attr->value_size > PAGE_SIZE)
return ERR_PTR(-E2BIG);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e986518d7bc3..3584ab27d25c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2842,6 +2842,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
u64 umin_val, umax_val;
u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
+ if (insn_bitness == 32) {
+ /* Relevant for 32-bit RSH: Information can propagate towards
+ * LSB, so it isn't sufficient to only truncate the output to
+ * 32 bits.
+ */
+ coerce_reg_to_size(dst_reg, 4);
+ coerce_reg_to_size(&src_reg, 4);
+ }
+
smin_val = src_reg.smin_value;
smax_val = src_reg.smax_value;
umin_val = src_reg.umin_value;
@@ -3077,7 +3086,6 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
if (BPF_CLASS(insn->code) != BPF_ALU64) {
/* 32-bit ALU ops are (32,32)->32 */
coerce_reg_to_size(dst_reg, 4);
- coerce_reg_to_size(&src_reg, 4);
}
__reg_deduce_bounds(dst_reg);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dcb093e7b377..5a97f34bc14c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8314,6 +8314,8 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
goto unlock;
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+ if (event->cpu != smp_processor_id())
+ continue;
if (event->attr.type != PERF_TYPE_TRACEPOINT)
continue;
if (event->attr.config != entry->type)
@@ -9431,9 +9433,7 @@ static void free_pmu_context(struct pmu *pmu)
if (pmu->task_ctx_nr > perf_invalid_context)
return;
- mutex_lock(&pmus_lock);
free_percpu(pmu->pmu_cpu_context);
- mutex_unlock(&pmus_lock);
}
/*
@@ -9689,12 +9689,8 @@ EXPORT_SYMBOL_GPL(perf_pmu_register);
void perf_pmu_unregister(struct pmu *pmu)
{
- int remove_device;
-
mutex_lock(&pmus_lock);
- remove_device = pmu_bus_running;
list_del_rcu(&pmu->entry);
- mutex_unlock(&pmus_lock);
/*
* We dereference the pmu list under both SRCU and regular RCU, so
@@ -9706,13 +9702,14 @@ void perf_pmu_unregister(struct pmu *pmu)
free_percpu(pmu->pmu_disable_count);
if (pmu->type >= PERF_TYPE_MAX)
idr_remove(&pmu_idr, pmu->type);
- if (remove_device) {
+ if (pmu_bus_running) {
if (pmu->nr_addr_filters)
device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
device_del(pmu->dev);
put_device(pmu->dev);
}
free_pmu_context(pmu);
+ mutex_unlock(&pmus_lock);
}
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index 0be047dbd897..65a3b7e55b9f 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -260,7 +260,7 @@ static void test_cycle_work(struct work_struct *work)
{
struct test_cycle *cycle = container_of(work, typeof(*cycle), work);
struct ww_acquire_ctx ctx;
- int err;
+ int err, erra = 0;
ww_acquire_init(&ctx, &ww_class);
ww_mutex_lock(&cycle->a_mutex, &ctx);
@@ -270,17 +270,19 @@ static void test_cycle_work(struct work_struct *work)
err = ww_mutex_lock(cycle->b_mutex, &ctx);
if (err == -EDEADLK) {
+ err = 0;
ww_mutex_unlock(&cycle->a_mutex);
ww_mutex_lock_slow(cycle->b_mutex, &ctx);
- err = ww_mutex_lock(&cycle->a_mutex, &ctx);
+ erra = ww_mutex_lock(&cycle->a_mutex, &ctx);
}
if (!err)
ww_mutex_unlock(cycle->b_mutex);
- ww_mutex_unlock(&cycle->a_mutex);
+ if (!erra)
+ ww_mutex_unlock(&cycle->a_mutex);
ww_acquire_fini(&ctx);
- cycle->result = err;
+ cycle->result = err ?: erra;
}
static int __test_cycle(unsigned int nthreads)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 625bc9897f62..ad97f3ba5ec5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1167,7 +1167,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
if (task_cpu(p) != new_cpu) {
if (p->sched_class->migrate_task_rq)
- p->sched_class->migrate_task_rq(p);
+ p->sched_class->migrate_task_rq(p, new_cpu);
p->se.nr_migrations++;
rseq_migrate(p);
perf_event_task_migrate(p);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 997ea7b839fa..91e4202b0634 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1607,7 +1607,7 @@ out:
return cpu;
}
-static void migrate_task_rq_dl(struct task_struct *p)
+static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
{
struct rq *rq;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f808ddf2a868..7fc4a371bdd2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1392,6 +1392,17 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
int last_cpupid, this_cpupid;
this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
+ last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
+
+ /*
+ * Allow first faults or private faults to migrate immediately early in
+ * the lifetime of a task. The magic number 4 is based on waiting for
+ * two full passes of the "multi-stage node selection" test that is
+ * executed below.
+ */
+ if ((p->numa_preferred_nid == -1 || p->numa_scan_seq <= 4) &&
+ (cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid)))
+ return true;
/*
* Multi-stage node selection is used in conjunction with a periodic
@@ -1410,7 +1421,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
* This quadric squishes small probabilities, making it less likely we
* act on an unlikely task<->page relation.
*/
- last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
if (!cpupid_pid_unset(last_cpupid) &&
cpupid_to_nid(last_cpupid) != dst_nid)
return false;
@@ -1514,6 +1524,21 @@ struct task_numa_env {
static void task_numa_assign(struct task_numa_env *env,
struct task_struct *p, long imp)
{
+ struct rq *rq = cpu_rq(env->dst_cpu);
+
+ /* Bail out if run-queue part of active NUMA balance. */
+ if (xchg(&rq->numa_migrate_on, 1))
+ return;
+
+ /*
+ * Clear previous best_cpu/rq numa-migrate flag, since task now
+ * found a better CPU to move/swap.
+ */
+ if (env->best_cpu != -1) {
+ rq = cpu_rq(env->best_cpu);
+ WRITE_ONCE(rq->numa_migrate_on, 0);
+ }
+
if (env->best_task)
put_task_struct(env->best_task);
if (p)
@@ -1553,6 +1578,13 @@ static bool load_too_imbalanced(long src_load, long dst_load,
}
/*
+ * Maximum NUMA importance can be 1998 (2*999);
+ * SMALLIMP @ 30 would be close to 1998/64.
+ * Used to deter task migration.
+ */
+#define SMALLIMP 30
+
+/*
* This checks if the overall compute and NUMA accesses of the system would
* be improved if the source tasks was migrated to the target dst_cpu taking
* into account that it might be best if task running on the dst_cpu should
@@ -1569,6 +1601,9 @@ static void task_numa_compare(struct task_numa_env *env,
long moveimp = imp;
int dist = env->dist;
+ if (READ_ONCE(dst_rq->numa_migrate_on))
+ return;
+
rcu_read_lock();
cur = task_rcu_dereference(&dst_rq->curr);
if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
@@ -1582,7 +1617,7 @@ static void task_numa_compare(struct task_numa_env *env,
goto unlock;
if (!cur) {
- if (maymove || imp > env->best_imp)
+ if (maymove && moveimp >= env->best_imp)
goto assign;
else
goto unlock;
@@ -1625,16 +1660,22 @@ static void task_numa_compare(struct task_numa_env *env,
task_weight(cur, env->dst_nid, dist);
}
- if (imp <= env->best_imp)
- goto unlock;
-
if (maymove && moveimp > imp && moveimp > env->best_imp) {
- imp = moveimp - 1;
+ imp = moveimp;
cur = NULL;
goto assign;
}
/*
+ * If the NUMA importance is less than SMALLIMP,
+ * task migration might only result in ping pong
+ * of tasks and also hurt performance due to cache
+ * misses.
+ */
+ if (imp < SMALLIMP || imp <= env->best_imp + SMALLIMP / 2)
+ goto unlock;
+
+ /*
* In the overloaded case, try and keep the load balanced.
*/
load = task_h_load(env->p) - task_h_load(cur);
@@ -1710,6 +1751,7 @@ static int task_numa_migrate(struct task_struct *p)
.best_cpu = -1,
};
struct sched_domain *sd;
+ struct rq *best_rq;
unsigned long taskweight, groupweight;
int nid, ret, dist;
long taskimp, groupimp;
@@ -1805,20 +1847,17 @@ static int task_numa_migrate(struct task_struct *p)
if (env.best_cpu == -1)
return -EAGAIN;
- /*
- * Reset the scan period if the task is being rescheduled on an
- * alternative node to recheck if the tasks is now properly placed.
- */
- p->numa_scan_period = task_scan_start(p);
-
+ best_rq = cpu_rq(env.best_cpu);
if (env.best_task == NULL) {
ret = migrate_task_to(p, env.best_cpu);
+ WRITE_ONCE(best_rq->numa_migrate_on, 0);
if (ret != 0)
trace_sched_stick_numa(p, env.src_cpu, env.best_cpu);
return ret;
}
ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu);
+ WRITE_ONCE(best_rq->numa_migrate_on, 0);
if (ret != 0)
trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));
@@ -2596,6 +2635,39 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
}
}
+static void update_scan_period(struct task_struct *p, int new_cpu)
+{
+ int src_nid = cpu_to_node(task_cpu(p));
+ int dst_nid = cpu_to_node(new_cpu);
+
+ if (!static_branch_likely(&sched_numa_balancing))
+ return;
+
+ if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING))
+ return;
+
+ if (src_nid == dst_nid)
+ return;
+
+ /*
+ * Allow resets if faults have been trapped before one scan
+ * has completed. This is most likely due to a new task that
+ * is pulled cross-node due to wakeups or load balancing.
+ */
+ if (p->numa_scan_seq) {
+ /*
+ * Avoid scan adjustments if moving to the preferred
+ * node or if the task was not previously running on
+ * the preferred node.
+ */
+ if (dst_nid == p->numa_preferred_nid ||
+ (p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid))
+ return;
+ }
+
+ p->numa_scan_period = task_scan_start(p);
+}
+
#else
static void task_tick_numa(struct rq *rq, struct task_struct *curr)
{
@@ -2609,6 +2681,10 @@ static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p)
{
}
+static inline void update_scan_period(struct task_struct *p, int new_cpu)
+{
+}
+
#endif /* CONFIG_NUMA_BALANCING */
static void
@@ -6275,7 +6351,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
* cfs_rq_of(p) references at time of call are still valid and identify the
* previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
*/
-static void migrate_task_rq_fair(struct task_struct *p)
+static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
{
/*
* As blocked tasks retain absolute vruntime the migration needs to
@@ -6328,6 +6404,8 @@ static void migrate_task_rq_fair(struct task_struct *p)
/* We have migrated, no longer consider this task hot */
p->se.exec_start = 0;
+
+ update_scan_period(p, new_cpu);
}
static void task_dead_fair(struct task_struct *p)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4a2e8cae63c4..455fa330de04 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -783,6 +783,7 @@ struct rq {
#ifdef CONFIG_NUMA_BALANCING
unsigned int nr_numa_running;
unsigned int nr_preferred_running;
+ unsigned int numa_migrate_on;
#endif
#define CPU_LOAD_IDX_MAX 5
unsigned long cpu_load[CPU_LOAD_IDX_MAX];
@@ -1523,7 +1524,7 @@ struct sched_class {
#ifdef CONFIG_SMP
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
- void (*migrate_task_rq)(struct task_struct *p);
+ void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
void (*task_woken)(struct rq *this_rq, struct task_struct *task);
diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 6a473709e9b6..7405c9d89d65 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -19,7 +19,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
struct gup_benchmark *gup)
{
ktime_t start_time, end_time;
- unsigned long i, nr, nr_pages, addr, next;
+ unsigned long i, nr_pages, addr, next;
+ int nr;
struct page **pages;
nr_pages = gup->size / PAGE_SIZE;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 533f9b00147d..00704060b7f7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2931,7 +2931,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
else
page_add_file_rmap(new, true);
set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
- if (vma->vm_flags & VM_LOCKED)
+ if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new))
mlock_vma_page(new);
update_mmu_cache_pmd(vma, address, pvmw->pmd);
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3c21775f196b..5c390f5a5207 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
struct page *page;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
- const unsigned long mmun_start = start; /* For mmu_notifiers */
- const unsigned long mmun_end = end; /* For mmu_notifiers */
+ unsigned long mmun_start = start; /* For mmu_notifiers */
+ unsigned long mmun_end = end; /* For mmu_notifiers */
WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON(start & ~huge_page_mask(h));
@@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
*/
tlb_remove_check_page_size_change(tlb, sz);
tlb_start_vma(tlb, vma);
+
+ /*
+ * If sharing possible, alert mmu notifiers of worst case.
+ */
+ adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
address = start;
for (; address < end; address += sz) {
@@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, &address, ptep)) {
spin_unlock(ptl);
+ /*
+ * We just unmapped a page of PMDs by clearing a PUD.
+ * The caller's TLB flush range should cover this area.
+ */
continue;
}
@@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
{
struct mm_struct *mm;
struct mmu_gather tlb;
+ unsigned long tlb_start = start;
+ unsigned long tlb_end = end;
+
+ /*
+ * If shared PMDs were possibly used within this vma range, adjust
+ * start/end for worst case tlb flushing.
+ * Note that we can not be sure if PMDs are shared until we try to
+ * unmap pages. However, we want to make sure TLB flushing covers
+ * the largest possible range.
+ */
+ adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);
mm = vma->vm_mm;
- tlb_gather_mmu(&tlb, mm, start, end);
+ tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
__unmap_hugepage_range(&tlb, vma, start, end, ref_page);
- tlb_finish_mmu(&tlb, start, end);
+ tlb_finish_mmu(&tlb, tlb_start, tlb_end);
}
/*
@@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
pte_t pte;
struct hstate *h = hstate_vma(vma);
unsigned long pages = 0;
+ unsigned long f_start = start;
+ unsigned long f_end = end;
+ bool shared_pmd = false;
+
+ /*
+ * In the case of shared PMDs, the area to flush could be beyond
+ * start/end. Set f_start/f_end to cover the maximum possible
+ * range if PMD sharing is possible.
+ */
+ adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);
BUG_ON(address >= end);
- flush_cache_range(vma, address, end);
+ flush_cache_range(vma, f_start, f_end);
- mmu_notifier_invalidate_range_start(mm, start, end);
+ mmu_notifier_invalidate_range_start(mm, f_start, f_end);
i_mmap_lock_write(vma->vm_file->f_mapping);
for (; address < end; address += huge_page_size(h)) {
spinlock_t *ptl;
@@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
if (huge_pmd_unshare(mm, &address, ptep)) {
pages++;
spin_unlock(ptl);
+ shared_pmd = true;
continue;
}
pte = huge_ptep_get(ptep);
@@ -4348,9 +4379,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
* may have cleared our pud entry and done put_page on the page table:
* once we release i_mmap_rwsem, another task can do the final put_page
- * and that page table be reused and filled with junk.
+ * and that page table be reused and filled with junk. If we actually
+ * did unshare a page of pmds, flush the range corresponding to the pud.
*/
- flush_hugetlb_tlb_range(vma, start, end);
+ if (shared_pmd)
+ flush_hugetlb_tlb_range(vma, f_start, f_end);
+ else
+ flush_hugetlb_tlb_range(vma, start, end);
/*
* No need to call mmu_notifier_invalidate_range() we are downgrading
* page table protection not changing it to point to a new page.
@@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* See Documentation/vm/mmu_notifier.rst
*/
i_mmap_unlock_write(vma->vm_file->f_mapping);
- mmu_notifier_invalidate_range_end(mm, start, end);
+ mmu_notifier_invalidate_range_end(mm, f_start, f_end);
return pages << h->order;
}
@@ -4545,13 +4580,41 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
/*
* check on proper vm_flags and page table alignment
*/
- if (vma->vm_flags & VM_MAYSHARE &&
- vma->vm_start <= base && end <= vma->vm_end)
+ if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
return true;
return false;
}
/*
+ * Determine if start,end range within vma could be mapped by shared pmd.
+ * If yes, adjust start and end to cover range associated with possible
+ * shared pmd mappings.
+ */
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+ unsigned long check_addr = *start;
+
+ if (!(vma->vm_flags & VM_MAYSHARE))
+ return;
+
+ for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+ unsigned long a_start = check_addr & PUD_MASK;
+ unsigned long a_end = a_start + PUD_SIZE;
+
+ /*
+ * If sharing is possible, adjust start/end if necessary.
+ */
+ if (range_in_vma(vma, a_start, a_end)) {
+ if (a_start < *start)
+ *start = a_start;
+ if (a_end > *end)
+ *end = a_end;
+ }
+ }
+}
+
+/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
* !shared pmd case because we can allocate the pmd later as well, it makes the
@@ -4648,6 +4711,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
return 0;
}
+
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+}
#define want_pmd_share() (0)
#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
diff --git a/mm/madvise.c b/mm/madvise.c
index 972a9eaa898b..71d21df2a3f3 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -96,7 +96,7 @@ static long madvise_behavior(struct vm_area_struct *vma,
new_flags |= VM_DONTDUMP;
break;
case MADV_DODUMP:
- if (new_flags & VM_SPECIAL) {
+ if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) {
error = -EINVAL;
goto out;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index d6a2e89b086a..84381b55b2bd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -275,6 +275,9 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
mlock_vma_page(new);
+ if (PageTransHuge(page) && PageMlocked(page))
+ clear_page_mlock(page);
+
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, pvmw.address, pvmw.pte);
}
@@ -1411,7 +1414,7 @@ retry:
* we encounter them after the rest of the list
* is processed.
*/
- if (PageTransHuge(page)) {
+ if (PageTransHuge(page) && !PageHuge(page)) {
lock_page(page);
rc = split_huge_page_to_list(page, from);
unlock_page(page);
@@ -1855,46 +1858,6 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
return newpage;
}
-/*
- * page migration rate limiting control.
- * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
- * window of time. Default here says do not migrate more than 1280M per second.
- */
-static unsigned int migrate_interval_millisecs __read_mostly = 100;
-static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
-
-/* Returns true if the node is migrate rate-limited after the update */
-static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
- unsigned long nr_pages)
-{
- /*
- * Rate-limit the amount of data that is being migrated to a node.
- * Optimal placement is no good if the memory bus is saturated and
- * all the time is being spent migrating!
- */
- if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
- spin_lock(&pgdat->numabalancing_migrate_lock);
- pgdat->numabalancing_migrate_nr_pages = 0;
- pgdat->numabalancing_migrate_next_window = jiffies +
- msecs_to_jiffies(migrate_interval_millisecs);
- spin_unlock(&pgdat->numabalancing_migrate_lock);
- }
- if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
- trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
- nr_pages);
- return true;
- }
-
- /*
- * This is an unlocked non-atomic update so errors are possible.
- * The consequences are failing to migrate when we potentiall should
- * have which is not severe enough to warrant locking. If it is ever
- * a problem, it can be converted to a per-cpu counter.
- */
- pgdat->numabalancing_migrate_nr_pages += nr_pages;
- return false;
-}
-
static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
{
int page_lru;
@@ -1967,14 +1930,6 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
if (page_is_file_cache(page) && PageDirty(page))
goto out;
- /*
- * Rate-limit the amount of data that is being migrated to a node.
- * Optimal placement is no good if the memory bus is saturated and
- * all the time is being spent migrating!
- */
- if (numamigrate_update_ratelimit(pgdat, 1))
- goto out;
-
isolated = numamigrate_isolate_page(pgdat, page);
if (!isolated)
goto out;
@@ -2021,14 +1976,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
unsigned long mmun_start = address & HPAGE_PMD_MASK;
unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
- /*
- * Rate-limit the amount of data that is being migrated to a node.
- * Optimal placement is no good if the memory bus is saturated and
- * all the time is being spent migrating!
- */
- if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
- goto out_dropref;
-
new_page = alloc_pages_node(node,
(GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
HPAGE_PMD_ORDER);
@@ -2125,7 +2072,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
out_fail:
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
-out_dropref:
ptl = pmd_lock(mm, pmd);
if (pmd_same(*pmd, entry)) {
entry = pmd_modify(entry, vma->vm_page_prot);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 89d2a2ab3fe6..706a738c0aee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6197,8 +6197,6 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
static void pgdat_init_numabalancing(struct pglist_data *pgdat)
{
spin_lock_init(&pgdat->numabalancing_migrate_lock);
- pgdat->numabalancing_migrate_nr_pages = 0;
- pgdat->numabalancing_migrate_next_window = jiffies;
}
#else
static void pgdat_init_numabalancing(struct pglist_data *pgdat) {}
diff --git a/mm/rmap.c b/mm/rmap.c
index eb477809a5c0..1e79fac3186b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1362,11 +1362,21 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
}
/*
- * We have to assume the worse case ie pmd for invalidation. Note that
- * the page can not be free in this function as call of try_to_unmap()
- * must hold a reference on the page.
+ * For THP, we have to assume the worse case ie pmd for invalidation.
+ * For hugetlb, it could be much worse if we need to do pud
+ * invalidation in the case of pmd sharing.
+ *
+ * Note that the page can not be free in this function as call of
+ * try_to_unmap() must hold a reference on the page.
*/
end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+ if (PageHuge(page)) {
+ /*
+ * If sharing is possible, start and end will be adjusted
+ * accordingly.
+ */
+ adjust_range_if_pmd_sharing_possible(vma, &start, &end);
+ }
mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
while (page_vma_mapped_walk(&pvmw)) {
@@ -1409,6 +1419,32 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address;
+ if (PageHuge(page)) {
+ if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
+ /*
+ * huge_pmd_unshare unmapped an entire PMD
+ * page. There is no way of knowing exactly
+ * which PMDs may be cached for this mm, so
+ * we must flush them all. start/end were
+ * already adjusted above to cover this range.
+ */
+ flush_cache_range(vma, start, end);
+ flush_tlb_range(vma, start, end);
+ mmu_notifier_invalidate_range(mm, start, end);
+
+ /*
+ * The ref count of the PMD page was dropped
+ * which is part of the way map counting
+ * is done for shared PMDs. Return 'true'
+ * here. When there is no other sharing,
+ * huge_pmd_unshare returns false and we will
+ * unmap the actual page and drop map count
+ * to zero.
+ */
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ }
if (IS_ENABLED(CONFIG_MIGRATION) &&
(flags & TTU_MIGRATION) &&
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c7ce2c161225..c5ef7240cbcb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -580,8 +580,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg, int priority)
{
struct memcg_shrinker_map *map;
- unsigned long freed = 0;
- int ret, i;
+ unsigned long ret, freed = 0;
+ int i;
if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))
return 0;
@@ -677,9 +677,8 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg,
int priority)
{
+ unsigned long ret, freed = 0;
struct shrinker *shrinker;
- unsigned long freed = 0;
- int ret;
if (!mem_cgroup_is_root(memcg))
return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8ba0870ecddd..7878da76abf2 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1275,6 +1275,9 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_SMP
"nr_tlb_remote_flush",
"nr_tlb_remote_flush_received",
+#else
+ "", /* nr_tlb_remote_flush */
+ "", /* nr_tlb_remote_flush_received */
#endif /* CONFIG_SMP */
"nr_tlb_local_flush_all",
"nr_tlb_local_flush_one",
@@ -1283,7 +1286,6 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_DEBUG_VM_VMACACHE
"vmacache_find_calls",
"vmacache_find_hits",
- "vmacache_full_flushes",
#endif
#ifdef CONFIG_SWAP
"swap_ra",
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index f0fc182d3db7..b64e1649993b 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -59,7 +59,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
req.is_set = is_set;
req.pid = current->pid;
req.cmd = optname;
- req.addr = (long)optval;
+ req.addr = (long __force __user)optval;
req.len = optlen;
mutex_lock(&bpfilter_lock);
if (!info.pid)
@@ -98,7 +98,7 @@ static int __init load_umh(void)
pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
/* health check that usermode process started correctly */
- if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) {
+ if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) {
stop_umh();
return -EFAULT;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 57bf96d73e3b..5564eee1e980 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3763,16 +3763,27 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
int err = 0;
int fidx = 0;
- err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
- IFLA_MAX, ifla_policy, NULL);
- if (err < 0) {
- return -EINVAL;
- } else if (err == 0) {
- if (tb[IFLA_MASTER])
- br_idx = nla_get_u32(tb[IFLA_MASTER]);
- }
+ /* A hack to preserve kernel<->userspace interface.
+ * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0.
+ * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails.
+ * So, check for ndmsg with an optional u32 attribute (not used here).
+ * Fortunately these sizes don't conflict with the size of ifinfomsg
+ * with an optional attribute.
+ */
+ if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) &&
+ (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) +
+ nla_attr_size(sizeof(u32)))) {
+ err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+ IFLA_MAX, ifla_policy, NULL);
+ if (err < 0) {
+ return -EINVAL;
+ } else if (err == 0) {
+ if (tb[IFLA_MASTER])
+ br_idx = nla_get_u32(tb[IFLA_MASTER]);
+ }
- brport_idx = ifm->ifi_index;
+ brport_idx = ifm->ifi_index;
+ }
if (br_idx) {
br_dev = __dev_get_by_index(net, br_idx);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 413d98bf24f4..5e0efd3954e9 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -651,8 +651,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
skb->tstamp = sockc->transmit_time;
- skb_dst_set(skb, &rt->dst);
- *dstp = NULL;
skb_put(skb, length);
skb_reset_network_header(skb);
@@ -665,8 +663,14 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->transport_header = skb->network_header;
err = memcpy_from_msg(iph, msg, length);
- if (err)
- goto error_fault;
+ if (err) {
+ err = -EFAULT;
+ kfree_skb(skb);
+ goto error;
+ }
+
+ skb_dst_set(skb, &rt->dst);
+ *dstp = NULL;
/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -675,21 +679,28 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
if (unlikely(!skb))
return 0;
+ /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev
+ * in the error path. Since skb has been freed, the dst could
+ * have been queued for deletion.
+ */
+ rcu_read_lock();
IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, rt->dst.dev, dst_output);
if (err > 0)
err = net_xmit_errno(err);
- if (err)
- goto error;
+ if (err) {
+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
+ goto error_check;
+ }
+ rcu_read_unlock();
out:
return 0;
-error_fault:
- err = -EFAULT;
- kfree_skb(skb);
error:
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+error_check:
if (err == -ENOBUFS && !np->recverr)
err = 0;
return err;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 504627e2117f..914aef7e7afd 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -425,7 +425,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
/* Keys without a station are used for TX only */
- if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP))
+ if (sta && test_sta_flag(sta, WLAN_STA_MFP))
key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
break;
case NL80211_IFTYPE_ADHOC:
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 0aeb34c6389d..35ae64cbef33 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1312,6 +1312,10 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
rcu_assign_pointer(help->helper, helper);
info->helper = helper;
+
+ if (info->nat)
+ request_module("ip_nat_%s", name);
+
return 0;
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f85f67b5c1f4..ec3095f13aae 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2715,10 +2715,12 @@ tpacket_error:
}
}
- if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr,
- vio_le())) {
- tp_len = -EINVAL;
- goto tpacket_error;
+ if (po->has_vnet_hdr) {
+ if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
+ tp_len = -EINVAL;
+ goto tpacket_error;
+ }
+ virtio_net_hdr_set_proto(skb, vnet_hdr);
}
skb->destructor = tpacket_destruct_skb;
@@ -2915,6 +2917,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (err)
goto out_free;
len += sizeof(vnet_hdr);
+ virtio_net_hdr_set_proto(skb, &vnet_hdr);
}
skb_probe_transport_header(skb, reserve);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 22e9799e5b69..da1963b19dec 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1322,6 +1322,18 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
* Delete/get qdisc.
*/
+const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
+ [TCA_KIND] = { .type = NLA_STRING },
+ [TCA_OPTIONS] = { .type = NLA_NESTED },
+ [TCA_RATE] = { .type = NLA_BINARY,
+ .len = sizeof(struct tc_estimator) },
+ [TCA_STAB] = { .type = NLA_NESTED },
+ [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
+ [TCA_CHAIN] = { .type = NLA_U32 },
+ [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
+ [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
+};
+
static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
@@ -1338,7 +1350,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+ extack);
if (err < 0)
return err;
@@ -1422,7 +1435,8 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
replay:
/* Reinit, just in case something touches this. */
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+ extack);
if (err < 0)
return err;
@@ -1656,7 +1670,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
+ err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
+ rtm_tca_policy, NULL);
if (err < 0)
return err;
@@ -1875,7 +1890,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+ extack);
if (err < 0)
return err;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5ad5b9f98e8f..148c229fe84f 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2667,11 +2667,12 @@ static void reg_process_hint(struct regulatory_request *reg_request)
{
struct wiphy *wiphy = NULL;
enum reg_request_treatment treatment;
+ enum nl80211_reg_initiator initiator = reg_request->initiator;
if (reg_request->wiphy_idx != WIPHY_IDX_INVALID)
wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx);
- switch (reg_request->initiator) {
+ switch (initiator) {
case NL80211_REGDOM_SET_BY_CORE:
treatment = reg_process_hint_core(reg_request);
break;
@@ -2689,7 +2690,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
treatment = reg_process_hint_country_ie(wiphy, reg_request);
break;
default:
- WARN(1, "invalid initiator %d\n", reg_request->initiator);
+ WARN(1, "invalid initiator %d\n", initiator);
goto out_free;
}
@@ -2704,7 +2705,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
*/
if (treatment == REG_REQ_ALREADY_SET && wiphy &&
wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
- wiphy_update_regulatory(wiphy, reg_request->initiator);
+ wiphy_update_regulatory(wiphy, initiator);
wiphy_all_share_dfs_chan_state(wiphy);
reg_check_channels();
}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 167f7025ac98..06943d9c9835 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1278,12 +1278,16 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
if (err)
return err;
- if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)))
- return -EOPNOTSUPP;
+ if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate);
- return 0;
+free:
+ cfg80211_sinfo_release_content(&sinfo);
+ return err;
}
/* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */
@@ -1293,7 +1297,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
/* we are under RTNL - globally locked - so can use static structs */
static struct iw_statistics wstats;
- static struct station_info sinfo;
+ static struct station_info sinfo = {};
u8 bssid[ETH_ALEN];
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION)
@@ -1352,6 +1356,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))
wstats.discard.retries = sinfo.tx_failed;
+ cfg80211_sinfo_release_content(&sinfo);
+
return &wstats;
}
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index b5282cbbe489..617ff1aa818f 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -145,9 +145,11 @@ int snd_hdac_i915_init(struct hdac_bus *bus)
if (!acomp->ops) {
request_module("i915");
/* 10s timeout */
- wait_for_completion_timeout(&bind_complete, 10 * 1000);
+ wait_for_completion_timeout(&bind_complete,
+ msecs_to_jiffies(10 * 1000));
}
if (!acomp->ops) {
+ dev_info(bus->dev, "couldn't bind with audio component\n");
snd_hdac_acomp_exit(bus);
return -ENODEV;
}
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 1d117f00d04d..3ac7ba9b342d 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6409,6 +6409,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x075c, "Dell XPS 27 7760", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3),
SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 439b8a27488d..195ba486640f 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1325,7 +1325,7 @@ class Tui(object):
msg = ''
while True:
self.screen.erase()
- self.screen.addstr(0, 0, 'Set update interval (defaults to %fs).' %
+ self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' %
DELAY_DEFAULT, curses.A_BOLD)
self.screen.addstr(4, 0, msg)
self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 235259011704..35edd61d1663 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -17,6 +17,7 @@
#include <errno.h>
#include <sched.h>
#include <stdbool.h>
+#include <limits.h>
#ifndef SYS_getcpu
# ifdef __x86_64__
@@ -31,6 +32,14 @@
int nerrs = 0;
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
getcpu_t vgetcpu;
@@ -95,6 +104,15 @@ static void fill_function_pointers()
printf("Warning: failed to find getcpu in vDSO\n");
vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+ vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_clock_gettime)
+ printf("Warning: failed to find clock_gettime in vDSO\n");
+
+ vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+ if (!vdso_gettimeofday)
+ printf("Warning: failed to find gettimeofday in vDSO\n");
+
}
static long sys_getcpu(unsigned * cpu, unsigned * node,
@@ -103,6 +121,16 @@ static long sys_getcpu(unsigned * cpu, unsigned * node,
return syscall(__NR_getcpu, cpu, node, cache);
}
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ return syscall(__NR_clock_gettime, id, ts);
+}
+
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ return syscall(__NR_gettimeofday, tv, tz);
+}
+
static void test_getcpu(void)
{
printf("[RUN]\tTesting getcpu...\n");
@@ -155,10 +183,154 @@ static void test_getcpu(void)
}
}
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+ if (a->tv_sec != b->tv_sec)
+ return a->tv_sec < b->tv_sec;
+ else
+ return a->tv_nsec <= b->tv_nsec;
+}
+
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+ if (a->tv_sec != b->tv_sec)
+ return a->tv_sec < b->tv_sec;
+ else
+ return a->tv_usec <= b->tv_usec;
+}
+
+static char const * const clocknames[] = {
+ [0] = "CLOCK_REALTIME",
+ [1] = "CLOCK_MONOTONIC",
+ [2] = "CLOCK_PROCESS_CPUTIME_ID",
+ [3] = "CLOCK_THREAD_CPUTIME_ID",
+ [4] = "CLOCK_MONOTONIC_RAW",
+ [5] = "CLOCK_REALTIME_COARSE",
+ [6] = "CLOCK_MONOTONIC_COARSE",
+ [7] = "CLOCK_BOOTTIME",
+ [8] = "CLOCK_REALTIME_ALARM",
+ [9] = "CLOCK_BOOTTIME_ALARM",
+ [10] = "CLOCK_SGI_CYCLE",
+ [11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+ struct timespec start, vdso, end;
+ int vdso_ret, end_ret;
+
+ printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+ if (sys_clock_gettime(clock, &start) < 0) {
+ if (errno == EINVAL) {
+ vdso_ret = vdso_clock_gettime(clock, &vdso);
+ if (vdso_ret == -EINVAL) {
+ printf("[OK]\tNo such clock.\n");
+ } else {
+ printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+ nerrs++;
+ }
+ } else {
+ printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+ }
+ return;
+ }
+
+ vdso_ret = vdso_clock_gettime(clock, &vdso);
+ end_ret = sys_clock_gettime(clock, &end);
+
+ if (vdso_ret != 0 || end_ret != 0) {
+ printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+ vdso_ret, errno);
+ nerrs++;
+ return;
+ }
+
+ printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+ (unsigned long long)start.tv_sec, start.tv_nsec,
+ (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+ (unsigned long long)end.tv_sec, end.tv_nsec);
+
+ if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+ printf("[FAIL]\tTimes are out of sequence\n");
+ nerrs++;
+ }
+}
+
+static void test_clock_gettime(void)
+{
+ for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+ clock++) {
+ test_one_clock_gettime(clock, clocknames[clock]);
+ }
+
+ /* Also test some invalid clock ids */
+ test_one_clock_gettime(-1, "invalid");
+ test_one_clock_gettime(INT_MIN, "invalid");
+ test_one_clock_gettime(INT_MAX, "invalid");
+}
+
+static void test_gettimeofday(void)
+{
+ struct timeval start, vdso, end;
+ struct timezone sys_tz, vdso_tz;
+ int vdso_ret, end_ret;
+
+ if (!vdso_gettimeofday)
+ return;
+
+ printf("[RUN]\tTesting gettimeofday...\n");
+
+ if (sys_gettimeofday(&start, &sys_tz) < 0) {
+ printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+ nerrs++;
+ return;
+ }
+
+ vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+ end_ret = sys_gettimeofday(&end, NULL);
+
+ if (vdso_ret != 0 || end_ret != 0) {
+ printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+ vdso_ret, errno);
+ nerrs++;
+ return;
+ }
+
+ printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+ (unsigned long long)start.tv_sec, start.tv_usec,
+ (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+ (unsigned long long)end.tv_sec, end.tv_usec);
+
+ if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+ printf("[FAIL]\tTimes are out of sequence\n");
+ nerrs++;
+ }
+
+ if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+ sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+ printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+ sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+ } else {
+ printf("[FAIL]\ttimezones do not match\n");
+ nerrs++;
+ }
+
+ /* And make sure that passing NULL for tz doesn't crash. */
+ vdso_gettimeofday(&vdso, NULL);
+}
+
int main(int argc, char **argv)
{
fill_function_pointers();
+ test_clock_gettime();
+ test_gettimeofday();
+
+ /*
+ * Test getcpu() last so that, if something goes wrong setting affinity,
+ * we still run the other tests.
+ */
test_getcpu();
return nerrs ? 1 : 0;