diff options
190 files changed, 4546 insertions, 862 deletions
@@ -3949,8 +3949,6 @@ E: gwingerde@gmail.com D: Ralink rt2x00 WLAN driver D: Minix V2 file-system D: Misc fixes -S: Geessinkweg 177 -S: 7544 TX Enschede S: The Netherlands N: Lars Wirzenius diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 498741737055..2a4a423d08e0 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -272,6 +272,22 @@ Description: Parameters for the CPU cache attributes the modified cache line is written to main memory only when it is replaced + +What: /sys/devices/system/cpu/cpu*/cache/index*/id +Date: September 2016 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: Cache id + + The id provides a unique number for a specific instance of + a cache of a particular type. E.g. there may be a level + 3 unified cache on each socket in a server and we may + assign them ids 0, 1, 2, ... + + Note that id value can be non-contiguous. E.g. level 1 + caches typically exist per core, but there may not be a + power of two cores on a socket, so these caches may be + numbered 0, 1, 2, 3, 4, 5, 8, 9, 10, ... + What: /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/turbo_stat /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/sub_turbo_stat diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index be2d6d0a03a4..21e2d8863705 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1441,6 +1441,10 @@ The builtin appraise policy appraises all files owned by uid=0. + ima_canonical_fmt [IMA] + Use the canonical format for the binary runtime + measurements, instead of host native format. + ima_hash= [IMA] Format: { md5 | sha1 | rmd160 | sha256 | sha384 | sha512 | ... } diff --git a/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt index 063c02da018a..eea73adc678f 100644 --- a/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt +++ b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt @@ -2,11 +2,14 @@ Hisilicon hix5hd2 gmac controller Required properties: - compatible: should contain one of the following SoC strings: - * "hisilicon,hix5hd2-gemac" - * "hisilicon,hi3798cv200-gemac" + * "hisilicon,hix5hd2-gmac" + * "hisilicon,hi3798cv200-gmac" + * "hisilicon,hi3516a-gmac" and one of the following version string: - * "hisilicon,hisi-gemac-v1" - * "hisilicon,hisi-gemac-v2" + * "hisilicon,hisi-gmac-v1" + * "hisilicon,hisi-gmac-v2" + The version v1 includes SoCs hix5hd2. + The version v2 includes SoCs hi3798cv200, hi3516a. - reg: specifies base physical address(s) and size of the device registers. The first region is the MAC register base and size. The second region is external interface control register. @@ -35,7 +38,7 @@ Required properties: Example: gmac0: ethernet@f9840000 { - compatible = "hisilicon,hi3798cv200-gemac", "hisilicon,hisi-gemac-v2"; + compatible = "hisilicon,hi3798cv200-gmac", "hisilicon,hisi-gmac-v2"; reg = <0xf9840000 0x1000>,<0xf984300c 0x4>; interrupts = <0 71 4>; #address-cells = <1>; diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt index 54749b60a466..ff1bc4b1bb3b 100644 --- a/Documentation/devicetree/bindings/net/phy.txt +++ b/Documentation/devicetree/bindings/net/phy.txt @@ -38,8 +38,14 @@ Optional Properties: - enet-phy-lane-swap: If set, indicates the PHY will swap the TX/RX lanes to compensate for the board being designed with the lanes swapped. -- eee-broken-modes: Bits to clear in the MDIO_AN_EEE_ADV register to - disable EEE broken modes. +- eee-broken-100tx: +- eee-broken-1000t: +- eee-broken-10gt: +- eee-broken-1000kx: +- eee-broken-10gkx4: +- eee-broken-10gkr: + Mark the corresponding energy efficient ethernet mode as broken and + request the ethernet to stop advertising it. Example: diff --git a/Documentation/features/io/dma-api-debug/arch-support.txt b/Documentation/features/io/dma-api-debug/arch-support.txt index 4f4a3443b114..ffa522a9bdfd 100644 --- a/Documentation/features/io/dma-api-debug/arch-support.txt +++ b/Documentation/features/io/dma-api-debug/arch-support.txt @@ -36,5 +36,5 @@ | um: | TODO | | unicore32: | TODO | | x86: | ok | - | xtensa: | TODO | + | xtensa: | ok | ----------------------- diff --git a/Documentation/features/io/dma-contiguous/arch-support.txt b/Documentation/features/io/dma-contiguous/arch-support.txt index a97e8e3f4ebb..83d2cf989ea3 100644 --- a/Documentation/features/io/dma-contiguous/arch-support.txt +++ b/Documentation/features/io/dma-contiguous/arch-support.txt @@ -36,5 +36,5 @@ | um: | TODO | | unicore32: | TODO | | x86: | ok | - | xtensa: | TODO | + | xtensa: | ok | ----------------------- diff --git a/Documentation/sphinx/rstFlatTable.py b/Documentation/sphinx/rstFlatTable.py index 55f275793028..25feb0d35e7a 100755 --- a/Documentation/sphinx/rstFlatTable.py +++ b/Documentation/sphinx/rstFlatTable.py @@ -157,6 +157,11 @@ class ListTableBuilder(object): def buildTableNode(self): colwidths = self.directive.get_column_widths(self.max_cols) + if isinstance(colwidths, tuple): + # Since docutils 0.13, get_column_widths returns a (widths, + # colwidths) tuple, where widths is a string (i.e. 'auto'). + # See https://sourceforge.net/p/docutils/patches/120/. + colwidths = colwidths[1] stub_columns = self.directive.options.get('stub-columns', 0) header_rows = self.directive.options.get('header-rows', 0) diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt new file mode 100644 index 000000000000..d918d268cd72 --- /dev/null +++ b/Documentation/x86/intel_rdt_ui.txt @@ -0,0 +1,214 @@ +User Interface for Resource Allocation in Intel Resource Director Technology + +Copyright (C) 2016 Intel Corporation + +Fenghua Yu <fenghua.yu@intel.com> +Tony Luck <tony.luck@intel.com> + +This feature is enabled by the CONFIG_INTEL_RDT_A Kconfig and the +X86 /proc/cpuinfo flag bits "rdt", "cat_l3" and "cdp_l3". + +To use the feature mount the file system: + + # mount -t resctrl resctrl [-o cdp] /sys/fs/resctrl + +mount options are: + +"cdp": Enable code/data prioritization in L3 cache allocations. + + +Info directory +-------------- + +The 'info' directory contains information about the enabled +resources. Each resource has its own subdirectory. The subdirectory +names reflect the resource names. Each subdirectory contains the +following files: + +"num_closids": The number of CLOSIDs which are valid for this + resource. The kernel uses the smallest number of + CLOSIDs of all enabled resources as limit. + +"cbm_mask": The bitmask which is valid for this resource. This + mask is equivalent to 100%. + +"min_cbm_bits": The minimum number of consecutive bits which must be + set when writing a mask. + + +Resource groups +--------------- +Resource groups are represented as directories in the resctrl file +system. The default group is the root directory. Other groups may be +created as desired by the system administrator using the "mkdir(1)" +command, and removed using "rmdir(1)". + +There are three files associated with each group: + +"tasks": A list of tasks that belongs to this group. Tasks can be + added to a group by writing the task ID to the "tasks" file + (which will automatically remove them from the previous + group to which they belonged). New tasks created by fork(2) + and clone(2) are added to the same group as their parent. + If a pid is not in any sub partition, it is in root partition + (i.e. default partition). + +"cpus": A bitmask of logical CPUs assigned to this group. Writing + a new mask can add/remove CPUs from this group. Added CPUs + are removed from their previous group. Removed ones are + given to the default (root) group. You cannot remove CPUs + from the default group. + +"schemata": A list of all the resources available to this group. + Each resource has its own line and format - see below for + details. + +When a task is running the following rules define which resources +are available to it: + +1) If the task is a member of a non-default group, then the schemata +for that group is used. + +2) Else if the task belongs to the default group, but is running on a +CPU that is assigned to some specific group, then the schemata for +the CPU's group is used. + +3) Otherwise the schemata for the default group is used. + + +Schemata files - general concepts +--------------------------------- +Each line in the file describes one resource. The line starts with +the name of the resource, followed by specific values to be applied +in each of the instances of that resource on the system. + +Cache IDs +--------- +On current generation systems there is one L3 cache per socket and L2 +caches are generally just shared by the hyperthreads on a core, but this +isn't an architectural requirement. We could have multiple separate L3 +caches on a socket, multiple cores could share an L2 cache. So instead +of using "socket" or "core" to define the set of logical cpus sharing +a resource we use a "Cache ID". At a given cache level this will be a +unique number across the whole system (but it isn't guaranteed to be a +contiguous sequence, there may be gaps). To find the ID for each logical +CPU look in /sys/devices/system/cpu/cpu*/cache/index*/id + +Cache Bit Masks (CBM) +--------------------- +For cache resources we describe the portion of the cache that is available +for allocation using a bitmask. The maximum value of the mask is defined +by each cpu model (and may be different for different cache levels). It +is found using CPUID, but is also provided in the "info" directory of +the resctrl file system in "info/{resource}/cbm_mask". X86 hardware +requires that these masks have all the '1' bits in a contiguous block. So +0x3, 0x6 and 0xC are legal 4-bit masks with two bits set, but 0x5, 0x9 +and 0xA are not. On a system with a 20-bit mask each bit represents 5% +of the capacity of the cache. You could partition the cache into four +equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000. + + +L3 details (code and data prioritization disabled) +-------------------------------------------------- +With CDP disabled the L3 schemata format is: + + L3:<cache_id0>=<cbm>;<cache_id1>=<cbm>;... + +L3 details (CDP enabled via mount option to resctrl) +---------------------------------------------------- +When CDP is enabled L3 control is split into two separate resources +so you can specify independent masks for code and data like this: + + L3data:<cache_id0>=<cbm>;<cache_id1>=<cbm>;... + L3code:<cache_id0>=<cbm>;<cache_id1>=<cbm>;... + +L2 details +---------- +L2 cache does not support code and data prioritization, so the +schemata format is always: + + L2:<cache_id0>=<cbm>;<cache_id1>=<cbm>;... + +Example 1 +--------- +On a two socket machine (one L3 cache per socket) with just four bits +for cache bit masks + +# mount -t resctrl resctrl /sys/fs/resctrl +# cd /sys/fs/resctrl +# mkdir p0 p1 +# echo "L3:0=3;1=c" > /sys/fs/resctrl/p0/schemata +# echo "L3:0=3;1=3" > /sys/fs/resctrl/p1/schemata + +The default resource group is unmodified, so we have access to all parts +of all caches (its schemata file reads "L3:0=f;1=f"). + +Tasks that are under the control of group "p0" may only allocate from the +"lower" 50% on cache ID 0, and the "upper" 50% of cache ID 1. +Tasks in group "p1" use the "lower" 50% of cache on both sockets. + +Example 2 +--------- +Again two sockets, but this time with a more realistic 20-bit mask. + +Two real time tasks pid=1234 running on processor 0 and pid=5678 running on +processor 1 on socket 0 on a 2-socket and dual core machine. To avoid noisy +neighbors, each of the two real-time tasks exclusively occupies one quarter +of L3 cache on socket 0. + +# mount -t resctrl resctrl /sys/fs/resctrl +# cd /sys/fs/resctrl + +First we reset the schemata for the default group so that the "upper" +50% of the L3 cache on socket 0 cannot be used by ordinary tasks: + +# echo "L3:0=3ff;1=fffff" > schemata + +Next we make a resource group for our first real time task and give +it access to the "top" 25% of the cache on socket 0. + +# mkdir p0 +# echo "L3:0=f8000;1=fffff" > p0/schemata + +Finally we move our first real time task into this resource group. We +also use taskset(1) to ensure the task always runs on a dedicated CPU +on socket 0. Most uses of resource groups will also constrain which +processors tasks run on. + +# echo 1234 > p0/tasks +# taskset -cp 1 1234 + +Ditto for the second real time task (with the remaining 25% of cache): + +# mkdir p1 +# echo "L3:0=7c00;1=fffff" > p1/schemata +# echo 5678 > p1/tasks +# taskset -cp 2 5678 + +Example 3 +--------- + +A single socket system which has real-time tasks running on core 4-7 and +non real-time workload assigned to core 0-3. The real-time tasks share text +and data, so a per task association is not required and due to interaction +with the kernel it's desired that the kernel on these cores shares L3 with +the tasks. + +# mount -t resctrl resctrl /sys/fs/resctrl +# cd /sys/fs/resctrl + +First we reset the schemata for the default group so that the "upper" +50% of the L3 cache on socket 0 cannot be used by ordinary tasks: + +# echo "L3:0=3ff" > schemata + +Next we make a resource group for our real time cores and give +it access to the "top" 50% of the cache on socket 0. + +# mkdir p0 +# echo "L3:0=ffc00;" > p0/schemata + +Finally we move core 4-7 over to the new group and make sure that the +kernel and the tasks running there get 50% of the cache. + +# echo C0 > p0/cpus diff --git a/MAINTAINERS b/MAINTAINERS index f6eb97b35e0f..7c21c7638bb5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10327,6 +10327,14 @@ L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/sw/rdmavt +RDT - RESOURCE ALLOCATION +M: Fenghua Yu <fenghua.yu@intel.com> +L: linux-kernel@vger.kernel.org +S: Supported +F: arch/x86/kernel/cpu/intel_rdt* +F: arch/x86/include/asm/intel_rdt* +F: Documentation/x86/intel_rdt* + READ-COPY UPDATE (RCU) M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> M: Josh Triplett <josh@joshtriplett.org> diff --git a/arch/Kconfig b/arch/Kconfig index 19483aea4bbc..99839c23d453 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -5,6 +5,9 @@ config KEXEC_CORE bool +config HAVE_IMA_KEXEC + bool + config OPROFILE tristate "OProfile system profiling" depends on PROFILING diff --git a/arch/arm/boot/dts/hisi-x5hd2.dtsi b/arch/arm/boot/dts/hisi-x5hd2.dtsi index c02e092fad8b..6c712a97e1fe 100644 --- a/arch/arm/boot/dts/hisi-x5hd2.dtsi +++ b/arch/arm/boot/dts/hisi-x5hd2.dtsi @@ -438,7 +438,7 @@ }; gmac0: ethernet@1840000 { - compatible = "hisilicon,hix5hd2-gemac", "hisilicon,hisi-gemac-v1"; + compatible = "hisilicon,hix5hd2-gmac", "hisilicon,hisi-gmac-v1"; reg = <0x1840000 0x1000>,<0x184300c 0x4>; interrupts = <0 71 4>; clocks = <&clock HIX5HD2_MAC0_CLK>; @@ -447,7 +447,7 @@ }; gmac1: ethernet@1841000 { - compatible = "hisilicon,hix5hd2-gemac", "hisilicon,hisi-gemac-v1"; + compatible = "hisilicon,hix5hd2-gmac", "hisilicon,hisi-gmac-v1"; reg = <0x1841000 0x1000>,<0x1843010 0x4>; interrupts = <0 72 4>; clocks = <&clock HIX5HD2_MAC1_CLK>; diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index b71086d25195..bfe632808d77 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -165,6 +165,11 @@ extern u64 kimage_vaddr; /* the offset between the kernel virtual and physical mappings */ extern u64 kimage_voffset; +static inline unsigned long kaslr_offset(void) +{ + return kimage_vaddr - KIMAGE_VADDR; +} + /* * Allow all memory at the discovery stage. We will clip it later. */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index a53f52ac81c6..b051367e2149 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -338,11 +338,11 @@ subsys_initcall(topology_init); static int dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) { - u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR; + const unsigned long offset = kaslr_offset(); - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) { - pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n", - kaslr_offset, KIMAGE_VADDR); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && offset > 0) { + pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n", + offset, KIMAGE_VADDR); } else { pr_emerg("Kernel Offset: disabled\n"); } diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index 805ae5d712e8..032fed71223f 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -38,6 +38,6 @@ #endif /* __ASSEMBLY__ */ -#define __NR_syscalls 392 +#define __NR_syscalls 398 #endif /* _ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/microblaze/include/uapi/asm/unistd.h b/arch/microblaze/include/uapi/asm/unistd.h index a8bd3fa28bc7..d8086159d996 100644 --- a/arch/microblaze/include/uapi/asm/unistd.h +++ b/arch/microblaze/include/uapi/asm/unistd.h @@ -407,5 +407,11 @@ #define __NR_userfaultfd 389 #define __NR_membarrier 390 #define __NR_mlock2 391 +#define __NR_copy_file_range 392 +#define __NR_preadv2 393 +#define __NR_pwritev2 394 +#define __NR_pkey_mprotect 395 +#define __NR_pkey_alloc 396 +#define __NR_pkey_free 397 #endif /* _UAPI_ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c index b70bb538f001..96b3f26d16be 100644 --- a/arch/microblaze/kernel/cpu/cpuinfo.c +++ b/arch/microblaze/kernel/cpu/cpuinfo.c @@ -49,6 +49,8 @@ const struct cpu_ver_key cpu_ver_lookup[] = { {"9.3", 0x20}, {"9.4", 0x21}, {"9.5", 0x22}, + {"9.6", 0x23}, + {"10.0", 0x24}, {NULL, 0}, }; @@ -75,6 +77,10 @@ const struct family_string_key family_string_lookup[] = { {"zynq7000", 0x12}, {"UltraScale Virtex", 0x13}, {"UltraScale Kintex", 0x14}, + {"UltraScale+ Zynq", 0x15}, + {"UltraScale+ Virtex", 0x16}, + {"UltraScale+ Kintex", 0x17}, + {"Spartan7", 0x18}, {NULL, 0}, }; diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index 6b3dd99126d7..6841c2df14d9 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -392,3 +392,9 @@ ENTRY(sys_call_table) .long sys_userfaultfd .long sys_membarrier /* 390 */ .long sys_mlock2 + .long sys_copy_file_range + .long sys_preadv2 + .long sys_pwritev2 + .long sys_pkey_mprotect /* 395 */ + .long sys_pkey_alloc + .long sys_pkey_free diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c index 5bbf38b916ef..9e954959f605 100644 --- a/arch/microblaze/kernel/timer.c +++ b/arch/microblaze/kernel/timer.c @@ -259,7 +259,7 @@ static int __init xilinx_timer_init(struct device_node *timer) int ret; if (initialized) - return; + return -EINVAL; initialized = 1; diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index a14b86587013..3a71f38cdc05 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -7,6 +7,7 @@ config PARISC select HAVE_FUNCTION_GRAPH_TRACER select HAVE_SYSCALL_TRACEPOINTS select ARCH_WANT_FRAME_POINTERS + select ARCH_HAS_ELF_RANDOMIZE select RTC_CLASS select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 78c9fd32c554..a6b2a421571e 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -348,9 +348,10 @@ struct pt_regs; /* forward declaration... */ #define ELF_HWCAP 0 -#define STACK_RND_MASK (is_32bit_task() ? \ - 0x7ff >> (PAGE_SHIFT - 12) : \ - 0x3ffff >> (PAGE_SHIFT - 12)) +/* Masks for stack and mmap randomization */ +#define BRK_RND_MASK (is_32bit_task() ? 0x07ffUL : 0x3ffffUL) +#define MMAP_RND_MASK (is_32bit_task() ? 0x1fffUL : 0x3ffffUL) +#define STACK_RND_MASK MMAP_RND_MASK struct mm_struct; extern unsigned long arch_randomize_brk(struct mm_struct *); diff --git a/arch/parisc/include/asm/pdcpat.h b/arch/parisc/include/asm/pdcpat.h index 47539f117958..e1d289092705 100644 --- a/arch/parisc/include/asm/pdcpat.h +++ b/arch/parisc/include/asm/pdcpat.h @@ -289,7 +289,7 @@ extern int pdc_pat_cell_get_number(struct pdc_pat_cell_num *cell_info); extern int pdc_pat_cell_module(unsigned long *actcnt, unsigned long ploc, unsigned long mod, unsigned long view_type, void *mem_addr); extern int pdc_pat_cell_num_to_loc(void *, unsigned long); -extern int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, void *hpa); +extern int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, unsigned long hpa); extern int pdc_pat_pd_get_addr_map(unsigned long *actual_len, void *mem_addr, unsigned long count, unsigned long offset); diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index ca40741378be..a3661ee6b060 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -93,9 +93,7 @@ struct system_cpuinfo_parisc { /* Per CPU data structure - ie varies per CPU. */ struct cpuinfo_parisc { unsigned long it_value; /* Interval Timer at last timer Intr */ - unsigned long it_delta; /* Interval delta (tic_10ms / HZ * 100) */ unsigned long irq_count; /* number of IRQ's since boot */ - unsigned long irq_max_cr16; /* longest time to handle a single IRQ */ unsigned long cpuid; /* aka slot_number or set to NO_PROC_ID */ unsigned long hpa; /* Host Physical address */ unsigned long txn_addr; /* MMIO addr of EIR or id_eid */ @@ -103,8 +101,6 @@ struct cpuinfo_parisc { unsigned long pending_ipi; /* bitmap of type ipi_message_type */ #endif unsigned long bh_count; /* number of times bh was invoked */ - unsigned long prof_counter; /* per CPU profiling support */ - unsigned long prof_multiplier; /* per CPU profiling support */ unsigned long fp_rev; unsigned long fp_model; unsigned int state; diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 4fcff2dcc9c3..ad4cb1613c57 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -878,6 +878,9 @@ ENTRY_CFI(syscall_exit_rfi) STREG %r19,PT_SR7(%r16) intr_return: + /* NOTE: Need to enable interrupts incase we schedule. */ + ssm PSW_SM_I, %r0 + /* check for reschedule */ mfctl %cr30,%r1 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ @@ -904,11 +907,6 @@ intr_check_sig: LDREG PT_IASQ1(%r16), %r20 cmpib,COND(=),n 0,%r20,intr_restore /* backward */ - /* NOTE: We need to enable interrupts if we have to deliver - * signals. We used to do this earlier but it caused kernel - * stack overflows. */ - ssm PSW_SM_I, %r0 - copy %r0, %r25 /* long in_syscall = 0 */ #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ @@ -960,10 +958,6 @@ intr_do_resched: cmpib,COND(=) 0, %r20, intr_do_preempt nop - /* NOTE: We need to enable interrupts if we schedule. We used - * to do this earlier but it caused kernel stack overflows. */ - ssm PSW_SM_I, %r0 - #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ #endif diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index e5d71905cad5..9d797ae4fa22 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -1258,7 +1258,7 @@ int pdc_pat_cell_module(unsigned long *actcnt, unsigned long ploc, unsigned long * * Retrieve the cpu number for the cpu at the specified HPA. */ -int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, void *hpa) +int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, unsigned long hpa) { int retval; unsigned long flags; diff --git a/arch/parisc/kernel/inventory.c b/arch/parisc/kernel/inventory.c index c05d1876d27c..c9789d9c73b4 100644 --- a/arch/parisc/kernel/inventory.c +++ b/arch/parisc/kernel/inventory.c @@ -216,9 +216,9 @@ pat_query_module(ulong pcell_loc, ulong mod_index) register_parisc_device(dev); /* advertise device */ #ifdef DEBUG_PAT - pdc_pat_cell_mod_maddr_block_t io_pdc_cell; /* dump what we see so far... */ switch (PAT_GET_ENTITY(dev->mod_info)) { + pdc_pat_cell_mod_maddr_block_t io_pdc_cell; unsigned long i; case PAT_ENTITY_PROC: @@ -259,9 +259,9 @@ pat_query_module(ulong pcell_loc, ulong mod_index) pa_pdc_cell->mod[4 + i * 3]); /* finish (ie end) */ printk(KERN_DEBUG " IO_VIEW %ld: 0x%016lx 0x%016lx 0x%016lx\n", - i, io_pdc_cell->mod[2 + i * 3], /* type */ - io_pdc_cell->mod[3 + i * 3], /* start */ - io_pdc_cell->mod[4 + i * 3]); /* finish (ie end) */ + i, io_pdc_cell.mod[2 + i * 3], /* type */ + io_pdc_cell.mod[3 + i * 3], /* start */ + io_pdc_cell.mod[4 + i * 3]); /* finish (ie end) */ } printk(KERN_DEBUG "\n"); break; diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index 518f4f5f1f43..6eabce62463b 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -301,7 +301,6 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - int err; size_t image_size; uint32_t image_type; uint32_t interface_type; @@ -320,8 +319,8 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun if (count != sizeof(uint32_t)) return -EIO; - if ((err = copy_from_user(&image_type, buf, sizeof(uint32_t))) != 0) - return err; + if (copy_from_user(&image_type, buf, sizeof(uint32_t))) + return -EFAULT; /* Get the interface type and test type */ interface_type = (image_type >> 16) & 0xffff; diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 40639439d8b3..ea6603ee8d24 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -276,11 +276,7 @@ void *dereference_function_descriptor(void *ptr) static inline unsigned long brk_rnd(void) { - /* 8MB for 32bit, 1GB for 64bit */ - if (is_32bit_task()) - return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; - else - return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT; + return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT; } unsigned long arch_randomize_brk(struct mm_struct *mm) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 0c2a94a0f751..85de47f4eb59 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -78,11 +78,6 @@ DEFINE_PER_CPU(struct cpuinfo_parisc, cpu_data); static void init_percpu_prof(unsigned long cpunum) { - struct cpuinfo_parisc *p; - - p = &per_cpu(cpu_data, cpunum); - p->prof_counter = 1; - p->prof_multiplier = 1; } @@ -99,6 +94,7 @@ static int processor_probe(struct parisc_device *dev) unsigned long txn_addr; unsigned long cpuid; struct cpuinfo_parisc *p; + struct pdc_pat_cpu_num cpu_info __maybe_unused; #ifdef CONFIG_SMP if (num_online_cpus() >= nr_cpu_ids) { @@ -123,10 +119,6 @@ static int processor_probe(struct parisc_device *dev) ulong status; unsigned long bytecnt; pdc_pat_cell_mod_maddr_block_t *pa_pdc_cell; -#undef USE_PAT_CPUID -#ifdef USE_PAT_CPUID - struct pdc_pat_cpu_num cpu_info; -#endif pa_pdc_cell = kmalloc(sizeof (*pa_pdc_cell), GFP_KERNEL); if (!pa_pdc_cell) @@ -145,22 +137,27 @@ static int processor_probe(struct parisc_device *dev) kfree(pa_pdc_cell); + /* get the cpu number */ + status = pdc_pat_cpu_get_number(&cpu_info, dev->hpa.start); + BUG_ON(PDC_OK != status); + + pr_info("Logical CPU #%lu is physical cpu #%lu at location " + "0x%lx with hpa %pa\n", + cpuid, cpu_info.cpu_num, cpu_info.cpu_loc, + &dev->hpa.start); + +#undef USE_PAT_CPUID #ifdef USE_PAT_CPUID /* We need contiguous numbers for cpuid. Firmware's notion * of cpuid is for physical CPUs and we just don't care yet. * We'll care when we need to query PAT PDC about a CPU *after* * boot time (ie shutdown a CPU from an OS perspective). */ - /* get the cpu number */ - status = pdc_pat_cpu_get_number(&cpu_info, dev->hpa.start); - - BUG_ON(PDC_OK != status); - if (cpu_info.cpu_num >= NR_CPUS) { - printk(KERN_WARNING "IGNORING CPU at 0x%x," + printk(KERN_WARNING "IGNORING CPU at %pa," " cpu_slot_id > NR_CPUS" " (%ld > %d)\n", - dev->hpa.start, cpu_info.cpu_num, NR_CPUS); + &dev->hpa.start, cpu_info.cpu_num, NR_CPUS); /* Ignore CPU since it will only crash */ boot_cpu_data.cpu_count--; return 1; diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 0a393a04e891..a81e177cac7b 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -225,19 +225,17 @@ static unsigned long mmap_rnd(void) { unsigned long rnd = 0; - /* - * 8 bits of randomness in 32bit mmaps, 20 address space bits - * 28 bits of randomness in 64bit mmaps, 40 address space bits - */ - if (current->flags & PF_RANDOMIZE) { - if (is_32bit_task()) - rnd = get_random_int() % (1<<8); - else - rnd = get_random_int() % (1<<28); - } + if (current->flags & PF_RANDOMIZE) + rnd = get_random_int() & MMAP_RND_MASK; + return rnd << PAGE_SHIFT; } +unsigned long arch_mmap_rnd(void) +{ + return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT; +} + static unsigned long mmap_legacy_base(void) { return TASK_UNMAPPED_BASE + mmap_rnd(); diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 325f30d82b64..4215f5596c8b 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -59,10 +59,9 @@ static unsigned long clocktick __read_mostly; /* timer cycles per tick */ */ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id) { - unsigned long now, now2; + unsigned long now; unsigned long next_tick; - unsigned long cycles_elapsed, ticks_elapsed = 1; - unsigned long cycles_remainder; + unsigned long ticks_elapsed = 0; unsigned int cpu = smp_processor_id(); struct cpuinfo_parisc *cpuinfo = &per_cpu(cpu_data, cpu); @@ -71,102 +70,49 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id) profile_tick(CPU_PROFILING); - /* Initialize next_tick to the expected tick time. */ + /* Initialize next_tick to the old expected tick time. */ next_tick = cpuinfo->it_value; - /* Get current cycle counter (Control Register 16). */ - now = mfctl(16); - - cycles_elapsed = now - next_tick; - - if ((cycles_elapsed >> 6) < cpt) { - /* use "cheap" math (add/subtract) instead - * of the more expensive div/mul method - */ - cycles_remainder = cycles_elapsed; - while (cycles_remainder > cpt) { - cycles_remainder -= cpt; - ticks_elapsed++; - } - } else { - /* TODO: Reduce this to one fdiv op */ - cycles_remainder = cycles_elapsed % cpt; - ticks_elapsed += cycles_elapsed / cpt; - } - - /* convert from "division remainder" to "remainder of clock tick" */ - cycles_remainder = cpt - cycles_remainder; - - /* Determine when (in CR16 cycles) next IT interrupt will fire. - * We want IT to fire modulo clocktick even if we miss/skip some. - * But those interrupts don't in fact get delivered that regularly. - */ - next_tick = now + cycles_remainder; + /* Calculate how many ticks have elapsed. */ + do { + ++ticks_elapsed; + next_tick += cpt; + now = mfctl(16); + } while (next_tick - now > cpt); + /* Store (in CR16 cycles) up to when we are accounting right now. */ cpuinfo->it_value = next_tick; - /* Program the IT when to deliver the next interrupt. - * Only bottom 32-bits of next_tick are writable in CR16! - */ - mtctl(next_tick, 16); + /* Go do system house keeping. */ + if (cpu == 0) + xtime_update(ticks_elapsed); + + update_process_times(user_mode(get_irq_regs())); - /* Skip one clocktick on purpose if we missed next_tick. + /* Skip clockticks on purpose if we know we would miss those. * The new CR16 must be "later" than current CR16 otherwise * itimer would not fire until CR16 wrapped - e.g 4 seconds * later on a 1Ghz processor. We'll account for the missed - * tick on the next timer interrupt. + * ticks on the next timer interrupt. + * We want IT to fire modulo clocktick even if we miss/skip some. + * But those interrupts don't in fact get delivered that regularly. * * "next_tick - now" will always give the difference regardless * if one or the other wrapped. If "now" is "bigger" we'll end up * with a very large unsigned number. */ - now2 = mfctl(16); - if (next_tick - now2 > cpt) - mtctl(next_tick+cpt, 16); + while (next_tick - mfctl(16) > cpt) + next_tick += cpt; -#if 1 -/* - * GGG: DEBUG code for how many cycles programming CR16 used. - */ - if (unlikely(now2 - now > 0x3000)) /* 12K cycles */ - printk (KERN_CRIT "timer_interrupt(CPU %d): SLOW! 0x%lx cycles!" - " cyc %lX rem %lX " - " next/now %lX/%lX\n", - cpu, now2 - now, cycles_elapsed, cycles_remainder, - next_tick, now ); -#endif - - /* Can we differentiate between "early CR16" (aka Scenario 1) and - * "long delay" (aka Scenario 3)? I don't think so. - * - * Timer_interrupt will be delivered at least a few hundred cycles - * after the IT fires. But it's arbitrary how much time passes - * before we call it "late". I've picked one second. - * - * It's important NO printk's are between reading CR16 and - * setting up the next value. May introduce huge variance. - */ - if (unlikely(ticks_elapsed > HZ)) { - /* Scenario 3: very long delay? bad in any case */ - printk (KERN_CRIT "timer_interrupt(CPU %d): delayed!" - " cycles %lX rem %lX " - " next/now %lX/%lX\n", - cpu, - cycles_elapsed, cycles_remainder, - next_tick, now ); - } - - /* Done mucking with unreliable delivery of interrupts. - * Go do system house keeping. + /* Program the IT when to deliver the next interrupt. + * Only bottom 32-bits of next_tick are writable in CR16! + * Timer interrupt will be delivered at least a few hundred cycles + * after the IT fires, so if we are too close (<= 500 cycles) to the + * next cycle, simply skip it. */ - - if (!--cpuinfo->prof_counter) { - cpuinfo->prof_counter = cpuinfo->prof_multiplier; - update_process_times(user_mode(get_irq_regs())); - } - - if (cpu == 0) - xtime_update(ticks_elapsed); + if (next_tick - mfctl(16) <= 500) + next_tick += cpt; + mtctl(next_tick, 16); return IRQ_HANDLED; } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3da87e198878..a8ee573fe610 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -469,6 +469,7 @@ config KEXEC config KEXEC_FILE bool "kexec file based system call" select KEXEC_CORE + select HAVE_IMA_KEXEC select BUILD_BIN2C depends on PPC64 depends on CRYPTO=y diff --git a/arch/powerpc/include/asm/ima.h b/arch/powerpc/include/asm/ima.h new file mode 100644 index 000000000000..2313bdface34 --- /dev/null +++ b/arch/powerpc/include/asm/ima.h @@ -0,0 +1,29 @@ +#ifndef _ASM_POWERPC_IMA_H +#define _ASM_POWERPC_IMA_H + +struct kimage; + +int ima_get_kexec_buffer(void **addr, size_t *size); +int ima_free_kexec_buffer(void); + +#ifdef CONFIG_IMA +void remove_ima_buffer(void *fdt, int chosen_node); +#else +static inline void remove_ima_buffer(void *fdt, int chosen_node) {} +#endif + +#ifdef CONFIG_IMA_KEXEC +int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr, + size_t size); + +int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node); +#else +static inline int setup_ima_buffer(const struct kimage *image, void *fdt, + int chosen_node) +{ + remove_ima_buffer(fdt, chosen_node); + return 0; +} +#endif /* CONFIG_IMA_KEXEC */ + +#endif /* _ASM_POWERPC_IMA_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 6c3b71502fbc..25668bc8cb2a 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -94,11 +94,22 @@ static inline bool kdump_in_progress(void) #ifdef CONFIG_KEXEC_FILE extern struct kexec_file_ops kexec_elf64_ops; +#ifdef CONFIG_IMA_KEXEC +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + phys_addr_t ima_buffer_addr; + size_t ima_buffer_size; +}; +#endif + int setup_purgatory(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, unsigned long fdt_load_addr); -int setup_new_fdt(void *fdt, unsigned long initrd_load_addr, - unsigned long initrd_len, const char *cmdline); +int setup_new_fdt(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, unsigned long initrd_len, + const char *cmdline); +int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size); #endif /* CONFIG_KEXEC_FILE */ #else /* !CONFIG_KEXEC_CORE */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index a3a6047fd395..23f8082d7bfa 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -112,6 +112,10 @@ obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \ machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o +ifeq ($(CONFIG_HAVE_IMA_KEXEC)$(CONFIG_IMA),yy) +obj-y += ima_kexec.o +endif + obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c new file mode 100644 index 000000000000..5ea42c937ca9 --- /dev/null +++ b/arch/powerpc/kernel/ima_kexec.c @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2016 IBM Corporation + * + * Authors: + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/slab.h> +#include <linux/kexec.h> +#include <linux/of.h> +#include <linux/memblock.h> +#include <linux/libfdt.h> + +static int get_addr_size_cells(int *addr_cells, int *size_cells) +{ + struct device_node *root; + + root = of_find_node_by_path("/"); + if (!root) + return -EINVAL; + + *addr_cells = of_n_addr_cells(root); + *size_cells = of_n_size_cells(root); + + of_node_put(root); + + return 0; +} + +static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr, + size_t *size) +{ + int ret, addr_cells, size_cells; + + ret = get_addr_size_cells(&addr_cells, &size_cells); + if (ret) + return ret; + + if (len < 4 * (addr_cells + size_cells)) + return -ENOENT; + + *addr = of_read_number(prop, addr_cells); + *size = of_read_number(prop + 4 * addr_cells, size_cells); + + return 0; +} + +/** + * ima_get_kexec_buffer - get IMA buffer from the previous kernel + * @addr: On successful return, set to point to the buffer contents. + * @size: On successful return, set to the buffer size. + * + * Return: 0 on success, negative errno on error. + */ +int ima_get_kexec_buffer(void **addr, size_t *size) +{ + int ret, len; + unsigned long tmp_addr; + size_t tmp_size; + const void *prop; + + prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len); + if (!prop) + return -ENOENT; + + ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size); + if (ret) + return ret; + + *addr = __va(tmp_addr); + *size = tmp_size; + + return 0; +} + +/** + * ima_free_kexec_buffer - free memory used by the IMA buffer + */ +int ima_free_kexec_buffer(void) +{ + int ret; + unsigned long addr; + size_t size; + struct property *prop; + + prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL); + if (!prop) + return -ENOENT; + + ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size); + if (ret) + return ret; + + ret = of_remove_property(of_chosen, prop); + if (ret) + return ret; + + return memblock_free(addr, size); + +} + +/** + * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt + * + * The IMA measurement buffer is of no use to a subsequent kernel, so we always + * remove it from the device tree. + */ +void remove_ima_buffer(void *fdt, int chosen_node) +{ + int ret, len; + unsigned long addr; + size_t size; + const void *prop; + + prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len); + if (!prop) + return; + + ret = do_get_kexec_buffer(prop, len, &addr, &size); + fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer"); + if (ret) + return; + + ret = delete_fdt_mem_rsv(fdt, addr, size); + if (!ret) + pr_debug("Removed old IMA buffer reservation.\n"); +} + +#ifdef CONFIG_IMA_KEXEC +/** + * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer + * + * Architectures should use this function to pass on the IMA buffer + * information to the next kernel. + * + * Return: 0 on success, negative errno on error. + */ +int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr, + size_t size) +{ + image->arch.ima_buffer_addr = load_addr; + image->arch.ima_buffer_size = size; + + return 0; +} + +static int write_number(void *p, u64 value, int cells) +{ + if (cells == 1) { + u32 tmp; + + if (value > U32_MAX) + return -EINVAL; + + tmp = cpu_to_be32(value); + memcpy(p, &tmp, sizeof(tmp)); + } else if (cells == 2) { + u64 tmp; + + tmp = cpu_to_be64(value); + memcpy(p, &tmp, sizeof(tmp)); + } else + return -EINVAL; + + return 0; +} + +/** + * setup_ima_buffer - add IMA buffer information to the fdt + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @chosen_node: Offset to the chosen node. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node) +{ + int ret, addr_cells, size_cells, entry_size; + u8 value[16]; + + remove_ima_buffer(fdt, chosen_node); + if (!image->arch.ima_buffer_size) + return 0; + + ret = get_addr_size_cells(&addr_cells, &size_cells); + if (ret) + return ret; + + entry_size = 4 * (addr_cells + size_cells); + + if (entry_size > sizeof(value)) + return -EINVAL; + + ret = write_number(value, image->arch.ima_buffer_addr, addr_cells); + if (ret) + return ret; + + ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size, + size_cells); + if (ret) + return ret; + + ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value, + entry_size); + if (ret < 0) + return -EINVAL; + + ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr, + image->arch.ima_buffer_size); + if (ret) + return -EINVAL; + + pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n", + image->arch.ima_buffer_addr, image->arch.ima_buffer_size); + + return 0; +} +#endif /* CONFIG_IMA_KEXEC */ diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c index 6acffd34a70f..9a42309b091a 100644 --- a/arch/powerpc/kernel/kexec_elf_64.c +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -627,7 +627,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, goto out; } - ret = setup_new_fdt(fdt, initrd_load_addr, initrd_len, cmdline); + ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); if (ret) goto out; diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c index 7abc8a75ee48..992c0d258e5d 100644 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -27,6 +27,7 @@ #include <linux/memblock.h> #include <linux/of_fdt.h> #include <linux/libfdt.h> +#include <asm/ima.h> #define SLAVE_CODE_SIZE 256 @@ -180,7 +181,7 @@ int setup_purgatory(struct kimage *image, const void *slave_code, * * Return: 0 on success, or negative errno on error. */ -static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) +int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) { int i, ret, num_rsvs = fdt_num_mem_rsv(fdt); @@ -209,6 +210,7 @@ static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size /* * setup_new_fdt - modify /chosen and memory reservation for the next kernel + * @image: kexec image being loaded. * @fdt: Flattened device tree for the next kernel. * @initrd_load_addr: Address where the next initrd will be loaded. * @initrd_len: Size of the next initrd, or 0 if there will be none. @@ -217,8 +219,9 @@ static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size * * Return: 0 on success, or negative errno on error. */ -int setup_new_fdt(void *fdt, unsigned long initrd_load_addr, - unsigned long initrd_len, const char *cmdline) +int setup_new_fdt(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, unsigned long initrd_len, + const char *cmdline) { int ret, chosen_node; const void *prop; @@ -328,6 +331,12 @@ int setup_new_fdt(void *fdt, unsigned long initrd_load_addr, } } + ret = setup_ima_buffer(image, fdt, chosen_node); + if (ret) { + pr_err("Error setting up the new device tree.\n"); + return ret; + } + ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); if (ret) { pr_err("Error setting up the new device tree.\n"); diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 3803b0addf65..6c0ba75fb256 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -117,9 +117,6 @@ static const struct of_device_id of_device_ids[] = { { .compatible = "fsl,qe", }, - { - .compatible = "fsl,fman", - }, /* The following two are for the Freescale hypervisor */ { .name = "hypervisor", diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 64024c999531..e487493bbd47 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -412,6 +412,19 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH +config INTEL_RDT_A + bool "Intel Resource Director Technology Allocation support" + default n + depends on X86 && CPU_SUP_INTEL + select KERNFS + help + Select to enable resource allocation which is a sub-feature of + Intel Resource Director Technology(RDT). More information about + RDT can be found in the Intel x86 Architecture Software + Developer Manual. + + Say N if unsure. + if X86_32 config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 8f82b02934fa..0c45cc8e64ba 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -7,9 +7,9 @@ #include <linux/perf_event.h> #include <linux/slab.h> #include <asm/cpu_device_id.h> +#include <asm/intel_rdt_common.h> #include "../perf_event.h" -#define MSR_IA32_PQR_ASSOC 0x0c8f #define MSR_IA32_QM_CTR 0x0c8e #define MSR_IA32_QM_EVTSEL 0x0c8d @@ -24,32 +24,13 @@ static unsigned int cqm_l3_scale; /* supposedly cacheline size */ static bool cqm_enabled, mbm_enabled; unsigned int mbm_socket_max; -/** - * struct intel_pqr_state - State cache for the PQR MSR - * @rmid: The cached Resource Monitoring ID - * @closid: The cached Class Of Service ID - * @rmid_usecnt: The usage counter for rmid - * - * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the - * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always - * contains both parts, so we need to cache them. - * - * The cache also helps to avoid pointless updates if the value does - * not change. - */ -struct intel_pqr_state { - u32 rmid; - u32 closid; - int rmid_usecnt; -}; - /* * The cached intel_pqr_state is strictly per CPU and can never be * updated from a remote CPU. Both functions which modify the state * (intel_cqm_event_start and intel_cqm_event_stop) are called with * interrupts disabled, which is sufficient for the protection. */ -static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); +DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); static struct hrtimer *mbm_timers; /** * struct sample - mbm event's (local or total) data diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6ccbf1aaa7ce..eafee3161d1c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -189,6 +189,9 @@ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ @@ -222,6 +225,7 @@ #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ #define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h new file mode 100644 index 000000000000..95ce5c85b009 --- /dev/null +++ b/arch/x86/include/asm/intel_rdt.h @@ -0,0 +1,224 @@ +#ifndef _ASM_X86_INTEL_RDT_H +#define _ASM_X86_INTEL_RDT_H + +#ifdef CONFIG_INTEL_RDT_A + +#include <linux/kernfs.h> +#include <linux/jump_label.h> + +#include <asm/intel_rdt_common.h> + +#define IA32_L3_QOS_CFG 0xc81 +#define IA32_L3_CBM_BASE 0xc90 +#define IA32_L2_CBM_BASE 0xd10 + +#define L3_QOS_CDP_ENABLE 0x01ULL + +/** + * struct rdtgroup - store rdtgroup's data in resctrl file system. + * @kn: kernfs node + * @rdtgroup_list: linked list for all rdtgroups + * @closid: closid for this rdtgroup + * @cpu_mask: CPUs assigned to this rdtgroup + * @flags: status bits + * @waitcount: how many cpus expect to find this + * group when they acquire rdtgroup_mutex + */ +struct rdtgroup { + struct kernfs_node *kn; + struct list_head rdtgroup_list; + int closid; + struct cpumask cpu_mask; + int flags; + atomic_t waitcount; +}; + +/* rdtgroup.flags */ +#define RDT_DELETED 1 + +/* List of all resource groups */ +extern struct list_head rdt_all_groups; + +int __init rdtgroup_init(void); + +/** + * struct rftype - describe each file in the resctrl file system + * @name: file name + * @mode: access mode + * @kf_ops: operations + * @seq_show: show content of the file + * @write: write to the file + */ +struct rftype { + char *name; + umode_t mode; + struct kernfs_ops *kf_ops; + + int (*seq_show)(struct kernfs_open_file *of, + struct seq_file *sf, void *v); + /* + * write() is the generic write callback which maps directly to + * kernfs write operation and overrides all other operations. + * Maximum write size is determined by ->max_write_len. + */ + ssize_t (*write)(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); +}; + +/** + * struct rdt_resource - attributes of an RDT resource + * @enabled: Is this feature enabled on this machine + * @capable: Is this feature available on this machine + * @name: Name to use in "schemata" file + * @num_closid: Number of CLOSIDs available + * @max_cbm: Largest Cache Bit Mask allowed + * @min_cbm_bits: Minimum number of consecutive bits to be set + * in a cache bit mask + * @domains: All domains for this resource + * @num_domains: Number of domains active + * @msr_base: Base MSR address for CBMs + * @tmp_cbms: Scratch space when updating schemata + * @num_tmp_cbms: Number of CBMs in tmp_cbms + * @cache_level: Which cache level defines scope of this domain + * @cbm_idx_multi: Multiplier of CBM index + * @cbm_idx_offset: Offset of CBM index. CBM index is computed by: + * closid * cbm_idx_multi + cbm_idx_offset + */ +struct rdt_resource { + bool enabled; + bool capable; + char *name; + int num_closid; + int cbm_len; + int min_cbm_bits; + u32 max_cbm; + struct list_head domains; + int num_domains; + int msr_base; + u32 *tmp_cbms; + int num_tmp_cbms; + int cache_level; + int cbm_idx_multi; + int cbm_idx_offset; +}; + +/** + * struct rdt_domain - group of cpus sharing an RDT resource + * @list: all instances of this resource + * @id: unique id for this instance + * @cpu_mask: which cpus share this resource + * @cbm: array of cache bit masks (indexed by CLOSID) + */ +struct rdt_domain { + struct list_head list; + int id; + struct cpumask cpu_mask; + u32 *cbm; +}; + +/** + * struct msr_param - set a range of MSRs from a domain + * @res: The resource to use + * @low: Beginning index from base MSR + * @high: End index + */ +struct msr_param { + struct rdt_resource *res; + int low; + int high; +}; + +extern struct mutex rdtgroup_mutex; + +extern struct rdt_resource rdt_resources_all[]; +extern struct rdtgroup rdtgroup_default; +DECLARE_STATIC_KEY_FALSE(rdt_enable_key); + +int __init rdtgroup_init(void); + +enum { + RDT_RESOURCE_L3, + RDT_RESOURCE_L3DATA, + RDT_RESOURCE_L3CODE, + RDT_RESOURCE_L2, + + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + +#define for_each_capable_rdt_resource(r) \ + for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ + r++) \ + if (r->capable) + +#define for_each_enabled_rdt_resource(r) \ + for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ + r++) \ + if (r->enabled) + +/* CPUID.(EAX=10H, ECX=ResID=1).EAX */ +union cpuid_0x10_1_eax { + struct { + unsigned int cbm_len:5; + } split; + unsigned int full; +}; + +/* CPUID.(EAX=10H, ECX=ResID=1).EDX */ +union cpuid_0x10_1_edx { + struct { + unsigned int cos_max:16; + } split; + unsigned int full; +}; + +DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid); + +void rdt_cbm_update(void *arg); +struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn); +void rdtgroup_kn_unlock(struct kernfs_node *kn); +ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); +int rdtgroup_schemata_show(struct kernfs_open_file *of, + struct seq_file *s, void *v); + +/* + * intel_rdt_sched_in() - Writes the task's CLOSid to IA32_PQR_MSR + * + * Following considerations are made so that this has minimal impact + * on scheduler hot path: + * - This will stay as no-op unless we are running on an Intel SKU + * which supports resource control and we enable by mounting the + * resctrl file system. + * - Caches the per cpu CLOSid values and does the MSR write only + * when a task with a different CLOSid is scheduled in. + * + * Must be called with preemption disabled. + */ +static inline void intel_rdt_sched_in(void) +{ + if (static_branch_likely(&rdt_enable_key)) { + struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); + int closid; + + /* + * If this task has a closid assigned, use it. + * Else use the closid assigned to this cpu. + */ + closid = current->closid; + if (closid == 0) + closid = this_cpu_read(cpu_closid); + + if (closid != state->closid) { + state->closid = closid; + wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, closid); + } + } +} + +#else + +static inline void intel_rdt_sched_in(void) {} + +#endif /* CONFIG_INTEL_RDT_A */ +#endif /* _ASM_X86_INTEL_RDT_H */ diff --git a/arch/x86/include/asm/intel_rdt_common.h b/arch/x86/include/asm/intel_rdt_common.h new file mode 100644 index 000000000000..b31081b89407 --- /dev/null +++ b/arch/x86/include/asm/intel_rdt_common.h @@ -0,0 +1,27 @@ +#ifndef _ASM_X86_INTEL_RDT_COMMON_H +#define _ASM_X86_INTEL_RDT_COMMON_H + +#define MSR_IA32_PQR_ASSOC 0x0c8f + +/** + * struct intel_pqr_state - State cache for the PQR MSR + * @rmid: The cached Resource Monitoring ID + * @closid: The cached Class Of Service ID + * @rmid_usecnt: The usage counter for rmid + * + * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the + * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always + * contains both parts, so we need to cache them. + * + * The cache also helps to avoid pointless updates if the value does + * not change. + */ +struct intel_pqr_state { + u32 rmid; + u32 closid; + int rmid_usecnt; +}; + +DECLARE_PER_CPU(struct intel_pqr_state, pqr_state); + +#endif /* _ASM_X86_INTEL_RDT_COMMON_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 33b63670bf09..52000010c62e 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -32,6 +32,8 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o +obj-$(CONFIG_INTEL_RDT_A) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_schemata.o + obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE) += microcode/ diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index be6337156502..0282b0df004a 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -153,6 +153,7 @@ struct _cpuid4_info_regs { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; + unsigned int id; unsigned long size; struct amd_northbridge *nb; }; @@ -894,6 +895,8 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, static void ci_leaf_init(struct cacheinfo *this_leaf, struct _cpuid4_info_regs *base) { + this_leaf->id = base->id; + this_leaf->attributes = CACHE_ID; this_leaf->level = base->eax.split.level; this_leaf->type = cache_type_map[base->eax.split.type]; this_leaf->coherency_line_size = @@ -920,6 +923,22 @@ static int __init_cache_level(unsigned int cpu) return 0; } +/* + * The max shared threads number comes from CPUID.4:EAX[25-14] with input + * ECX as cache index. Then right shift apicid by the number's order to get + * cache id for this cache node. + */ +static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + unsigned long num_threads_sharing; + int index_msb; + + num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + id4_regs->id = c->apicid >> index_msb; +} + static int __populate_cache_leaves(unsigned int cpu) { unsigned int idx, ret; @@ -931,6 +950,7 @@ static int __populate_cache_leaves(unsigned int cpu) ret = cpuid4_cache_lookup_regs(idx, &id4_regs); if (ret) return ret; + get_cache_id(cpu, &id4_regs); ci_leaf_init(this_leaf++, &id4_regs); __cache_cpumap_setup(cpu, idx, &id4_regs); } diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c new file mode 100644 index 000000000000..5a533fefefa0 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -0,0 +1,403 @@ +/* + * Resource Director Technology(RDT) + * - Cache Allocation code. + * + * Copyright (C) 2016 Intel Corporation + * + * Authors: + * Fenghua Yu <fenghua.yu@intel.com> + * Tony Luck <tony.luck@intel.com> + * Vikas Shivappa <vikas.shivappa@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * More information about RDT be found in the Intel (R) x86 Architecture + * Software Developer Manual June 2016, volume 3, section 17.17. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/cacheinfo.h> +#include <linux/cpuhotplug.h> + +#include <asm/intel-family.h> +#include <asm/intel_rdt.h> + +/* Mutex to protect rdtgroup access. */ +DEFINE_MUTEX(rdtgroup_mutex); + +DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid); + +#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains) + +struct rdt_resource rdt_resources_all[] = { + { + .name = "L3", + .domains = domain_init(RDT_RESOURCE_L3), + .msr_base = IA32_L3_CBM_BASE, + .min_cbm_bits = 1, + .cache_level = 3, + .cbm_idx_multi = 1, + .cbm_idx_offset = 0 + }, + { + .name = "L3DATA", + .domains = domain_init(RDT_RESOURCE_L3DATA), + .msr_base = IA32_L3_CBM_BASE, + .min_cbm_bits = 1, + .cache_level = 3, + .cbm_idx_multi = 2, + .cbm_idx_offset = 0 + }, + { + .name = "L3CODE", + .domains = domain_init(RDT_RESOURCE_L3CODE), + .msr_base = IA32_L3_CBM_BASE, + .min_cbm_bits = 1, + .cache_level = 3, + .cbm_idx_multi = 2, + .cbm_idx_offset = 1 + }, + { + .name = "L2", + .domains = domain_init(RDT_RESOURCE_L2), + .msr_base = IA32_L2_CBM_BASE, + .min_cbm_bits = 1, + .cache_level = 2, + .cbm_idx_multi = 1, + .cbm_idx_offset = 0 + }, +}; + +static int cbm_idx(struct rdt_resource *r, int closid) +{ + return closid * r->cbm_idx_multi + r->cbm_idx_offset; +} + +/* + * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs + * as they do not have CPUID enumeration support for Cache allocation. + * The check for Vendor/Family/Model is not enough to guarantee that + * the MSRs won't #GP fault because only the following SKUs support + * CAT: + * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz + * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz + * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz + * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz + * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz + * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz + * + * Probe by trying to write the first of the L3 cach mask registers + * and checking that the bits stick. Max CLOSids is always 4 and max cbm length + * is always 20 on hsw server parts. The minimum cache bitmask length + * allowed for HSW server is always 2 bits. Hardcode all of them. + */ +static inline bool cache_alloc_hsw_probe(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == INTEL_FAM6_HASWELL_X) { + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + u32 l, h, max_cbm = BIT_MASK(20) - 1; + + if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0)) + return false; + rdmsr(IA32_L3_CBM_BASE, l, h); + + /* If all the bits were set in MSR, return success */ + if (l != max_cbm) + return false; + + r->num_closid = 4; + r->cbm_len = 20; + r->max_cbm = max_cbm; + r->min_cbm_bits = 2; + r->capable = true; + r->enabled = true; + + return true; + } + + return false; +} + +static void rdt_get_config(int idx, struct rdt_resource *r) +{ + union cpuid_0x10_1_eax eax; + union cpuid_0x10_1_edx edx; + u32 ebx, ecx; + + cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full); + r->num_closid = edx.split.cos_max + 1; + r->cbm_len = eax.split.cbm_len + 1; + r->max_cbm = BIT_MASK(eax.split.cbm_len + 1) - 1; + r->capable = true; + r->enabled = true; +} + +static void rdt_get_cdp_l3_config(int type) +{ + struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_resource *r = &rdt_resources_all[type]; + + r->num_closid = r_l3->num_closid / 2; + r->cbm_len = r_l3->cbm_len; + r->max_cbm = r_l3->max_cbm; + r->capable = true; + /* + * By default, CDP is disabled. CDP can be enabled by mount parameter + * "cdp" during resctrl file system mount time. + */ + r->enabled = false; +} + +static inline bool get_rdt_resources(void) +{ + bool ret = false; + + if (cache_alloc_hsw_probe()) + return true; + + if (!boot_cpu_has(X86_FEATURE_RDT_A)) + return false; + + if (boot_cpu_has(X86_FEATURE_CAT_L3)) { + rdt_get_config(1, &rdt_resources_all[RDT_RESOURCE_L3]); + if (boot_cpu_has(X86_FEATURE_CDP_L3)) { + rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA); + rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE); + } + ret = true; + } + if (boot_cpu_has(X86_FEATURE_CAT_L2)) { + /* CPUID 0x10.2 fields are same format at 0x10.1 */ + rdt_get_config(2, &rdt_resources_all[RDT_RESOURCE_L2]); + ret = true; + } + + return ret; +} + +static int get_cache_id(int cpu, int level) +{ + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); + int i; + + for (i = 0; i < ci->num_leaves; i++) { + if (ci->info_list[i].level == level) + return ci->info_list[i].id; + } + + return -1; +} + +void rdt_cbm_update(void *arg) +{ + struct msr_param *m = (struct msr_param *)arg; + struct rdt_resource *r = m->res; + int i, cpu = smp_processor_id(); + struct rdt_domain *d; + + list_for_each_entry(d, &r->domains, list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->cpu_mask)) + goto found; + } + pr_info_once("cpu %d not found in any domain for resource %s\n", + cpu, r->name); + + return; + +found: + for (i = m->low; i < m->high; i++) { + int idx = cbm_idx(r, i); + + wrmsrl(r->msr_base + idx, d->cbm[i]); + } +} + +/* + * rdt_find_domain - Find a domain in a resource that matches input resource id + * + * Search resource r's domain list to find the resource id. If the resource + * id is found in a domain, return the domain. Otherwise, if requested by + * caller, return the first domain whose id is bigger than the input id. + * The domain list is sorted by id in ascending order. + */ +static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, + struct list_head **pos) +{ + struct rdt_domain *d; + struct list_head *l; + + if (id < 0) + return ERR_PTR(id); + + list_for_each(l, &r->domains) { + d = list_entry(l, struct rdt_domain, list); + /* When id is found, return its domain. */ + if (id == d->id) + return d; + /* Stop searching when finding id's position in sorted list. */ + if (id < d->id) + break; + } + + if (pos) + *pos = l; + + return NULL; +} + +/* + * domain_add_cpu - Add a cpu to a resource's domain list. + * + * If an existing domain in the resource r's domain list matches the cpu's + * resource id, add the cpu in the domain. + * + * Otherwise, a new domain is allocated and inserted into the right position + * in the domain list sorted by id in ascending order. + * + * The order in the domain list is visible to users when we print entries + * in the schemata file and schemata input is validated to have the same order + * as this list. + */ +static void domain_add_cpu(int cpu, struct rdt_resource *r) +{ + int i, id = get_cache_id(cpu, r->cache_level); + struct list_head *add_pos = NULL; + struct rdt_domain *d; + + d = rdt_find_domain(r, id, &add_pos); + if (IS_ERR(d)) { + pr_warn("Could't find cache id for cpu %d\n", cpu); + return; + } + + if (d) { + cpumask_set_cpu(cpu, &d->cpu_mask); + return; + } + + d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu)); + if (!d) + return; + + d->id = id; + + d->cbm = kmalloc_array(r->num_closid, sizeof(*d->cbm), GFP_KERNEL); + if (!d->cbm) { + kfree(d); + return; + } + + for (i = 0; i < r->num_closid; i++) { + int idx = cbm_idx(r, i); + + d->cbm[i] = r->max_cbm; + wrmsrl(r->msr_base + idx, d->cbm[i]); + } + + cpumask_set_cpu(cpu, &d->cpu_mask); + list_add_tail(&d->list, add_pos); + r->num_domains++; +} + +static void domain_remove_cpu(int cpu, struct rdt_resource *r) +{ + int id = get_cache_id(cpu, r->cache_level); + struct rdt_domain *d; + + d = rdt_find_domain(r, id, NULL); + if (IS_ERR_OR_NULL(d)) { + pr_warn("Could't find cache id for cpu %d\n", cpu); + return; + } + + cpumask_clear_cpu(cpu, &d->cpu_mask); + if (cpumask_empty(&d->cpu_mask)) { + r->num_domains--; + kfree(d->cbm); + list_del(&d->list); + kfree(d); + } +} + +static void clear_closid(int cpu) +{ + struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); + + per_cpu(cpu_closid, cpu) = 0; + state->closid = 0; + wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0); +} + +static int intel_rdt_online_cpu(unsigned int cpu) +{ + struct rdt_resource *r; + + mutex_lock(&rdtgroup_mutex); + for_each_capable_rdt_resource(r) + domain_add_cpu(cpu, r); + /* The cpu is set in default rdtgroup after online. */ + cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask); + clear_closid(cpu); + mutex_unlock(&rdtgroup_mutex); + + return 0; +} + +static int intel_rdt_offline_cpu(unsigned int cpu) +{ + struct rdtgroup *rdtgrp; + struct rdt_resource *r; + + mutex_lock(&rdtgroup_mutex); + for_each_capable_rdt_resource(r) + domain_remove_cpu(cpu, r); + list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { + if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) + break; + } + clear_closid(cpu); + mutex_unlock(&rdtgroup_mutex); + + return 0; +} + +static int __init intel_rdt_late_init(void) +{ + struct rdt_resource *r; + int state, ret; + + if (!get_rdt_resources()) + return -ENODEV; + + state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "x86/rdt/cat:online:", + intel_rdt_online_cpu, intel_rdt_offline_cpu); + if (state < 0) + return state; + + ret = rdtgroup_init(); + if (ret) { + cpuhp_remove_state(state); + return ret; + } + + for_each_capable_rdt_resource(r) + pr_info("Intel RDT %s allocation detected\n", r->name); + + return 0; +} + +late_initcall(intel_rdt_late_init); diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c new file mode 100644 index 000000000000..8af04afdfcb9 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -0,0 +1,1115 @@ +/* + * User interface for Resource Alloction in Resource Director Technology(RDT) + * + * Copyright (C) 2016 Intel Corporation + * + * Author: Fenghua Yu <fenghua.yu@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * More information about RDT be found in the Intel (R) x86 Architecture + * Software Developer Manual. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/cpu.h> +#include <linux/fs.h> +#include <linux/sysfs.h> +#include <linux/kernfs.h> +#include <linux/seq_file.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/task_work.h> + +#include <uapi/linux/magic.h> + +#include <asm/intel_rdt.h> +#include <asm/intel_rdt_common.h> + +DEFINE_STATIC_KEY_FALSE(rdt_enable_key); +struct kernfs_root *rdt_root; +struct rdtgroup rdtgroup_default; +LIST_HEAD(rdt_all_groups); + +/* Kernel fs node for "info" directory under root */ +static struct kernfs_node *kn_info; + +/* + * Trivial allocator for CLOSIDs. Since h/w only supports a small number, + * we can keep a bitmap of free CLOSIDs in a single integer. + * + * Using a global CLOSID across all resources has some advantages and + * some drawbacks: + * + We can simply set "current->closid" to assign a task to a resource + * group. + * + Context switch code can avoid extra memory references deciding which + * CLOSID to load into the PQR_ASSOC MSR + * - We give up some options in configuring resource groups across multi-socket + * systems. + * - Our choices on how to configure each resource become progressively more + * limited as the number of resources grows. + */ +static int closid_free_map; + +static void closid_init(void) +{ + struct rdt_resource *r; + int rdt_min_closid = 32; + + /* Compute rdt_min_closid across all resources */ + for_each_enabled_rdt_resource(r) + rdt_min_closid = min(rdt_min_closid, r->num_closid); + + closid_free_map = BIT_MASK(rdt_min_closid) - 1; + + /* CLOSID 0 is always reserved for the default group */ + closid_free_map &= ~1; +} + +int closid_alloc(void) +{ + int closid = ffs(closid_free_map); + + if (closid == 0) + return -ENOSPC; + closid--; + closid_free_map &= ~(1 << closid); + + return closid; +} + +static void closid_free(int closid) +{ + closid_free_map |= 1 << closid; +} + +/* set uid and gid of rdtgroup dirs and files to that of the creator */ +static int rdtgroup_kn_set_ugid(struct kernfs_node *kn) +{ + struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, + .ia_uid = current_fsuid(), + .ia_gid = current_fsgid(), }; + + if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) && + gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID)) + return 0; + + return kernfs_setattr(kn, &iattr); +} + +static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft) +{ + struct kernfs_node *kn; + int ret; + + kn = __kernfs_create_file(parent_kn, rft->name, rft->mode, + 0, rft->kf_ops, rft, NULL, NULL); + if (IS_ERR(kn)) + return PTR_ERR(kn); + + ret = rdtgroup_kn_set_ugid(kn); + if (ret) { + kernfs_remove(kn); + return ret; + } + + return 0; +} + +static int rdtgroup_add_files(struct kernfs_node *kn, struct rftype *rfts, + int len) +{ + struct rftype *rft; + int ret; + + lockdep_assert_held(&rdtgroup_mutex); + + for (rft = rfts; rft < rfts + len; rft++) { + ret = rdtgroup_add_file(kn, rft); + if (ret) + goto error; + } + + return 0; +error: + pr_warn("Failed to add %s, err=%d\n", rft->name, ret); + while (--rft >= rfts) + kernfs_remove_by_name(kn, rft->name); + return ret; +} + +static int rdtgroup_seqfile_show(struct seq_file *m, void *arg) +{ + struct kernfs_open_file *of = m->private; + struct rftype *rft = of->kn->priv; + + if (rft->seq_show) + return rft->seq_show(of, m, arg); + return 0; +} + +static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct rftype *rft = of->kn->priv; + + if (rft->write) + return rft->write(of, buf, nbytes, off); + + return -EINVAL; +} + +static struct kernfs_ops rdtgroup_kf_single_ops = { + .atomic_write_len = PAGE_SIZE, + .write = rdtgroup_file_write, + .seq_show = rdtgroup_seqfile_show, +}; + +static int rdtgroup_cpus_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + + if (rdtgrp) + seq_printf(s, "%*pb\n", cpumask_pr_args(&rdtgrp->cpu_mask)); + else + ret = -ENOENT; + rdtgroup_kn_unlock(of->kn); + + return ret; +} + +/* + * This is safe against intel_rdt_sched_in() called from __switch_to() + * because __switch_to() is executed with interrupts disabled. A local call + * from rdt_update_closid() is proteced against __switch_to() because + * preemption is disabled. + */ +static void rdt_update_cpu_closid(void *closid) +{ + if (closid) + this_cpu_write(cpu_closid, *(int *)closid); + /* + * We cannot unconditionally write the MSR because the current + * executing task might have its own closid selected. Just reuse + * the context switch code. + */ + intel_rdt_sched_in(); +} + +/* + * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, + * + * Per task closids must have been set up before calling this function. + * + * The per cpu closids are updated with the smp function call, when @closid + * is not NULL. If @closid is NULL then all affected percpu closids must + * have been set up before calling this function. + */ +static void +rdt_update_closid(const struct cpumask *cpu_mask, int *closid) +{ + int cpu = get_cpu(); + + if (cpumask_test_cpu(cpu, cpu_mask)) + rdt_update_cpu_closid(closid); + smp_call_function_many(cpu_mask, rdt_update_cpu_closid, closid, 1); + put_cpu(); +} + +static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + cpumask_var_t tmpmask, newmask; + struct rdtgroup *rdtgrp, *r; + int ret; + + if (!buf) + return -EINVAL; + + if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) + return -ENOMEM; + if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) { + free_cpumask_var(tmpmask); + return -ENOMEM; + } + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + ret = -ENOENT; + goto unlock; + } + + ret = cpumask_parse(buf, newmask); + if (ret) + goto unlock; + + /* check that user didn't specify any offline cpus */ + cpumask_andnot(tmpmask, newmask, cpu_online_mask); + if (cpumask_weight(tmpmask)) { + ret = -EINVAL; + goto unlock; + } + + /* Check whether cpus are dropped from this group */ + cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); + if (cpumask_weight(tmpmask)) { + /* Can't drop from default group */ + if (rdtgrp == &rdtgroup_default) { + ret = -EINVAL; + goto unlock; + } + /* Give any dropped cpus to rdtgroup_default */ + cpumask_or(&rdtgroup_default.cpu_mask, + &rdtgroup_default.cpu_mask, tmpmask); + rdt_update_closid(tmpmask, &rdtgroup_default.closid); + } + + /* + * If we added cpus, remove them from previous group that owned them + * and update per-cpu closid + */ + cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); + if (cpumask_weight(tmpmask)) { + list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) { + if (r == rdtgrp) + continue; + cpumask_andnot(&r->cpu_mask, &r->cpu_mask, tmpmask); + } + rdt_update_closid(tmpmask, &rdtgrp->closid); + } + + /* Done pushing/pulling - update this group with new mask */ + cpumask_copy(&rdtgrp->cpu_mask, newmask); + +unlock: + rdtgroup_kn_unlock(of->kn); + free_cpumask_var(tmpmask); + free_cpumask_var(newmask); + + return ret ?: nbytes; +} + +struct task_move_callback { + struct callback_head work; + struct rdtgroup *rdtgrp; +}; + +static void move_myself(struct callback_head *head) +{ + struct task_move_callback *callback; + struct rdtgroup *rdtgrp; + + callback = container_of(head, struct task_move_callback, work); + rdtgrp = callback->rdtgrp; + + /* + * If resource group was deleted before this task work callback + * was invoked, then assign the task to root group and free the + * resource group. + */ + if (atomic_dec_and_test(&rdtgrp->waitcount) && + (rdtgrp->flags & RDT_DELETED)) { + current->closid = 0; + kfree(rdtgrp); + } + + preempt_disable(); + /* update PQR_ASSOC MSR to make resource group go into effect */ + intel_rdt_sched_in(); + preempt_enable(); + + kfree(callback); +} + +static int __rdtgroup_move_task(struct task_struct *tsk, + struct rdtgroup *rdtgrp) +{ + struct task_move_callback *callback; + int ret; + + callback = kzalloc(sizeof(*callback), GFP_KERNEL); + if (!callback) + return -ENOMEM; + callback->work.func = move_myself; + callback->rdtgrp = rdtgrp; + + /* + * Take a refcount, so rdtgrp cannot be freed before the + * callback has been invoked. + */ + atomic_inc(&rdtgrp->waitcount); + ret = task_work_add(tsk, &callback->work, true); + if (ret) { + /* + * Task is exiting. Drop the refcount and free the callback. + * No need to check the refcount as the group cannot be + * deleted before the write function unlocks rdtgroup_mutex. + */ + atomic_dec(&rdtgrp->waitcount); + kfree(callback); + } else { + tsk->closid = rdtgrp->closid; + } + return ret; +} + +static int rdtgroup_task_write_permission(struct task_struct *task, + struct kernfs_open_file *of) +{ + const struct cred *tcred = get_task_cred(task); + const struct cred *cred = current_cred(); + int ret = 0; + + /* + * Even if we're attaching all tasks in the thread group, we only + * need to check permissions on one of them. + */ + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->euid, tcred->suid)) + ret = -EPERM; + + put_cred(tcred); + return ret; +} + +static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, + struct kernfs_open_file *of) +{ + struct task_struct *tsk; + int ret; + + rcu_read_lock(); + if (pid) { + tsk = find_task_by_vpid(pid); + if (!tsk) { + rcu_read_unlock(); + return -ESRCH; + } + } else { + tsk = current; + } + + get_task_struct(tsk); + rcu_read_unlock(); + + ret = rdtgroup_task_write_permission(tsk, of); + if (!ret) + ret = __rdtgroup_move_task(tsk, rdtgrp); + + put_task_struct(tsk); + return ret; +} + +static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + pid_t pid; + + if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) + return -EINVAL; + rdtgrp = rdtgroup_kn_lock_live(of->kn); + + if (rdtgrp) + ret = rdtgroup_move_task(pid, rdtgrp, of); + else + ret = -ENOENT; + + rdtgroup_kn_unlock(of->kn); + + return ret ?: nbytes; +} + +static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) +{ + struct task_struct *p, *t; + + rcu_read_lock(); + for_each_process_thread(p, t) { + if (t->closid == r->closid) + seq_printf(s, "%d\n", t->pid); + } + rcu_read_unlock(); +} + +static int rdtgroup_tasks_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (rdtgrp) + show_rdt_tasks(rdtgrp, s); + else + ret = -ENOENT; + rdtgroup_kn_unlock(of->kn); + + return ret; +} + +/* Files in each rdtgroup */ +static struct rftype rdtgroup_base_files[] = { + { + .name = "cpus", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .write = rdtgroup_cpus_write, + .seq_show = rdtgroup_cpus_show, + }, + { + .name = "tasks", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .write = rdtgroup_tasks_write, + .seq_show = rdtgroup_tasks_show, + }, + { + .name = "schemata", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .write = rdtgroup_schemata_write, + .seq_show = rdtgroup_schemata_show, + }, +}; + +static int rdt_num_closids_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct rdt_resource *r = of->kn->parent->priv; + + seq_printf(seq, "%d\n", r->num_closid); + + return 0; +} + +static int rdt_cbm_mask_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct rdt_resource *r = of->kn->parent->priv; + + seq_printf(seq, "%x\n", r->max_cbm); + + return 0; +} + +static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct rdt_resource *r = of->kn->parent->priv; + + seq_printf(seq, "%d\n", r->min_cbm_bits); + + return 0; +} + +/* rdtgroup information files for one cache resource. */ +static struct rftype res_info_files[] = { + { + .name = "num_closids", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_num_closids_show, + }, + { + .name = "cbm_mask", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_cbm_mask_show, + }, + { + .name = "min_cbm_bits", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_min_cbm_bits_show, + }, +}; + +static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) +{ + struct kernfs_node *kn_subdir; + struct rdt_resource *r; + int ret; + + /* create the directory */ + kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); + if (IS_ERR(kn_info)) + return PTR_ERR(kn_info); + kernfs_get(kn_info); + + for_each_enabled_rdt_resource(r) { + kn_subdir = kernfs_create_dir(kn_info, r->name, + kn_info->mode, r); + if (IS_ERR(kn_subdir)) { + ret = PTR_ERR(kn_subdir); + goto out_destroy; + } + kernfs_get(kn_subdir); + ret = rdtgroup_kn_set_ugid(kn_subdir); + if (ret) + goto out_destroy; + ret = rdtgroup_add_files(kn_subdir, res_info_files, + ARRAY_SIZE(res_info_files)); + if (ret) + goto out_destroy; + kernfs_activate(kn_subdir); + } + + /* + * This extra ref will be put in kernfs_remove() and guarantees + * that @rdtgrp->kn is always accessible. + */ + kernfs_get(kn_info); + + ret = rdtgroup_kn_set_ugid(kn_info); + if (ret) + goto out_destroy; + + kernfs_activate(kn_info); + + return 0; + +out_destroy: + kernfs_remove(kn_info); + return ret; +} + +static void l3_qos_cfg_update(void *arg) +{ + bool *enable = arg; + + wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); +} + +static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) +{ + cpumask_var_t cpu_mask; + struct rdt_domain *d; + int cpu; + + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + + list_for_each_entry(d, &r->domains, list) { + /* Pick one CPU from each domain instance to update MSR */ + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + } + cpu = get_cpu(); + /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ + if (cpumask_test_cpu(cpu, cpu_mask)) + l3_qos_cfg_update(&enable); + /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ + smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1); + put_cpu(); + + free_cpumask_var(cpu_mask); + + return 0; +} + +static int cdp_enable(void) +{ + struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA]; + struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE]; + struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; + int ret; + + if (!r_l3->capable || !r_l3data->capable || !r_l3code->capable) + return -EINVAL; + + ret = set_l3_qos_cfg(r_l3, true); + if (!ret) { + r_l3->enabled = false; + r_l3data->enabled = true; + r_l3code->enabled = true; + } + return ret; +} + +static void cdp_disable(void) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + + r->enabled = r->capable; + + if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled) { + rdt_resources_all[RDT_RESOURCE_L3DATA].enabled = false; + rdt_resources_all[RDT_RESOURCE_L3CODE].enabled = false; + set_l3_qos_cfg(r, false); + } +} + +static int parse_rdtgroupfs_options(char *data) +{ + char *token, *o = data; + int ret = 0; + + while ((token = strsep(&o, ",")) != NULL) { + if (!*token) + return -EINVAL; + + if (!strcmp(token, "cdp")) + ret = cdp_enable(); + } + + return ret; +} + +/* + * We don't allow rdtgroup directories to be created anywhere + * except the root directory. Thus when looking for the rdtgroup + * structure for a kernfs node we are either looking at a directory, + * in which case the rdtgroup structure is pointed at by the "priv" + * field, otherwise we have a file, and need only look to the parent + * to find the rdtgroup. + */ +static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) +{ + if (kernfs_type(kn) == KERNFS_DIR) { + /* + * All the resource directories use "kn->priv" + * to point to the "struct rdtgroup" for the + * resource. "info" and its subdirectories don't + * have rdtgroup structures, so return NULL here. + */ + if (kn == kn_info || kn->parent == kn_info) + return NULL; + else + return kn->priv; + } else { + return kn->parent->priv; + } +} + +struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) +{ + struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); + + if (!rdtgrp) + return NULL; + + atomic_inc(&rdtgrp->waitcount); + kernfs_break_active_protection(kn); + + mutex_lock(&rdtgroup_mutex); + + /* Was this group deleted while we waited? */ + if (rdtgrp->flags & RDT_DELETED) + return NULL; + + return rdtgrp; +} + +void rdtgroup_kn_unlock(struct kernfs_node *kn) +{ + struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); + + if (!rdtgrp) + return; + + mutex_unlock(&rdtgroup_mutex); + + if (atomic_dec_and_test(&rdtgrp->waitcount) && + (rdtgrp->flags & RDT_DELETED)) { + kernfs_unbreak_active_protection(kn); + kernfs_put(kn); + kfree(rdtgrp); + } else { + kernfs_unbreak_active_protection(kn); + } +} + +static struct dentry *rdt_mount(struct file_system_type *fs_type, + int flags, const char *unused_dev_name, + void *data) +{ + struct dentry *dentry; + int ret; + + mutex_lock(&rdtgroup_mutex); + /* + * resctrl file system can only be mounted once. + */ + if (static_branch_unlikely(&rdt_enable_key)) { + dentry = ERR_PTR(-EBUSY); + goto out; + } + + ret = parse_rdtgroupfs_options(data); + if (ret) { + dentry = ERR_PTR(ret); + goto out_cdp; + } + + closid_init(); + + ret = rdtgroup_create_info_dir(rdtgroup_default.kn); + if (ret) { + dentry = ERR_PTR(ret); + goto out_cdp; + } + + dentry = kernfs_mount(fs_type, flags, rdt_root, + RDTGROUP_SUPER_MAGIC, NULL); + if (IS_ERR(dentry)) + goto out_cdp; + + static_branch_enable(&rdt_enable_key); + goto out; + +out_cdp: + cdp_disable(); +out: + mutex_unlock(&rdtgroup_mutex); + + return dentry; +} + +static int reset_all_cbms(struct rdt_resource *r) +{ + struct msr_param msr_param; + cpumask_var_t cpu_mask; + struct rdt_domain *d; + int i, cpu; + + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + + msr_param.res = r; + msr_param.low = 0; + msr_param.high = r->num_closid; + + /* + * Disable resource control for this resource by setting all + * CBMs in all domains to the maximum mask value. Pick one CPU + * from each domain to update the MSRs below. + */ + list_for_each_entry(d, &r->domains, list) { + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + + for (i = 0; i < r->num_closid; i++) + d->cbm[i] = r->max_cbm; + } + cpu = get_cpu(); + /* Update CBM on this cpu if it's in cpu_mask. */ + if (cpumask_test_cpu(cpu, cpu_mask)) + rdt_cbm_update(&msr_param); + /* Update CBM on all other cpus in cpu_mask. */ + smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1); + put_cpu(); + + free_cpumask_var(cpu_mask); + + return 0; +} + +/* + * Move tasks from one to the other group. If @from is NULL, then all tasks + * in the systems are moved unconditionally (used for teardown). + * + * If @mask is not NULL the cpus on which moved tasks are running are set + * in that mask so the update smp function call is restricted to affected + * cpus. + */ +static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, + struct cpumask *mask) +{ + struct task_struct *p, *t; + + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { + if (!from || t->closid == from->closid) { + t->closid = to->closid; +#ifdef CONFIG_SMP + /* + * This is safe on x86 w/o barriers as the ordering + * of writing to task_cpu() and t->on_cpu is + * reverse to the reading here. The detection is + * inaccurate as tasks might move or schedule + * before the smp function call takes place. In + * such a case the function call is pointless, but + * there is no other side effect. + */ + if (mask && t->on_cpu) + cpumask_set_cpu(task_cpu(t), mask); +#endif + } + } + read_unlock(&tasklist_lock); +} + +/* + * Forcibly remove all of subdirectories under root. + */ +static void rmdir_all_sub(void) +{ + struct rdtgroup *rdtgrp, *tmp; + + /* Move all tasks to the default resource group */ + rdt_move_group_tasks(NULL, &rdtgroup_default, NULL); + + list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) { + /* Remove each rdtgroup other than root */ + if (rdtgrp == &rdtgroup_default) + continue; + + /* + * Give any CPUs back to the default group. We cannot copy + * cpu_online_mask because a CPU might have executed the + * offline callback already, but is still marked online. + */ + cpumask_or(&rdtgroup_default.cpu_mask, + &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); + + kernfs_remove(rdtgrp->kn); + list_del(&rdtgrp->rdtgroup_list); + kfree(rdtgrp); + } + /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ + get_online_cpus(); + rdt_update_closid(cpu_online_mask, &rdtgroup_default.closid); + put_online_cpus(); + + kernfs_remove(kn_info); +} + +static void rdt_kill_sb(struct super_block *sb) +{ + struct rdt_resource *r; + + mutex_lock(&rdtgroup_mutex); + + /*Put everything back to default values. */ + for_each_enabled_rdt_resource(r) + reset_all_cbms(r); + cdp_disable(); + rmdir_all_sub(); + static_branch_disable(&rdt_enable_key); + kernfs_kill_sb(sb); + mutex_unlock(&rdtgroup_mutex); +} + +static struct file_system_type rdt_fs_type = { + .name = "resctrl", + .mount = rdt_mount, + .kill_sb = rdt_kill_sb, +}; + +static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, + umode_t mode) +{ + struct rdtgroup *parent, *rdtgrp; + struct kernfs_node *kn; + int ret, closid; + + /* Only allow mkdir in the root directory */ + if (parent_kn != rdtgroup_default.kn) + return -EPERM; + + /* Do not accept '\n' to avoid unparsable situation. */ + if (strchr(name, '\n')) + return -EINVAL; + + parent = rdtgroup_kn_lock_live(parent_kn); + if (!parent) { + ret = -ENODEV; + goto out_unlock; + } + + ret = closid_alloc(); + if (ret < 0) + goto out_unlock; + closid = ret; + + /* allocate the rdtgroup. */ + rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); + if (!rdtgrp) { + ret = -ENOSPC; + goto out_closid_free; + } + rdtgrp->closid = closid; + list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); + + /* kernfs creates the directory for rdtgrp */ + kn = kernfs_create_dir(parent->kn, name, mode, rdtgrp); + if (IS_ERR(kn)) { + ret = PTR_ERR(kn); + goto out_cancel_ref; + } + rdtgrp->kn = kn; + + /* + * kernfs_remove() will drop the reference count on "kn" which + * will free it. But we still need it to stick around for the + * rdtgroup_kn_unlock(kn} call below. Take one extra reference + * here, which will be dropped inside rdtgroup_kn_unlock(). + */ + kernfs_get(kn); + + ret = rdtgroup_kn_set_ugid(kn); + if (ret) + goto out_destroy; + + ret = rdtgroup_add_files(kn, rdtgroup_base_files, + ARRAY_SIZE(rdtgroup_base_files)); + if (ret) + goto out_destroy; + + kernfs_activate(kn); + + ret = 0; + goto out_unlock; + +out_destroy: + kernfs_remove(rdtgrp->kn); +out_cancel_ref: + list_del(&rdtgrp->rdtgroup_list); + kfree(rdtgrp); +out_closid_free: + closid_free(closid); +out_unlock: + rdtgroup_kn_unlock(parent_kn); + return ret; +} + +static int rdtgroup_rmdir(struct kernfs_node *kn) +{ + int ret, cpu, closid = rdtgroup_default.closid; + struct rdtgroup *rdtgrp; + cpumask_var_t tmpmask; + + if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) + return -ENOMEM; + + rdtgrp = rdtgroup_kn_lock_live(kn); + if (!rdtgrp) { + ret = -EPERM; + goto out; + } + + /* Give any tasks back to the default group */ + rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask); + + /* Give any CPUs back to the default group */ + cpumask_or(&rdtgroup_default.cpu_mask, + &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); + + /* Update per cpu closid of the moved CPUs first */ + for_each_cpu(cpu, &rdtgrp->cpu_mask) + per_cpu(cpu_closid, cpu) = closid; + /* + * Update the MSR on moved CPUs and CPUs which have moved + * task running on them. + */ + cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); + rdt_update_closid(tmpmask, NULL); + + rdtgrp->flags = RDT_DELETED; + closid_free(rdtgrp->closid); + list_del(&rdtgrp->rdtgroup_list); + + /* + * one extra hold on this, will drop when we kfree(rdtgrp) + * in rdtgroup_kn_unlock() + */ + kernfs_get(kn); + kernfs_remove(rdtgrp->kn); + ret = 0; +out: + rdtgroup_kn_unlock(kn); + free_cpumask_var(tmpmask); + return ret; +} + +static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) +{ + if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled) + seq_puts(seq, ",cdp"); + return 0; +} + +static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { + .mkdir = rdtgroup_mkdir, + .rmdir = rdtgroup_rmdir, + .show_options = rdtgroup_show_options, +}; + +static int __init rdtgroup_setup_root(void) +{ + int ret; + + rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, + KERNFS_ROOT_CREATE_DEACTIVATED, + &rdtgroup_default); + if (IS_ERR(rdt_root)) + return PTR_ERR(rdt_root); + + mutex_lock(&rdtgroup_mutex); + + rdtgroup_default.closid = 0; + list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); + + ret = rdtgroup_add_files(rdt_root->kn, rdtgroup_base_files, + ARRAY_SIZE(rdtgroup_base_files)); + if (ret) { + kernfs_destroy_root(rdt_root); + goto out; + } + + rdtgroup_default.kn = rdt_root->kn; + kernfs_activate(rdtgroup_default.kn); + +out: + mutex_unlock(&rdtgroup_mutex); + + return ret; +} + +/* + * rdtgroup_init - rdtgroup initialization + * + * Setup resctrl file system including set up root, create mount point, + * register rdtgroup filesystem, and initialize files under root directory. + * + * Return: 0 on success or -errno + */ +int __init rdtgroup_init(void) +{ + int ret = 0; + + ret = rdtgroup_setup_root(); + if (ret) + return ret; + + ret = sysfs_create_mount_point(fs_kobj, "resctrl"); + if (ret) + goto cleanup_root; + + ret = register_filesystem(&rdt_fs_type); + if (ret) + goto cleanup_mountpoint; + + return 0; + +cleanup_mountpoint: + sysfs_remove_mount_point(fs_kobj, "resctrl"); +cleanup_root: + kernfs_destroy_root(rdt_root); + + return ret; +} diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c new file mode 100644 index 000000000000..f369cb8db0d5 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c @@ -0,0 +1,245 @@ +/* + * Resource Director Technology(RDT) + * - Cache Allocation code. + * + * Copyright (C) 2016 Intel Corporation + * + * Authors: + * Fenghua Yu <fenghua.yu@intel.com> + * Tony Luck <tony.luck@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * More information about RDT be found in the Intel (R) x86 Architecture + * Software Developer Manual June 2016, volume 3, section 17.17. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernfs.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <asm/intel_rdt.h> + +/* + * Check whether a cache bit mask is valid. The SDM says: + * Please note that all (and only) contiguous '1' combinations + * are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.). + * Additionally Haswell requires at least two bits set. + */ +static bool cbm_validate(unsigned long var, struct rdt_resource *r) +{ + unsigned long first_bit, zero_bit; + + if (var == 0 || var > r->max_cbm) + return false; + + first_bit = find_first_bit(&var, r->cbm_len); + zero_bit = find_next_zero_bit(&var, r->cbm_len, first_bit); + + if (find_next_bit(&var, r->cbm_len, zero_bit) < r->cbm_len) + return false; + + if ((zero_bit - first_bit) < r->min_cbm_bits) + return false; + return true; +} + +/* + * Read one cache bit mask (hex). Check that it is valid for the current + * resource type. + */ +static int parse_cbm(char *buf, struct rdt_resource *r) +{ + unsigned long data; + int ret; + + ret = kstrtoul(buf, 16, &data); + if (ret) + return ret; + if (!cbm_validate(data, r)) + return -EINVAL; + r->tmp_cbms[r->num_tmp_cbms++] = data; + + return 0; +} + +/* + * For each domain in this resource we expect to find a series of: + * id=mask + * separated by ";". The "id" is in decimal, and must appear in the + * right order. + */ +static int parse_line(char *line, struct rdt_resource *r) +{ + char *dom = NULL, *id; + struct rdt_domain *d; + unsigned long dom_id; + + list_for_each_entry(d, &r->domains, list) { + dom = strsep(&line, ";"); + if (!dom) + return -EINVAL; + id = strsep(&dom, "="); + if (kstrtoul(id, 10, &dom_id) || dom_id != d->id) + return -EINVAL; + if (parse_cbm(dom, r)) + return -EINVAL; + } + + /* Any garbage at the end of the line? */ + if (line && line[0]) + return -EINVAL; + return 0; +} + +static int update_domains(struct rdt_resource *r, int closid) +{ + struct msr_param msr_param; + cpumask_var_t cpu_mask; + struct rdt_domain *d; + int cpu, idx = 0; + + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + + msr_param.low = closid; + msr_param.high = msr_param.low + 1; + msr_param.res = r; + + list_for_each_entry(d, &r->domains, list) { + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + d->cbm[msr_param.low] = r->tmp_cbms[idx++]; + } + cpu = get_cpu(); + /* Update CBM on this cpu if it's in cpu_mask. */ + if (cpumask_test_cpu(cpu, cpu_mask)) + rdt_cbm_update(&msr_param); + /* Update CBM on other cpus. */ + smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1); + put_cpu(); + + free_cpumask_var(cpu_mask); + + return 0; +} + +ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct rdtgroup *rdtgrp; + struct rdt_resource *r; + char *tok, *resname; + int closid, ret = 0; + u32 *l3_cbms = NULL; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + buf[nbytes - 1] = '\0'; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + rdtgroup_kn_unlock(of->kn); + return -ENOENT; + } + + closid = rdtgrp->closid; + + /* get scratch space to save all the masks while we validate input */ + for_each_enabled_rdt_resource(r) { + r->tmp_cbms = kcalloc(r->num_domains, sizeof(*l3_cbms), + GFP_KERNEL); + if (!r->tmp_cbms) { + ret = -ENOMEM; + goto out; + } + r->num_tmp_cbms = 0; + } + + while ((tok = strsep(&buf, "\n")) != NULL) { + resname = strsep(&tok, ":"); + if (!tok) { + ret = -EINVAL; + goto out; + } + for_each_enabled_rdt_resource(r) { + if (!strcmp(resname, r->name) && + closid < r->num_closid) { + ret = parse_line(tok, r); + if (ret) + goto out; + break; + } + } + if (!r->name) { + ret = -EINVAL; + goto out; + } + } + + /* Did the parser find all the masks we need? */ + for_each_enabled_rdt_resource(r) { + if (r->num_tmp_cbms != r->num_domains) { + ret = -EINVAL; + goto out; + } + } + + for_each_enabled_rdt_resource(r) { + ret = update_domains(r, closid); + if (ret) + goto out; + } + +out: + rdtgroup_kn_unlock(of->kn); + for_each_enabled_rdt_resource(r) { + kfree(r->tmp_cbms); + r->tmp_cbms = NULL; + } + return ret ?: nbytes; +} + +static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid) +{ + struct rdt_domain *dom; + bool sep = false; + + seq_printf(s, "%s:", r->name); + list_for_each_entry(dom, &r->domains, list) { + if (sep) + seq_puts(s, ";"); + seq_printf(s, "%d=%x", dom->id, dom->cbm[closid]); + sep = true; + } + seq_puts(s, "\n"); +} + +int rdtgroup_schemata_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdtgroup *rdtgrp; + struct rdt_resource *r; + int closid, ret = 0; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (rdtgrp) { + closid = rdtgrp->closid; + for_each_enabled_rdt_resource(r) { + if (closid < r->num_closid) + show_doms(s, r, closid); + } + } else { + ret = -ENOENT; + } + rdtgroup_kn_unlock(of->kn); + return ret; +} diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index d1316f9c8329..d9794060fe22 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -20,12 +20,15 @@ struct cpuid_bit { /* Please keep the leaf sorted by cpuid_bit.level for faster search. */ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, - { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 }, + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 }, { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, - { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, - { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, + { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, + { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, + { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, + { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, + { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d0d744108594..a0ac3e81518a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -53,6 +53,7 @@ #include <asm/debugreg.h> #include <asm/switch_to.h> #include <asm/vm86.h> +#include <asm/intel_rdt.h> void __show_regs(struct pt_regs *regs, int all) { @@ -296,5 +297,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); + /* Load the Intel cache allocation PQR MSR. */ + intel_rdt_sched_in(); + return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a76b65e3e615..a61e141b6891 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -49,6 +49,7 @@ #include <asm/switch_to.h> #include <asm/xen/hypervisor.h> #include <asm/vdso.h> +#include <asm/intel_rdt.h> __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); @@ -476,6 +477,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) loadsegment(ss, __KERNEL_DS); } + /* Load the Intel cache allocation PQR MSR. */ + intel_rdt_sched_in(); + return prev_p; } diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index f61058617ada..f4126cf997a4 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -15,6 +15,7 @@ config XTENSA select GENERIC_SCHED_CLOCK select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG + select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if !MMU diff --git a/arch/xtensa/boot/dts/kc705.dts b/arch/xtensa/boot/dts/kc705.dts index b1f4ee8c9a22..6106bdc097ad 100644 --- a/arch/xtensa/boot/dts/kc705.dts +++ b/arch/xtensa/boot/dts/kc705.dts @@ -11,4 +11,20 @@ device_type = "memory"; reg = <0x00000000 0x38000000>; }; + + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + /* global autoconfigured region for contiguous allocations */ + linux,cma { + compatible = "shared-dma-pool"; + reusable; + size = <0x04000000>; + alignment = <0x2000>; + alloc-ranges = <0x00000000 0x20000000>; + linux,cma-default; + }; + }; }; diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 28cf4c5d65ef..b7fbaa56b51a 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += bug.h generic-y += clkdev.h generic-y += cputime.h generic-y += div64.h +generic-y += dma-contiguous.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile index c31f5d5afc7d..264fb89c444e 100644 --- a/arch/xtensa/kernel/Makefile +++ b/arch/xtensa/kernel/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += mcount.o obj-$(CONFIG_SMP) += smp.o mxhead.o obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o +obj-$(CONFIG_S32C1I_SELFTEST) += s32c1i_selftest.o AFLAGS_head.o += -mtext-section-literals AFLAGS_mxhead.o += -mtext-section-literals diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 6a16decf278f..70e362e6038e 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -15,6 +15,7 @@ * Joe Taylor <joe@tensilica.com, joetylr@yahoo.com> */ +#include <linux/dma-contiguous.h> #include <linux/gfp.h> #include <linux/highmem.h> #include <linux/mm.h> @@ -146,6 +147,8 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, { unsigned long ret; unsigned long uncached = 0; + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page *page = NULL; /* ignore region speicifiers */ @@ -153,11 +156,18 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) flag |= GFP_DMA; - ret = (unsigned long)__get_free_pages(flag, get_order(size)); - if (ret == 0) + if (gfpflags_allow_blocking(flag)) + page = dma_alloc_from_contiguous(dev, count, get_order(size)); + + if (!page) + page = alloc_pages(flag, get_order(size)); + + if (!page) return NULL; + ret = (unsigned long)page_address(page); + /* We currently don't support coherent memory outside KSEG */ BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR || @@ -170,16 +180,19 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, return (void *)uncached; } -static void xtensa_dma_free(struct device *hwdev, size_t size, void *vaddr, +static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { unsigned long addr = (unsigned long)vaddr + XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; + struct page *page = virt_to_page(addr); + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR || addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1); - free_pages(addr, get_order(size)); + if (!dma_release_from_contiguous(dev, page, count)) + __free_pages(page, get_order(size)); } static dma_addr_t xtensa_map_page(struct device *dev, struct page *page, diff --git a/arch/xtensa/kernel/s32c1i_selftest.c b/arch/xtensa/kernel/s32c1i_selftest.c new file mode 100644 index 000000000000..07e56e3a9a8b --- /dev/null +++ b/arch/xtensa/kernel/s32c1i_selftest.c @@ -0,0 +1,128 @@ +/* + * S32C1I selftest. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2016 Cadence Design Systems Inc. + */ + +#include <linux/init.h> +#include <linux/kernel.h> + +#include <asm/traps.h> + +#if XCHAL_HAVE_S32C1I + +static int __initdata rcw_word, rcw_probe_pc, rcw_exc; + +/* + * Basic atomic compare-and-swap, that records PC of S32C1I for probing. + * + * If *v == cmp, set *v = set. Return previous *v. + */ +static inline int probed_compare_swap(int *v, int cmp, int set) +{ + int tmp; + + __asm__ __volatile__( + " movi %1, 1f\n" + " s32i %1, %4, 0\n" + " wsr %2, scompare1\n" + "1: s32c1i %0, %3, 0\n" + : "=a" (set), "=&a" (tmp) + : "a" (cmp), "a" (v), "a" (&rcw_probe_pc), "0" (set) + : "memory" + ); + return set; +} + +/* Handle probed exception */ + +static void __init do_probed_exception(struct pt_regs *regs, + unsigned long exccause) +{ + if (regs->pc == rcw_probe_pc) { /* exception on s32c1i ? */ + regs->pc += 3; /* skip the s32c1i instruction */ + rcw_exc = exccause; + } else { + do_unhandled(regs, exccause); + } +} + +/* Simple test of S32C1I (soc bringup assist) */ + +static int __init check_s32c1i(void) +{ + int n, cause1, cause2; + void *handbus, *handdata, *handaddr; /* temporarily saved handlers */ + + rcw_probe_pc = 0; + handbus = trap_set_handler(EXCCAUSE_LOAD_STORE_ERROR, + do_probed_exception); + handdata = trap_set_handler(EXCCAUSE_LOAD_STORE_DATA_ERROR, + do_probed_exception); + handaddr = trap_set_handler(EXCCAUSE_LOAD_STORE_ADDR_ERROR, + do_probed_exception); + + /* First try an S32C1I that does not store: */ + rcw_exc = 0; + rcw_word = 1; + n = probed_compare_swap(&rcw_word, 0, 2); + cause1 = rcw_exc; + + /* took exception? */ + if (cause1 != 0) { + /* unclean exception? */ + if (n != 2 || rcw_word != 1) + panic("S32C1I exception error"); + } else if (rcw_word != 1 || n != 1) { + panic("S32C1I compare error"); + } + + /* Then an S32C1I that stores: */ + rcw_exc = 0; + rcw_word = 0x1234567; + n = probed_compare_swap(&rcw_word, 0x1234567, 0xabcde); + cause2 = rcw_exc; + + if (cause2 != 0) { + /* unclean exception? */ + if (n != 0xabcde || rcw_word != 0x1234567) + panic("S32C1I exception error (b)"); + } else if (rcw_word != 0xabcde || n != 0x1234567) { + panic("S32C1I store error"); + } + + /* Verify consistency of exceptions: */ + if (cause1 || cause2) { + pr_warn("S32C1I took exception %d, %d\n", cause1, cause2); + /* If emulation of S32C1I upon bus error gets implemented, + * we can get rid of this panic for single core (not SMP) + */ + panic("S32C1I exceptions not currently supported"); + } + if (cause1 != cause2) + panic("inconsistent S32C1I exceptions"); + + trap_set_handler(EXCCAUSE_LOAD_STORE_ERROR, handbus); + trap_set_handler(EXCCAUSE_LOAD_STORE_DATA_ERROR, handdata); + trap_set_handler(EXCCAUSE_LOAD_STORE_ADDR_ERROR, handaddr); + return 0; +} + +#else /* XCHAL_HAVE_S32C1I */ + +/* This condition should not occur with a commercially deployed processor. + * Display reminder for early engr test or demo chips / FPGA bitstreams + */ +static int __init check_s32c1i(void) +{ + pr_warn("Processor configuration lacks atomic compare-and-swap support!\n"); + return 0; +} + +#endif /* XCHAL_HAVE_S32C1I */ + +early_initcall(check_s32c1i); diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 88a044af7504..848e8568fb3c 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -31,10 +31,6 @@ # include <linux/console.h> #endif -#ifdef CONFIG_RTC -# include <linux/timex.h> -#endif - #ifdef CONFIG_PROC_FS # include <linux/seq_file.h> #endif @@ -48,24 +44,22 @@ #include <asm/page.h> #include <asm/setup.h> #include <asm/param.h> -#include <asm/traps.h> #include <asm/smp.h> #include <asm/sysmem.h> #include <platform/hardware.h> #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) -struct screen_info screen_info = { 0, 24, 0, 0, 0, 80, 0, 0, 0, 24, 1, 16}; -#endif - -#ifdef CONFIG_BLK_DEV_FD -extern struct fd_ops no_fd_ops; -struct fd_ops *fd_ops; +struct screen_info screen_info = { + .orig_x = 0, + .orig_y = 24, + .orig_video_cols = 80, + .orig_video_lines = 24, + .orig_video_isVGA = 1, + .orig_video_points = 16, +}; #endif -extern struct rtc_ops no_rtc_ops; -struct rtc_ops *rtc_ops; - #ifdef CONFIG_BLK_DEV_INITRD extern unsigned long initrd_start; extern unsigned long initrd_end; @@ -77,7 +71,6 @@ extern int initrd_below_start_ok; void *dtb_start = __dtb_start; #endif -unsigned char aux_device_present; extern unsigned long loops_per_jiffy; /* Command line specified as configuration option. */ @@ -317,120 +310,6 @@ extern char _SecondaryResetVector_text_start; extern char _SecondaryResetVector_text_end; #endif - -#ifdef CONFIG_S32C1I_SELFTEST -#if XCHAL_HAVE_S32C1I - -static int __initdata rcw_word, rcw_probe_pc, rcw_exc; - -/* - * Basic atomic compare-and-swap, that records PC of S32C1I for probing. - * - * If *v == cmp, set *v = set. Return previous *v. - */ -static inline int probed_compare_swap(int *v, int cmp, int set) -{ - int tmp; - - __asm__ __volatile__( - " movi %1, 1f\n" - " s32i %1, %4, 0\n" - " wsr %2, scompare1\n" - "1: s32c1i %0, %3, 0\n" - : "=a" (set), "=&a" (tmp) - : "a" (cmp), "a" (v), "a" (&rcw_probe_pc), "0" (set) - : "memory" - ); - return set; -} - -/* Handle probed exception */ - -static void __init do_probed_exception(struct pt_regs *regs, - unsigned long exccause) -{ - if (regs->pc == rcw_probe_pc) { /* exception on s32c1i ? */ - regs->pc += 3; /* skip the s32c1i instruction */ - rcw_exc = exccause; - } else { - do_unhandled(regs, exccause); - } -} - -/* Simple test of S32C1I (soc bringup assist) */ - -static int __init check_s32c1i(void) -{ - int n, cause1, cause2; - void *handbus, *handdata, *handaddr; /* temporarily saved handlers */ - - rcw_probe_pc = 0; - handbus = trap_set_handler(EXCCAUSE_LOAD_STORE_ERROR, - do_probed_exception); - handdata = trap_set_handler(EXCCAUSE_LOAD_STORE_DATA_ERROR, - do_probed_exception); - handaddr = trap_set_handler(EXCCAUSE_LOAD_STORE_ADDR_ERROR, - do_probed_exception); - - /* First try an S32C1I that does not store: */ - rcw_exc = 0; - rcw_word = 1; - n = probed_compare_swap(&rcw_word, 0, 2); - cause1 = rcw_exc; - - /* took exception? */ - if (cause1 != 0) { - /* unclean exception? */ - if (n != 2 || rcw_word != 1) - panic("S32C1I exception error"); - } else if (rcw_word != 1 || n != 1) { - panic("S32C1I compare error"); - } - - /* Then an S32C1I that stores: */ - rcw_exc = 0; - rcw_word = 0x1234567; - n = probed_compare_swap(&rcw_word, 0x1234567, 0xabcde); - cause2 = rcw_exc; - - if (cause2 != 0) { - /* unclean exception? */ - if (n != 0xabcde || rcw_word != 0x1234567) - panic("S32C1I exception error (b)"); - } else if (rcw_word != 0xabcde || n != 0x1234567) { - panic("S32C1I store error"); - } - - /* Verify consistency of exceptions: */ - if (cause1 || cause2) { - pr_warn("S32C1I took exception %d, %d\n", cause1, cause2); - /* If emulation of S32C1I upon bus error gets implemented, - we can get rid of this panic for single core (not SMP) */ - panic("S32C1I exceptions not currently supported"); - } - if (cause1 != cause2) - panic("inconsistent S32C1I exceptions"); - - trap_set_handler(EXCCAUSE_LOAD_STORE_ERROR, handbus); - trap_set_handler(EXCCAUSE_LOAD_STORE_DATA_ERROR, handdata); - trap_set_handler(EXCCAUSE_LOAD_STORE_ADDR_ERROR, handaddr); - return 0; -} - -#else /* XCHAL_HAVE_S32C1I */ - -/* This condition should not occur with a commercially deployed processor. - Display reminder for early engr test or demo chips / FPGA bitstreams */ -static int __init check_s32c1i(void) -{ - pr_warn("Processor configuration lacks atomic compare-and-swap support!\n"); - return 0; -} - -#endif /* XCHAL_HAVE_S32C1I */ -early_initcall(check_s32c1i); -#endif /* CONFIG_S32C1I_SELFTEST */ - static inline int mem_reserve(unsigned long start, unsigned long end) { return memblock_reserve(start, end - start); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 80e4cfb2471a..720fe4e8b497 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -26,6 +26,7 @@ #include <linux/nodemask.h> #include <linux/mm.h> #include <linux/of_fdt.h> +#include <linux/dma-contiguous.h> #include <asm/bootparam.h> #include <asm/page.h> @@ -60,6 +61,7 @@ void __init bootmem_init(void) max_low_pfn = min(max_pfn, MAX_LOW_PFN); memblock_set_current_limit(PFN_PHYS(max_low_pfn)); + dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); memblock_dump_all(); } diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 1e3903d0d994..eb3af2739537 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -363,6 +363,7 @@ static ssize_t file_name##_show(struct device *dev, \ return sprintf(buf, "%u\n", this_leaf->object); \ } +show_one(id, id); show_one(level, level); show_one(coherency_line_size, coherency_line_size); show_one(number_of_sets, number_of_sets); @@ -444,6 +445,7 @@ static ssize_t write_policy_show(struct device *dev, return n; } +static DEVICE_ATTR_RO(id); static DEVICE_ATTR_RO(level); static DEVICE_ATTR_RO(type); static DEVICE_ATTR_RO(coherency_line_size); @@ -457,6 +459,7 @@ static DEVICE_ATTR_RO(shared_cpu_list); static DEVICE_ATTR_RO(physical_line_partition); static struct attribute *cache_default_attrs[] = { + &dev_attr_id.attr, &dev_attr_type.attr, &dev_attr_level.attr, &dev_attr_shared_cpu_map.attr, @@ -480,6 +483,8 @@ cache_default_attrs_is_visible(struct kobject *kobj, const struct cpumask *mask = &this_leaf->shared_cpu_map; umode_t mode = attr->mode; + if ((attr == &dev_attr_id.attr) && (this_leaf->attributes & CACHE_ID)) + return mode; if ((attr == &dev_attr_type.attr) && this_leaf->type) return mode; if ((attr == &dev_attr_level.attr) && this_leaf->level) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 08153ea4d848..6ce431323125 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -150,6 +150,29 @@ static int i2c_dw_plat_prepare_clk(struct dw_i2c_dev *i_dev, bool prepare) return 0; } +static void dw_i2c_set_fifo_size(struct dw_i2c_dev *dev, int id) +{ + u32 param, tx_fifo_depth, rx_fifo_depth; + + /* + * Try to detect the FIFO depth if not set by interface driver, + * the depth could be from 2 to 256 from HW spec. + */ + param = i2c_dw_read_comp_param(dev); + tx_fifo_depth = ((param >> 16) & 0xff) + 1; + rx_fifo_depth = ((param >> 8) & 0xff) + 1; + if (!dev->tx_fifo_depth) { + dev->tx_fifo_depth = tx_fifo_depth; + dev->rx_fifo_depth = rx_fifo_depth; + dev->adapter.nr = id; + } else if (tx_fifo_depth >= 2) { + dev->tx_fifo_depth = min_t(u32, dev->tx_fifo_depth, + tx_fifo_depth); + dev->rx_fifo_depth = min_t(u32, dev->rx_fifo_depth, + rx_fifo_depth); + } +} + static int dw_i2c_plat_probe(struct platform_device *pdev) { struct dw_i2c_platform_data *pdata = dev_get_platdata(&pdev->dev); @@ -245,13 +268,7 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) 1000000); } - if (!dev->tx_fifo_depth) { - u32 param1 = i2c_dw_read_comp_param(dev); - - dev->tx_fifo_depth = ((param1 >> 16) & 0xff) + 1; - dev->rx_fifo_depth = ((param1 >> 8) & 0xff) + 1; - dev->adapter.nr = pdev->id; - } + dw_i2c_set_fifo_size(dev, pdev->id); adap = &dev->adapter; adap->owner = THIS_MODULE; diff --git a/drivers/i2c/busses/i2c-octeon-core.c b/drivers/i2c/busses/i2c-octeon-core.c index 3d10f1a802be..1d8775799056 100644 --- a/drivers/i2c/busses/i2c-octeon-core.c +++ b/drivers/i2c/busses/i2c-octeon-core.c @@ -342,7 +342,9 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target, if (result) return result; - data[i] = octeon_i2c_data_read(i2c); + data[i] = octeon_i2c_data_read(i2c, &result); + if (result) + return result; if (recv_len && i == 0) { if (data[i] > I2C_SMBUS_BLOCK_MAX + 1) return -EPROTO; diff --git a/drivers/i2c/busses/i2c-octeon-core.h b/drivers/i2c/busses/i2c-octeon-core.h index 87151ea74acd..e160f838c254 100644 --- a/drivers/i2c/busses/i2c-octeon-core.h +++ b/drivers/i2c/busses/i2c-octeon-core.h @@ -141,11 +141,14 @@ static inline void octeon_i2c_writeq_flush(u64 val, void __iomem *addr) */ static inline void octeon_i2c_reg_write(struct octeon_i2c *i2c, u64 eop_reg, u8 data) { + int tries = 1000; u64 tmp; __raw_writeq(SW_TWSI_V | eop_reg | data, i2c->twsi_base + SW_TWSI(i2c)); do { tmp = __raw_readq(i2c->twsi_base + SW_TWSI(i2c)); + if (--tries < 0) + return; } while ((tmp & SW_TWSI_V) != 0); } @@ -163,24 +166,32 @@ static inline void octeon_i2c_reg_write(struct octeon_i2c *i2c, u64 eop_reg, u8 * * The I2C core registers are accessed indirectly via the SW_TWSI CSR. */ -static inline u8 octeon_i2c_reg_read(struct octeon_i2c *i2c, u64 eop_reg) +static inline int octeon_i2c_reg_read(struct octeon_i2c *i2c, u64 eop_reg, + int *error) { + int tries = 1000; u64 tmp; __raw_writeq(SW_TWSI_V | eop_reg | SW_TWSI_R, i2c->twsi_base + SW_TWSI(i2c)); do { tmp = __raw_readq(i2c->twsi_base + SW_TWSI(i2c)); + if (--tries < 0) { + /* signal that the returned data is invalid */ + if (error) + *error = -EIO; + return 0; + } } while ((tmp & SW_TWSI_V) != 0); return tmp & 0xFF; } #define octeon_i2c_ctl_read(i2c) \ - octeon_i2c_reg_read(i2c, SW_TWSI_EOP_TWSI_CTL) -#define octeon_i2c_data_read(i2c) \ - octeon_i2c_reg_read(i2c, SW_TWSI_EOP_TWSI_DATA) + octeon_i2c_reg_read(i2c, SW_TWSI_EOP_TWSI_CTL, NULL) +#define octeon_i2c_data_read(i2c, error) \ + octeon_i2c_reg_read(i2c, SW_TWSI_EOP_TWSI_DATA, error) #define octeon_i2c_stat_read(i2c) \ - octeon_i2c_reg_read(i2c, SW_TWSI_EOP_TWSI_STAT) + octeon_i2c_reg_read(i2c, SW_TWSI_EOP_TWSI_STAT, NULL) /** * octeon_i2c_read_int - read the TWSI_INT register diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index 05cf192ef1ac..0ab1e55558bc 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -415,6 +415,7 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev) adapter->algo = &xgene_slimpro_i2c_algorithm; adapter->class = I2C_CLASS_HWMON; adapter->dev.parent = &pdev->dev; + adapter->dev.of_node = pdev->dev.of_node; i2c_set_adapdata(adapter, ctx); rc = i2c_add_adapter(adapter); if (rc) { diff --git a/drivers/i2c/muxes/i2c-mux-mlxcpld.c b/drivers/i2c/muxes/i2c-mux-mlxcpld.c index 3ab654bbfab5..b7ca249ec9c3 100644 --- a/drivers/i2c/muxes/i2c-mux-mlxcpld.c +++ b/drivers/i2c/muxes/i2c-mux-mlxcpld.c @@ -95,6 +95,7 @@ static int mlxcpld_mux_reg_write(struct i2c_adapter *adap, struct i2c_client *client, u8 val) { struct mlxcpld_mux_plat_data *pdata = dev_get_platdata(&client->dev); + int ret = -ENODEV; if (adap->algo->master_xfer) { struct i2c_msg msg; @@ -104,17 +105,21 @@ static int mlxcpld_mux_reg_write(struct i2c_adapter *adap, msg.flags = 0; msg.len = 2; msg.buf = msgbuf; - return __i2c_transfer(adap, &msg, 1); + ret = __i2c_transfer(adap, &msg, 1); + + if (ret >= 0 && ret != 1) + ret = -EREMOTEIO; } else if (adap->algo->smbus_xfer) { union i2c_smbus_data data; data.byte = val; - return adap->algo->smbus_xfer(adap, client->addr, - client->flags, I2C_SMBUS_WRITE, - pdata->sel_reg_addr, - I2C_SMBUS_BYTE_DATA, &data); - } else - return -ENODEV; + ret = adap->algo->smbus_xfer(adap, client->addr, + client->flags, I2C_SMBUS_WRITE, + pdata->sel_reg_addr, + I2C_SMBUS_BYTE_DATA, &data); + } + + return ret; } static int mlxcpld_mux_select_chan(struct i2c_mux_core *muxc, u32 chan) @@ -127,10 +132,7 @@ static int mlxcpld_mux_select_chan(struct i2c_mux_core *muxc, u32 chan) /* Only select the channel if its different from the last channel */ if (data->last_chan != regval) { err = mlxcpld_mux_reg_write(muxc->parent, client, regval); - if (err) - data->last_chan = 0; - else - data->last_chan = regval; + data->last_chan = err < 0 ? 0 : regval; } return err; diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 9a348ee4dc14..dd18b9ccb1f4 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -167,6 +167,9 @@ static int pca954x_reg_write(struct i2c_adapter *adap, buf[0] = val; msg.buf = buf; ret = __i2c_transfer(adap, &msg, 1); + + if (ret >= 0 && ret != 1) + ret = -EREMOTEIO; } else { union i2c_smbus_data data; ret = adap->algo->smbus_xfer(adap, client->addr, @@ -195,7 +198,7 @@ static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan) /* Only select the channel if its different from the last channel */ if (data->last_chan != regval) { ret = pca954x_reg_write(muxc->parent, client, regval); - data->last_chan = ret ? 0 : regval; + data->last_chan = ret < 0 ? 0 : regval; } return ret; diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c index c19dd820ea9b..2aeb034d5fb9 100644 --- a/drivers/mailbox/bcm-pdc-mailbox.c +++ b/drivers/mailbox/bcm-pdc-mailbox.c @@ -60,7 +60,13 @@ #define RING_ENTRY_SIZE sizeof(struct dma64dd) /* # entries in PDC dma ring */ -#define PDC_RING_ENTRIES 128 +#define PDC_RING_ENTRIES 512 +/* + * Minimum number of ring descriptor entries that must be free to tell mailbox + * framework that it can submit another request + */ +#define PDC_RING_SPACE_MIN 15 + #define PDC_RING_SIZE (PDC_RING_ENTRIES * RING_ENTRY_SIZE) /* Rings are 8k aligned */ #define RING_ALIGN_ORDER 13 @@ -93,11 +99,9 @@ * Interrupt mask and status definitions. Enable interrupts for tx and rx on * ring 0 */ -#define PDC_XMTINT_0 (24 + PDC_RINGSET) #define PDC_RCVINT_0 (16 + PDC_RINGSET) -#define PDC_XMTINTEN_0 BIT(PDC_XMTINT_0) #define PDC_RCVINTEN_0 BIT(PDC_RCVINT_0) -#define PDC_INTMASK (PDC_XMTINTEN_0 | PDC_RCVINTEN_0) +#define PDC_INTMASK (PDC_RCVINTEN_0) #define PDC_LAZY_FRAMECOUNT 1 #define PDC_LAZY_TIMEOUT 10000 #define PDC_LAZY_INT (PDC_LAZY_TIMEOUT | (PDC_LAZY_FRAMECOUNT << 24)) @@ -117,15 +121,16 @@ /* * Sets the following bits for write to transmit control reg: - * 0 - XmtEn - enable activity on the tx channel * 11 - PtyChkDisable - parity check is disabled * 20:18 - BurstLen = 3 -> 2^7 = 128 byte data reads from memory */ -#define PDC_TX_CTL 0x000C0801 +#define PDC_TX_CTL 0x000C0800 + +/* Bit in tx control reg to enable tx channel */ +#define PDC_TX_ENABLE 0x1 /* * Sets the following bits for write to receive control reg: - * 0 - RcvEn - enable activity on the rx channel * 7:1 - RcvOffset - size in bytes of status region at start of rx frame buf * 9 - SepRxHdrDescEn - place start of new frames only in descriptors * that have StartOfFrame set @@ -135,7 +140,10 @@ * 11 - PtyChkDisable - parity check is disabled * 20:18 - BurstLen = 3 -> 2^7 = 128 byte data reads from memory */ -#define PDC_RX_CTL 0x000C0E01 +#define PDC_RX_CTL 0x000C0E00 + +/* Bit in rx control reg to enable rx channel */ +#define PDC_RX_ENABLE 0x1 #define CRYPTO_D64_RS0_CD_MASK ((PDC_RING_ENTRIES * RING_ENTRY_SIZE) - 1) @@ -252,11 +260,29 @@ struct pdc_ring_alloc { u32 size; /* ring allocation size in bytes */ }; +/* + * context associated with a receive descriptor. + * @rxp_ctx: opaque context associated with frame that starts at each + * rx ring index. + * @dst_sg: Scatterlist used to form reply frames beginning at a given ring + * index. Retained in order to unmap each sg after reply is processed. + * @rxin_numd: Number of rx descriptors associated with the message that starts + * at a descriptor index. Not set for every index. For example, + * if descriptor index i points to a scatterlist with 4 entries, + * then the next three descriptor indexes don't have a value set. + * @resp_hdr: Virtual address of buffer used to catch DMA rx status + * @resp_hdr_daddr: physical address of DMA rx status buffer + */ +struct pdc_rx_ctx { + void *rxp_ctx; + struct scatterlist *dst_sg; + u32 rxin_numd; + void *resp_hdr; + dma_addr_t resp_hdr_daddr; +}; + /* PDC state structure */ struct pdc_state { - /* synchronize access to this PDC state structure */ - spinlock_t pdc_lock; - /* Index of the PDC whose state is in this structure instance */ u8 pdc_idx; @@ -272,13 +298,8 @@ struct pdc_state { unsigned int pdc_irq; - /* - * Last interrupt status read from PDC device. Saved in interrupt - * handler so the handler can clear the interrupt in the device, - * and the interrupt thread called later can know which interrupt - * bits are active. - */ - unsigned long intstatus; + /* tasklet for deferred processing after DMA rx interrupt */ + struct tasklet_struct rx_tasklet; /* Number of bytes of receive status prior to each rx frame */ u32 rx_status_len; @@ -369,11 +390,7 @@ struct pdc_state { /* Index of next rx descriptor to post. */ u32 rxout; - /* - * opaque context associated with frame that starts at each - * rx ring index. - */ - void *rxp_ctx[PDC_RING_ENTRIES]; + struct pdc_rx_ctx rx_ctx[PDC_RING_ENTRIES]; /* * Scatterlists used to form request and reply frames beginning at a @@ -381,27 +398,18 @@ struct pdc_state { * is processed */ struct scatterlist *src_sg[PDC_RING_ENTRIES]; - struct scatterlist *dst_sg[PDC_RING_ENTRIES]; - - /* - * Number of rx descriptors associated with the message that starts - * at this descriptor index. Not set for every index. For example, - * if descriptor index i points to a scatterlist with 4 entries, then - * the next three descriptor indexes don't have a value set. - */ - u32 rxin_numd[PDC_RING_ENTRIES]; - - void *resp_hdr[PDC_RING_ENTRIES]; - dma_addr_t resp_hdr_daddr[PDC_RING_ENTRIES]; struct dentry *debugfs_stats; /* debug FS stats file for this PDC */ /* counters */ - u32 pdc_requests; /* number of request messages submitted */ - u32 pdc_replies; /* number of reply messages received */ - u32 txnobuf; /* count of tx ring full */ - u32 rxnobuf; /* count of rx ring full */ - u32 rx_oflow; /* count of rx overflows */ + u32 pdc_requests; /* number of request messages submitted */ + u32 pdc_replies; /* number of reply messages received */ + u32 last_tx_not_done; /* too few tx descriptors to indicate done */ + u32 tx_ring_full; /* unable to accept msg because tx ring full */ + u32 rx_ring_full; /* unable to accept msg because rx ring full */ + u32 txnobuf; /* unable to create tx descriptor */ + u32 rxnobuf; /* unable to create rx descriptor */ + u32 rx_oflow; /* count of rx overflows */ }; /* Global variables */ @@ -434,20 +442,33 @@ static ssize_t pdc_debugfs_read(struct file *filp, char __user *ubuf, out_offset += snprintf(buf + out_offset, out_count - out_offset, "SPU %u stats:\n", pdcs->pdc_idx); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "PDC requests............%u\n", + "PDC requests....................%u\n", pdcs->pdc_requests); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "PDC responses...........%u\n", + "PDC responses...................%u\n", pdcs->pdc_replies); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "Tx err ring full........%u\n", + "Tx not done.....................%u\n", + pdcs->last_tx_not_done); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Tx ring full....................%u\n", + pdcs->tx_ring_full); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Rx ring full....................%u\n", + pdcs->rx_ring_full); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Tx desc write fail. Ring full...%u\n", pdcs->txnobuf); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "Rx err ring full........%u\n", + "Rx desc write fail. Ring full...%u\n", pdcs->rxnobuf); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "Receive overflow........%u\n", + "Receive overflow................%u\n", pdcs->rx_oflow); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Num frags in rx ring............%u\n", + NRXDACTIVE(pdcs->rxin, pdcs->last_rx_curr, + pdcs->nrxpost)); if (out_offset > out_count) out_offset = out_count; @@ -480,17 +501,16 @@ static void pdc_setup_debugfs(struct pdc_state *pdcs) if (!debugfs_dir) debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); - pdcs->debugfs_stats = debugfs_create_file(spu_stats_name, S_IRUSR, + /* S_IRUSR == 0400 */ + pdcs->debugfs_stats = debugfs_create_file(spu_stats_name, 0400, debugfs_dir, pdcs, &pdc_debugfs_stats); } static void pdc_free_debugfs(void) { - if (debugfs_dir && simple_empty(debugfs_dir)) { - debugfs_remove_recursive(debugfs_dir); - debugfs_dir = NULL; - } + debugfs_remove_recursive(debugfs_dir); + debugfs_dir = NULL; } /** @@ -505,17 +525,17 @@ pdc_build_rxd(struct pdc_state *pdcs, dma_addr_t dma_addr, u32 buf_len, u32 flags) { struct device *dev = &pdcs->pdev->dev; + struct dma64dd *rxd = &pdcs->rxd_64[pdcs->rxout]; dev_dbg(dev, "Writing rx descriptor for PDC %u at index %u with length %u. flags %#x\n", pdcs->pdc_idx, pdcs->rxout, buf_len, flags); - iowrite32(lower_32_bits(dma_addr), - (void *)&pdcs->rxd_64[pdcs->rxout].addrlow); - iowrite32(upper_32_bits(dma_addr), - (void *)&pdcs->rxd_64[pdcs->rxout].addrhigh); - iowrite32(flags, (void *)&pdcs->rxd_64[pdcs->rxout].ctrl1); - iowrite32(buf_len, (void *)&pdcs->rxd_64[pdcs->rxout].ctrl2); + rxd->addrlow = cpu_to_le32(lower_32_bits(dma_addr)); + rxd->addrhigh = cpu_to_le32(upper_32_bits(dma_addr)); + rxd->ctrl1 = cpu_to_le32(flags); + rxd->ctrl2 = cpu_to_le32(buf_len); + /* bump ring index and return */ pdcs->rxout = NEXTRXD(pdcs->rxout, pdcs->nrxpost); } @@ -533,53 +553,50 @@ pdc_build_txd(struct pdc_state *pdcs, dma_addr_t dma_addr, u32 buf_len, u32 flags) { struct device *dev = &pdcs->pdev->dev; + struct dma64dd *txd = &pdcs->txd_64[pdcs->txout]; dev_dbg(dev, "Writing tx descriptor for PDC %u at index %u with length %u, flags %#x\n", pdcs->pdc_idx, pdcs->txout, buf_len, flags); - iowrite32(lower_32_bits(dma_addr), - (void *)&pdcs->txd_64[pdcs->txout].addrlow); - iowrite32(upper_32_bits(dma_addr), - (void *)&pdcs->txd_64[pdcs->txout].addrhigh); - iowrite32(flags, (void *)&pdcs->txd_64[pdcs->txout].ctrl1); - iowrite32(buf_len, (void *)&pdcs->txd_64[pdcs->txout].ctrl2); + txd->addrlow = cpu_to_le32(lower_32_bits(dma_addr)); + txd->addrhigh = cpu_to_le32(upper_32_bits(dma_addr)); + txd->ctrl1 = cpu_to_le32(flags); + txd->ctrl2 = cpu_to_le32(buf_len); /* bump ring index and return */ pdcs->txout = NEXTTXD(pdcs->txout, pdcs->ntxpost); } /** - * pdc_receive() - Receive a response message from a given SPU. + * pdc_receive_one() - Receive a response message from a given SPU. * @pdcs: PDC state for the SPU to receive from - * @mssg: mailbox message to be returned to client * * When the return code indicates success, the response message is available in * the receive buffers provided prior to submission of the request. * - * Input: - * pdcs - PDC state structure for the SPU to be polled - * mssg - mailbox message to be returned to client. This function sets the - * context pointer on the message to help the client associate the - * response with a request. - * * Return: PDC_SUCCESS if one or more receive descriptors was processed * -EAGAIN indicates that no response message is available * -EIO an error occurred */ static int -pdc_receive(struct pdc_state *pdcs, struct brcm_message *mssg) +pdc_receive_one(struct pdc_state *pdcs) { struct device *dev = &pdcs->pdev->dev; + struct mbox_controller *mbc; + struct mbox_chan *chan; + struct brcm_message mssg; u32 len, rx_status; u32 num_frags; - int i; u8 *resp_hdr; /* virtual addr of start of resp message DMA header */ u32 frags_rdy; /* number of fragments ready to read */ u32 rx_idx; /* ring index of start of receive frame */ dma_addr_t resp_hdr_daddr; + struct pdc_rx_ctx *rx_ctx; - spin_lock(&pdcs->pdc_lock); + mbc = &pdcs->mbc; + chan = &mbc->chans[0]; + mssg.type = BRCM_MESSAGE_SPU; /* * return if a complete response message is not yet ready. @@ -587,47 +604,34 @@ pdc_receive(struct pdc_state *pdcs, struct brcm_message *mssg) * to read. */ frags_rdy = NRXDACTIVE(pdcs->rxin, pdcs->last_rx_curr, pdcs->nrxpost); - if ((frags_rdy == 0) || (frags_rdy < pdcs->rxin_numd[pdcs->rxin])) { - /* See if the hw has written more fragments than we know */ - pdcs->last_rx_curr = - (ioread32((void *)&pdcs->rxregs_64->status0) & - CRYPTO_D64_RS0_CD_MASK) / RING_ENTRY_SIZE; - frags_rdy = NRXDACTIVE(pdcs->rxin, pdcs->last_rx_curr, - pdcs->nrxpost); - if ((frags_rdy == 0) || - (frags_rdy < pdcs->rxin_numd[pdcs->rxin])) { - /* No response ready */ - spin_unlock(&pdcs->pdc_lock); - return -EAGAIN; - } - /* can't read descriptors/data until write index is read */ - rmb(); - } + if ((frags_rdy == 0) || + (frags_rdy < pdcs->rx_ctx[pdcs->rxin].rxin_numd)) + /* No response ready */ + return -EAGAIN; num_frags = pdcs->txin_numd[pdcs->txin]; + WARN_ON(num_frags == 0); + dma_unmap_sg(dev, pdcs->src_sg[pdcs->txin], sg_nents(pdcs->src_sg[pdcs->txin]), DMA_TO_DEVICE); - for (i = 0; i < num_frags; i++) - pdcs->txin = NEXTTXD(pdcs->txin, pdcs->ntxpost); + pdcs->txin = (pdcs->txin + num_frags) & pdcs->ntxpost; dev_dbg(dev, "PDC %u reclaimed %d tx descriptors", pdcs->pdc_idx, num_frags); rx_idx = pdcs->rxin; - num_frags = pdcs->rxin_numd[rx_idx]; + rx_ctx = &pdcs->rx_ctx[rx_idx]; + num_frags = rx_ctx->rxin_numd; /* Return opaque context with result */ - mssg->ctx = pdcs->rxp_ctx[rx_idx]; - pdcs->rxp_ctx[rx_idx] = NULL; - resp_hdr = pdcs->resp_hdr[rx_idx]; - resp_hdr_daddr = pdcs->resp_hdr_daddr[rx_idx]; - dma_unmap_sg(dev, pdcs->dst_sg[rx_idx], - sg_nents(pdcs->dst_sg[rx_idx]), DMA_FROM_DEVICE); - - for (i = 0; i < num_frags; i++) - pdcs->rxin = NEXTRXD(pdcs->rxin, pdcs->nrxpost); + mssg.ctx = rx_ctx->rxp_ctx; + rx_ctx->rxp_ctx = NULL; + resp_hdr = rx_ctx->resp_hdr; + resp_hdr_daddr = rx_ctx->resp_hdr_daddr; + dma_unmap_sg(dev, rx_ctx->dst_sg, sg_nents(rx_ctx->dst_sg), + DMA_FROM_DEVICE); - spin_unlock(&pdcs->pdc_lock); + pdcs->rxin = (pdcs->rxin + num_frags) & pdcs->nrxpost; dev_dbg(dev, "PDC %u reclaimed %d rx descriptors", pdcs->pdc_idx, num_frags); @@ -659,12 +663,35 @@ pdc_receive(struct pdc_state *pdcs, struct brcm_message *mssg) dma_pool_free(pdcs->rx_buf_pool, resp_hdr, resp_hdr_daddr); + mbox_chan_received_data(chan, &mssg); + pdcs->pdc_replies++; - /* if we read one or more rx descriptors, claim success */ - if (num_frags > 0) - return PDC_SUCCESS; - else - return -EIO; + return PDC_SUCCESS; +} + +/** + * pdc_receive() - Process as many responses as are available in the rx ring. + * @pdcs: PDC state + * + * Called within the hard IRQ. + * Return: + */ +static int +pdc_receive(struct pdc_state *pdcs) +{ + int rx_status; + + /* read last_rx_curr from register once */ + pdcs->last_rx_curr = + (ioread32(&pdcs->rxregs_64->status0) & + CRYPTO_D64_RS0_CD_MASK) / RING_ENTRY_SIZE; + + do { + /* Could be many frames ready */ + rx_status = pdc_receive_one(pdcs); + } while (rx_status == PDC_SUCCESS); + + return 0; } /** @@ -766,8 +793,8 @@ static int pdc_tx_list_final(struct pdc_state *pdcs) * before chip starts to process new request */ wmb(); - iowrite32(pdcs->rxout << 4, (void *)&pdcs->rxregs_64->ptr); - iowrite32(pdcs->txout << 4, (void *)&pdcs->txregs_64->ptr); + iowrite32(pdcs->rxout << 4, &pdcs->rxregs_64->ptr); + iowrite32(pdcs->txout << 4, &pdcs->txregs_64->ptr); pdcs->pdc_requests++; return PDC_SUCCESS; @@ -796,6 +823,7 @@ static int pdc_rx_list_init(struct pdc_state *pdcs, struct scatterlist *dst_sg, u32 rx_pkt_cnt = 1; /* Adding a single rx buffer */ dma_addr_t daddr; void *vaddr; + struct pdc_rx_ctx *rx_ctx; rx_avail = pdcs->nrxpost - NRXDACTIVE(pdcs->rxin, pdcs->rxout, pdcs->nrxpost); @@ -806,7 +834,7 @@ static int pdc_rx_list_init(struct pdc_state *pdcs, struct scatterlist *dst_sg, /* allocate a buffer for the dma rx status */ vaddr = dma_pool_zalloc(pdcs->rx_buf_pool, GFP_ATOMIC, &daddr); - if (!vaddr) + if (unlikely(!vaddr)) return -ENOMEM; /* @@ -819,15 +847,16 @@ static int pdc_rx_list_init(struct pdc_state *pdcs, struct scatterlist *dst_sg, /* This is always the first descriptor in the receive sequence */ flags = D64_CTRL1_SOF; - pdcs->rxin_numd[pdcs->rx_msg_start] = 1; + pdcs->rx_ctx[pdcs->rx_msg_start].rxin_numd = 1; if (unlikely(pdcs->rxout == (pdcs->nrxd - 1))) flags |= D64_CTRL1_EOT; - pdcs->rxp_ctx[pdcs->rxout] = ctx; - pdcs->dst_sg[pdcs->rxout] = dst_sg; - pdcs->resp_hdr[pdcs->rxout] = vaddr; - pdcs->resp_hdr_daddr[pdcs->rxout] = daddr; + rx_ctx = &pdcs->rx_ctx[pdcs->rxout]; + rx_ctx->rxp_ctx = ctx; + rx_ctx->dst_sg = dst_sg; + rx_ctx->resp_hdr = vaddr; + rx_ctx->resp_hdr_daddr = daddr; pdc_build_rxd(pdcs, daddr, pdcs->pdc_resp_hdr_len, flags); return PDC_SUCCESS; } @@ -895,7 +924,7 @@ static int pdc_rx_list_sg_add(struct pdc_state *pdcs, struct scatterlist *sg) desc_w++; sg = sg_next(sg); } - pdcs->rxin_numd[pdcs->rx_msg_start] += desc_w; + pdcs->rx_ctx[pdcs->rx_msg_start].rxin_numd += desc_w; return PDC_SUCCESS; } @@ -903,7 +932,7 @@ static int pdc_rx_list_sg_add(struct pdc_state *pdcs, struct scatterlist *sg) /** * pdc_irq_handler() - Interrupt handler called in interrupt context. * @irq: Interrupt number that has fired - * @cookie: PDC state for DMA engine that generated the interrupt + * @data: device struct for DMA engine that generated the interrupt * * We have to clear the device interrupt status flags here. So cache the * status for later use in the thread function. Other than that, just return @@ -912,88 +941,39 @@ static int pdc_rx_list_sg_add(struct pdc_state *pdcs, struct scatterlist *sg) * Return: IRQ_WAKE_THREAD if interrupt is ours * IRQ_NONE otherwise */ -static irqreturn_t pdc_irq_handler(int irq, void *cookie) +static irqreturn_t pdc_irq_handler(int irq, void *data) { - struct pdc_state *pdcs = cookie; + struct device *dev = (struct device *)data; + struct pdc_state *pdcs = dev_get_drvdata(dev); u32 intstatus = ioread32(pdcs->pdc_reg_vbase + PDC_INTSTATUS_OFFSET); - if (intstatus & PDC_XMTINTEN_0) - set_bit(PDC_XMTINT_0, &pdcs->intstatus); - if (intstatus & PDC_RCVINTEN_0) - set_bit(PDC_RCVINT_0, &pdcs->intstatus); + if (unlikely(intstatus == 0)) + return IRQ_NONE; + + /* Disable interrupts until soft handler runs */ + iowrite32(0, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET); /* Clear interrupt flags in device */ iowrite32(intstatus, pdcs->pdc_reg_vbase + PDC_INTSTATUS_OFFSET); /* Wakeup IRQ thread */ - if (pdcs && (irq == pdcs->pdc_irq) && (intstatus & PDC_INTMASK)) - return IRQ_WAKE_THREAD; - - return IRQ_NONE; + tasklet_schedule(&pdcs->rx_tasklet); + return IRQ_HANDLED; } /** - * pdc_irq_thread() - Function invoked on deferred thread when a DMA tx has - * completed or data is available to receive. - * @irq: Interrupt number - * @cookie: PDC state for PDC that generated the interrupt - * - * On DMA tx complete, notify the mailbox client. On DMA rx complete, process - * as many SPU response messages as are available and send each to the mailbox - * client. - * - * Return: IRQ_HANDLED if we recognized and handled the interrupt - * IRQ_NONE otherwise + * pdc_tasklet_cb() - Tasklet callback that runs the deferred processing after + * a DMA receive interrupt. Reenables the receive interrupt. + * @data: PDC state structure */ -static irqreturn_t pdc_irq_thread(int irq, void *cookie) +static void pdc_tasklet_cb(unsigned long data) { - struct pdc_state *pdcs = cookie; - struct mbox_controller *mbc; - struct mbox_chan *chan; - bool tx_int; - bool rx_int; - int rx_status; - struct brcm_message mssg; + struct pdc_state *pdcs = (struct pdc_state *)data; - tx_int = test_and_clear_bit(PDC_XMTINT_0, &pdcs->intstatus); - rx_int = test_and_clear_bit(PDC_RCVINT_0, &pdcs->intstatus); + pdc_receive(pdcs); - if (pdcs && (tx_int || rx_int)) { - dev_dbg(&pdcs->pdev->dev, - "%s() got irq %d with tx_int %s, rx_int %s", - __func__, irq, - tx_int ? "set" : "clear", rx_int ? "set" : "clear"); - - mbc = &pdcs->mbc; - chan = &mbc->chans[0]; - - if (tx_int) { - dev_dbg(&pdcs->pdev->dev, "%s(): tx done", __func__); - /* only one frame in flight at a time */ - mbox_chan_txdone(chan, PDC_SUCCESS); - } - if (rx_int) { - while (1) { - /* Could be many frames ready */ - memset(&mssg, 0, sizeof(mssg)); - mssg.type = BRCM_MESSAGE_SPU; - rx_status = pdc_receive(pdcs, &mssg); - if (rx_status >= 0) { - dev_dbg(&pdcs->pdev->dev, - "%s(): invoking client rx cb", - __func__); - mbox_chan_received_data(chan, &mssg); - } else { - dev_dbg(&pdcs->pdev->dev, - "%s(): no SPU response available", - __func__); - break; - } - } - } - return IRQ_HANDLED; - } - return IRQ_NONE; + /* reenable interrupts */ + iowrite32(PDC_INTMASK, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET); } /** @@ -1016,14 +996,14 @@ static int pdc_ring_init(struct pdc_state *pdcs, int ringset) /* Allocate tx ring */ tx.vbase = dma_pool_zalloc(pdcs->ring_pool, GFP_KERNEL, &tx.dmabase); - if (!tx.vbase) { + if (unlikely(!tx.vbase)) { err = -ENOMEM; goto done; } /* Allocate rx ring */ rx.vbase = dma_pool_zalloc(pdcs->ring_pool, GFP_KERNEL, &rx.dmabase); - if (!rx.vbase) { + if (unlikely(!rx.vbase)) { err = -ENOMEM; goto fail_dealloc; } @@ -1033,9 +1013,6 @@ static int pdc_ring_init(struct pdc_state *pdcs, int ringset) dev_dbg(dev, " - base DMA addr of rx ring %pad", &rx.dmabase); dev_dbg(dev, " - base virtual addr of rx ring %p", rx.vbase); - /* lock after ring allocation to avoid scheduling while atomic */ - spin_lock(&pdcs->pdc_lock); - memcpy(&pdcs->tx_ring_alloc, &tx, sizeof(tx)); memcpy(&pdcs->rx_ring_alloc, &rx, sizeof(rx)); @@ -1053,40 +1030,52 @@ static int pdc_ring_init(struct pdc_state *pdcs, int ringset) /* Tell device the base DMA address of each ring */ dma_reg = &pdcs->regs->dmaregs[ringset]; + + /* But first disable DMA and set curptr to 0 for both TX & RX */ + iowrite32(PDC_TX_CTL, &dma_reg->dmaxmt.control); + iowrite32((PDC_RX_CTL + (pdcs->rx_status_len << 1)), + &dma_reg->dmarcv.control); + iowrite32(0, &dma_reg->dmaxmt.ptr); + iowrite32(0, &dma_reg->dmarcv.ptr); + + /* Set base DMA addresses */ iowrite32(lower_32_bits(pdcs->tx_ring_alloc.dmabase), - (void *)&dma_reg->dmaxmt.addrlow); + &dma_reg->dmaxmt.addrlow); iowrite32(upper_32_bits(pdcs->tx_ring_alloc.dmabase), - (void *)&dma_reg->dmaxmt.addrhigh); + &dma_reg->dmaxmt.addrhigh); iowrite32(lower_32_bits(pdcs->rx_ring_alloc.dmabase), - (void *)&dma_reg->dmarcv.addrlow); + &dma_reg->dmarcv.addrlow); iowrite32(upper_32_bits(pdcs->rx_ring_alloc.dmabase), - (void *)&dma_reg->dmarcv.addrhigh); + &dma_reg->dmarcv.addrhigh); + + /* Re-enable DMA */ + iowrite32(PDC_TX_CTL | PDC_TX_ENABLE, &dma_reg->dmaxmt.control); + iowrite32((PDC_RX_CTL | PDC_RX_ENABLE | (pdcs->rx_status_len << 1)), + &dma_reg->dmarcv.control); /* Initialize descriptors */ for (i = 0; i < PDC_RING_ENTRIES; i++) { /* Every tx descriptor can be used for start of frame. */ if (i != pdcs->ntxpost) { iowrite32(D64_CTRL1_SOF | D64_CTRL1_EOF, - (void *)&pdcs->txd_64[i].ctrl1); + &pdcs->txd_64[i].ctrl1); } else { /* Last descriptor in ringset. Set End of Table. */ iowrite32(D64_CTRL1_SOF | D64_CTRL1_EOF | - D64_CTRL1_EOT, - (void *)&pdcs->txd_64[i].ctrl1); + D64_CTRL1_EOT, &pdcs->txd_64[i].ctrl1); } /* Every rx descriptor can be used for start of frame */ if (i != pdcs->nrxpost) { iowrite32(D64_CTRL1_SOF, - (void *)&pdcs->rxd_64[i].ctrl1); + &pdcs->rxd_64[i].ctrl1); } else { /* Last descriptor in ringset. Set End of Table. */ iowrite32(D64_CTRL1_SOF | D64_CTRL1_EOT, - (void *)&pdcs->rxd_64[i].ctrl1); + &pdcs->rxd_64[i].ctrl1); } } - spin_unlock(&pdcs->pdc_lock); return PDC_SUCCESS; fail_dealloc: @@ -1111,6 +1100,80 @@ static void pdc_ring_free(struct pdc_state *pdcs) } /** + * pdc_desc_count() - Count the number of DMA descriptors that will be required + * for a given scatterlist. Account for the max length of a DMA buffer. + * @sg: Scatterlist to be DMA'd + * Return: Number of descriptors required + */ +static u32 pdc_desc_count(struct scatterlist *sg) +{ + u32 cnt = 0; + + while (sg) { + cnt += ((sg->length / PDC_DMA_BUF_MAX) + 1); + sg = sg_next(sg); + } + return cnt; +} + +/** + * pdc_rings_full() - Check whether the tx ring has room for tx_cnt descriptors + * and the rx ring has room for rx_cnt descriptors. + * @pdcs: PDC state + * @tx_cnt: The number of descriptors required in the tx ring + * @rx_cnt: The number of descriptors required i the rx ring + * + * Return: true if one of the rings does not have enough space + * false if sufficient space is available in both rings + */ +static bool pdc_rings_full(struct pdc_state *pdcs, int tx_cnt, int rx_cnt) +{ + u32 rx_avail; + u32 tx_avail; + bool full = false; + + /* Check if the tx and rx rings are likely to have enough space */ + rx_avail = pdcs->nrxpost - NRXDACTIVE(pdcs->rxin, pdcs->rxout, + pdcs->nrxpost); + if (unlikely(rx_cnt > rx_avail)) { + pdcs->rx_ring_full++; + full = true; + } + + if (likely(!full)) { + tx_avail = pdcs->ntxpost - NTXDACTIVE(pdcs->txin, pdcs->txout, + pdcs->ntxpost); + if (unlikely(tx_cnt > tx_avail)) { + pdcs->tx_ring_full++; + full = true; + } + } + return full; +} + +/** + * pdc_last_tx_done() - If both the tx and rx rings have at least + * PDC_RING_SPACE_MIN descriptors available, then indicate that the mailbox + * framework can submit another message. + * @chan: mailbox channel to check + * Return: true if PDC can accept another message on this channel + */ +static bool pdc_last_tx_done(struct mbox_chan *chan) +{ + struct pdc_state *pdcs = chan->con_priv; + bool ret; + + if (unlikely(pdc_rings_full(pdcs, PDC_RING_SPACE_MIN, + PDC_RING_SPACE_MIN))) { + pdcs->last_tx_not_done++; + ret = false; + } else { + ret = true; + } + return ret; +} + +/** * pdc_send_data() - mailbox send_data function * @chan: The mailbox channel on which the data is sent. The channel * corresponds to a DMA ringset. @@ -1141,29 +1204,43 @@ static int pdc_send_data(struct mbox_chan *chan, void *data) int src_nent; int dst_nent; int nent; + u32 tx_desc_req; + u32 rx_desc_req; - if (mssg->type != BRCM_MESSAGE_SPU) + if (unlikely(mssg->type != BRCM_MESSAGE_SPU)) return -ENOTSUPP; src_nent = sg_nents(mssg->spu.src); - if (src_nent) { + if (likely(src_nent)) { nent = dma_map_sg(dev, mssg->spu.src, src_nent, DMA_TO_DEVICE); - if (nent == 0) + if (unlikely(nent == 0)) return -EIO; } dst_nent = sg_nents(mssg->spu.dst); - if (dst_nent) { + if (likely(dst_nent)) { nent = dma_map_sg(dev, mssg->spu.dst, dst_nent, DMA_FROM_DEVICE); - if (nent == 0) { + if (unlikely(nent == 0)) { dma_unmap_sg(dev, mssg->spu.src, src_nent, DMA_TO_DEVICE); return -EIO; } } - spin_lock(&pdcs->pdc_lock); + /* + * Check if the tx and rx rings have enough space. Do this prior to + * writing any tx or rx descriptors. Need to ensure that we do not write + * a partial set of descriptors, or write just rx descriptors but + * corresponding tx descriptors don't fit. Note that we want this check + * and the entire sequence of descriptor to happen without another + * thread getting in. The channel spin lock in the mailbox framework + * ensures this. + */ + tx_desc_req = pdc_desc_count(mssg->spu.src); + rx_desc_req = pdc_desc_count(mssg->spu.dst); + if (unlikely(pdc_rings_full(pdcs, tx_desc_req, rx_desc_req + 1))) + return -ENOSPC; /* Create rx descriptors to SPU catch response */ err = pdc_rx_list_init(pdcs, mssg->spu.dst, mssg->ctx); @@ -1173,9 +1250,7 @@ static int pdc_send_data(struct mbox_chan *chan, void *data) err |= pdc_tx_list_sg_add(pdcs, mssg->spu.src); err |= pdc_tx_list_final(pdcs); /* initiate transfer */ - spin_unlock(&pdcs->pdc_lock); - - if (err) + if (unlikely(err)) dev_err(&pdcs->pdev->dev, "%s failed with error %d", __func__, err); @@ -1224,26 +1299,29 @@ void pdc_hw_init(struct pdc_state *pdcs) /* initialize data structures */ pdcs->regs = (struct pdc_regs *)pdcs->pdc_reg_vbase; pdcs->txregs_64 = (struct dma64_regs *) - (void *)(((u8 *)pdcs->pdc_reg_vbase) + + (((u8 *)pdcs->pdc_reg_vbase) + PDC_TXREGS_OFFSET + (sizeof(struct dma64) * ringset)); pdcs->rxregs_64 = (struct dma64_regs *) - (void *)(((u8 *)pdcs->pdc_reg_vbase) + + (((u8 *)pdcs->pdc_reg_vbase) + PDC_RXREGS_OFFSET + (sizeof(struct dma64) * ringset)); pdcs->ntxd = PDC_RING_ENTRIES; pdcs->nrxd = PDC_RING_ENTRIES; pdcs->ntxpost = PDC_RING_ENTRIES - 1; pdcs->nrxpost = PDC_RING_ENTRIES - 1; - pdcs->regs->intmask = 0; + iowrite32(0, &pdcs->regs->intmask); dma_reg = &pdcs->regs->dmaregs[ringset]; - iowrite32(0, (void *)&dma_reg->dmaxmt.ptr); - iowrite32(0, (void *)&dma_reg->dmarcv.ptr); - iowrite32(PDC_TX_CTL, (void *)&dma_reg->dmaxmt.control); + /* Configure DMA but will enable later in pdc_ring_init() */ + iowrite32(PDC_TX_CTL, &dma_reg->dmaxmt.control); iowrite32(PDC_RX_CTL + (pdcs->rx_status_len << 1), - (void *)&dma_reg->dmarcv.control); + &dma_reg->dmarcv.control); + + /* Reset current index pointers after making sure DMA is disabled */ + iowrite32(0, &dma_reg->dmaxmt.ptr); + iowrite32(0, &dma_reg->dmarcv.ptr); if (pdcs->pdc_resp_hdr_len == PDC_SPU2_RESP_HDR_LEN) iowrite32(PDC_CKSUM_CTRL, @@ -1251,6 +1329,21 @@ void pdc_hw_init(struct pdc_state *pdcs) } /** + * pdc_hw_disable() - Disable the tx and rx control in the hw. + * @pdcs: PDC state structure + * + */ +static void pdc_hw_disable(struct pdc_state *pdcs) +{ + struct dma64 *dma_reg; + + dma_reg = &pdcs->regs->dmaregs[PDC_RINGSET]; + iowrite32(PDC_TX_CTL, &dma_reg->dmaxmt.control); + iowrite32(PDC_RX_CTL + (pdcs->rx_status_len << 1), + &dma_reg->dmarcv.control); +} + +/** * pdc_rx_buf_pool_create() - Pool of receive buffers used to catch the metadata * header returned with each response message. * @pdcs: PDC state structure @@ -1301,8 +1394,6 @@ static int pdc_interrupts_init(struct pdc_state *pdcs) struct device_node *dn = pdev->dev.of_node; int err; - pdcs->intstatus = 0; - /* interrupt configuration */ iowrite32(PDC_INTMASK, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET); iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase + PDC_RCVLAZY0_OFFSET); @@ -1311,11 +1402,11 @@ static int pdc_interrupts_init(struct pdc_state *pdcs) pdcs->pdc_irq = irq_of_parse_and_map(dn, 0); dev_dbg(dev, "pdc device %s irq %u for pdcs %p", dev_name(dev), pdcs->pdc_irq, pdcs); - err = devm_request_threaded_irq(dev, pdcs->pdc_irq, - pdc_irq_handler, - pdc_irq_thread, 0, dev_name(dev), pdcs); + + err = devm_request_irq(dev, pdcs->pdc_irq, pdc_irq_handler, 0, + dev_name(dev), dev); if (err) { - dev_err(dev, "threaded tx IRQ %u request failed with err %d\n", + dev_err(dev, "IRQ %u request failed with err %d\n", pdcs->pdc_irq, err); return err; } @@ -1324,6 +1415,7 @@ static int pdc_interrupts_init(struct pdc_state *pdcs) static const struct mbox_chan_ops pdc_mbox_chan_ops = { .send_data = pdc_send_data, + .last_tx_done = pdc_last_tx_done, .startup = pdc_startup, .shutdown = pdc_shutdown }; @@ -1356,8 +1448,9 @@ static int pdc_mb_init(struct pdc_state *pdcs) if (!mbc->chans) return -ENOMEM; - mbc->txdone_irq = true; - mbc->txdone_poll = false; + mbc->txdone_irq = false; + mbc->txdone_poll = true; + mbc->txpoll_period = 1; for (chan_index = 0; chan_index < mbc->num_chans; chan_index++) mbc->chans[chan_index].con_priv = pdcs; @@ -1427,7 +1520,6 @@ static int pdc_probe(struct platform_device *pdev) goto cleanup; } - spin_lock_init(&pdcs->pdc_lock); pdcs->pdev = pdev; platform_set_drvdata(pdev, pdcs); pdcs->pdc_idx = pdcg.num_spu; @@ -1473,6 +1565,9 @@ static int pdc_probe(struct platform_device *pdev) pdc_hw_init(pdcs); + /* Init tasklet for deferred DMA rx processing */ + tasklet_init(&pdcs->rx_tasklet, pdc_tasklet_cb, (unsigned long)pdcs); + err = pdc_interrupts_init(pdcs); if (err) goto cleanup_buf_pool; @@ -1489,6 +1584,7 @@ static int pdc_probe(struct platform_device *pdev) return PDC_SUCCESS; cleanup_buf_pool: + tasklet_kill(&pdcs->rx_tasklet); dma_pool_destroy(pdcs->rx_buf_pool); cleanup_ring_pool: @@ -1504,6 +1600,10 @@ static int pdc_remove(struct platform_device *pdev) pdc_free_debugfs(); + tasklet_kill(&pdcs->rx_tasklet); + + pdc_hw_disable(pdcs); + mbox_controller_unregister(&pdcs->mbc); dma_pool_destroy(pdcs->rx_buf_pool); diff --git a/drivers/mailbox/mailbox-sti.c b/drivers/mailbox/mailbox-sti.c index a334db5c9f1c..41bcd339b68a 100644 --- a/drivers/mailbox/mailbox-sti.c +++ b/drivers/mailbox/mailbox-sti.c @@ -403,6 +403,7 @@ static const struct of_device_id sti_mailbox_match[] = { }, { } }; +MODULE_DEVICE_TABLE(of, sti_mailbox_match); static int sti_mbox_probe(struct platform_device *pdev) { diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index 9ca96e9db6bf..9c79f8019d2a 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -11,12 +11,14 @@ #include <linux/debugfs.h> #include <linux/err.h> +#include <linux/fs.h> #include <linux/io.h> #include <linux/kernel.h> #include <linux/mailbox_client.h> #include <linux/module.h> #include <linux/of.h> #include <linux/platform_device.h> +#include <linux/poll.h> #include <linux/slab.h> #include <linux/uaccess.h> @@ -39,6 +41,8 @@ struct mbox_test_device { char *signal; char *message; spinlock_t lock; + wait_queue_head_t waitq; + struct fasync_struct *async_queue; }; static ssize_t mbox_test_signal_write(struct file *filp, @@ -81,6 +85,13 @@ static const struct file_operations mbox_test_signal_ops = { .llseek = generic_file_llseek, }; +static int mbox_test_message_fasync(int fd, struct file *filp, int on) +{ + struct mbox_test_device *tdev = filp->private_data; + + return fasync_helper(fd, filp, on, &tdev->async_queue); +} + static ssize_t mbox_test_message_write(struct file *filp, const char __user *userbuf, size_t count, loff_t *ppos) @@ -138,6 +149,20 @@ out: return ret < 0 ? ret : count; } +static bool mbox_test_message_data_ready(struct mbox_test_device *tdev) +{ + unsigned char data; + unsigned long flags; + + spin_lock_irqsave(&tdev->lock, flags); + data = tdev->rx_buffer[0]; + spin_unlock_irqrestore(&tdev->lock, flags); + + if (data != '\0') + return true; + return false; +} + static ssize_t mbox_test_message_read(struct file *filp, char __user *userbuf, size_t count, loff_t *ppos) { @@ -147,6 +172,8 @@ static ssize_t mbox_test_message_read(struct file *filp, char __user *userbuf, int l = 0; int ret; + DECLARE_WAITQUEUE(wait, current); + touser = kzalloc(MBOX_HEXDUMP_MAX_LEN + 1, GFP_KERNEL); if (!touser) return -ENOMEM; @@ -155,15 +182,29 @@ static ssize_t mbox_test_message_read(struct file *filp, char __user *userbuf, ret = snprintf(touser, 20, "<NO RX CAPABILITY>\n"); ret = simple_read_from_buffer(userbuf, count, ppos, touser, ret); - goto out; + goto kfree_err; } - if (tdev->rx_buffer[0] == '\0') { - ret = snprintf(touser, 9, "<EMPTY>\n"); - ret = simple_read_from_buffer(userbuf, count, ppos, - touser, ret); - goto out; - } + add_wait_queue(&tdev->waitq, &wait); + + do { + __set_current_state(TASK_INTERRUPTIBLE); + + if (mbox_test_message_data_ready(tdev)) + break; + + if (filp->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + goto waitq_err; + } + + if (signal_pending(current)) { + ret = -ERESTARTSYS; + goto waitq_err; + } + schedule(); + + } while (1); spin_lock_irqsave(&tdev->lock, flags); @@ -185,14 +226,31 @@ static ssize_t mbox_test_message_read(struct file *filp, char __user *userbuf, spin_unlock_irqrestore(&tdev->lock, flags); ret = simple_read_from_buffer(userbuf, count, ppos, touser, MBOX_HEXDUMP_MAX_LEN); -out: +waitq_err: + __set_current_state(TASK_RUNNING); + remove_wait_queue(&tdev->waitq, &wait); +kfree_err: kfree(touser); return ret; } +static unsigned int +mbox_test_message_poll(struct file *filp, struct poll_table_struct *wait) +{ + struct mbox_test_device *tdev = filp->private_data; + + poll_wait(filp, &tdev->waitq, wait); + + if (mbox_test_message_data_ready(tdev)) + return POLLIN | POLLRDNORM; + return 0; +} + static const struct file_operations mbox_test_message_ops = { .write = mbox_test_message_write, .read = mbox_test_message_read, + .fasync = mbox_test_message_fasync, + .poll = mbox_test_message_poll, .open = simple_open, .llseek = generic_file_llseek, }; @@ -234,6 +292,10 @@ static void mbox_test_receive_message(struct mbox_client *client, void *message) memcpy(tdev->rx_buffer, message, MBOX_MAX_MSG_LEN); } spin_unlock_irqrestore(&tdev->lock, flags); + + wake_up_interruptible(&tdev->waitq); + + kill_fasync(&tdev->async_queue, SIGIO, POLL_IN); } static void mbox_test_prepare_message(struct mbox_client *client, void *message) @@ -290,6 +352,7 @@ static int mbox_test_probe(struct platform_device *pdev) { struct mbox_test_device *tdev; struct resource *res; + resource_size_t size; int ret; tdev = devm_kzalloc(&pdev->dev, sizeof(*tdev), GFP_KERNEL); @@ -298,14 +361,21 @@ static int mbox_test_probe(struct platform_device *pdev) /* It's okay for MMIO to be NULL */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + size = resource_size(res); tdev->tx_mmio = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(tdev->tx_mmio)) + if (PTR_ERR(tdev->tx_mmio) == -EBUSY) + /* if reserved area in SRAM, try just ioremap */ + tdev->tx_mmio = devm_ioremap(&pdev->dev, res->start, size); + else if (IS_ERR(tdev->tx_mmio)) tdev->tx_mmio = NULL; /* If specified, second reg entry is Rx MMIO */ res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + size = resource_size(res); tdev->rx_mmio = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(tdev->rx_mmio)) + if (PTR_ERR(tdev->rx_mmio) == -EBUSY) + tdev->rx_mmio = devm_ioremap(&pdev->dev, res->start, size); + else if (IS_ERR(tdev->rx_mmio)) tdev->rx_mmio = tdev->tx_mmio; tdev->tx_channel = mbox_test_request_channel(pdev, "tx"); @@ -334,6 +404,7 @@ static int mbox_test_probe(struct platform_device *pdev) if (ret) return ret; + init_waitqueue_head(&tdev->waitq); dev_info(&pdev->dev, "Successfully registered\n"); return 0; @@ -357,6 +428,7 @@ static const struct of_device_id mbox_test_match[] = { { .compatible = "mailbox-test" }, {}, }; +MODULE_DEVICE_TABLE(of, mbox_test_match); static struct platform_driver mbox_test_driver = { .driver = { diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 6e16e441f85e..e4c28fed61d5 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -166,7 +166,6 @@ source "drivers/net/ethernet/seeq/Kconfig" source "drivers/net/ethernet/silan/Kconfig" source "drivers/net/ethernet/sis/Kconfig" source "drivers/net/ethernet/sfc/Kconfig" -source "drivers/net/ethernet/sfc/falcon/Kconfig" source "drivers/net/ethernet/sgi/Kconfig" source "drivers/net/ethernet/smsc/Kconfig" source "drivers/net/ethernet/stmicro/Kconfig" diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 16e12c45904b..21f80f5744ba 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1469,6 +1469,12 @@ static int octeon_mgmt_probe(struct platform_device *pdev) p->agl = (u64)devm_ioremap(&pdev->dev, p->agl_phys, p->agl_size); p->agl_prt_ctl = (u64)devm_ioremap(&pdev->dev, p->agl_prt_ctl_phys, p->agl_prt_ctl_size); + if (!p->mix || !p->agl || !p->agl_prt_ctl) { + dev_err(&pdev->dev, "failed to map I/O memory\n"); + result = -ENOMEM; + goto err; + } + spin_lock_init(&p->lock); skb_queue_head_init(&p->tx_list); diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig index 79b7c84b7869..dc0850b3b517 100644 --- a/drivers/net/ethernet/freescale/fman/Kconfig +++ b/drivers/net/ethernet/freescale/fman/Kconfig @@ -1,6 +1,6 @@ config FSL_FMAN tristate "FMan support" - depends on FSL_SOC || COMPILE_TEST + depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST select GENERIC_ALLOCATOR select PHYLIB default n diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index dafd9e1baba2..f60845f0c6ca 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -1890,6 +1890,7 @@ static int fman_reset(struct fman *fman) goto _return; } else { +#ifdef CONFIG_PPC struct device_node *guts_node; struct ccsr_guts __iomem *guts_regs; u32 devdisr2, reg; @@ -1921,6 +1922,7 @@ static int fman_reset(struct fman *fman) /* Enable all MACs */ iowrite32be(reg, &guts_regs->devdisr2); +#endif /* Perform FMan reset */ iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc); @@ -1932,25 +1934,31 @@ static int fman_reset(struct fman *fman) } while (((ioread32be(&fman->fpm_regs->fm_rstc)) & FPM_RSTC_FM_RESET) && --count); if (count == 0) { +#ifdef CONFIG_PPC iounmap(guts_regs); of_node_put(guts_node); +#endif err = -EBUSY; goto _return; } +#ifdef CONFIG_PPC /* Restore devdisr2 value */ iowrite32be(devdisr2, &guts_regs->devdisr2); iounmap(guts_regs); of_node_put(guts_node); +#endif goto _return; +#ifdef CONFIG_PPC guts_regs: of_node_put(guts_node); guts_node: dev_dbg(fman->dev, "%s: Didn't perform FManV3 reset due to Errata A007273!\n", __func__); +#endif } _return: return err; @@ -2868,6 +2876,13 @@ static struct fman *read_dts_node(struct platform_device *of_dev) fman->dev = &of_dev->dev; + err = of_platform_populate(fm_node, NULL, NULL, &of_dev->dev); + if (err) { + dev_err(&of_dev->dev, "%s: of_platform_populate() failed\n", + __func__); + goto fman_free; + } + return fman; fman_node_put: diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 69ca42ce5dd5..0b31f8502ada 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -594,6 +594,7 @@ static const u16 phy2speed[] = { [PHY_INTERFACE_MODE_RGMII_RXID] = SPEED_1000, [PHY_INTERFACE_MODE_RGMII_TXID] = SPEED_1000, [PHY_INTERFACE_MODE_RTBI] = SPEED_1000, + [PHY_INTERFACE_MODE_QSGMII] = SPEED_1000, [PHY_INTERFACE_MODE_XGMII] = SPEED_10000 }; diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index ee7e9ce2f5b3..418ca1f3774a 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -1316,10 +1316,11 @@ static int hix5hd2_dev_remove(struct platform_device *pdev) } static const struct of_device_id hix5hd2_of_match[] = { - { .compatible = "hisilicon,hisi-gemac-v1", .data = (void *)GEMAC_V1 }, - { .compatible = "hisilicon,hisi-gemac-v2", .data = (void *)GEMAC_V2 }, - { .compatible = "hisilicon,hix5hd2-gemac", .data = (void *)GEMAC_V1 }, - { .compatible = "hisilicon,hi3798cv200-gemac", .data = (void *)GEMAC_V2 }, + { .compatible = "hisilicon,hisi-gmac-v1", .data = (void *)GEMAC_V1 }, + { .compatible = "hisilicon,hisi-gmac-v2", .data = (void *)GEMAC_V2 }, + { .compatible = "hisilicon,hix5hd2-gmac", .data = (void *)GEMAC_V1 }, + { .compatible = "hisilicon,hi3798cv200-gmac", .data = (void *)GEMAC_V2 }, + { .compatible = "hisilicon,hi3516a-gmac", .data = (void *)GEMAC_V2 }, {}, }; @@ -1327,7 +1328,7 @@ MODULE_DEVICE_TABLE(of, hix5hd2_of_match); static struct platform_driver hix5hd2_dev_driver = { .driver = { - .name = "hisi-gemac", + .name = "hisi-gmac", .of_match_table = hix5hd2_of_match, }, .probe = hix5hd2_dev_probe, @@ -1338,4 +1339,4 @@ module_platform_driver(hix5hd2_dev_driver); MODULE_DESCRIPTION("HISILICON Gigabit Ethernet MAC driver"); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:hisi-gemac"); +MODULE_ALIAS("platform:hisi-gmac"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 3b026c151cf2..7431f633de31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -75,7 +75,7 @@ static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) struct rb_node *parent = NULL; while (*new) { - struct mlx5_fc *this = container_of(*new, struct mlx5_fc, node); + struct mlx5_fc *this = rb_entry(*new, struct mlx5_fc, node); int result = counter->id - this->id; parent = *new; diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 46f7be85f5a3..2c032629c369 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -1,3 +1,20 @@ +# +# Solarflare device configuration +# + +config NET_VENDOR_SOLARFLARE + bool "Solarflare devices" + default y + ---help--- + If you have a network (Ethernet) card belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Solarflare devices. If you say Y, you will be asked + for your specific card in the following questions. + +if NET_VENDOR_SOLARFLARE + config SFC tristate "Solarflare SFC9000/SFC9100-family support" depends on PCI @@ -44,3 +61,7 @@ config SFC_MCDI_LOGGING Driver-Interface) commands and responses, allowing debugging of driver/firmware interaction. The tracing is actually enabled by a sysfs file 'mcdi_logging' under the PCI device. + +source "drivers/net/ethernet/sfc/falcon/Kconfig" + +endif # NET_VENDOR_SOLARFLARE diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index a340fc8bd0de..8816515e1bbb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -334,7 +334,7 @@ static void dwmac4_rd_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, * descriptors for the same frame has to be set before, to * avoid race condition. */ - wmb(); + dma_wmb(); p->des3 = cpu_to_le32(tdes3); } @@ -377,7 +377,7 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs, * descriptors for the same frame has to be set before, to * avoid race condition. */ - wmb(); + dma_wmb(); p->des3 = cpu_to_le32(tdes3); } diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index ce97e522566a..f0d86321dfe2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -350,7 +350,7 @@ static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, * descriptors for the same frame has to be set before, to * avoid race condition. */ - wmb(); + dma_wmb(); p->des0 = cpu_to_le32(tdes0); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3e405785b81c..bb40382e205d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2125,7 +2125,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) * descriptor and then barrier is needed to make sure that * all is coherent before granting the DMA engine. */ - smp_wmb(); + dma_wmb(); if (netif_msg_pktdata(priv)) { pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n", @@ -2338,7 +2338,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) * descriptor and then barrier is needed to make sure that * all is coherent before granting the DMA engine. */ - smp_wmb(); + dma_wmb(); } netdev_sent_queue(dev, skb->len); @@ -2443,14 +2443,14 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv) netif_dbg(priv, rx_status, priv->dev, "refill entry #%d\n", entry); } - wmb(); + dma_wmb(); if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) priv->hw->desc->init_rx_desc(p, priv->use_riwt, 0, 0); else priv->hw->desc->set_rx_owner(p); - wmb(); + dma_wmb(); entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE); } diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index c7e547e4f2b1..7d9e36f66735 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -94,6 +94,7 @@ /* offset relative to base of XGBE_SS_REG_INDEX */ #define XGBE10_SGMII_MODULE_OFFSET 0x100 +#define IS_SS_ID_XGBE(d) ((d)->ss_version == XGBE_SS_VERSION_10) /* offset relative to base of XGBE_SM_REG_INDEX */ #define XGBE10_HOST_PORT_OFFSET 0x34 #define XGBE10_SLAVE_PORT_OFFSET 0x64 @@ -1746,6 +1747,17 @@ static void keystone_set_msglevel(struct net_device *ndev, u32 value) netcp->msg_enable = value; } +static struct gbe_intf *keystone_get_intf_data(struct netcp_intf *netcp) +{ + struct gbe_intf *gbe_intf; + + gbe_intf = netcp_module_get_intf_data(&gbe_module, netcp); + if (!gbe_intf) + gbe_intf = netcp_module_get_intf_data(&xgbe_module, netcp); + + return gbe_intf; +} + static void keystone_get_stat_strings(struct net_device *ndev, uint32_t stringset, uint8_t *data) { @@ -1754,7 +1766,7 @@ static void keystone_get_stat_strings(struct net_device *ndev, struct gbe_priv *gbe_dev; int i; - gbe_intf = netcp_module_get_intf_data(&gbe_module, netcp); + gbe_intf = keystone_get_intf_data(netcp); if (!gbe_intf) return; gbe_dev = gbe_intf->gbe_dev; @@ -1778,7 +1790,7 @@ static int keystone_get_sset_count(struct net_device *ndev, int stringset) struct gbe_intf *gbe_intf; struct gbe_priv *gbe_dev; - gbe_intf = netcp_module_get_intf_data(&gbe_module, netcp); + gbe_intf = keystone_get_intf_data(netcp); if (!gbe_intf) return -EINVAL; gbe_dev = gbe_intf->gbe_dev; @@ -1896,7 +1908,7 @@ static void keystone_get_ethtool_stats(struct net_device *ndev, struct gbe_intf *gbe_intf; struct gbe_priv *gbe_dev; - gbe_intf = netcp_module_get_intf_data(&gbe_module, netcp); + gbe_intf = keystone_get_intf_data(netcp); if (!gbe_intf) return; @@ -1920,7 +1932,7 @@ static int keystone_get_link_ksettings(struct net_device *ndev, if (!phy) return -EINVAL; - gbe_intf = netcp_module_get_intf_data(&gbe_module, netcp); + gbe_intf = keystone_get_intf_data(netcp); if (!gbe_intf) return -EINVAL; @@ -1953,7 +1965,7 @@ static int keystone_set_link_ksettings(struct net_device *ndev, if (!phy) return -EINVAL; - gbe_intf = netcp_module_get_intf_data(&gbe_module, netcp); + gbe_intf = keystone_get_intf_data(netcp); if (!gbe_intf) return -EINVAL; @@ -2311,7 +2323,7 @@ static void gbe_init_host_port(struct gbe_priv *priv) int bypass_en = 1; /* Host Tx Pri */ - if (IS_SS_ID_NU(priv)) + if (IS_SS_ID_NU(priv) || IS_SS_ID_XGBE(priv)) writel(HOST_TX_PRI_MAP_DEFAULT, GBE_REG_ADDR(priv, host_port_regs, tx_pri_map)); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9c06f8028f0c..92b08383cafa 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1187,8 +1187,8 @@ static int genphy_config_advert(struct phy_device *phydev) */ static int genphy_config_eee_advert(struct phy_device *phydev) { - u32 broken = phydev->eee_broken_modes; - u32 old_adv, adv; + int broken = phydev->eee_broken_modes; + int old_adv, adv; /* Nothing to disable */ if (!broken) @@ -1665,7 +1665,7 @@ static void of_set_phy_supported(struct phy_device *phydev) static void of_set_phy_eee_broken(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; - u32 broken; + u32 broken = 0; if (!IS_ENABLED(CONFIG_OF_MDIO)) return; @@ -1673,8 +1673,20 @@ static void of_set_phy_eee_broken(struct phy_device *phydev) if (!node) return; - if (!of_property_read_u32(node, "eee-broken-modes", &broken)) - phydev->eee_broken_modes = broken; + if (of_property_read_bool(node, "eee-broken-100tx")) + broken |= MDIO_EEE_100TX; + if (of_property_read_bool(node, "eee-broken-1000t")) + broken |= MDIO_EEE_1000T; + if (of_property_read_bool(node, "eee-broken-10gt")) + broken |= MDIO_EEE_10GT; + if (of_property_read_bool(node, "eee-broken-1000kx")) + broken |= MDIO_EEE_1000KX; + if (of_property_read_bool(node, "eee-broken-10gkx4")) + broken |= MDIO_EEE_10GKX4; + if (of_property_read_bool(node, "eee-broken-10gkr")) + broken |= MDIO_EEE_10GKR; + + phydev->eee_broken_modes = broken; } /** diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index c9fa3565c671..2583e8b50b21 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -30,6 +30,7 @@ #include <linux/types.h> #include <linux/list.h> #include <linux/string.h> +#include <linux/delay.h> #include <target/target_core_base.h> #include <target/target_core_fabric.h> diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h index 98b0ca79a5c5..65c6189885ab 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h @@ -26,6 +26,7 @@ #ifndef __H_IBMVSCSI_TGT #define __H_IBMVSCSI_TGT +#include <linux/interrupt.h> #include "libsrp.h" #define SYS_ID_NAME_LEN 64 diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 19f18485a854..d8efddf6f312 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -9,6 +9,7 @@ #include <linux/delay.h> #include <linux/slab.h> +#include <linux/t10-pi.h> #include <scsi/scsi_tcq.h> #include <scsi/scsi_bsg_fc.h> #include <scsi/scsi_eh.h> diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c index d02bf58aea6d..8bcb9b71f764 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_target.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c @@ -9,6 +9,7 @@ #include <linux/workqueue.h> #include <linux/kthread.h> #include <asm/unaligned.h> +#include <net/tcp.h> #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include "cxgbit.h" diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index b7d747e92c7a..da2c73a255de 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -23,7 +23,9 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/idr.h> +#include <linux/delay.h> #include <asm/unaligned.h> +#include <net/ipv6.h> #include <scsi/scsi_proto.h> #include <scsi/iscsi_proto.h> #include <scsi/scsi_tcq.h> diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index 4cf2c0f2ba2f..e0db2ceb0f87 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -1,6 +1,18 @@ #ifndef ISCSI_TARGET_H #define ISCSI_TARGET_H +#include <linux/types.h> +#include <linux/spinlock.h> + +struct iscsi_cmd; +struct iscsi_conn; +struct iscsi_np; +struct iscsi_portal_group; +struct iscsi_session; +struct iscsi_tpg_np; +struct kref; +struct sockaddr_storage; + extern struct iscsi_tiqn *iscsit_get_tiqn_for_login(unsigned char *); extern struct iscsi_tiqn *iscsit_get_tiqn(unsigned char *, int); extern void iscsit_put_tiqn_for_login(struct iscsi_tiqn *); diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index e116f0e845c0..903b667f8e01 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -20,8 +20,8 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/err.h> +#include <linux/random.h> #include <linux/scatterlist.h> - #include <target/iscsi/iscsi_target_core.h> #include "iscsi_target_nego.h" #include "iscsi_target_auth.h" diff --git a/drivers/target/iscsi/iscsi_target_auth.h b/drivers/target/iscsi/iscsi_target_auth.h index d22f7b96a06c..1b91c13cc965 100644 --- a/drivers/target/iscsi/iscsi_target_auth.h +++ b/drivers/target/iscsi/iscsi_target_auth.h @@ -1,6 +1,8 @@ #ifndef _ISCSI_CHAP_H_ #define _ISCSI_CHAP_H_ +#include <linux/types.h> + #define CHAP_DIGEST_UNKNOWN 0 #define CHAP_DIGEST_MD5 5 #define CHAP_DIGEST_SHA 6 @@ -18,6 +20,9 @@ #define CHAP_STAGE_CLIENT_NRIC 4 #define CHAP_STAGE_SERVER_NR 5 +struct iscsi_node_auth; +struct iscsi_conn; + extern u32 chap_main_loop(struct iscsi_conn *, struct iscsi_node_auth *, char *, char *, int *, int *); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 923c032f0b95..bf40f03755dd 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -21,10 +21,11 @@ #include <linux/ctype.h> #include <linux/export.h> #include <linux/inet.h> +#include <linux/module.h> +#include <net/ipv6.h> #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/iscsi/iscsi_transport.h> - #include <target/iscsi/iscsi_target_core.h> #include "iscsi_target_parameters.h" #include "iscsi_target_device.h" @@ -100,8 +101,10 @@ static ssize_t lio_target_np_driver_store(struct config_item *item, tpg_np_new = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, tpg_np, type); - if (IS_ERR(tpg_np_new)) + if (IS_ERR(tpg_np_new)) { + rc = PTR_ERR(tpg_np_new); goto out; + } } else { tpg_np_new = iscsit_tpg_locate_child_np(tpg_np, type); if (tpg_np_new) { diff --git a/drivers/target/iscsi/iscsi_target_datain_values.c b/drivers/target/iscsi/iscsi_target_datain_values.c index 647d4a5dca52..173ddd93c757 100644 --- a/drivers/target/iscsi/iscsi_target_datain_values.c +++ b/drivers/target/iscsi/iscsi_target_datain_values.c @@ -16,8 +16,8 @@ * GNU General Public License for more details. ******************************************************************************/ +#include <linux/slab.h> #include <scsi/iscsi_proto.h> - #include <target/iscsi/iscsi_target_core.h> #include "iscsi_target_seq_pdu_list.h" #include "iscsi_target_erl1.h" diff --git a/drivers/target/iscsi/iscsi_target_datain_values.h b/drivers/target/iscsi/iscsi_target_datain_values.h index 646429ac5a02..16edeeeb7777 100644 --- a/drivers/target/iscsi/iscsi_target_datain_values.h +++ b/drivers/target/iscsi/iscsi_target_datain_values.h @@ -1,6 +1,9 @@ #ifndef ISCSI_TARGET_DATAIN_VALUES_H #define ISCSI_TARGET_DATAIN_VALUES_H +struct iscsi_cmd; +struct iscsi_datain; + extern struct iscsi_datain_req *iscsit_allocate_datain_req(void); extern void iscsit_attach_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *); extern void iscsit_free_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *); diff --git a/drivers/target/iscsi/iscsi_target_device.h b/drivers/target/iscsi/iscsi_target_device.h index a0e2df9e8090..06dbff5cd520 100644 --- a/drivers/target/iscsi/iscsi_target_device.h +++ b/drivers/target/iscsi/iscsi_target_device.h @@ -1,6 +1,9 @@ #ifndef ISCSI_TARGET_DEVICE_H #define ISCSI_TARGET_DEVICE_H +struct iscsi_cmd; +struct iscsi_session; + extern void iscsit_determine_maxcmdsn(struct iscsi_session *); extern void iscsit_increment_maxcmdsn(struct iscsi_cmd *, struct iscsi_session *); diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h index a9e2f9497fb2..60e69e2af6ed 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.h +++ b/drivers/target/iscsi/iscsi_target_erl0.h @@ -1,6 +1,12 @@ #ifndef ISCSI_TARGET_ERL0_H #define ISCSI_TARGET_ERL0_H +#include <linux/types.h> + +struct iscsi_cmd; +struct iscsi_conn; +struct iscsi_session; + extern void iscsit_set_dataout_sequence_values(struct iscsi_cmd *); extern int iscsit_check_pre_dataout(struct iscsi_cmd *, unsigned char *); extern int iscsit_check_post_dataout(struct iscsi_cmd *, unsigned char *, u8); diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index 9214c9dafa2b..fe9b7f1e44ac 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -17,6 +17,7 @@ ******************************************************************************/ #include <linux/list.h> +#include <linux/slab.h> #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> #include <target/target_core_fabric.h> diff --git a/drivers/target/iscsi/iscsi_target_erl1.h b/drivers/target/iscsi/iscsi_target_erl1.h index 2a3ebf118a34..54d36bd25bea 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.h +++ b/drivers/target/iscsi/iscsi_target_erl1.h @@ -1,6 +1,16 @@ #ifndef ISCSI_TARGET_ERL1_H #define ISCSI_TARGET_ERL1_H +#include <linux/types.h> +#include <scsi/iscsi_proto.h> /* itt_t */ + +struct iscsi_cmd; +struct iscsi_conn; +struct iscsi_datain_req; +struct iscsi_ooo_cmdsn; +struct iscsi_pdu; +struct iscsi_session; + extern int iscsit_dump_data_payload(struct iscsi_conn *, u32, int); extern int iscsit_create_recovery_datain_values_datasequenceinorder_yes( struct iscsi_cmd *, struct iscsi_datain_req *); diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c index e24f1c7c5862..faf9ae014b30 100644 --- a/drivers/target/iscsi/iscsi_target_erl2.c +++ b/drivers/target/iscsi/iscsi_target_erl2.c @@ -17,6 +17,7 @@ * GNU General Public License for more details. ******************************************************************************/ +#include <linux/slab.h> #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> #include <target/target_core_fabric.h> diff --git a/drivers/target/iscsi/iscsi_target_erl2.h b/drivers/target/iscsi/iscsi_target_erl2.h index 63f2501f3fe0..7965f1e86506 100644 --- a/drivers/target/iscsi/iscsi_target_erl2.h +++ b/drivers/target/iscsi/iscsi_target_erl2.h @@ -1,6 +1,13 @@ #ifndef ISCSI_TARGET_ERL2_H #define ISCSI_TARGET_ERL2_H +#include <linux/types.h> + +struct iscsi_cmd; +struct iscsi_conn; +struct iscsi_conn_recovery; +struct iscsi_session; + extern void iscsit_create_conn_recovery_datain_values(struct iscsi_cmd *, __be32); extern void iscsit_create_conn_recovery_dataout_values(struct iscsi_cmd *); extern struct iscsi_conn_recovery *iscsit_get_inactive_connection_recovery_entry( diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 15f79a2ca34a..450f51deb2a2 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -20,6 +20,8 @@ #include <linux/string.h> #include <linux/kthread.h> #include <linux/idr.h> +#include <linux/tcp.h> /* TCP_NODELAY */ +#include <net/ipv6.h> /* ipv6_addr_v4mapped() */ #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> #include <target/target_core_fabric.h> diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index b597aa2c61a1..0e1fd6cedd54 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -1,6 +1,13 @@ #ifndef ISCSI_TARGET_LOGIN_H #define ISCSI_TARGET_LOGIN_H +#include <linux/types.h> + +struct iscsi_conn; +struct iscsi_login; +struct iscsi_np; +struct sockaddr_storage; + extern int iscsi_login_setup_crypto(struct iscsi_conn *); extern int iscsi_check_for_session_reinstatement(struct iscsi_conn *); extern int iscsi_login_post_auth_non_zero_tsih(struct iscsi_conn *, u16, u32); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 89d34bd6d87f..46388c9e08da 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -18,6 +18,8 @@ #include <linux/ctype.h> #include <linux/kthread.h> +#include <linux/slab.h> +#include <net/sock.h> #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> #include <target/target_core_fabric.h> diff --git a/drivers/target/iscsi/iscsi_target_nego.h b/drivers/target/iscsi/iscsi_target_nego.h index f021cbd330e5..53438bfca4c6 100644 --- a/drivers/target/iscsi/iscsi_target_nego.h +++ b/drivers/target/iscsi/iscsi_target_nego.h @@ -4,6 +4,10 @@ #define DECIMAL 0 #define HEX 1 +struct iscsi_conn; +struct iscsi_login; +struct iscsi_np; + extern void convert_null_to_semi(char *, int); extern int extract_param(const char *, const char *, unsigned int, char *, unsigned char *); diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.h b/drivers/target/iscsi/iscsi_target_nodeattrib.h index 0c69a46a62ec..79cdf06ade48 100644 --- a/drivers/target/iscsi/iscsi_target_nodeattrib.h +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.h @@ -1,6 +1,11 @@ #ifndef ISCSI_TARGET_NODEATTRIB_H #define ISCSI_TARGET_NODEATTRIB_H +#include <linux/types.h> + +struct iscsi_node_acl; +struct iscsi_portal_group; + extern void iscsit_set_default_node_attribues(struct iscsi_node_acl *, struct iscsi_portal_group *); extern int iscsit_na_dataout_timeout(struct iscsi_node_acl *, u32); diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 0efa80bb8962..e65bf78ceef3 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -17,7 +17,7 @@ ******************************************************************************/ #include <linux/slab.h> - +#include <linux/uio.h> /* struct kvec */ #include <target/iscsi/iscsi_target_core.h> #include "iscsi_target_util.h" #include "iscsi_target_parameters.h" diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h index a0751e3f0813..9962ccf0ccd7 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.h +++ b/drivers/target/iscsi/iscsi_target_parameters.h @@ -1,6 +1,7 @@ #ifndef ISCSI_PARAMETERS_H #define ISCSI_PARAMETERS_H +#include <linux/types.h> #include <scsi/iscsi_proto.h> struct iscsi_extra_response { @@ -23,6 +24,11 @@ struct iscsi_param { struct list_head p_list; } ____cacheline_aligned; +struct iscsi_conn; +struct iscsi_conn_ops; +struct iscsi_param_list; +struct iscsi_sess_ops; + extern int iscsi_login_rx_data(struct iscsi_conn *, char *, int); extern int iscsi_login_tx_data(struct iscsi_conn *, char *, char *, int); extern void iscsi_dump_conn_ops(struct iscsi_conn_ops *); diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.h b/drivers/target/iscsi/iscsi_target_seq_pdu_list.h index d5b153751a8d..be1234362271 100644 --- a/drivers/target/iscsi/iscsi_target_seq_pdu_list.h +++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.h @@ -1,6 +1,9 @@ #ifndef ISCSI_SEQ_AND_PDU_LIST_H #define ISCSI_SEQ_AND_PDU_LIST_H +#include <linux/types.h> +#include <linux/cache.h> + /* struct iscsi_pdu->status */ #define DATAOUT_PDU_SENT 1 @@ -78,6 +81,8 @@ struct iscsi_seq { u32 xfer_len; } ____cacheline_aligned; +struct iscsi_cmd; + extern int iscsit_build_pdu_and_seq_lists(struct iscsi_cmd *, u32); extern struct iscsi_pdu *iscsit_get_pdu_holder(struct iscsi_cmd *, u32, u32); extern struct iscsi_pdu *iscsit_get_pdu_holder_for_seq(struct iscsi_cmd *, struct iscsi_seq *); diff --git a/drivers/target/iscsi/iscsi_target_tmr.h b/drivers/target/iscsi/iscsi_target_tmr.h index 142e992cb097..64cc5c07e47c 100644 --- a/drivers/target/iscsi/iscsi_target_tmr.h +++ b/drivers/target/iscsi/iscsi_target_tmr.h @@ -1,6 +1,12 @@ #ifndef ISCSI_TARGET_TMR_H #define ISCSI_TARGET_TMR_H +#include <linux/types.h> + +struct iscsi_cmd; +struct iscsi_conn; +struct iscsi_tmr_req; + extern u8 iscsit_tmr_abort_task(struct iscsi_cmd *, unsigned char *); extern int iscsit_tmr_task_warm_reset(struct iscsi_conn *, struct iscsi_tmr_req *, unsigned char *); diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 0814e5894a96..2e7e08dbda48 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -16,9 +16,9 @@ * GNU General Public License for more details. ******************************************************************************/ +#include <linux/slab.h> #include <target/target_core_base.h> #include <target/target_core_fabric.h> - #include <target/iscsi/iscsi_target_core.h> #include "iscsi_target_erl0.h" #include "iscsi_target_login.h" @@ -260,7 +260,6 @@ err_out: iscsi_release_param_list(tpg->param_list); tpg->param_list = NULL; } - kfree(tpg); return -ENOMEM; } diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index 2da211920c18..ceba29851167 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -1,6 +1,15 @@ #ifndef ISCSI_TARGET_TPG_H #define ISCSI_TARGET_TPG_H +#include <linux/types.h> + +struct iscsi_np; +struct iscsi_session; +struct iscsi_tiqn; +struct iscsi_tpg_np; +struct se_node_acl; +struct sockaddr_storage; + extern struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *, u16); extern int iscsit_load_discovery_tpg(void); extern void iscsit_release_discovery_tpg(void); diff --git a/drivers/target/iscsi/iscsi_target_transport.c b/drivers/target/iscsi/iscsi_target_transport.c index 08217d62fb0d..c4eb141c6435 100644 --- a/drivers/target/iscsi/iscsi_target_transport.c +++ b/drivers/target/iscsi/iscsi_target_transport.c @@ -1,5 +1,6 @@ #include <linux/spinlock.h> #include <linux/list.h> +#include <linux/module.h> #include <target/iscsi/iscsi_transport.h> static LIST_HEAD(g_transport_list); diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 1f38177207e0..b5a1b4ccba12 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -18,6 +18,7 @@ #include <linux/list.h> #include <linux/percpu_ida.h> +#include <net/ipv6.h> /* ipv6_addr_equal() */ #include <scsi/scsi_tcq.h> #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h index 995f1cb29d0e..8ff08856516a 100644 --- a/drivers/target/iscsi/iscsi_target_util.h +++ b/drivers/target/iscsi/iscsi_target_util.h @@ -1,8 +1,16 @@ #ifndef ISCSI_TARGET_UTIL_H #define ISCSI_TARGET_UTIL_H +#include <linux/types.h> +#include <scsi/iscsi_proto.h> /* itt_t */ + #define MARKER_SIZE 8 +struct iscsi_cmd; +struct iscsi_conn; +struct iscsi_conn_recovery; +struct iscsi_session; + extern int iscsit_add_r2t_to_list(struct iscsi_cmd *, u32, u32, int, u32); extern struct iscsi_r2t *iscsit_get_r2t_for_eos(struct iscsi_cmd *, u32, u32); extern struct iscsi_r2t *iscsit_get_r2t_from_list(struct iscsi_cmd *); diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h index 4346462094a1..a8a230b4e6b5 100644 --- a/drivers/target/loopback/tcm_loop.h +++ b/drivers/target/loopback/tcm_loop.h @@ -1,3 +1,7 @@ +#include <linux/types.h> +#include <linux/device.h> +#include <target/target_core_base.h> /* struct se_cmd */ + #define TCM_LOOP_VERSION "v2.1-rc2" #define TL_WWN_ADDR_LEN 256 #define TL_TPGS_PER_HBA 32 diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 58bb6ed18185..e5c3e5f827d0 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -28,6 +28,7 @@ #include <linux/string.h> #include <linux/configfs.h> #include <linux/ctype.h> +#include <linux/delay.h> #include <linux/firewire.h> #include <linux/firewire-constants.h> #include <scsi/scsi_proto.h> @@ -928,7 +929,7 @@ static struct sbp_target_request *sbp_mgt_get_req(struct sbp_session *sess, struct sbp_target_request *req; int tag; - tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC); + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING); if (tag < 0) return ERR_PTR(-ENOMEM); diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 4c82bbe19003..f5e330099bfc 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -26,8 +26,11 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/configfs.h> +#include <linux/delay.h> #include <linux/export.h> +#include <linux/fcntl.h> #include <linux/file.h> +#include <linux/fs.h> #include <scsi/scsi_proto.h> #include <asm/unaligned.h> diff --git a/drivers/target/target_core_alua.h b/drivers/target/target_core_alua.h index 9b250f9b33bf..c69c11baf07f 100644 --- a/drivers/target/target_core_alua.h +++ b/drivers/target/target_core_alua.h @@ -1,6 +1,8 @@ #ifndef TARGET_CORE_ALUA_H #define TARGET_CORE_ALUA_H +#include <target/target_core_base.h> + /* * INQUIRY response data, TPGS Field * diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index a35a347ec357..54b36c9835be 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -144,12 +144,12 @@ static ssize_t target_core_item_dbroot_store(struct config_item *item, return -EINVAL; } if (!S_ISDIR(file_inode(fp)->i_mode)) { - filp_close(fp, 0); + filp_close(fp, NULL); mutex_unlock(&g_tf_lock); pr_err("db_root: not a directory: %s\n", db_root_stage); return -EINVAL; } - filp_close(fp, 0); + filp_close(fp, NULL); strncpy(db_root, db_root_stage, read_bytes); diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 6b423485c5d6..1ebd13ef7bd3 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -33,6 +33,7 @@ #include <linux/kthread.h> #include <linux/in.h> #include <linux/export.h> +#include <linux/t10-pi.h> #include <asm/unaligned.h> #include <net/sock.h> #include <net/tcp.h> diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index d545993df18b..87aa376a1a1a 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/falloc.h> +#include <linux/uio.h> #include <scsi/scsi_proto.h> #include <asm/unaligned.h> diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h index 068966fce308..526595a072de 100644 --- a/drivers/target/target_core_file.h +++ b/drivers/target/target_core_file.h @@ -1,6 +1,8 @@ #ifndef TARGET_CORE_FILE_H #define TARGET_CORE_FILE_H +#include <target/target_core_base.h> + #define FD_VERSION "4.0" #define FD_MAX_DEV_NAME 256 diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h index 01c2afd81500..718d3fcd3e7c 100644 --- a/drivers/target/target_core_iblock.h +++ b/drivers/target/target_core_iblock.h @@ -1,6 +1,9 @@ #ifndef TARGET_CORE_IBLOCK_H #define TARGET_CORE_IBLOCK_H +#include <linux/atomic.h> +#include <target/target_core_base.h> + #define IBLOCK_VERSION "4.0" #define IBLOCK_MAX_CDBS 16 diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index e2c970a9d61c..9ab7090f7c83 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -1,6 +1,11 @@ #ifndef TARGET_CORE_INTERNAL_H #define TARGET_CORE_INTERNAL_H +#include <linux/configfs.h> +#include <linux/list.h> +#include <linux/types.h> +#include <target/target_core_base.h> + #define TARGET_CORE_NAME_MAX_LEN 64 #define TARGET_FABRIC_NAME_SIZE 32 diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 47463c99c318..d761025144f9 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -29,6 +29,8 @@ #include <linux/list.h> #include <linux/vmalloc.h> #include <linux/file.h> +#include <linux/fcntl.h> +#include <linux/fs.h> #include <scsi/scsi_proto.h> #include <asm/unaligned.h> @@ -253,8 +255,7 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd) if ((cmd->t_task_cdb[1] & 0x01) && (cmd->t_task_cdb[1] & 0x02)) { - pr_err("LongIO and Obselete Bits set, returning" - " ILLEGAL_REQUEST\n"); + pr_err("LongIO and Obsolete Bits set, returning ILLEGAL_REQUEST\n"); return TCM_UNSUPPORTED_SCSI_OPCODE; } /* diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h index e3d26e9126a0..847bd470339c 100644 --- a/drivers/target/target_core_pr.h +++ b/drivers/target/target_core_pr.h @@ -1,5 +1,9 @@ #ifndef TARGET_CORE_PR_H #define TARGET_CORE_PR_H + +#include <linux/types.h> +#include <target/target_core_base.h> + /* * PERSISTENT_RESERVE_OUT service action codes * diff --git a/drivers/target/target_core_pscsi.h b/drivers/target/target_core_pscsi.h index 6d2007e35df6..8a02fa47c7e8 100644 --- a/drivers/target/target_core_pscsi.h +++ b/drivers/target/target_core_pscsi.h @@ -15,11 +15,12 @@ #define PS_TIMEOUT_DISK (15*HZ) #define PS_TIMEOUT_OTHER (500*HZ) -#include <linux/device.h> -#include <linux/kref.h> -#include <linux/kobject.h> +#include <linux/cache.h> /* ___cacheline_aligned */ +#include <target/target_core_base.h> /* struct se_device */ +struct block_device; struct scsi_device; +struct Scsi_Host; struct pscsi_plugin_task { unsigned char pscsi_sense[TRANSPORT_SENSE_BUFFER]; diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 24b36fd785f1..ddc216c9f1f6 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -26,7 +26,9 @@ #include <linux/string.h> #include <linux/parser.h> +#include <linux/highmem.h> #include <linux/timer.h> +#include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <scsi/scsi_proto.h> diff --git a/drivers/target/target_core_rd.h b/drivers/target/target_core_rd.h index cc46a6a89b38..91fc1a34791d 100644 --- a/drivers/target/target_core_rd.h +++ b/drivers/target/target_core_rd.h @@ -1,6 +1,10 @@ #ifndef TARGET_CORE_RD_H #define TARGET_CORE_RD_H +#include <linux/module.h> +#include <linux/types.h> +#include <target/target_core_base.h> + #define RD_HBA_VERSION "v4.0" #define RD_MCP_VERSION "4.0" diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 04f616b3ba0a..4879e70e2eef 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -24,6 +24,7 @@ #include <linux/module.h> #include <linux/ratelimit.h> #include <linux/crc-t10dif.h> +#include <linux/t10-pi.h> #include <asm/unaligned.h> #include <scsi/scsi_proto.h> #include <scsi/scsi_tcq.h> diff --git a/drivers/target/target_core_ua.h b/drivers/target/target_core_ua.h index bd6e78ba153d..97402856a8f0 100644 --- a/drivers/target/target_core_ua.h +++ b/drivers/target/target_core_ua.h @@ -1,6 +1,8 @@ #ifndef TARGET_CORE_UA_H #define TARGET_CORE_UA_H +#include <target/target_core_base.h> + /* * From spc4r17, Table D.1: ASC and ASCQ Assignement */ diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 2b3c8564ace8..8041710b6972 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -27,6 +27,7 @@ #include <linux/uio_driver.h> #include <linux/stringify.h> #include <linux/bitops.h> +#include <linux/highmem.h> #include <net/genetlink.h> #include <scsi/scsi_common.h> #include <scsi/scsi_proto.h> @@ -537,7 +538,7 @@ tcmu_queue_cmd(struct se_cmd *se_cmd) struct se_device *se_dev = se_cmd->se_dev; struct tcmu_dev *udev = TCMU_DEV(se_dev); struct tcmu_cmd *tcmu_cmd; - int ret; + sense_reason_t ret; tcmu_cmd = tcmu_alloc_cmd(se_cmd); if (!tcmu_cmd) @@ -685,8 +686,6 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) target_complete_cmd(cmd->se_cmd, SAM_STAT_CHECK_CONDITION); cmd->se_cmd = NULL; - kmem_cache_free(tcmu_cmd_cache, cmd); - return 0; } diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 094a1440eacb..37d5caebffa6 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -25,6 +25,7 @@ #include <linux/spinlock.h> #include <linux/list.h> #include <linux/configfs.h> +#include <linux/ratelimit.h> #include <scsi/scsi_proto.h> #include <asm/unaligned.h> diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index 700a981c7b41..4d3d4dd060f2 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -1,3 +1,5 @@ +#include <target/target_core_base.h> + #define XCOPY_TARGET_DESC_LEN 32 #define XCOPY_SEGMENT_DESC_LEN 28 #define XCOPY_NAA_IEEE_REGEX_LEN 16 diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h index e28209b99b59..11d27b93b413 100644 --- a/drivers/target/tcm_fc/tcm_fc.h +++ b/drivers/target/tcm_fc/tcm_fc.h @@ -17,6 +17,9 @@ #ifndef __TCM_FC_H__ #define __TCM_FC_H__ +#include <linux/types.h> +#include <target/target_core_base.h> + #define FT_VERSION "0.4" #define FT_NAMELEN 32 /* length of ASCII WWPNs including pad */ diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index 197f73386fac..d2351139342f 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c @@ -1073,7 +1073,7 @@ static struct usbg_cmd *usbg_get_cmd(struct f_uas *fu, struct usbg_cmd *cmd; int tag; - tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC); + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING); if (tag < 0) return ERR_PTR(-ENOMEM); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cb22a9f9ae7e..fad81041f5ab 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1273,8 +1273,8 @@ out_error: */ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) { - int error; struct inode *inode = d_inode(dentry); + int error = 0; /* * I believe we can only get a negative dentry here in the case of a @@ -1293,7 +1293,8 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - error = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (nfs_mapping_need_revalidate_inode(inode)) + error = __nfs_revalidate_inode(NFS_SERVER(inode), inode); dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", __func__, inode->i_ino, error ? "invalid" : "valid"); return !error; @@ -2285,8 +2286,7 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str if (cache == NULL) goto out; /* Found an entry, is our attribute cache valid? */ - if (!nfs_attribute_cache_expired(inode) && - !(nfsi->cache_validity & NFS_INO_INVALID_ATTR)) + if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) break; err = -ECHILD; if (!may_block) @@ -2334,12 +2334,12 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, cache = NULL; if (cache == NULL) goto out; - err = nfs_revalidate_inode_rcu(NFS_SERVER(inode), inode); - if (err) + if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; res->jiffies = cache->jiffies; res->cred = cache->cred; res->mask = cache->mask; + err = 0; out: rcu_read_unlock(); return err; @@ -2491,12 +2491,13 @@ EXPORT_SYMBOL_GPL(nfs_may_open); static int nfs_execute_ok(struct inode *inode, int mask) { struct nfs_server *server = NFS_SERVER(inode); - int ret; + int ret = 0; - if (mask & MAY_NOT_BLOCK) - ret = nfs_revalidate_inode_rcu(server, inode); - else - ret = nfs_revalidate_inode(server, inode); + if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) { + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + ret = __nfs_revalidate_inode(server, inode); + } if (ret == 0 && !execute_ok(inode)) ret = -EACCES; return ret; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 55208b9b3c11..157cb43ce9db 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -101,21 +101,11 @@ EXPORT_SYMBOL_GPL(nfs_file_release); static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs_inode *nfsi = NFS_I(inode); - const unsigned long force_reval = NFS_INO_REVAL_PAGECACHE|NFS_INO_REVAL_FORCED; - unsigned long cache_validity = nfsi->cache_validity; - - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) && - (cache_validity & force_reval) != force_reval) - goto out_noreval; if (filp->f_flags & O_DIRECT) goto force_reval; - if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - goto force_reval; - if (nfs_attribute_timeout(inode)) + if (nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE)) goto force_reval; -out_noreval: return 0; force_reval: return __nfs_revalidate_inode(server, inode); diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index a5589b791439..f956ca20a8a3 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -282,7 +282,8 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) s->nfs_client->cl_minorversion); out_test_devid: - if (filelayout_test_devid_unavailable(devid)) + if (ret->ds_clp == NULL || + filelayout_test_devid_unavailable(devid)) ret = NULL; out: return ret; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 9e111d07f667..45962fe5098c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1126,7 +1126,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, case -EPIPE: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, + &devid->deviceid); rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: @@ -1175,7 +1176,8 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, default: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, + &devid->deviceid); } /* FIXME: Need to prevent infinite looping here. */ return -NFS4ERR_RESET_TO_PNFS; diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 3cc39d1c1206..e5a6f248697b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -177,7 +177,7 @@ out_err: static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg, struct nfs4_deviceid_node *devid) { - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); if (!ff_layout_has_available_ds(lseg)) pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5864146e05e6..011e4f8c1e01 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -160,6 +160,43 @@ int nfs_sync_mapping(struct address_space *mapping) return ret; } +static int nfs_attribute_timeout(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); +} + +static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags) +{ + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + + /* Special case for the pagecache or access cache */ + if (flags == NFS_INO_REVAL_PAGECACHE && + !(cache_validity & NFS_INO_REVAL_FORCED)) + return false; + return (cache_validity & flags) != 0; +} + +static bool nfs_check_cache_invalid_not_delegated(struct inode *inode, unsigned long flags) +{ + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + + if ((cache_validity & flags) != 0) + return true; + if (nfs_attribute_timeout(inode)) + return true; + return false; +} + +bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) +{ + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + return nfs_check_cache_invalid_delegated(inode, flags); + + return nfs_check_cache_invalid_not_delegated(inode, flags); +} + static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); @@ -795,6 +832,8 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) if (!is_sync) return; inode = d_inode(ctx->dentry); + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + return; nfsi = NFS_I(inode); if (inode->i_mapping->nrpages == 0) return; @@ -1044,13 +1083,6 @@ out: return status; } -int nfs_attribute_timeout(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); -} - int nfs_attribute_cache_expired(struct inode *inode) { if (nfs_have_delegated_attributes(inode)) @@ -1073,15 +1105,6 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } EXPORT_SYMBOL_GPL(nfs_revalidate_inode); -int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode) -{ - if (!(NFS_I(inode)->cache_validity & - (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) - && !nfs_attribute_cache_expired(inode)) - return NFS_STALE(inode) ? -ESTALE : 0; - return -ECHILD; -} - static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); @@ -1114,17 +1137,8 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map bool nfs_mapping_need_revalidate_inode(struct inode *inode) { - unsigned long cache_validity = NFS_I(inode)->cache_validity; - - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { - const unsigned long force_reval = - NFS_INO_REVAL_PAGECACHE|NFS_INO_REVAL_FORCED; - return (cache_validity & force_reval) == force_reval; - } - - return (cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_timeout(inode) - || NFS_STALE(inode); + return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) || + NFS_STALE(inode); } int nfs_revalidate_mapping_rcu(struct inode *inode) @@ -1536,13 +1550,6 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr { unsigned long invalid = NFS_INO_INVALID_ATTR; - /* - * Don't revalidate the pagecache if we hold a delegation, but do - * force an attribute update - */ - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) - invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_FORCED; - if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; nfs_set_cache_invalid(inode, invalid); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6b79c2ca9b9a..09ca5095c04e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -381,6 +381,7 @@ extern int nfs_drop_inode(struct inode *); extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); +extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); extern int nfs_wait_atomic_killable(atomic_t *p); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d33242c8d95d..6dcbc5defb7a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1089,8 +1089,15 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) spin_lock(&dir->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - if (!cinfo->atomic || cinfo->before != dir->i_version) + if (cinfo->atomic && cinfo->before == dir->i_version) { + nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; + nfsi->attrtimeo_timestamp = jiffies; + } else { nfs_force_lookup_revalidate(dir); + if (cinfo->before != dir->i_version) + nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL; + } dir->i_version = cinfo->after; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_fscache_invalidate(dir); @@ -3115,6 +3122,16 @@ static void nfs4_close_done(struct rpc_task *task, void *data) res_stateid = &calldata->res.stateid; renew_lease(server, calldata->timestamp); break; + case -NFS4ERR_ACCESS: + if (calldata->arg.bitmask != NULL) { + calldata->arg.bitmask = NULL; + calldata->res.fattr = NULL; + task->tk_status = 0; + rpc_restart_call_prepare(task); + goto out_release; + + } + break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: @@ -3140,7 +3157,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) res_stateid, calldata->arg.fmode); out_release: nfs_release_seqid(calldata->arg.seqid); - nfs_refresh_inode(calldata->inode, calldata->res.fattr); + nfs_refresh_inode(calldata->inode, &calldata->fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); } @@ -3193,9 +3210,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_wait; } - if (calldata->arg.fmode == 0) { + if (calldata->arg.fmode == 0) task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; + if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { /* Close-to-open cache consistency revalidation */ if (!nfs4_have_delegation(inode, FMODE_READ)) calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; @@ -3207,7 +3225,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) nfs4_map_atomic_open_share(NFS_SERVER(inode), calldata->arg.fmode, 0); - nfs_fattr_init(calldata->res.fattr); + if (calldata->res.fattr == NULL) + calldata->arg.bitmask = NULL; + else if (calldata->arg.bitmask == NULL) + calldata->res.fattr = NULL; calldata->timestamp = jiffies; if (nfs4_setup_sequence(NFS_SERVER(inode), &calldata->arg.seq_args, @@ -3274,6 +3295,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask); if (IS_ERR(calldata->arg.seqid)) goto out_free_calldata; + nfs_fattr_init(&calldata->fattr); calldata->arg.fmode = 0; calldata->lr.arg.ld_private = &calldata->lr.ld_private; calldata->res.fattr = &calldata->fattr; @@ -5673,6 +5695,14 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_STALE_STATEID: task->tk_status = 0; break; + case -NFS4ERR_ACCESS: + if (data->args.bitmask) { + data->args.bitmask = NULL; + data->res.fattr = NULL; + task->tk_status = 0; + rpc_restart_call_prepare(task); + return; + } default: if (nfs4_async_handle_error(task, data->res.server, NULL, NULL) == -EAGAIN) { @@ -5692,6 +5722,7 @@ static void nfs4_delegreturn_release(void *calldata) if (data->lr.roc) pnfs_roc_release(&data->lr.arg, &data->lr.res, data->res.lr_ret); + nfs_post_op_update_inode_force_wcc(inode, &data->fattr); nfs_iput_and_deactive(inode); } kfree(calldata); @@ -5780,10 +5811,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co if (status != 0) goto out; status = data->rpc_status; - if (status == 0) - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); - else - nfs_refresh_inode(inode, &data->fattr); out: rpc_put_task(task); return status; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 95baf7d340f0..1d152f4470cd 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -494,21 +494,18 @@ nfs4_alloc_state_owner(struct nfs_server *server, } static void -nfs4_drop_state_owner(struct nfs4_state_owner *sp) -{ - struct rb_node *rb_node = &sp->so_server_node; - - if (!RB_EMPTY_NODE(rb_node)) { - struct nfs_server *server = sp->so_server; - struct nfs_client *clp = server->nfs_client; - - spin_lock(&clp->cl_lock); - if (!RB_EMPTY_NODE(rb_node)) { - rb_erase(rb_node, &server->state_owners); - RB_CLEAR_NODE(rb_node); - } - spin_unlock(&clp->cl_lock); - } +nfs4_reset_state_owner(struct nfs4_state_owner *sp) +{ + /* This state_owner is no longer usable, but must + * remain in place so that state recovery can find it + * and the opens associated with it. + * It may also be used for new 'open' request to + * return a delegation to the server. + * So update the 'create_time' so that it looks like + * a new state_owner. This will cause the server to + * request an OPEN_CONFIRM to start a new sequence. + */ + sp->so_seqid.create_time = ktime_get(); } static void nfs4_free_state_owner(struct nfs4_state_owner *sp) @@ -797,21 +794,33 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) /* * Search the state->lock_states for an existing lock_owner - * that is compatible with current->files + * that is compatible with either of the given owners. + * If the second is non-zero, then the first refers to a Posix-lock + * owner (current->files) and the second refers to a flock/OFD + * owner (struct file*). In that case, prefer a match for the first + * owner. + * If both sorts of locks are held on the one file we cannot know + * which stateid was intended to be used, so a "correct" choice cannot + * be made. Failing that, a "consistent" choice is preferable. The + * consistent choice we make is to prefer the first owner, that of a + * Posix lock. */ static struct nfs4_lock_state * __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, fl_owner_t fl_owner2) { - struct nfs4_lock_state *pos; + struct nfs4_lock_state *pos, *ret = NULL; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (pos->ls_owner != fl_owner && - pos->ls_owner != fl_owner2) - continue; - atomic_inc(&pos->ls_count); - return pos; + if (pos->ls_owner == fl_owner) { + ret = pos; + break; + } + if (pos->ls_owner == fl_owner2) + ret = pos; } - return NULL; + if (ret) + atomic_inc(&ret->ls_count); + return ret; } /* @@ -1101,7 +1110,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) sp = container_of(seqid->sequence, struct nfs4_state_owner, so_seqid); if (status == -NFS4ERR_BAD_SEQID) - nfs4_drop_state_owner(sp); + nfs4_reset_state_owner(sp); if (!nfs4_has_session(sp->so_server->nfs_client)) nfs_increment_seqid(status, seqid); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1af6268a7d8c..e9255cb453e6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -502,11 +502,13 @@ static int nfs4_stat_to_errno(int); (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ + encode_layoutreturn_maxsz + \ encode_open_downgrade_maxsz) #define NFS4_dec_open_downgrade_sz \ (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ + decode_layoutreturn_maxsz + \ decode_open_downgrade_maxsz) #define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ @@ -2277,9 +2279,9 @@ static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); if (args->lr_args) encode_layoutreturn(xdr, args->lr_args, &hdr); - encode_close(xdr, args, &hdr); if (args->bitmask != NULL) encode_getfattr(xdr, args->bitmask, &hdr); + encode_close(xdr, args, &hdr); encode_nops(&hdr); } @@ -2356,6 +2358,8 @@ static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); + if (args->lr_args) + encode_layoutreturn(xdr, args->lr_args, &hdr); encode_open_downgrade(xdr, args, &hdr); encode_nops(&hdr); } @@ -2701,7 +2705,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, encode_putfh(xdr, args->fhandle, &hdr); if (args->lr_args) encode_layoutreturn(xdr, args->lr_args, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + if (args->bitmask) + encode_getfattr(xdr, args->bitmask, &hdr); encode_delegreturn(xdr, args->stateid, &hdr); encode_nops(&hdr); } @@ -6151,6 +6156,12 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, status = decode_putfh(xdr); if (status) goto out; + if (res->lr_res) { + status = decode_layoutreturn(xdr, res->lr_res); + res->lr_ret = status; + if (status) + goto out; + } status = decode_open_downgrade(xdr, res); out: return status; @@ -6484,16 +6495,12 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; } + if (res->fattr != NULL) { + status = decode_getfattr(xdr, res->fattr, res->server); + if (status != 0) + goto out; + } status = decode_close(xdr, res); - if (status != 0) - goto out; - /* - * Note: Server may do delete on close for this file - * in which case the getattr call will fail with - * an ESTALE error. Shouldn't be a problem, - * though, since fattr->valid will remain unset. - */ - decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6966,9 +6973,11 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, if (status) goto out; } - status = decode_getfattr(xdr, res->fattr, res->server); - if (status != 0) - goto out; + if (res->fattr) { + status = decode_getfattr(xdr, res->fattr, res->server); + if (status != 0) + goto out; + } status = decode_delegreturn(xdr); out: return status; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 896df7bdf85f..59554f3adf29 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1251,6 +1251,7 @@ bool pnfs_roc(struct inode *ino, nfs4_stateid stateid; enum pnfs_iomode iomode = 0; bool layoutreturn = false, roc = false; + bool skip_read = false; if (!nfs_have_layout(ino)) return false; @@ -1270,18 +1271,27 @@ retry: } /* no roc if we hold a delegation */ - if (nfs4_check_delegation(ino, FMODE_READ)) - goto out_noroc; + if (nfs4_check_delegation(ino, FMODE_READ)) { + if (nfs4_check_delegation(ino, FMODE_WRITE)) + goto out_noroc; + skip_read = true; + } list_for_each_entry(ctx, &nfsi->open_files, list) { state = ctx->state; + if (state == NULL) + continue; /* Don't return layout if there is open file state */ - if (state != NULL && state->state != 0) + if (state->state & FMODE_WRITE) goto out_noroc; + if (state->state & FMODE_READ) + skip_read = true; } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) { + if (skip_read && lseg->pls_range.iomode == IOMODE_READ) + continue; /* If we are sending layoutreturn, invalidate all valid lsegs */ if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags)) continue; diff --git a/fs/splice.c b/fs/splice.c index 8ed7c9d8c0fb..873d83104e79 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1087,7 +1087,13 @@ EXPORT_SYMBOL(do_splice_direct); static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) { - while (pipe->nrbufs == pipe->buffers) { + for (;;) { + if (unlikely(!pipe->readers)) { + send_sig(SIGPIPE, current, 0); + return -EPIPE; + } + if (pipe->nrbufs != pipe->buffers) + return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; if (signal_pending(current)) @@ -1096,7 +1102,6 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) pipe_wait(pipe); pipe->waiting_writers--; } - return 0; } static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, diff --git a/include/dt-bindings/net/mdio.h b/include/dt-bindings/net/mdio.h deleted file mode 100644 index 99c6d903d439..000000000000 --- a/include/dt-bindings/net/mdio.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * This header provides generic constants for ethernet MDIO bindings - */ - -#ifndef _DT_BINDINGS_NET_MDIO_H -#define _DT_BINDINGS_NET_MDIO_H - -/* - * EEE capability Advertisement - */ - -#define MDIO_EEE_100TX 0x0002 /* 100TX EEE cap */ -#define MDIO_EEE_1000T 0x0004 /* 1000T EEE cap */ -#define MDIO_EEE_10GT 0x0008 /* 10GT EEE cap */ -#define MDIO_EEE_1000KX 0x0010 /* 1000KX EEE cap */ -#define MDIO_EEE_10GKX4 0x0020 /* 10G KX4 EEE cap */ -#define MDIO_EEE_10GKR 0x0040 /* 10G KR EEE cap */ - -#endif diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index a951fd10aaaa..6a524bf6a06d 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -18,6 +18,7 @@ enum cache_type { /** * struct cacheinfo - represent a cache leaf node + * @id: This cache's id. It is unique among caches with the same (type, level). * @type: type of the cache - data, inst or unified * @level: represents the hierarchy in the multi-level cache * @coherency_line_size: size of each cache line usually representing @@ -44,6 +45,7 @@ enum cache_type { * keeping, the remaining members form the core properties of the cache */ struct cacheinfo { + unsigned int id; enum cache_type type; unsigned int level; unsigned int coherency_line_size; @@ -61,6 +63,7 @@ struct cacheinfo { #define CACHE_WRITE_ALLOCATE BIT(3) #define CACHE_ALLOCATE_POLICY_MASK \ (CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE) +#define CACHE_ID BIT(4) struct device_node *of_node; bool disable_sysfs; diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 9a30b921f740..2319b8c108e8 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -35,14 +35,11 @@ #ifndef _CONFIGFS_H_ #define _CONFIGFS_H_ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/list.h> -#include <linux/kref.h> -#include <linux/mutex.h> -#include <linux/err.h> - -#include <linux/atomic.h> +#include <linux/stat.h> /* S_IRUGO */ +#include <linux/types.h> /* ssize_t */ +#include <linux/list.h> /* struct list_head */ +#include <linux/kref.h> /* struct kref */ +#include <linux/mutex.h> /* struct mutex */ #define CONFIGFS_ITEM_NAME_LEN 20 diff --git a/include/linux/ima.h b/include/linux/ima.h index 0eb7c2e7f0d6..7f6952f8d6aa 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -11,6 +11,7 @@ #define _LINUX_IMA_H #include <linux/fs.h> +#include <linux/kexec.h> struct linux_binprm; #ifdef CONFIG_IMA @@ -23,6 +24,10 @@ extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); extern void ima_post_path_mknod(struct dentry *dentry); +#ifdef CONFIG_IMA_KEXEC +extern void ima_add_kexec_buffer(struct kimage *image); +#endif + #else static inline int ima_bprm_check(struct linux_binprm *bprm) { @@ -62,6 +67,13 @@ static inline void ima_post_path_mknod(struct dentry *dentry) #endif /* CONFIG_IMA */ +#ifndef CONFIG_IMA_KEXEC +struct kimage; + +static inline void ima_add_kexec_buffer(struct kimage *image) +{} +#endif + #ifdef CONFIG_IMA_APPRAISE extern void ima_inode_post_setattr(struct dentry *dentry); extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index cb631973839a..f1da8c8dd473 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -340,10 +340,8 @@ extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct inode *, int); extern int nfs_open(struct inode *, struct file *); -extern int nfs_attribute_timeout(struct inode *inode); extern int nfs_attribute_cache_expired(struct inode *inode); extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); -extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern bool nfs_mapping_need_revalidate_inode(struct inode *inode); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 57c9e0622a38..56375edf2ed2 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -77,8 +77,11 @@ extern int ___ratelimit(struct ratelimit_state *rs, const char *func); #ifdef CONFIG_PRINTK -#define WARN_ON_RATELIMIT(condition, state) \ - WARN_ON((condition) && __ratelimit(state)) +#define WARN_ON_RATELIMIT(condition, state) ({ \ + bool __rtn_cond = !!(condition); \ + WARN_ON(__rtn_cond && __ratelimit(state)); \ + __rtn_cond; \ +}) #define WARN_RATELIMIT(condition, format, ...) \ ({ \ diff --git a/include/linux/sched.h b/include/linux/sched.h index a440cf178191..4d1905245c7a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1821,6 +1821,9 @@ struct task_struct { /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif +#ifdef CONFIG_INTEL_RDT_A + int closid; +#endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 4ac24f5a3308..275581d483dd 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -1,12 +1,14 @@ #ifndef ISCSI_TARGET_CORE_H #define ISCSI_TARGET_CORE_H -#include <linux/in.h> -#include <linux/configfs.h> -#include <net/sock.h> -#include <net/tcp.h> -#include <scsi/iscsi_proto.h> -#include <target/target_core_base.h> +#include <linux/dma-direction.h> /* enum dma_data_direction */ +#include <linux/list.h> /* struct list_head */ +#include <linux/socket.h> /* struct sockaddr_storage */ +#include <linux/types.h> /* u8 */ +#include <scsi/iscsi_proto.h> /* itt_t */ +#include <target/target_core_base.h> /* struct se_cmd */ + +struct sock; #define ISCSIT_VERSION "v4.1.0" #define ISCSI_MAX_DATASN_MISSING_COUNT 16 diff --git a/include/target/iscsi/iscsi_target_stat.h b/include/target/iscsi/iscsi_target_stat.h index e615bb485d0b..c27dd471656d 100644 --- a/include/target/iscsi/iscsi_target_stat.h +++ b/include/target/iscsi/iscsi_target_stat.h @@ -1,6 +1,10 @@ #ifndef ISCSI_TARGET_STAT_H #define ISCSI_TARGET_STAT_H +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/socket.h> + /* * For struct iscsi_tiqn->tiqn_wwn default groups */ diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index 40ac7cd80150..1277e9ba0318 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -1,6 +1,6 @@ -#include <linux/module.h> -#include <linux/list.h> -#include "iscsi_target_core.h" +#include "iscsi_target_core.h" /* struct iscsi_cmd */ + +struct sockaddr_storage; struct iscsit_transport { #define ISCSIT_TRANSPORT_NAME 16 diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index f6f3bc52c1ac..b54b98dc2d4a 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -1,8 +1,14 @@ #ifndef TARGET_CORE_BACKEND_H #define TARGET_CORE_BACKEND_H +#include <linux/types.h> +#include <target/target_core_base.h> + #define TRANSPORT_FLAG_PASSTHROUGH 1 +struct request_queue; +struct scatterlist; + struct target_backend_ops { char name[16]; char inquiry_prod[16]; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 00558287936d..29e6858bb164 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -1,14 +1,10 @@ #ifndef TARGET_CORE_BASE_H #define TARGET_CORE_BASE_H -#include <linux/in.h> -#include <linux/configfs.h> -#include <linux/dma-mapping.h> -#include <linux/blkdev.h> -#include <linux/percpu_ida.h> -#include <linux/t10-pi.h> -#include <net/sock.h> -#include <net/tcp.h> +#include <linux/configfs.h> /* struct config_group */ +#include <linux/dma-direction.h> /* enum dma_data_direction */ +#include <linux/percpu_ida.h> /* struct percpu_ida */ +#include <linux/semaphore.h> /* struct semaphore */ #define TARGET_CORE_VERSION "v5.0" diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 5cd6faa6e0d1..358041bad1da 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -1,6 +1,10 @@ #ifndef TARGET_CORE_FABRIC_H #define TARGET_CORE_FABRIC_H +#include <linux/configfs.h> +#include <linux/types.h> +#include <target/target_core_base.h> + struct target_core_fabric_ops { struct module *module; const char *name; diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 9bd559472c92..e230af2e6855 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -57,6 +57,7 @@ #define CGROUP_SUPER_MAGIC 0x27e0eb #define CGROUP2_SUPER_MAGIC 0x63677270 +#define RDTGROUP_SUPER_MAGIC 0x7655821 #define STACK_END_MAGIC 0x57AC6E9D diff --git a/kernel/kcov.c b/kernel/kcov.c index cc2fa35ca480..85e5546cd791 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -19,6 +19,7 @@ #include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/kcov.h> +#include <asm/setup.h> /* * kcov descriptor (one per opened debugfs file). @@ -73,6 +74,11 @@ void notrace __sanitizer_cov_trace_pc(void) if (mode == KCOV_MODE_TRACE) { unsigned long *area; unsigned long pos; + unsigned long ip = _RET_IP_; + +#ifdef CONFIG_RANDOMIZE_BASE + ip -= kaslr_offset(); +#endif /* * There is some code that runs in interrupts but for which @@ -86,7 +92,7 @@ void notrace __sanitizer_cov_trace_pc(void) /* The first word is number of subsequent PCs. */ pos = READ_ONCE(area[0]) + 1; if (likely(pos < t->kcov_size)) { - area[pos] = _RET_IP_; + area[pos] = ip; WRITE_ONCE(area[0], pos); } } diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 0c2df7f73792..b56a558e406d 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -19,6 +19,7 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/fs.h> +#include <linux/ima.h> #include <crypto/hash.h> #include <crypto/sha.h> #include <linux/syscalls.h> @@ -132,6 +133,9 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, return ret; image->kernel_buf_len = size; + /* IMA needs to pass the measurement list to the next kernel. */ + ima_add_kexec_buffer(image); + /* Call arch image probe handlers */ ret = arch_kexec_kernel_image_probe(image, image->kernel_buf, image->kernel_buf_len); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7446097f72bd..cb66a4648840 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -26,7 +26,7 @@ config CONSOLE_LOGLEVEL_DEFAULT the kernel bootargs. loglevel=<x> continues to override whatever value is specified here as well. - Note: This does not affect the log level of un-prefixed prink() + Note: This does not affect the log level of un-prefixed printk() usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT option. diff --git a/mm/fadvise.c b/mm/fadvise.c index 6c707bfe02fd..a43013112581 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -139,7 +139,20 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) } if (end_index >= start_index) { - unsigned long count = invalidate_mapping_pages(mapping, + unsigned long count; + + /* + * It's common to FADV_DONTNEED right after + * the read or write that instantiates the + * pages, in which case there will be some + * sitting on the local LRU cache. Try to + * avoid the expensive remote drain and the + * second cache tree walk below by flushing + * them out right away. + */ + lru_add_drain(); + + count = invalidate_mapping_pages(mapping, start_index, end_index); /* diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6c9615c90f37..618ab5079816 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -958,7 +958,7 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if (((length > mtu) || (skb && skb_is_gso(skb))) && + if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d19044f2b1f4..c87d359b9b37 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2195,6 +2195,7 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_ETHERNET: if (mac_proto != MAC_PROTO_ETHERNET) return -EINVAL; + break; case OVS_KEY_ATTR_TUNNEL: if (masked) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4c93badeabf2..ea961144084f 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -135,7 +135,7 @@ void rds_rdma_drop_keys(struct rds_sock *rs) /* Release any MRs associated with this socket */ spin_lock_irqsave(&rs->rs_rdma_lock, flags); while ((node = rb_first(&rs->rs_rdma_keys))) { - mr = container_of(node, struct rds_mr, r_rb_node); + mr = rb_entry(node, struct rds_mr, r_rb_node); if (mr->r_trans == rs->rs_transport) mr->r_invalidate = 0; rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 86309a3156a5..a4f738ac7728 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -136,7 +136,7 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) struct fq_flow *aux; parent = *p; - aux = container_of(parent, struct fq_flow, rate_node); + aux = rb_entry(parent, struct fq_flow, rate_node); if (f->time_next_packet >= aux->time_next_packet) p = &parent->rb_right; else @@ -188,7 +188,7 @@ static void fq_gc(struct fq_sched_data *q, while (*p) { parent = *p; - f = container_of(parent, struct fq_flow, fq_node); + f = rb_entry(parent, struct fq_flow, fq_node); if (f->sk == sk) break; @@ -256,7 +256,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) while (*p) { parent = *p; - f = container_of(parent, struct fq_flow, fq_node); + f = rb_entry(parent, struct fq_flow, fq_node); if (f->sk == sk) { /* socket might have been reallocated, so check * if its sk_hash is the same. @@ -424,7 +424,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now) q->time_next_delayed_flow = ~0ULL; while ((p = rb_first(&q->delayed)) != NULL) { - struct fq_flow *f = container_of(p, struct fq_flow, rate_node); + struct fq_flow *f = rb_entry(p, struct fq_flow, rate_node); if (f->time_next_packet > now) { q->time_next_delayed_flow = f->time_next_packet; @@ -563,7 +563,7 @@ static void fq_reset(struct Qdisc *sch) for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { root = &q->fq_root[idx]; while ((p = rb_first(root)) != NULL) { - f = container_of(p, struct fq_flow, fq_node); + f = rb_entry(p, struct fq_flow, fq_node); rb_erase(p, root); fq_flow_purge(f); @@ -593,7 +593,7 @@ static void fq_rehash(struct fq_sched_data *q, oroot = &old_array[idx]; while ((op = rb_first(oroot)) != NULL) { rb_erase(op, oroot); - of = container_of(op, struct fq_flow, fq_node); + of = rb_entry(op, struct fq_flow, fq_node); if (fq_gc_candidate(of)) { fcnt++; kmem_cache_free(fq_flow_cachep, of); @@ -606,7 +606,7 @@ static void fq_rehash(struct fq_sched_data *q, while (*np) { parent = *np; - nf = container_of(parent, struct fq_flow, fq_node); + nf = rb_entry(parent, struct fq_flow, fq_node); BUG_ON(nf->sk == of->sk); if (nf->sk > of->sk) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 9f7b380cf0a3..b7e4097bfdab 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -152,7 +152,7 @@ struct netem_skb_cb { static struct sk_buff *netem_rb_to_skb(struct rb_node *rb) { - return container_of(rb, struct sk_buff, rbnode); + return rb_entry(rb, struct sk_buff, rbnode); } static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 401c60750b20..1ebc184a0e23 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -292,6 +292,8 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, } af->from_addr_param(&addr, rawaddr, htons(port), 0); + if (sctp_bind_addr_state(bp, &addr) != -1) + goto next; retval = sctp_add_bind_addr(bp, &addr, sizeof(addr), SCTP_ADDR_SRC, gfp); if (retval) { @@ -300,6 +302,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, break; } +next: len = ntohs(param->length); addrs_len -= len; raw_addr_list += len; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 7b523e3f551f..616a9428e0c4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -205,26 +205,30 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; - if (sctp_in_scope(net, &addr->a, scope)) { - /* Now that the address is in scope, check to see if - * the address type is really supported by the local - * sock as well as the remote peer. - */ - if ((((AF_INET == addr->a.sa.sa_family) && - (copy_flags & SCTP_ADDR4_PEERSUPP))) || - (((AF_INET6 == addr->a.sa.sa_family) && - (copy_flags & SCTP_ADDR6_ALLOWED) && - (copy_flags & SCTP_ADDR6_PEERSUPP)))) { - error = sctp_add_bind_addr(bp, &addr->a, - sizeof(addr->a), - SCTP_ADDR_SRC, GFP_ATOMIC); - if (error) - goto end_copy; - } - } + if (!sctp_in_scope(net, &addr->a, scope)) + continue; + + /* Now that the address is in scope, check to see if + * the address type is really supported by the local + * sock as well as the remote peer. + */ + if (addr->a.sa.sa_family == AF_INET && + !(copy_flags & SCTP_ADDR4_PEERSUPP)) + continue; + if (addr->a.sa.sa_family == AF_INET6 && + (!(copy_flags & SCTP_ADDR6_ALLOWED) || + !(copy_flags & SCTP_ADDR6_PEERSUPP))) + continue; + + if (sctp_bind_addr_state(bp, &addr->a) != -1) + continue; + + error = sctp_add_bind_addr(bp, &addr->a, sizeof(addr->a), + SCTP_ADDR_SRC, GFP_ATOMIC); + if (error) + break; } -end_copy: rcu_read_unlock(); return error; } diff --git a/scripts/selinux/genheaders/Makefile b/scripts/selinux/genheaders/Makefile index 1d1ac51359e3..6fc2b8789a0b 100644 --- a/scripts/selinux/genheaders/Makefile +++ b/scripts/selinux/genheaders/Makefile @@ -1,4 +1,6 @@ hostprogs-y := genheaders -HOST_EXTRACFLAGS += -Isecurity/selinux/include +HOST_EXTRACFLAGS += \ + -I$(srctree)/include/uapi -I$(srctree)/include \ + -I$(srctree)/security/selinux/include always := $(hostprogs-y) diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c index 539855ff31f9..f4dd41f900d5 100644 --- a/scripts/selinux/genheaders/genheaders.c +++ b/scripts/selinux/genheaders/genheaders.c @@ -1,3 +1,7 @@ + +/* NOTE: we really do want to use the kernel headers here */ +#define __EXPORTED_HEADERS__ + #include <stdio.h> #include <stdlib.h> #include <unistd.h> diff --git a/scripts/selinux/mdp/Makefile b/scripts/selinux/mdp/Makefile index dba7eff69a00..d6a83cafe59f 100644 --- a/scripts/selinux/mdp/Makefile +++ b/scripts/selinux/mdp/Makefile @@ -1,5 +1,7 @@ hostprogs-y := mdp -HOST_EXTRACFLAGS += -Isecurity/selinux/include +HOST_EXTRACFLAGS += \ + -I$(srctree)/include/uapi -I$(srctree)/include \ + -I$(srctree)/security/selinux/include always := $(hostprogs-y) clean-files := policy.* file_contexts diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c index e10beb11b696..c29fa4a6228d 100644 --- a/scripts/selinux/mdp/mdp.c +++ b/scripts/selinux/mdp/mdp.c @@ -24,6 +24,10 @@ * Authors: Serge E. Hallyn <serue@us.ibm.com> */ + +/* NOTE: we really do want to use the kernel headers here */ +#define __EXPORTED_HEADERS__ + #include <stdio.h> #include <stdlib.h> #include <unistd.h> diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 5487827fa86c..370eb2f4dd37 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -27,6 +27,18 @@ config IMA to learn more about IMA. If unsure, say N. +config IMA_KEXEC + bool "Enable carrying the IMA measurement list across a soft boot" + depends on IMA && TCG_TPM && HAVE_IMA_KEXEC + default n + help + TPM PCRs are only reset on a hard reboot. In order to validate + a TPM's quote after a soft boot, the IMA measurement list of the + running kernel must be saved and restored on boot. + + Depending on the IMA policy, the measurement list can grow to + be very large. + config IMA_MEASURE_PCR_IDX int depends on IMA diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile index 9aeaedad1e2b..29f198bde02b 100644 --- a/security/integrity/ima/Makefile +++ b/security/integrity/ima/Makefile @@ -8,4 +8,5 @@ obj-$(CONFIG_IMA) += ima.o ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \ ima_policy.o ima_template.o ima_template_lib.o ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o +ima-$(CONFIG_HAVE_IMA_KEXEC) += ima_kexec.o obj-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index db25f54a04fe..5e6180a4da7d 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -28,6 +28,10 @@ #include "../integrity.h" +#ifdef CONFIG_HAVE_IMA_KEXEC +#include <asm/ima.h> +#endif + enum ima_show_type { IMA_SHOW_BINARY, IMA_SHOW_BINARY_NO_FIELD_LEN, IMA_SHOW_BINARY_OLD_STRING_FMT, IMA_SHOW_ASCII }; enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 }; @@ -81,6 +85,7 @@ struct ima_template_field { /* IMA template descriptor definition */ struct ima_template_desc { + struct list_head list; char *name; char *fmt; int num_fields; @@ -102,6 +107,27 @@ struct ima_queue_entry { }; extern struct list_head ima_measurements; /* list of all measurements */ +/* Some details preceding the binary serialized measurement list */ +struct ima_kexec_hdr { + u16 version; + u16 _reserved0; + u32 _reserved1; + u64 buffer_size; + u64 count; +}; + +#ifdef CONFIG_HAVE_IMA_KEXEC +void ima_load_kexec_buffer(void); +#else +static inline void ima_load_kexec_buffer(void) {} +#endif /* CONFIG_HAVE_IMA_KEXEC */ + +/* + * The default binary_runtime_measurements list format is defined as the + * platform native format. The canonical format is defined as little-endian. + */ +extern bool ima_canonical_fmt; + /* Internal IMA function definitions */ int ima_init(void); int ima_fs_init(void); @@ -122,7 +148,12 @@ int ima_init_crypto(void); void ima_putc(struct seq_file *m, void *data, int datalen); void ima_print_digest(struct seq_file *m, u8 *digest, u32 size); struct ima_template_desc *ima_template_desc_current(void); +int ima_restore_measurement_entry(struct ima_template_entry *entry); +int ima_restore_measurement_list(loff_t bufsize, void *buf); +int ima_measurements_show(struct seq_file *m, void *v); +unsigned long ima_get_binary_runtime_size(void); int ima_init_template(void); +void ima_init_template_list(void); /* * used to protect h_table and sha_table diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index 38f2ed830dd6..802d5d20f36f 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -477,11 +477,13 @@ static int ima_calc_field_array_hash_tfm(struct ima_field_data *field_data, u8 buffer[IMA_EVENT_NAME_LEN_MAX + 1] = { 0 }; u8 *data_to_hash = field_data[i].data; u32 datalen = field_data[i].len; + u32 datalen_to_hash = + !ima_canonical_fmt ? datalen : cpu_to_le32(datalen); if (strcmp(td->name, IMA_TEMPLATE_IMA_NAME) != 0) { rc = crypto_shash_update(shash, - (const u8 *) &field_data[i].len, - sizeof(field_data[i].len)); + (const u8 *) &datalen_to_hash, + sizeof(datalen_to_hash)); if (rc) break; } else if (strcmp(td->fields[i]->field_id, "n") == 0) { diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 3df46906492d..ca303e5d2b94 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -28,6 +28,16 @@ static DEFINE_MUTEX(ima_write_mutex); +bool ima_canonical_fmt; +static int __init default_canonical_fmt_setup(char *str) +{ +#ifdef __BIG_ENDIAN + ima_canonical_fmt = 1; +#endif + return 1; +} +__setup("ima_canonical_fmt", default_canonical_fmt_setup); + static int valid_policy = 1; #define TMPBUFLEN 12 static ssize_t ima_show_htable_value(char __user *buf, size_t count, @@ -116,13 +126,13 @@ void ima_putc(struct seq_file *m, void *data, int datalen) * [eventdata length] * eventdata[n]=template specific data */ -static int ima_measurements_show(struct seq_file *m, void *v) +int ima_measurements_show(struct seq_file *m, void *v) { /* the list never shrinks, so we don't need a lock here */ struct ima_queue_entry *qe = v; struct ima_template_entry *e; char *template_name; - int namelen; + u32 pcr, namelen, template_data_len; /* temporary fields */ bool is_ima_template = false; int i; @@ -139,25 +149,29 @@ static int ima_measurements_show(struct seq_file *m, void *v) * PCR used defaults to the same (config option) in * little-endian format, unless set in policy */ - ima_putc(m, &e->pcr, sizeof(e->pcr)); + pcr = !ima_canonical_fmt ? e->pcr : cpu_to_le32(e->pcr); + ima_putc(m, &pcr, sizeof(e->pcr)); /* 2nd: template digest */ ima_putc(m, e->digest, TPM_DIGEST_SIZE); /* 3rd: template name size */ - namelen = strlen(template_name); + namelen = !ima_canonical_fmt ? strlen(template_name) : + cpu_to_le32(strlen(template_name)); ima_putc(m, &namelen, sizeof(namelen)); /* 4th: template name */ - ima_putc(m, template_name, namelen); + ima_putc(m, template_name, strlen(template_name)); /* 5th: template length (except for 'ima' template) */ if (strcmp(template_name, IMA_TEMPLATE_IMA_NAME) == 0) is_ima_template = true; - if (!is_ima_template) - ima_putc(m, &e->template_data_len, - sizeof(e->template_data_len)); + if (!is_ima_template) { + template_data_len = !ima_canonical_fmt ? e->template_data_len : + cpu_to_le32(e->template_data_len); + ima_putc(m, &template_data_len, sizeof(e->template_data_len)); + } /* 6th: template specific data */ for (i = 0; i < e->template_desc->num_fields; i++) { diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c index 2ac1f41db5c0..2967d497a665 100644 --- a/security/integrity/ima/ima_init.c +++ b/security/integrity/ima/ima_init.c @@ -129,6 +129,8 @@ int __init ima_init(void) if (rc != 0) return rc; + ima_load_kexec_buffer(); + rc = ima_add_boot_aggregate(); /* boot aggregate must be first entry */ if (rc != 0) return rc; diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c new file mode 100644 index 000000000000..e473eee913cb --- /dev/null +++ b/security/integrity/ima/ima_kexec.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2016 IBM Corporation + * + * Authors: + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> + * Mimi Zohar <zohar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include <linux/seq_file.h> +#include <linux/vmalloc.h> +#include <linux/kexec.h> +#include "ima.h" + +#ifdef CONFIG_IMA_KEXEC +static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer, + unsigned long segment_size) +{ + struct ima_queue_entry *qe; + struct seq_file file; + struct ima_kexec_hdr khdr; + int ret = 0; + + /* segment size can't change between kexec load and execute */ + file.buf = vmalloc(segment_size); + if (!file.buf) { + ret = -ENOMEM; + goto out; + } + + file.size = segment_size; + file.read_pos = 0; + file.count = sizeof(khdr); /* reserved space */ + + memset(&khdr, 0, sizeof(khdr)); + khdr.version = 1; + list_for_each_entry_rcu(qe, &ima_measurements, later) { + if (file.count < file.size) { + khdr.count++; + ima_measurements_show(&file, qe); + } else { + ret = -EINVAL; + break; + } + } + + if (ret < 0) + goto out; + + /* + * fill in reserved space with some buffer details + * (eg. version, buffer size, number of measurements) + */ + khdr.buffer_size = file.count; + if (ima_canonical_fmt) { + khdr.version = cpu_to_le16(khdr.version); + khdr.count = cpu_to_le64(khdr.count); + khdr.buffer_size = cpu_to_le64(khdr.buffer_size); + } + memcpy(file.buf, &khdr, sizeof(khdr)); + + print_hex_dump(KERN_DEBUG, "ima dump: ", DUMP_PREFIX_NONE, + 16, 1, file.buf, + file.count < 100 ? file.count : 100, true); + + *buffer_size = file.count; + *buffer = file.buf; +out: + if (ret == -EINVAL) + vfree(file.buf); + return ret; +} + +/* + * Called during kexec_file_load so that IMA can add a segment to the kexec + * image for the measurement list for the next kernel. + * + * This function assumes that kexec_mutex is held. + */ +void ima_add_kexec_buffer(struct kimage *image) +{ + struct kexec_buf kbuf = { .image = image, .buf_align = PAGE_SIZE, + .buf_min = 0, .buf_max = ULONG_MAX, + .top_down = true }; + unsigned long binary_runtime_size; + + /* use more understandable variable names than defined in kbuf */ + void *kexec_buffer = NULL; + size_t kexec_buffer_size; + size_t kexec_segment_size; + int ret; + + /* + * Reserve an extra half page of memory for additional measurements + * added during the kexec load. + */ + binary_runtime_size = ima_get_binary_runtime_size(); + if (binary_runtime_size >= ULONG_MAX - PAGE_SIZE) + kexec_segment_size = ULONG_MAX; + else + kexec_segment_size = ALIGN(ima_get_binary_runtime_size() + + PAGE_SIZE / 2, PAGE_SIZE); + if ((kexec_segment_size == ULONG_MAX) || + ((kexec_segment_size >> PAGE_SHIFT) > totalram_pages / 2)) { + pr_err("Binary measurement list too large.\n"); + return; + } + + ima_dump_measurement_list(&kexec_buffer_size, &kexec_buffer, + kexec_segment_size); + if (!kexec_buffer) { + pr_err("Not enough memory for the kexec measurement buffer.\n"); + return; + } + + kbuf.buffer = kexec_buffer; + kbuf.bufsz = kexec_buffer_size; + kbuf.memsz = kexec_segment_size; + ret = kexec_add_buffer(&kbuf); + if (ret) { + pr_err("Error passing over kexec measurement buffer.\n"); + return; + } + + ret = arch_ima_add_kexec_buffer(image, kbuf.mem, kexec_segment_size); + if (ret) { + pr_err("Error passing over kexec measurement buffer.\n"); + return; + } + + pr_debug("kexec measurement buffer for the loaded kernel at 0x%lx.\n", + kbuf.mem); +} +#endif /* IMA_KEXEC */ + +/* + * Restore the measurement list from the previous kernel. + */ +void ima_load_kexec_buffer(void) +{ + void *kexec_buffer = NULL; + size_t kexec_buffer_size = 0; + int rc; + + rc = ima_get_kexec_buffer(&kexec_buffer, &kexec_buffer_size); + switch (rc) { + case 0: + rc = ima_restore_measurement_list(kexec_buffer_size, + kexec_buffer); + if (rc != 0) + pr_err("Failed to restore the measurement list: %d\n", + rc); + + ima_free_kexec_buffer(); + break; + case -ENOTSUPP: + pr_debug("Restoring the measurement list not supported\n"); + break; + case -ENOENT: + pr_debug("No measurement list to restore\n"); + break; + default: + pr_debug("Error restoring the measurement list: %d\n", rc); + } +} diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 423d111b3b94..50818c60538b 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -418,6 +418,7 @@ static int __init init_ima(void) { int error; + ima_init_template_list(); hash_setup(CONFIG_IMA_DEFAULT_HASH); error = ima_init(); if (!error) { diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c index 32f6ac0f96df..d9aa5ab71204 100644 --- a/security/integrity/ima/ima_queue.c +++ b/security/integrity/ima/ima_queue.c @@ -29,6 +29,11 @@ #define AUDIT_CAUSE_LEN_MAX 32 LIST_HEAD(ima_measurements); /* list of all measurements */ +#ifdef CONFIG_IMA_KEXEC +static unsigned long binary_runtime_size; +#else +static unsigned long binary_runtime_size = ULONG_MAX; +#endif /* key: inode (before secure-hashing a file) */ struct ima_h_table ima_htable = { @@ -64,12 +69,32 @@ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value, return ret; } +/* + * Calculate the memory required for serializing a single + * binary_runtime_measurement list entry, which contains a + * couple of variable length fields (e.g template name and data). + */ +static int get_binary_runtime_size(struct ima_template_entry *entry) +{ + int size = 0; + + size += sizeof(u32); /* pcr */ + size += sizeof(entry->digest); + size += sizeof(int); /* template name size field */ + size += strlen(entry->template_desc->name) + 1; + size += sizeof(entry->template_data_len); + size += entry->template_data_len; + return size; +} + /* ima_add_template_entry helper function: - * - Add template entry to measurement list and hash table. + * - Add template entry to the measurement list and hash table, for + * all entries except those carried across kexec. * * (Called with ima_extend_list_mutex held.) */ -static int ima_add_digest_entry(struct ima_template_entry *entry) +static int ima_add_digest_entry(struct ima_template_entry *entry, + bool update_htable) { struct ima_queue_entry *qe; unsigned int key; @@ -85,11 +110,34 @@ static int ima_add_digest_entry(struct ima_template_entry *entry) list_add_tail_rcu(&qe->later, &ima_measurements); atomic_long_inc(&ima_htable.len); - key = ima_hash_key(entry->digest); - hlist_add_head_rcu(&qe->hnext, &ima_htable.queue[key]); + if (update_htable) { + key = ima_hash_key(entry->digest); + hlist_add_head_rcu(&qe->hnext, &ima_htable.queue[key]); + } + + if (binary_runtime_size != ULONG_MAX) { + int size; + + size = get_binary_runtime_size(entry); + binary_runtime_size = (binary_runtime_size < ULONG_MAX - size) ? + binary_runtime_size + size : ULONG_MAX; + } return 0; } +/* + * Return the amount of memory required for serializing the + * entire binary_runtime_measurement list, including the ima_kexec_hdr + * structure. + */ +unsigned long ima_get_binary_runtime_size(void) +{ + if (binary_runtime_size >= (ULONG_MAX - sizeof(struct ima_kexec_hdr))) + return ULONG_MAX; + else + return binary_runtime_size + sizeof(struct ima_kexec_hdr); +}; + static int ima_pcr_extend(const u8 *hash, int pcr) { int result = 0; @@ -103,8 +151,13 @@ static int ima_pcr_extend(const u8 *hash, int pcr) return result; } -/* Add template entry to the measurement list and hash table, - * and extend the pcr. +/* + * Add template entry to the measurement list and hash table, and + * extend the pcr. + * + * On systems which support carrying the IMA measurement list across + * kexec, maintain the total memory size required for serializing the + * binary_runtime_measurements. */ int ima_add_template_entry(struct ima_template_entry *entry, int violation, const char *op, struct inode *inode, @@ -126,7 +179,7 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation, } } - result = ima_add_digest_entry(entry); + result = ima_add_digest_entry(entry, 1); if (result < 0) { audit_cause = "ENOMEM"; audit_info = 0; @@ -149,3 +202,13 @@ out: op, audit_cause, result, audit_info); return result; } + +int ima_restore_measurement_entry(struct ima_template_entry *entry) +{ + int result = 0; + + mutex_lock(&ima_extend_list_mutex); + result = ima_add_digest_entry(entry, 0); + mutex_unlock(&ima_extend_list_mutex); + return result; +} diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c index febd12ed9b55..cebb37c63629 100644 --- a/security/integrity/ima/ima_template.c +++ b/security/integrity/ima/ima_template.c @@ -15,16 +15,20 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/rculist.h> #include "ima.h" #include "ima_template_lib.h" -static struct ima_template_desc defined_templates[] = { +static struct ima_template_desc builtin_templates[] = { {.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT}, {.name = "ima-ng", .fmt = "d-ng|n-ng"}, {.name = "ima-sig", .fmt = "d-ng|n-ng|sig"}, {.name = "", .fmt = ""}, /* placeholder for a custom format */ }; +static LIST_HEAD(defined_templates); +static DEFINE_SPINLOCK(template_list); + static struct ima_template_field supported_fields[] = { {.field_id = "d", .field_init = ima_eventdigest_init, .field_show = ima_show_template_digest}, @@ -37,6 +41,7 @@ static struct ima_template_field supported_fields[] = { {.field_id = "sig", .field_init = ima_eventsig_init, .field_show = ima_show_template_sig}, }; +#define MAX_TEMPLATE_NAME_LEN 15 static struct ima_template_desc *ima_template; static struct ima_template_desc *lookup_template_desc(const char *name); @@ -52,6 +57,8 @@ static int __init ima_template_setup(char *str) if (ima_template) return 1; + ima_init_template_list(); + /* * Verify that a template with the supplied name exists. * If not, use CONFIG_IMA_DEFAULT_TEMPLATE. @@ -80,7 +87,7 @@ __setup("ima_template=", ima_template_setup); static int __init ima_template_fmt_setup(char *str) { - int num_templates = ARRAY_SIZE(defined_templates); + int num_templates = ARRAY_SIZE(builtin_templates); if (ima_template) return 1; @@ -91,22 +98,28 @@ static int __init ima_template_fmt_setup(char *str) return 1; } - defined_templates[num_templates - 1].fmt = str; - ima_template = defined_templates + num_templates - 1; + builtin_templates[num_templates - 1].fmt = str; + ima_template = builtin_templates + num_templates - 1; + return 1; } __setup("ima_template_fmt=", ima_template_fmt_setup); static struct ima_template_desc *lookup_template_desc(const char *name) { - int i; - - for (i = 0; i < ARRAY_SIZE(defined_templates); i++) { - if (strcmp(defined_templates[i].name, name) == 0) - return defined_templates + i; + struct ima_template_desc *template_desc; + int found = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(template_desc, &defined_templates, list) { + if ((strcmp(template_desc->name, name) == 0) || + (strcmp(template_desc->fmt, name) == 0)) { + found = 1; + break; + } } - - return NULL; + rcu_read_unlock(); + return found ? template_desc : NULL; } static struct ima_template_field *lookup_template_field(const char *field_id) @@ -142,9 +155,14 @@ static int template_desc_init_fields(const char *template_fmt, { const char *template_fmt_ptr; struct ima_template_field *found_fields[IMA_TEMPLATE_NUM_FIELDS_MAX]; - int template_num_fields = template_fmt_size(template_fmt); + int template_num_fields; int i, len; + if (num_fields && *num_fields > 0) /* already initialized? */ + return 0; + + template_num_fields = template_fmt_size(template_fmt); + if (template_num_fields > IMA_TEMPLATE_NUM_FIELDS_MAX) { pr_err("format string '%s' contains too many fields\n", template_fmt); @@ -182,11 +200,28 @@ static int template_desc_init_fields(const char *template_fmt, return 0; } +void ima_init_template_list(void) +{ + int i; + + if (!list_empty(&defined_templates)) + return; + + spin_lock(&template_list); + for (i = 0; i < ARRAY_SIZE(builtin_templates); i++) { + list_add_tail_rcu(&builtin_templates[i].list, + &defined_templates); + } + spin_unlock(&template_list); +} + struct ima_template_desc *ima_template_desc_current(void) { - if (!ima_template) + if (!ima_template) { + ima_init_template_list(); ima_template = lookup_template_desc(CONFIG_IMA_DEFAULT_TEMPLATE); + } return ima_template; } @@ -205,3 +240,239 @@ int __init ima_init_template(void) return result; } + +static struct ima_template_desc *restore_template_fmt(char *template_name) +{ + struct ima_template_desc *template_desc = NULL; + int ret; + + ret = template_desc_init_fields(template_name, NULL, NULL); + if (ret < 0) { + pr_err("attempting to initialize the template \"%s\" failed\n", + template_name); + goto out; + } + + template_desc = kzalloc(sizeof(*template_desc), GFP_KERNEL); + if (!template_desc) + goto out; + + template_desc->name = ""; + template_desc->fmt = kstrdup(template_name, GFP_KERNEL); + if (!template_desc->fmt) + goto out; + + spin_lock(&template_list); + list_add_tail_rcu(&template_desc->list, &defined_templates); + spin_unlock(&template_list); +out: + return template_desc; +} + +static int ima_restore_template_data(struct ima_template_desc *template_desc, + void *template_data, + int template_data_size, + struct ima_template_entry **entry) +{ + struct binary_field_data { + u32 len; + u8 data[0]; + } __packed; + + struct binary_field_data *field_data; + int offset = 0; + int ret = 0; + int i; + + *entry = kzalloc(sizeof(**entry) + + template_desc->num_fields * sizeof(struct ima_field_data), + GFP_NOFS); + if (!*entry) + return -ENOMEM; + + (*entry)->template_desc = template_desc; + for (i = 0; i < template_desc->num_fields; i++) { + field_data = template_data + offset; + + /* Each field of the template data is prefixed with a length. */ + if (offset > (template_data_size - sizeof(*field_data))) { + pr_err("Restoring the template field failed\n"); + ret = -EINVAL; + break; + } + offset += sizeof(*field_data); + + if (ima_canonical_fmt) + field_data->len = le32_to_cpu(field_data->len); + + if (offset > (template_data_size - field_data->len)) { + pr_err("Restoring the template field data failed\n"); + ret = -EINVAL; + break; + } + offset += field_data->len; + + (*entry)->template_data[i].len = field_data->len; + (*entry)->template_data_len += sizeof(field_data->len); + + (*entry)->template_data[i].data = + kzalloc(field_data->len + 1, GFP_KERNEL); + if (!(*entry)->template_data[i].data) { + ret = -ENOMEM; + break; + } + memcpy((*entry)->template_data[i].data, field_data->data, + field_data->len); + (*entry)->template_data_len += field_data->len; + } + + if (ret < 0) { + ima_free_template_entry(*entry); + *entry = NULL; + } + + return ret; +} + +/* Restore the serialized binary measurement list without extending PCRs. */ +int ima_restore_measurement_list(loff_t size, void *buf) +{ + struct binary_hdr_v1 { + u32 pcr; + u8 digest[TPM_DIGEST_SIZE]; + u32 template_name_len; + char template_name[0]; + } __packed; + char template_name[MAX_TEMPLATE_NAME_LEN]; + + struct binary_data_v1 { + u32 template_data_size; + char template_data[0]; + } __packed; + + struct ima_kexec_hdr *khdr = buf; + struct binary_hdr_v1 *hdr_v1; + struct binary_data_v1 *data_v1; + + void *bufp = buf + sizeof(*khdr); + void *bufendp; + struct ima_template_entry *entry; + struct ima_template_desc *template_desc; + unsigned long count = 0; + int ret = 0; + + if (!buf || size < sizeof(*khdr)) + return 0; + + if (ima_canonical_fmt) { + khdr->version = le16_to_cpu(khdr->version); + khdr->count = le64_to_cpu(khdr->count); + khdr->buffer_size = le64_to_cpu(khdr->buffer_size); + } + + if (khdr->version != 1) { + pr_err("attempting to restore a incompatible measurement list"); + return -EINVAL; + } + + if (khdr->count > ULONG_MAX - 1) { + pr_err("attempting to restore too many measurements"); + return -EINVAL; + } + + /* + * ima kexec buffer prefix: version, buffer size, count + * v1 format: pcr, digest, template-name-len, template-name, + * template-data-size, template-data + */ + bufendp = buf + khdr->buffer_size; + while ((bufp < bufendp) && (count++ < khdr->count)) { + hdr_v1 = bufp; + if (bufp > (bufendp - sizeof(*hdr_v1))) { + pr_err("attempting to restore partial measurement\n"); + ret = -EINVAL; + break; + } + bufp += sizeof(*hdr_v1); + + if (ima_canonical_fmt) + hdr_v1->template_name_len = + le32_to_cpu(hdr_v1->template_name_len); + + if ((hdr_v1->template_name_len >= MAX_TEMPLATE_NAME_LEN) || + (bufp > (bufendp - hdr_v1->template_name_len))) { + pr_err("attempting to restore a template name \ + that is too long\n"); + ret = -EINVAL; + break; + } + data_v1 = bufp += (u_int8_t)hdr_v1->template_name_len; + + /* template name is not null terminated */ + memcpy(template_name, hdr_v1->template_name, + hdr_v1->template_name_len); + template_name[hdr_v1->template_name_len] = 0; + + if (strcmp(template_name, "ima") == 0) { + pr_err("attempting to restore an unsupported \ + template \"%s\" failed\n", template_name); + ret = -EINVAL; + break; + } + + template_desc = lookup_template_desc(template_name); + if (!template_desc) { + template_desc = restore_template_fmt(template_name); + if (!template_desc) + break; + } + + /* + * Only the running system's template format is initialized + * on boot. As needed, initialize the other template formats. + */ + ret = template_desc_init_fields(template_desc->fmt, + &(template_desc->fields), + &(template_desc->num_fields)); + if (ret < 0) { + pr_err("attempting to restore the template fmt \"%s\" \ + failed\n", template_desc->fmt); + ret = -EINVAL; + break; + } + + if (bufp > (bufendp - sizeof(data_v1->template_data_size))) { + pr_err("restoring the template data size failed\n"); + ret = -EINVAL; + break; + } + bufp += (u_int8_t) sizeof(data_v1->template_data_size); + + if (ima_canonical_fmt) + data_v1->template_data_size = + le32_to_cpu(data_v1->template_data_size); + + if (bufp > (bufendp - data_v1->template_data_size)) { + pr_err("restoring the template data failed\n"); + ret = -EINVAL; + break; + } + bufp += data_v1->template_data_size; + + ret = ima_restore_template_data(template_desc, + data_v1->template_data, + data_v1->template_data_size, + &entry); + if (ret < 0) + break; + + memcpy(entry->digest, hdr_v1->digest, TPM_DIGEST_SIZE); + entry->pcr = + !ima_canonical_fmt ? hdr_v1->pcr : le32_to_cpu(hdr_v1->pcr); + ret = ima_restore_measurement_entry(entry); + if (ret < 0) + break; + + } + return ret; +} diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c index f9bae04ba176..f9ba37b3928d 100644 --- a/security/integrity/ima/ima_template_lib.c +++ b/security/integrity/ima/ima_template_lib.c @@ -103,8 +103,11 @@ static void ima_show_template_data_binary(struct seq_file *m, u32 len = (show == IMA_SHOW_BINARY_OLD_STRING_FMT) ? strlen(field_data->data) : field_data->len; - if (show != IMA_SHOW_BINARY_NO_FIELD_LEN) - ima_putc(m, &len, sizeof(len)); + if (show != IMA_SHOW_BINARY_NO_FIELD_LEN) { + u32 field_len = !ima_canonical_fmt ? len : cpu_to_le32(len); + + ima_putc(m, &field_len, sizeof(field_len)); + } if (!len) return; diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index e2d4ad3a4b4c..13ae49b0baa0 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -1,3 +1,5 @@ +#include <linux/capability.h> + #define COMMON_FILE_SOCK_PERMS "ioctl", "read", "write", "create", \ "getattr", "setattr", "lock", "relabelfrom", "relabelto", "append" diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index a2cdf3370afe..15d1d5c63c3c 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -384,9 +384,6 @@ static void snd_complete_urb(struct urb *urb) if (unlikely(atomic_read(&ep->chip->shutdown))) goto exit_clear; - if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags))) - goto exit_clear; - if (usb_pipeout(ep->pipe)) { retire_outbound_urb(ep, ctx); /* can be stopped during retire callback */ @@ -537,11 +534,6 @@ static int wait_clear_urbs(struct snd_usb_endpoint *ep) alive, ep->ep_num); clear_bit(EP_FLAG_STOPPING, &ep->flags); - ep->data_subs = NULL; - ep->sync_slave = NULL; - ep->retire_data_urb = NULL; - ep->prepare_data_urb = NULL; - return 0; } @@ -1028,6 +1020,10 @@ void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep) if (--ep->use_count == 0) { deactivate_urbs(ep, false); + ep->data_subs = NULL; + ep->sync_slave = NULL; + ep->retire_data_urb = NULL; + ep->prepare_data_urb = NULL; set_bit(EP_FLAG_STOPPING, &ep->flags); } } |