diff options
304 files changed, 16969 insertions, 6408 deletions
diff --git a/Documentation/devicetree/bindings/leds/leds-lp55xx.txt b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt index d5176882d8b9..a61727f9a6d1 100644 --- a/Documentation/devicetree/bindings/leds/leds-lp55xx.txt +++ b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt @@ -1,7 +1,7 @@ Binding for TI/National Semiconductor LP55xx Led Drivers Required properties: -- compatible: "national,lp5521" or "national,lp5523" or "ti,lp5562" +- compatible: "national,lp5521" or "national,lp5523" or "ti,lp5562" or "ti,lp8501" - reg: I2C slave address - clock-mode: Input clock mode, (0: automode, 1: internal, 2: external) @@ -11,6 +11,11 @@ Each child has own specific current settings Optional properties: - label: Used for naming LEDs +- pwr-sel: LP8501 specific property. Power selection for output channels. + 0: D1~9 are connected to VDD + 1: D1~6 with VDD, D7~9 with VOUT + 2: D1~6 with VOUT, D7~9 with VDD + 3: D1~9 are connected to VOUT Alternatively, each child can have specific channel name - chan-name: Name of each channel name @@ -145,3 +150,68 @@ lp5562@30 { max-cur = /bits/ 8 <0x60>; }; }; + +example 4) LP8501 +9 channels are defined. The 'pwr-sel' is LP8501 specific property. +Others are same as LP5523. + +lp8501@32 { + compatible = "ti,lp8501"; + reg = <0x32>; + clock-mode = /bits/ 8 <2>; + pwr-sel = /bits/ 8 <3>; /* D1~9 connected to VOUT */ + + chan0 { + chan-name = "d1"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan1 { + chan-name = "d2"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan2 { + chan-name = "d3"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan3 { + chan-name = "d4"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan4 { + chan-name = "d5"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan5 { + chan-name = "d6"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan6 { + chan-name = "d7"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan7 { + chan-name = "d8"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan8 { + chan-name = "d9"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; +}; diff --git a/Documentation/devicetree/bindings/leds/pca963x.txt b/Documentation/devicetree/bindings/leds/pca963x.txt new file mode 100644 index 000000000000..aece3eac1b63 --- /dev/null +++ b/Documentation/devicetree/bindings/leds/pca963x.txt @@ -0,0 +1,47 @@ +LEDs connected to pca9632, pca9633 or pca9634 + +Required properties: +- compatible : should be : "nxp,pca9632", "nxp,pca9633" or "nxp,pca9634" + +Optional properties: +- nxp,totem-pole : use totem pole (push-pull) instead of default open-drain +- nxp,hw-blink : use hardware blinking instead of software blinking + +Each led is represented as a sub-node of the nxp,pca963x device. + +LED sub-node properties: +- label : (optional) see Documentation/devicetree/bindings/leds/common.txt +- reg : number of LED line (could be from 0 to 3 in pca9632 or pca9633 + or 0 to 7 in pca9634) +- linux,default-trigger : (optional) + see Documentation/devicetree/bindings/leds/common.txt + +Examples: + +pca9632: pca9632 { + compatible = "nxp,pca9632"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x62>; + + red@0 { + label = "red"; + reg = <0>; + linux,default-trigger = "none"; + }; + green@1 { + label = "green"; + reg = <1>; + linux,default-trigger = "none"; + }; + blue@2 { + label = "blue"; + reg = <2>; + linux,default-trigger = "none"; + }; + unused@3 { + label = "unused"; + reg = <3>; + linux,default-trigger = "none"; + }; +}; diff --git a/Documentation/devicetree/bindings/sound/mvebu-audio.txt b/Documentation/devicetree/bindings/sound/mvebu-audio.txt index 7e5fd37c1b3f..f0062c5871b4 100644 --- a/Documentation/devicetree/bindings/sound/mvebu-audio.txt +++ b/Documentation/devicetree/bindings/sound/mvebu-audio.txt @@ -2,13 +2,17 @@ Required properties: -- compatible: "marvell,mvebu-audio" +- compatible: + "marvell,kirkwood-audio" for Kirkwood platforms + "marvell,dove-audio" for Dove platforms - reg: physical base address of the controller and length of memory mapped region. -- interrupts: list of two irq numbers. - The first irq is used for data flow and the second one is used for errors. +- interrupts: + with "marvell,kirkwood-audio", the audio interrupt + with "marvell,dove-audio", a list of two interrupts, the first for + the data flow, and the second for errors. - clocks: one or two phandles. The first one is mandatory and defines the internal clock. @@ -21,7 +25,7 @@ Required properties: Example: i2s1: audio-controller@b4000 { - compatible = "marvell,mvebu-audio"; + compatible = "marvell,dove-audio"; reg = <0xb4000 0x2210>; interrupts = <21>, <22>; clocks = <&gate_clk 13>; diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt new file mode 100644 index 000000000000..284f5300fd8b --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt @@ -0,0 +1,55 @@ +* Exynos Thermal Management Unit (TMU) + +** Required properties: + +- compatible : One of the following: + "samsung,exynos4412-tmu" + "samsung,exynos4210-tmu" + "samsung,exynos5250-tmu" + "samsung,exynos5440-tmu" +- interrupt-parent : The phandle for the interrupt controller +- reg : Address range of the thermal registers. For soc's which has multiple + instances of TMU and some registers are shared across all TMU's like + interrupt related then 2 set of register has to supplied. First set + belongs to each instance of TMU and second set belongs to common TMU + registers. +- interrupts : Should contain interrupt for thermal system +- clocks : The main clock for TMU device +- clock-names : Thermal system clock name +- vtmu-supply: This entry is optional and provides the regulator node supplying + voltage to TMU. If needed this entry can be placed inside + board/platform specific dts file. + +Example 1): + + tmu@100C0000 { + compatible = "samsung,exynos4412-tmu"; + interrupt-parent = <&combiner>; + reg = <0x100C0000 0x100>; + interrupts = <2 4>; + clocks = <&clock 383>; + clock-names = "tmu_apbif"; + status = "disabled"; + vtmu-supply = <&tmu_regulator_node>; + }; + +Example 2): + + tmuctrl_0: tmuctrl@160118 { + compatible = "samsung,exynos5440-tmu"; + reg = <0x160118 0x230>, <0x160368 0x10>; + interrupts = <0 58 0>; + clocks = <&clock 21>; + clock-names = "tmu_apbif"; + }; + +Note: For multi-instance tmu each instance should have an alias correctly +numbered in "aliases" node. + +Example: + +aliases { + tmuctrl0 = &tmuctrl_0; + tmuctrl1 = &tmuctrl_1; + tmuctrl2 = &tmuctrl_2; +}; diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.txt b/Documentation/devicetree/bindings/thermal/imx-thermal.txt new file mode 100644 index 000000000000..541c25e49abf --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/imx-thermal.txt @@ -0,0 +1,17 @@ +* Temperature Monitor (TEMPMON) on Freescale i.MX SoCs + +Required properties: +- compatible : "fsl,imx6q-thermal" +- fsl,tempmon : phandle pointer to system controller that contains TEMPMON + control registers, e.g. ANATOP on imx6q. +- fsl,tempmon-data : phandle pointer to fuse controller that contains TEMPMON + calibration data, e.g. OCOTP on imx6q. The details about calibration data + can be found in SoC Reference Manual. + +Example: + +tempmon { + compatible = "fsl,imx6q-tempmon"; + fsl,tempmon = <&anatop>; + fsl,tempmon-data = <&ocotp>; +}; diff --git a/fs/cifs/AUTHORS b/Documentation/filesystems/cifs/AUTHORS index ea940b1db77b..ca4a67a0bb1e 100644 --- a/fs/cifs/AUTHORS +++ b/Documentation/filesystems/cifs/AUTHORS @@ -39,6 +39,7 @@ Shaggy (Dave Kleikamp) for innumerable small fs suggestions and some good cleanu Gunter Kukkukk (testing and suggestions for support of old servers) Igor Mammedov (DFS support) Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code) +Scott Lovenberg Test case and Bug Report contributors ------------------------------------- diff --git a/fs/cifs/CHANGES b/Documentation/filesystems/cifs/CHANGES index bc0025cdd1c9..bc0025cdd1c9 100644 --- a/fs/cifs/CHANGES +++ b/Documentation/filesystems/cifs/CHANGES diff --git a/fs/cifs/README b/Documentation/filesystems/cifs/README index 2d5622f60e11..2d5622f60e11 100644 --- a/fs/cifs/README +++ b/Documentation/filesystems/cifs/README diff --git a/fs/cifs/TODO b/Documentation/filesystems/cifs/TODO index 355abcdcda98..355abcdcda98 100644 --- a/fs/cifs/TODO +++ b/Documentation/filesystems/cifs/TODO diff --git a/Documentation/filesystems/cifs.txt b/Documentation/filesystems/cifs/cifs.txt index 49cc923a93e3..49cc923a93e3 100644 --- a/Documentation/filesystems/cifs.txt +++ b/Documentation/filesystems/cifs/cifs.txt diff --git a/Documentation/filesystems/cifs/winucase_convert.pl b/Documentation/filesystems/cifs/winucase_convert.pl new file mode 100755 index 000000000000..322a9c833f23 --- /dev/null +++ b/Documentation/filesystems/cifs/winucase_convert.pl @@ -0,0 +1,62 @@ +#!/usr/bin/perl -w +# +# winucase_convert.pl -- convert "Windows 8 Upper Case Mapping Table.txt" to +# a two-level set of C arrays. +# +# Copyright 2013: Jeff Layton <jlayton@redhat.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +while(<>) { + next if (!/^0x(..)(..)\t0x(....)\t/); + $firstchar = hex($1); + $secondchar = hex($2); + $uppercase = hex($3); + + $top[$firstchar][$secondchar] = $uppercase; +} + +for ($i = 0; $i < 256; $i++) { + next if (!$top[$i]); + + printf("static const wchar_t t2_%2.2x[256] = {", $i); + for ($j = 0; $j < 256; $j++) { + if (($j % 8) == 0) { + print "\n\t"; + } else { + print " "; + } + printf("0x%4.4x,", $top[$i][$j] ? $top[$i][$j] : 0); + } + print "\n};\n\n"; +} + +printf("static const wchar_t *const toplevel[256] = {", $i); +for ($i = 0; $i < 256; $i++) { + if (($i % 8) == 0) { + print "\n\t"; + } elsif ($top[$i]) { + print " "; + } else { + print " "; + } + + if ($top[$i]) { + printf("t2_%2.2x,", $i); + } else { + print "NULL,"; + } +} +print "\n};\n\n"; diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 206a1bdc7321..f0890581f7f6 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -451,3 +451,7 @@ in your dentry operations instead. -- [mandatory] ->readdir() is gone now; switch to ->iterate() +[mandatory] + vfs_follow_link has been removed. Filesystems must use nd_set_link + from ->follow_link for normal symlinks, or nd_jump_link for magic + /proc/<pid> style links. diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt index 79e4c2e6e5e8..d08d8c179f85 100644 --- a/Documentation/leds/leds-lp5521.txt +++ b/Documentation/leds/leds-lp5521.txt @@ -18,7 +18,25 @@ All three channels can be also controlled using the engine micro programs. More details of the instructions can be found from the public data sheet. LP5521 has the internal program memory for running various LED patterns. -For the details, please refer to 'firmware' section in leds-lp55xx.txt +There are two ways to run LED patterns. + +1) Legacy interface - enginex_mode and enginex_load + Control interface for the engines: + x is 1 .. 3 + enginex_mode : disabled, load, run + enginex_load : store program (visible only in engine load mode) + + Example (start to blink the channel 2 led): + cd /sys/class/leds/lp5521:channel2/device + echo "load" > engine3_mode + echo "037f4d0003ff6000" > engine3_load + echo "run" > engine3_mode + + To stop the engine: + echo "disabled" > engine3_mode + +2) Firmware interface - LP55xx common interface + For the details, please refer to 'firmware' section in leds-lp55xx.txt sysfs contains a selftest entry. The test communicates with the chip and checks that diff --git a/Documentation/leds/leds-lp5523.txt b/Documentation/leds/leds-lp5523.txt index 899fdad509fe..5b3e91d4ac59 100644 --- a/Documentation/leds/leds-lp5523.txt +++ b/Documentation/leds/leds-lp5523.txt @@ -28,7 +28,26 @@ If both fields are NULL, 'lp5523' is used by default. /sys/class/leds/lp5523:channelN (N: 0 ~ 8) LP5523 has the internal program memory for running various LED patterns. -For the details, please refer to 'firmware' section in leds-lp55xx.txt +There are two ways to run LED patterns. + +1) Legacy interface - enginex_mode, enginex_load and enginex_leds + Control interface for the engines: + x is 1 .. 3 + enginex_mode : disabled, load, run + enginex_load : microcode load (visible only in load mode) + enginex_leds : led mux control (visible only in load mode) + + cd /sys/class/leds/lp5523:channel2/device + echo "load" > engine3_mode + echo "9d80400004ff05ff437f0000" > engine3_load + echo "111111111" > engine3_leds + echo "run" > engine3_mode + + To stop the engine: + echo "disabled" > engine3_mode + +2) Firmware interface - LP55xx common interface + For the details, please refer to 'firmware' section in leds-lp55xx.txt Selftest uses always the current from the platform data. diff --git a/Documentation/leds/leds-lp55xx.txt b/Documentation/leds/leds-lp55xx.txt index eec8fa2ffe4e..82713ff92eb3 100644 --- a/Documentation/leds/leds-lp55xx.txt +++ b/Documentation/leds/leds-lp55xx.txt @@ -1,11 +1,11 @@ -LP5521/LP5523/LP55231 Common Driver -=================================== +LP5521/LP5523/LP55231/LP5562/LP8501 Common Driver +================================================= Authors: Milo(Woogyom) Kim <milo.kim@ti.com> Description ----------- -LP5521, LP5523/55231 and LP5562 have common features as below. +LP5521, LP5523/55231, LP5562 and LP8501 have common features as below. Register access via the I2C Device initialization/deinitialization @@ -109,6 +109,30 @@ As soon as 'loading' is set to 0, registered callback is called. Inside the callback, the selected engine is loaded and memory is updated. To run programmed pattern, 'run_engine' attribute should be enabled. +The pattern sqeuence of LP8501 is same as LP5523. +However pattern data is specific. +Ex 1) Engine 1 is used +echo 1 > /sys/bus/i2c/devices/xxxx/select_engine +echo 1 > /sys/class/firmware/lp8501/loading +echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data +echo 0 > /sys/class/firmware/lp8501/loading +echo 1 > /sys/bus/i2c/devices/xxxx/run_engine + +Ex 2) Engine 2 and 3 are used at the same time +echo 2 > /sys/bus/i2c/devices/xxxx/select_engine +sleep 1 +echo 1 > /sys/class/firmware/lp8501/loading +echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data +echo 0 > /sys/class/firmware/lp8501/loading +sleep 1 +echo 3 > /sys/bus/i2c/devices/xxxx/select_engine +sleep 1 +echo 1 > /sys/class/firmware/lp8501/loading +echo "9d0340ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data +echo 0 > /sys/class/firmware/lp8501/loading +sleep 1 +echo 1 > /sys/class/leds/d1/device/run_engine + ( 'run_engine' and 'firmware_cb' ) The sequence of running the program data is common. But each device has own specific register addresses for commands. diff --git a/Documentation/thermal/exynos_thermal b/Documentation/thermal/exynos_thermal index 2b46f67b1ccb..9010c4416967 100644 --- a/Documentation/thermal/exynos_thermal +++ b/Documentation/thermal/exynos_thermal @@ -1,17 +1,17 @@ -Kernel driver exynos4_tmu +Kernel driver exynos_tmu ================= Supported chips: -* ARM SAMSUNG EXYNOS4 series of SoC - Prefix: 'exynos4-tmu' +* ARM SAMSUNG EXYNOS4, EXYNOS5 series of SoC Datasheet: Not publicly available Authors: Donggeun Kim <dg77.kim@samsung.com> +Authors: Amit Daniel <amit.daniel@samsung.com> -Description ------------ +TMU controller Description: +--------------------------- -This driver allows to read temperature inside SAMSUNG EXYNOS4 series of SoC. +This driver allows to read temperature inside SAMSUNG EXYNOS4/5 series of SoC. The chip only exposes the measured 8-bit temperature code value through a register. @@ -34,9 +34,9 @@ The three equations are: TI2: Trimming info for 85 degree Celsius (stored at TRIMINFO register) Temperature code measured at 85 degree Celsius which is unchanged -TMU(Thermal Management Unit) in EXYNOS4 generates interrupt +TMU(Thermal Management Unit) in EXYNOS4/5 generates interrupt when temperature exceeds pre-defined levels. -The maximum number of configurable threshold is four. +The maximum number of configurable threshold is five. The threshold levels are defined as follows: Level_0: current temperature > trigger_level_0 + threshold Level_1: current temperature > trigger_level_1 + threshold @@ -47,6 +47,31 @@ The threshold levels are defined as follows: through the corresponding registers. When an interrupt occurs, this driver notify kernel thermal framework -with the function exynos4_report_trigger. +with the function exynos_report_trigger. Although an interrupt condition for level_0 can be set, it can be used to synchronize the cooling action. + +TMU driver description: +----------------------- + +The exynos thermal driver is structured as, + + Kernel Core thermal framework + (thermal_core.c, step_wise.c, cpu_cooling.c) + ^ + | + | +TMU configuration data -------> TMU Driver <------> Exynos Core thermal wrapper +(exynos_tmu_data.c) (exynos_tmu.c) (exynos_thermal_common.c) +(exynos_tmu_data.h) (exynos_tmu.h) (exynos_thermal_common.h) + +a) TMU configuration data: This consist of TMU register offsets/bitfields + described through structure exynos_tmu_registers. Also several + other platform data (struct exynos_tmu_platform_data) members + are used to configure the TMU. +b) TMU driver: This component initialises the TMU controller and sets different + thresholds. It invokes core thermal implementation with the call + exynos_report_trigger. +c) Exynos Core thermal wrapper: This provides 3 wrapper function to use the + Kernel core thermal framework. They are exynos_unregister_thermal, + exynos_register_thermal and exynos_report_trigger. diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index a71bd5b90fe8..87519cb379ee 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -134,6 +134,13 @@ temperature) and throttle appropriate devices. this thermal zone and cdev, for a particular trip point. If nth bit is set, then the cdev and thermal zone are bound for trip point n. + .limits: This is an array of cooling state limits. Must have exactly + 2 * thermal_zone.number_of_trip_points. It is an array consisting + of tuples <lower-state upper-state> of state limits. Each trip + will be associated with one state limit tuple when binding. + A NULL pointer means <THERMAL_NO_LIMITS THERMAL_NO_LIMITS> + on all trips. These limits are used when binding a cdev to a + trip point. .match: This call back returns success(0) if the 'tz and cdev' need to be bound, as per platform data. 1.4.2 struct thermal_zone_params @@ -142,6 +149,11 @@ temperature) and throttle appropriate devices. This is an optional feature where some platforms can choose not to provide this data. .governor_name: Name of the thermal governor used for this zone + .no_hwmon: a boolean to indicate if the thermal to hwmon sysfs interface + is required. when no_hwmon == false, a hwmon sysfs interface + will be created. when no_hwmon == true, nothing will be done. + In case the thermal_zone_params is NULL, the hwmon interface + will be created (for backward compatibility). .num_tbps: Number of thermal_bind_params entries for this zone .tbp: thermal_bind_params entries diff --git a/arch/arc/include/asm/sections.h b/arch/arc/include/asm/sections.h index 6fc1159dfefe..764f1e3ba752 100644 --- a/arch/arc/include/asm/sections.h +++ b/arch/arc/include/asm/sections.h @@ -11,7 +11,6 @@ #include <asm-generic/sections.h> -extern char _int_vec_base_lds[]; extern char __arc_dccm_base[]; extern char __dtb_start[]; diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 2a913f85a747..0f944f024513 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -34,6 +34,9 @@ stext: ; IDENTITY Reg [ 3 2 1 0 ] ; (cpu-id) ^^^ => Zero for UP ARC700 ; => #Core-ID if SMP (Master 0) + ; Note that non-boot CPUs might not land here if halt-on-reset and + ; instead breath life from @first_lines_of_secondary, but we still + ; need to make sure only boot cpu takes this path. GET_CPU_ID r5 cmp r5, 0 jnz arc_platform_smp_wait_to_boot @@ -98,6 +101,8 @@ stext: first_lines_of_secondary: + sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] + ; setup per-cpu idle task as "current" on this CPU ld r0, [@secondary_idle_tsk] SET_CURR_TASK_ON_CPU r0, r1 diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 305b3f866aa7..5fc92455da36 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -24,7 +24,6 @@ * -Needed for each CPU (hence not foldable into init_IRQ) * * what it does ? - * -setup Vector Table Base Reg - in case Linux not linked at 0x8000_0000 * -Disable all IRQs (on CPU side) * -Optionally, setup the High priority Interrupts as Level 2 IRQs */ diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index b011f8c164a1..2c68bc7e6a78 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -47,10 +47,7 @@ void read_arc_build_cfg_regs(void) READ_BCR(AUX_IDENTITY, cpu->core); cpu->timers = read_aux_reg(ARC_REG_TIMERS_BCR); - cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); - if (cpu->vec_base == 0) - cpu->vec_base = (unsigned int)_int_vec_base_lds; READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); cpu->uncached_base = uncached_space.start << 24; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c8a916fcd54b..d13f6743df4b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -3,20 +3,21 @@ config ARM default y select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT if MMU + select CLONE_BACKWARDS select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN && MMU select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI) select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD - select GENERIC_IDLE_POLL_SETUP select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HARDIRQS_SW_RESEND @@ -25,6 +26,7 @@ config ARM select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT + select HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG @@ -47,6 +49,7 @@ config ARM select HAVE_KPROBES if !XIP_KERNEL select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_MEMBLOCK + select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API @@ -54,15 +57,14 @@ config ARM select HAVE_UID16 select IRQ_FORCED_THREADING select KTIME_SCALAR + select MODULES_USE_ELF_REL + select OLD_SIGACTION + select OLD_SIGSUSPEND3 select PERF_USE_VMALLOC select RTC_LIB select SYS_SUPPORTS_APM_EMULATION - select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND - select MODULES_USE_ELF_REL - select CLONE_BACKWARDS - select OLD_SIGSUSPEND3 - select OLD_SIGACTION - select HAVE_CONTEXT_TRACKING + # Above selects are sorted alphabetically; please add new ones + # according to that. Thanks. help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and @@ -386,8 +388,8 @@ config ARCH_GEMINI bool "Cortina Systems Gemini" select ARCH_REQUIRE_GPIOLIB select ARCH_USES_GETTIMEOFFSET - select NEED_MACH_GPIO_H select CPU_FA526 + select NEED_MACH_GPIO_H help Support for the Cortina Systems Gemini family SoCs @@ -487,8 +489,8 @@ config ARCH_IXP4XX select GENERIC_CLOCKEVENTS select MIGHT_HAVE_PCI select NEED_MACH_IO_H - select USB_EHCI_BIG_ENDIAN_MMIO select USB_EHCI_BIG_ENDIAN_DESC + select USB_EHCI_BIG_ENDIAN_MMIO help Support for Intel's IXP4XX (XScale) family of processors. @@ -498,11 +500,11 @@ config ARCH_DOVE select CPU_PJ4 select GENERIC_CLOCKEVENTS select MIGHT_HAVE_PCI + select MVEBU_MBUS select PINCTRL select PINCTRL_DOVE select PLAT_ORION_LEGACY select USB_ARCH_HAS_EHCI - select MVEBU_MBUS help Support for the Marvell Dove SoC 88AP510 @@ -512,12 +514,12 @@ config ARCH_KIRKWOOD select ARCH_REQUIRE_GPIOLIB select CPU_FEROCEON select GENERIC_CLOCKEVENTS + select MVEBU_MBUS select PCI select PCI_QUIRKS select PINCTRL select PINCTRL_KIRKWOOD select PLAT_ORION_LEGACY - select MVEBU_MBUS help Support for the following Marvell Kirkwood series SoCs: 88F6180, 88F6192 and 88F6281. @@ -527,9 +529,9 @@ config ARCH_MV78XX0 select ARCH_REQUIRE_GPIOLIB select CPU_FEROCEON select GENERIC_CLOCKEVENTS + select MVEBU_MBUS select PCI select PLAT_ORION_LEGACY - select MVEBU_MBUS help Support for the following Marvell MV78xx0 series SoCs: MV781x0, MV782x0. @@ -540,9 +542,9 @@ config ARCH_ORION5X select ARCH_REQUIRE_GPIOLIB select CPU_FEROCEON select GENERIC_CLOCKEVENTS + select MVEBU_MBUS select PCI select PLAT_ORION_LEGACY - select MVEBU_MBUS help Support for the following Marvell Orion 5x series SoCs: Orion-1 (5181), Orion-VoIP (5181L), Orion-NAS (5182), @@ -758,8 +760,8 @@ config ARCH_S5P64X0 select HAVE_S3C2410_WATCHDOG if WATCHDOG select HAVE_S3C_RTC if RTC_CLASS select NEED_MACH_GPIO_H - select SAMSUNG_WDT_RESET select SAMSUNG_ATAGS + select SAMSUNG_WDT_RESET help Samsung S5P64X0 CPU based systems, such as the Samsung SMDK6440, SMDK6450. @@ -777,8 +779,8 @@ config ARCH_S5PC100 select HAVE_S3C2410_WATCHDOG if WATCHDOG select HAVE_S3C_RTC if RTC_CLASS select NEED_MACH_GPIO_H - select SAMSUNG_WDT_RESET select SAMSUNG_ATAGS + select SAMSUNG_WDT_RESET help Samsung S5PC100 series based systems @@ -1619,9 +1621,10 @@ config HZ_FIXED ARCH_S5PV210 || ARCH_EXYNOS4 default AT91_TIMER_HZ if ARCH_AT91 default SHMOBILE_TIMER_HZ if ARCH_SHMOBILE + default 0 choice - depends on !HZ_FIXED + depends on HZ_FIXED = 0 prompt "Timer frequency" config HZ_100 @@ -1646,7 +1649,7 @@ endchoice config HZ int - default HZ_FIXED if HZ_FIXED + default HZ_FIXED if HZ_FIXED != 0 default 100 if HZ_100 default 200 if HZ_200 default 250 if HZ_250 diff --git a/arch/arm/boot/dts/sun7i-a20-cubieboard2.dts b/arch/arm/boot/dts/sun7i-a20-cubieboard2.dts index 31b76f08b3ad..15e625eca312 100644 --- a/arch/arm/boot/dts/sun7i-a20-cubieboard2.dts +++ b/arch/arm/boot/dts/sun7i-a20-cubieboard2.dts @@ -19,6 +19,21 @@ compatible = "cubietech,cubieboard2", "allwinner,sun7i-a20"; soc@01c00000 { + emac: ethernet@01c0b000 { + pinctrl-names = "default"; + pinctrl-0 = <&emac_pins_a>; + phy = <&phy1>; + status = "okay"; + }; + + mdio@01c0b080 { + status = "okay"; + + phy1: ethernet-phy@1 { + reg = <1>; + }; + }; + pinctrl@01c20800 { led_pins_cubieboard2: led_pins@0 { allwinner,pins = "PH20", "PH21"; diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts index 34a6c02a7c72..9e778557fadb 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts @@ -19,6 +19,21 @@ compatible = "olimex,a20-olinuxino-micro", "allwinner,sun7i-a20"; soc@01c00000 { + emac: ethernet@01c0b000 { + pinctrl-names = "default"; + pinctrl-0 = <&emac_pins_a>; + phy = <&phy1>; + status = "okay"; + }; + + mdio@01c0b080 { + status = "okay"; + + phy1: ethernet-phy@1 { + reg = <1>; + }; + }; + pinctrl@01c20800 { led_pins_olinuxino: led_pins@0 { allwinner,pins = "PH2"; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 999ff45cb77e..80559cbdbc87 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -167,6 +167,22 @@ #size-cells = <1>; ranges; + emac: ethernet@01c0b000 { + compatible = "allwinner,sun4i-emac"; + reg = <0x01c0b000 0x1000>; + interrupts = <0 55 1>; + clocks = <&ahb_gates 17>; + status = "disabled"; + }; + + mdio@01c0b080 { + compatible = "allwinner,sun4i-mdio"; + reg = <0x01c0b080 0x14>; + status = "disabled"; + #address-cells = <1>; + #size-cells = <0>; + }; + pio: pinctrl@01c20800 { compatible = "allwinner,sun7i-a20-pinctrl"; reg = <0x01c20800 0x400>; @@ -198,6 +214,17 @@ allwinner,drive = <0>; allwinner,pull = <0>; }; + + emac_pins_a: emac0@0 { + allwinner,pins = "PA0", "PA1", "PA2", + "PA3", "PA4", "PA5", "PA6", + "PA7", "PA8", "PA9", "PA10", + "PA11", "PA12", "PA13", "PA14", + "PA15", "PA16"; + allwinner,function = "emac"; + allwinner,drive = <0>; + allwinner,pull = <0>; + }; }; timer@01c20c00 { diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index aaf3a8731136..bd454b09133e 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -49,5 +49,5 @@ $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S ifeq ($(CONFIG_KERNEL_MODE_NEON),y) NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon CFLAGS_xor-neon.o += $(NEON_FLAGS) - lib-$(CONFIG_XOR_BLOCKS) += xor-neon.o + obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o endif diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index f485e5a2af4b..2c40aeab3eaa 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -9,6 +9,9 @@ */ #include <linux/raid/xor.h> +#include <linux/module.h> + +MODULE_LICENSE("GPL"); #ifndef __ARM_NEON__ #error You should compile this file with '-mfloat-abi=softfp -mfpu=neon' @@ -40,3 +43,4 @@ struct xor_block_template const xor_block_neon_inner = { .do_4 = xor_8regs_4, .do_5 = xor_8regs_5, }; +EXPORT_SYMBOL(xor_block_neon_inner); diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index df8612fbbc9c..3f12b885c083 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -281,7 +281,7 @@ static AMBA_APB_DEVICE(uart1, "apb:uart1", 0x00041010, EP93XX_UART1_PHYS_BASE, { IRQ_EP93XX_UART1 }, &ep93xx_uart_data); static AMBA_APB_DEVICE(uart2, "apb:uart2", 0x00041010, EP93XX_UART2_PHYS_BASE, - { IRQ_EP93XX_UART2 }, &ep93xx_uart_data); + { IRQ_EP93XX_UART2 }, NULL); static AMBA_APB_DEVICE(uart3, "apb:uart3", 0x00041010, EP93XX_UART3_PHYS_BASE, { IRQ_EP93XX_UART3 }, &ep93xx_uart_data); diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index 4872939cdba2..ffb6f0ac7606 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -96,7 +96,6 @@ static struct resource mmcif1_resources[] __initdata = { static struct sh_eth_plat_data ether_pdata __initdata = { .phy = 0x1, .edmac_endian = EDMAC_LITTLE_ENDIAN, - .register_type = SH_ETH_REG_FAST_RCAR, .phy_interface = PHY_INTERFACE_MODE_RMII, .ether_link_active_low = 1, }; diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c index b5b2f787da2e..ecd0148ee1e1 100644 --- a/arch/arm/mach-shmobile/setup-r8a7779.c +++ b/arch/arm/mach-shmobile/setup-r8a7779.c @@ -691,8 +691,8 @@ void __init __weak r8a7779_register_twd(void) { } void __init r8a7779_earlytimer_init(void) { r8a7779_clock_init(); - shmobile_earlytimer_init(); r8a7779_register_twd(); + shmobile_earlytimer_init(); } void __init r8a7779_add_early_devices(void) diff --git a/arch/arm/mach-versatile/include/mach/platform.h b/arch/arm/mach-versatile/include/mach/platform.h index ec087407b163..6f938ccb0c54 100644 --- a/arch/arm/mach-versatile/include/mach/platform.h +++ b/arch/arm/mach-versatile/include/mach/platform.h @@ -231,12 +231,14 @@ /* PCI space */ #define VERSATILE_PCI_BASE 0x41000000 /* PCI Interface */ #define VERSATILE_PCI_CFG_BASE 0x42000000 +#define VERSATILE_PCI_IO_BASE 0x43000000 #define VERSATILE_PCI_MEM_BASE0 0x44000000 #define VERSATILE_PCI_MEM_BASE1 0x50000000 #define VERSATILE_PCI_MEM_BASE2 0x60000000 /* Sizes of above maps */ #define VERSATILE_PCI_BASE_SIZE 0x01000000 #define VERSATILE_PCI_CFG_BASE_SIZE 0x02000000 +#define VERSATILE_PCI_IO_BASE_SIZE 0x01000000 #define VERSATILE_PCI_MEM_BASE0_SIZE 0x0c000000 /* 32Mb */ #define VERSATILE_PCI_MEM_BASE1_SIZE 0x10000000 /* 256Mb */ #define VERSATILE_PCI_MEM_BASE2_SIZE 0x10000000 /* 256Mb */ diff --git a/arch/arm/mach-versatile/pci.c b/arch/arm/mach-versatile/pci.c index e92e5e0705bc..c97be4ea76d2 100644 --- a/arch/arm/mach-versatile/pci.c +++ b/arch/arm/mach-versatile/pci.c @@ -43,9 +43,9 @@ #define PCI_IMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0) #define PCI_IMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4) #define PCI_IMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8) -#define PCI_SMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10) -#define PCI_SMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14) -#define PCI_SMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18) +#define PCI_SMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14) +#define PCI_SMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18) +#define PCI_SMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x1c) #define PCI_SELFID __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc) #define DEVICE_ID_OFFSET 0x00 @@ -170,8 +170,8 @@ static struct pci_ops pci_versatile_ops = { .write = versatile_write_config, }; -static struct resource io_mem = { - .name = "PCI I/O space", +static struct resource unused_mem = { + .name = "PCI unused", .start = VERSATILE_PCI_MEM_BASE0, .end = VERSATILE_PCI_MEM_BASE0+VERSATILE_PCI_MEM_BASE0_SIZE-1, .flags = IORESOURCE_MEM, @@ -195,9 +195,9 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys) { int ret = 0; - ret = request_resource(&iomem_resource, &io_mem); + ret = request_resource(&iomem_resource, &unused_mem); if (ret) { - printk(KERN_ERR "PCI: unable to allocate I/O " + printk(KERN_ERR "PCI: unable to allocate unused " "memory region (%d)\n", ret); goto out; } @@ -205,7 +205,7 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys) if (ret) { printk(KERN_ERR "PCI: unable to allocate non-prefetchable " "memory region (%d)\n", ret); - goto release_io_mem; + goto release_unused_mem; } ret = request_resource(&iomem_resource, &pre_mem); if (ret) { @@ -225,8 +225,8 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys) release_non_mem: release_resource(&non_mem); - release_io_mem: - release_resource(&io_mem); + release_unused_mem: + release_resource(&unused_mem); out: return ret; } @@ -246,7 +246,7 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys) goto out; } - ret = pci_ioremap_io(0, VERSATILE_PCI_MEM_BASE0); + ret = pci_ioremap_io(0, VERSATILE_PCI_IO_BASE); if (ret) goto out; @@ -295,6 +295,19 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys) __raw_writel(PHYS_OFFSET, local_pci_cfg_base + PCI_BASE_ADDRESS_2); /* + * For many years the kernel and QEMU were symbiotically buggy + * in that they both assumed the same broken IRQ mapping. + * QEMU therefore attempts to auto-detect old broken kernels + * so that they still work on newer QEMU as they did on old + * QEMU. Since we now use the correct (ie matching-hardware) + * IRQ mapping we write a definitely different value to a + * PCI_INTERRUPT_LINE register to tell QEMU that we expect + * real hardware behaviour and it need not be backwards + * compatible for us. This write is harmless on real hardware. + */ + __raw_writel(0, VERSATILE_PCI_VIRT_BASE+PCI_INTERRUPT_LINE); + + /* * Do not to map Versatile FPGA PCI device into memory space */ pci_slot_ignore |= (1 << myslot); @@ -327,13 +340,13 @@ static int __init versatile_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq; - /* slot, pin, irq - * 24 1 IRQ_SIC_PCI0 - * 25 1 IRQ_SIC_PCI1 - * 26 1 IRQ_SIC_PCI2 - * 27 1 IRQ_SIC_PCI3 + /* + * Slot INTA INTB INTC INTD + * 31 PCI1 PCI2 PCI3 PCI0 + * 30 PCI0 PCI1 PCI2 PCI3 + * 29 PCI3 PCI0 PCI1 PCI2 */ - irq = IRQ_SIC_PCI0 + ((slot - 24 + pin - 1) & 3); + irq = IRQ_SIC_PCI0 + ((slot + 2 + pin - 1) & 3); return irq; } diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile index 36ea8247123a..505e64ab3eae 100644 --- a/arch/arm/mach-vexpress/Makefile +++ b/arch/arm/mach-vexpress/Makefile @@ -7,6 +7,8 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \ obj-y := v2m.o obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o dcscb_setup.o +CFLAGS_dcscb.o += -march=armv7-a obj-$(CONFIG_ARCH_VEXPRESS_TC2_PM) += tc2_pm.o spc.o +CFLAGS_tc2_pm.o += -march=armv7-a obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index c83f27b6bdda..3ea02903d75a 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -132,6 +132,7 @@ static int pxa_ssp_probe(struct platform_device *pdev) if (dev->of_node) { struct of_phandle_args dma_spec; struct device_node *np = dev->of_node; + int ret; /* * FIXME: we should allocate the DMA channel from this @@ -140,14 +141,23 @@ static int pxa_ssp_probe(struct platform_device *pdev) */ /* rx */ - of_parse_phandle_with_args(np, "dmas", "#dma-cells", - 0, &dma_spec); + ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells", + 0, &dma_spec); + + if (ret) { + dev_err(dev, "Can't parse dmas property\n"); + return -ENODEV; + } ssp->drcmr_rx = dma_spec.args[0]; of_node_put(dma_spec.np); /* tx */ - of_parse_phandle_with_args(np, "dmas", "#dma-cells", - 1, &dma_spec); + ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells", + 1, &dma_spec); + if (ret) { + dev_err(dev, "Can't parse dmas property\n"); + return -ENODEV; + } ssp->drcmr_tx = dma_spec.args[0]; of_node_put(dma_spec.np); } else { diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 77e97dd0c15d..38faeded7d59 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -28,6 +28,9 @@ struct dev_archdata { void *iommu_table_base; } dma_data; +#ifdef CONFIG_IOMMU_API + void *iommu_domain; +#endif #ifdef CONFIG_SWIOTLB dma_addr_t max_direct_dma_addr; #endif diff --git a/arch/powerpc/include/asm/fsl_pamu_stash.h b/arch/powerpc/include/asm/fsl_pamu_stash.h new file mode 100644 index 000000000000..caa1b21c25cd --- /dev/null +++ b/arch/powerpc/include/asm/fsl_pamu_stash.h @@ -0,0 +1,39 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ + +#ifndef __FSL_PAMU_STASH_H +#define __FSL_PAMU_STASH_H + +/* cache stash targets */ +enum pamu_stash_target { + PAMU_ATTR_CACHE_L1 = 1, + PAMU_ATTR_CACHE_L2, + PAMU_ATTR_CACHE_L3, +}; + +/* + * This attribute allows configuring stashig specific parameters + * in the PAMU hardware. + */ + +struct pamu_stash_attribute { + u32 cpu; /* cpu number */ + u32 cache; /* cache to stash to: L1,L2,L3 */ +}; + +#endif /* __FSL_PAMU_STASH_H */ diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index defc422a375f..8d455df58471 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -16,6 +16,11 @@ struct platform_device; + +/* FSL PCI controller BRR1 register */ +#define PCI_FSL_BRR1 0xbf8 +#define PCI_FSL_BRR1_VER 0xffff + #define PCIE_LTSSM 0x0404 /* PCIE Link Training and Status */ #define PCIE_LTSSM_L0 0x16 /* L0 state */ #define PCIE_IP_REV_2_2 0x02080202 /* PCIE IP block version Rev2.2 */ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 0abf6742a8b0..c62d88396ad5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -124,6 +124,7 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */ INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ + INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index fd8011ed4dcd..8ed44589b0e4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2808,7 +2808,7 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve return c; } - if (event->hw.config == ~0ULL) + if (event->attr.config == UNCORE_FIXED_EVENT) return &constraint_fixed; if (type->constraints) { @@ -3112,7 +3112,9 @@ static int uncore_pmu_event_init(struct perf_event *event) */ if (pmu->type->single_fixed && pmu->pmu_idx > 0) return -EINVAL; - hwc->config = ~0ULL; + + /* fixed counters have event field hardcoded to zero */ + hwc->config = 0ULL; } else { hwc->config = event->attr.config & pmu->type->event_mask; if (pmu->type->ops->hw_config) { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6e2d2c8f230b..dce0df8150df 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4421,13 +4421,12 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) } } -static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct kvm *kvm; int nr_to_scan = sc->nr_to_scan; - - if (nr_to_scan == 0) - goto out; + unsigned long freed = 0; raw_spin_lock(&kvm_lock); @@ -4462,25 +4461,37 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) goto unlock; } - prepare_zap_oldest_mmu_page(kvm, &invalid_list); + if (prepare_zap_oldest_mmu_page(kvm, &invalid_list)) + freed++; kvm_mmu_commit_zap_page(kvm, &invalid_list); unlock: spin_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); + /* + * unfair on small ones + * per-vm shrinkers cry out + * sadness comes quickly + */ list_move_tail(&kvm->vm_list, &vm_list); break; } raw_spin_unlock(&kvm_lock); + return freed; -out: +} + +static unsigned long +mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ return percpu_counter_read_positive(&kvm_total_used_mmu_pages); } static struct shrinker mmu_shrinker = { - .shrink = mmu_shrink, + .count_objects = mmu_shrink_count, + .scan_objects = mmu_shrink_scan, .seeks = DEFAULT_SEEKS * 10, }; diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 6a382188fa20..fb78bb9ad8f6 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -257,12 +257,13 @@ static int acpi_lpss_create_device(struct acpi_device *adev, pdata->mmio_size = resource_size(&rentry->res); pdata->mmio_base = ioremap(rentry->res.start, pdata->mmio_size); - pdata->dev_desc = dev_desc; break; } acpi_dev_free_resource_list(&resource_list); + pdata->dev_desc = dev_desc; + if (dev_desc->clk_required) { ret = register_device_clock(adev, pdata); if (ret) { diff --git a/drivers/acpi/acpica/exstore.c b/drivers/acpi/acpica/exstore.c index 2bdba6f7d762..f0b09bf9887d 100644 --- a/drivers/acpi/acpica/exstore.c +++ b/drivers/acpi/acpica/exstore.c @@ -57,6 +57,11 @@ acpi_ex_store_object_to_index(union acpi_operand_object *val_desc, union acpi_operand_object *dest_desc, struct acpi_walk_state *walk_state); +static acpi_status +acpi_ex_store_direct_to_node(union acpi_operand_object *source_desc, + struct acpi_namespace_node *node, + struct acpi_walk_state *walk_state); + /******************************************************************************* * * FUNCTION: acpi_ex_store @@ -375,7 +380,11 @@ acpi_ex_store_object_to_index(union acpi_operand_object *source_desc, * When storing into an object the data is converted to the * target object type then stored in the object. This means * that the target object type (for an initialized target) will - * not be changed by a store operation. + * not be changed by a store operation. A copy_object can change + * the target type, however. + * + * The implicit_conversion flag is set to NO/FALSE only when + * storing to an arg_x -- as per the rules of the ACPI spec. * * Assumes parameters are already validated. * @@ -399,7 +408,7 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc, target_type = acpi_ns_get_type(node); target_desc = acpi_ns_get_attached_object(node); - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Storing %p(%s) into node %p(%s)\n", + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Storing %p (%s) to node %p (%s)\n", source_desc, acpi_ut_get_object_type_name(source_desc), node, acpi_ut_get_type_name(target_type))); @@ -413,45 +422,30 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc, return_ACPI_STATUS(status); } - /* If no implicit conversion, drop into the default case below */ - - if ((!implicit_conversion) || - ((walk_state->opcode == AML_COPY_OP) && - (target_type != ACPI_TYPE_LOCAL_REGION_FIELD) && - (target_type != ACPI_TYPE_LOCAL_BANK_FIELD) && - (target_type != ACPI_TYPE_LOCAL_INDEX_FIELD))) { - /* - * Force execution of default (no implicit conversion). Note: - * copy_object does not perform an implicit conversion, as per the ACPI - * spec -- except in case of region/bank/index fields -- because these - * objects must retain their original type permanently. - */ - target_type = ACPI_TYPE_ANY; - } - /* Do the actual store operation */ switch (target_type) { - case ACPI_TYPE_BUFFER_FIELD: - case ACPI_TYPE_LOCAL_REGION_FIELD: - case ACPI_TYPE_LOCAL_BANK_FIELD: - case ACPI_TYPE_LOCAL_INDEX_FIELD: - - /* For fields, copy the source data to the target field. */ - - status = acpi_ex_write_data_to_field(source_desc, target_desc, - &walk_state->result_obj); - break; - case ACPI_TYPE_INTEGER: case ACPI_TYPE_STRING: case ACPI_TYPE_BUFFER: /* - * These target types are all of type Integer/String/Buffer, and - * therefore support implicit conversion before the store. - * - * Copy and/or convert the source object to a new target object + * The simple data types all support implicit source operand + * conversion before the store. */ + + if ((walk_state->opcode == AML_COPY_OP) || !implicit_conversion) { + /* + * However, copy_object and Stores to arg_x do not perform + * an implicit conversion, as per the ACPI specification. + * A direct store is performed instead. + */ + status = acpi_ex_store_direct_to_node(source_desc, node, + walk_state); + break; + } + + /* Store with implicit source operand conversion support */ + status = acpi_ex_store_object_to_object(source_desc, target_desc, &new_desc, walk_state); @@ -465,13 +459,12 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc, * the Name's type to that of the value being stored in it. * source_desc reference count is incremented by attach_object. * - * Note: This may change the type of the node if an explicit store - * has been performed such that the node/object type has been - * changed. + * Note: This may change the type of the node if an explicit + * store has been performed such that the node/object type + * has been changed. */ - status = - acpi_ns_attach_object(node, new_desc, - new_desc->common.type); + status = acpi_ns_attach_object(node, new_desc, + new_desc->common.type); ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Store %s into %s via Convert/Attach\n", @@ -482,38 +475,83 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc, } break; - default: - - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "Storing [%s] (%p) directly into node [%s] (%p)" - " with no implicit conversion\n", - acpi_ut_get_object_type_name(source_desc), - source_desc, - acpi_ut_get_object_type_name(target_desc), - node)); + case ACPI_TYPE_BUFFER_FIELD: + case ACPI_TYPE_LOCAL_REGION_FIELD: + case ACPI_TYPE_LOCAL_BANK_FIELD: + case ACPI_TYPE_LOCAL_INDEX_FIELD: + /* + * For all fields, always write the source data to the target + * field. Any required implicit source operand conversion is + * performed in the function below as necessary. Note, field + * objects must retain their original type permanently. + */ + status = acpi_ex_write_data_to_field(source_desc, target_desc, + &walk_state->result_obj); + break; + default: /* * No conversions for all other types. Directly store a copy of - * the source object. NOTE: This is a departure from the ACPI - * spec, which states "If conversion is impossible, abort the - * running control method". + * the source object. This is the ACPI spec-defined behavior for + * the copy_object operator. * - * This code implements "If conversion is impossible, treat the - * Store operation as a CopyObject". + * NOTE: For the Store operator, this is a departure from the + * ACPI spec, which states "If conversion is impossible, abort + * the running control method". Instead, this code implements + * "If conversion is impossible, treat the Store operation as + * a CopyObject". */ - status = - acpi_ut_copy_iobject_to_iobject(source_desc, &new_desc, - walk_state); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - status = - acpi_ns_attach_object(node, new_desc, - new_desc->common.type); - acpi_ut_remove_reference(new_desc); + status = acpi_ex_store_direct_to_node(source_desc, node, + walk_state); break; } return_ACPI_STATUS(status); } + +/******************************************************************************* + * + * FUNCTION: acpi_ex_store_direct_to_node + * + * PARAMETERS: source_desc - Value to be stored + * node - Named object to receive the value + * walk_state - Current walk state + * + * RETURN: Status + * + * DESCRIPTION: "Store" an object directly to a node. This involves a copy + * and an attach. + * + ******************************************************************************/ + +static acpi_status +acpi_ex_store_direct_to_node(union acpi_operand_object *source_desc, + struct acpi_namespace_node *node, + struct acpi_walk_state *walk_state) +{ + acpi_status status; + union acpi_operand_object *new_desc; + + ACPI_FUNCTION_TRACE(ex_store_direct_to_node); + + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Storing [%s] (%p) directly into node [%s] (%p)" + " with no implicit conversion\n", + acpi_ut_get_object_type_name(source_desc), + source_desc, acpi_ut_get_type_name(node->type), + node)); + + /* Copy the source object to a new object */ + + status = + acpi_ut_copy_iobject_to_iobject(source_desc, &new_desc, walk_state); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Attach the new object to the node */ + + status = acpi_ns_attach_object(node, new_desc, new_desc->common.type); + acpi_ut_remove_reference(new_desc); + return_ACPI_STATUS(status); +} diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 94672297e1b1..10f0f40587bb 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -79,6 +79,9 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) return ret; } +#define FIND_CHILD_MIN_SCORE 1 +#define FIND_CHILD_MAX_SCORE 2 + static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used, void *not_used, void **ret_p) { @@ -92,14 +95,17 @@ static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used, return AE_OK; } -static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge) +static int do_find_child_checks(acpi_handle handle, bool is_bridge) { + bool sta_present = true; unsigned long long sta; acpi_status status; - status = acpi_bus_get_status_handle(handle, &sta); - if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) - return false; + status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); + if (status == AE_NOT_FOUND) + sta_present = false; + else if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) + return -ENODEV; if (is_bridge) { void *test = NULL; @@ -107,16 +113,17 @@ static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge) /* Check if this object has at least one child device. */ acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, acpi_dev_present, NULL, NULL, &test); - return !!test; + if (!test) + return -ENODEV; } - return true; + return sta_present ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; } struct find_child_context { u64 addr; bool is_bridge; acpi_handle ret; - bool ret_checked; + int ret_score; }; static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used, @@ -125,6 +132,7 @@ static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used, struct find_child_context *context = data; unsigned long long addr; acpi_status status; + int score; status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr); if (ACPI_FAILURE(status) || addr != context->addr) @@ -144,15 +152,20 @@ static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used, * its handle if so. Second, check the same for the object that we've * just found. */ - if (!context->ret_checked) { - if (acpi_extra_checks_passed(context->ret, context->is_bridge)) + if (!context->ret_score) { + score = do_find_child_checks(context->ret, context->is_bridge); + if (score == FIND_CHILD_MAX_SCORE) return AE_CTRL_TERMINATE; else - context->ret_checked = true; + context->ret_score = score; } - if (acpi_extra_checks_passed(handle, context->is_bridge)) { + score = do_find_child_checks(handle, context->is_bridge); + if (score == FIND_CHILD_MAX_SCORE) { context->ret = handle; return AE_CTRL_TERMINATE; + } else if (score > context->ret_score) { + context->ret = handle; + context->ret_score = score; } return AE_OK; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 61d090b6ce25..fbdb82e70d10 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -204,8 +204,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device) return -EINVAL; } - lock_device_hotplug(); - /* * Carry out two passes here and ignore errors in the first pass, * because if the devices in question are memory blocks and @@ -236,9 +234,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device) ACPI_UINT32_MAX, acpi_bus_online_companions, NULL, NULL, NULL); - - unlock_device_hotplug(); - put_device(&device->dev); return -EBUSY; } @@ -249,8 +244,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device) acpi_bus_trim(device); - unlock_device_hotplug(); - /* Device node has been unregistered. */ put_device(&device->dev); device = NULL; @@ -289,6 +282,7 @@ static void acpi_bus_device_eject(void *context) u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; int error; + lock_device_hotplug(); mutex_lock(&acpi_scan_lock); acpi_bus_get_device(handle, &device); @@ -312,6 +306,7 @@ static void acpi_bus_device_eject(void *context) out: mutex_unlock(&acpi_scan_lock); + unlock_device_hotplug(); return; err_out: @@ -326,8 +321,8 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source) u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; int error; - mutex_lock(&acpi_scan_lock); lock_device_hotplug(); + mutex_lock(&acpi_scan_lock); if (ost_source != ACPI_NOTIFY_BUS_CHECK) { acpi_bus_get_device(handle, &device); @@ -353,9 +348,9 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source) kobject_uevent(&device->dev.kobj, KOBJ_ONLINE); out: - unlock_device_hotplug(); acpi_evaluate_hotplug_ost(handle, ost_source, ost_code, NULL); mutex_unlock(&acpi_scan_lock); + unlock_device_hotplug(); } static void acpi_scan_bus_check(void *context) @@ -446,6 +441,7 @@ void acpi_bus_hot_remove_device(void *context) acpi_handle handle = device->handle; int error; + lock_device_hotplug(); mutex_lock(&acpi_scan_lock); error = acpi_scan_hot_remove(device); @@ -455,6 +451,7 @@ void acpi_bus_hot_remove_device(void *context) NULL); mutex_unlock(&acpi_scan_lock); + unlock_device_hotplug(); kfree(context); } EXPORT_SYMBOL(acpi_bus_hot_remove_device); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5c75e3147a60..43c24aa756f6 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -280,13 +280,6 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy, switch (state) { case CPUFREQ_PRECHANGE: - if (WARN(policy->transition_ongoing == - cpumask_weight(policy->cpus), - "In middle of another frequency transition\n")) - return; - - policy->transition_ongoing++; - /* detect if the driver reported a value as "old frequency" * which is not equal to what the cpufreq core thinks is * "old frequency". @@ -306,12 +299,6 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy, break; case CPUFREQ_POSTCHANGE: - if (WARN(!policy->transition_ongoing, - "No frequency transition in progress\n")) - return; - - policy->transition_ongoing--; - adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new, (unsigned long)freqs->cpu); @@ -437,7 +424,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *policy, static ssize_t store_##file_name \ (struct cpufreq_policy *policy, const char *buf, size_t count) \ { \ - unsigned int ret; \ + int ret; \ struct cpufreq_policy new_policy; \ \ ret = cpufreq_get_policy(&new_policy, policy->cpu); \ @@ -490,7 +477,7 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) static ssize_t store_scaling_governor(struct cpufreq_policy *policy, const char *buf, size_t count) { - unsigned int ret; + int ret; char str_governor[16]; struct cpufreq_policy new_policy; @@ -694,8 +681,13 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; + get_online_cpus(); + + if (!cpu_online(policy->cpu)) + goto unlock; + if (!down_read_trylock(&cpufreq_rwsem)) - goto exit; + goto unlock; if (lock_policy_rwsem_write(policy->cpu) < 0) goto up_read; @@ -709,7 +701,9 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, up_read: up_read(&cpufreq_rwsem); -exit: +unlock: + put_online_cpus(); + return ret; } @@ -912,11 +906,11 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) struct cpufreq_policy *policy; unsigned long flags; - write_lock_irqsave(&cpufreq_driver_lock, flags); + read_lock_irqsave(&cpufreq_driver_lock, flags); policy = per_cpu(cpufreq_cpu_data_fallback, cpu); - write_unlock_irqrestore(&cpufreq_driver_lock, flags); + read_unlock_irqrestore(&cpufreq_driver_lock, flags); return policy; } @@ -953,6 +947,21 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) kfree(policy); } +static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) +{ + if (cpu == policy->cpu) + return; + + policy->last_cpu = policy->cpu; + policy->cpu = cpu; + +#ifdef CONFIG_CPU_FREQ_TABLE + cpufreq_frequency_table_update_policy_cpu(policy); +#endif + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_UPDATE_POLICY_CPU, policy); +} + static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, bool frozen) { @@ -1006,7 +1015,18 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, if (!policy) goto nomem_out; - policy->cpu = cpu; + + /* + * In the resume path, since we restore a saved policy, the assignment + * to policy->cpu is like an update of the existing policy, rather than + * the creation of a brand new one. So we need to perform this update + * by invoking update_policy_cpu(). + */ + if (frozen && cpu != policy->cpu) + update_policy_cpu(policy, cpu); + else + policy->cpu = cpu; + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; cpumask_copy(policy->cpus, cpumask_of(cpu)); @@ -1098,18 +1118,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return __cpufreq_add_dev(dev, sif, false); } -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) -{ - policy->last_cpu = policy->cpu; - policy->cpu = cpu; - -#ifdef CONFIG_CPU_FREQ_TABLE - cpufreq_frequency_table_update_policy_cpu(policy); -#endif - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_UPDATE_POLICY_CPU, policy); -} - static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy, unsigned int old_cpu, bool frozen) { @@ -1141,22 +1149,14 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy, return cpu_dev->id; } -/** - * __cpufreq_remove_dev - remove a CPU device - * - * Removes the cpufreq interface for a CPU device. - * Caller should already have policy_rwsem in write mode for this CPU. - * This routine frees the rwsem before returning. - */ -static int __cpufreq_remove_dev(struct device *dev, - struct subsys_interface *sif, bool frozen) +static int __cpufreq_remove_dev_prepare(struct device *dev, + struct subsys_interface *sif, + bool frozen) { unsigned int cpu = dev->id, cpus; int new_cpu, ret; unsigned long flags; struct cpufreq_policy *policy; - struct kobject *kobj; - struct completion *cmp; pr_debug("%s: unregistering CPU %u\n", __func__, cpu); @@ -1196,8 +1196,9 @@ static int __cpufreq_remove_dev(struct device *dev, cpumask_clear_cpu(cpu, policy->cpus); unlock_policy_rwsem_write(cpu); - if (cpu != policy->cpu && !frozen) { - sysfs_remove_link(&dev->kobj, "cpufreq"); + if (cpu != policy->cpu) { + if (!frozen) + sysfs_remove_link(&dev->kobj, "cpufreq"); } else if (cpus > 1) { new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen); @@ -1213,6 +1214,33 @@ static int __cpufreq_remove_dev(struct device *dev, } } + return 0; +} + +static int __cpufreq_remove_dev_finish(struct device *dev, + struct subsys_interface *sif, + bool frozen) +{ + unsigned int cpu = dev->id, cpus; + int ret; + unsigned long flags; + struct cpufreq_policy *policy; + struct kobject *kobj; + struct completion *cmp; + + read_lock_irqsave(&cpufreq_driver_lock, flags); + policy = per_cpu(cpufreq_cpu_data, cpu); + read_unlock_irqrestore(&cpufreq_driver_lock, flags); + + if (!policy) { + pr_debug("%s: No cpu_data found\n", __func__); + return -EINVAL; + } + + lock_policy_rwsem_read(cpu); + cpus = cpumask_weight(policy->cpus); + unlock_policy_rwsem_read(cpu); + /* If cpu is last user of policy, free policy */ if (cpus == 1) { if (cpufreq_driver->target) { @@ -1272,6 +1300,27 @@ static int __cpufreq_remove_dev(struct device *dev, return 0; } +/** + * __cpufreq_remove_dev - remove a CPU device + * + * Removes the cpufreq interface for a CPU device. + * Caller should already have policy_rwsem in write mode for this CPU. + * This routine frees the rwsem before returning. + */ +static inline int __cpufreq_remove_dev(struct device *dev, + struct subsys_interface *sif, + bool frozen) +{ + int ret; + + ret = __cpufreq_remove_dev_prepare(dev, sif, frozen); + + if (!ret) + ret = __cpufreq_remove_dev_finish(dev, sif, frozen); + + return ret; +} + static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) { unsigned int cpu = dev->id; @@ -1610,8 +1659,6 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; - if (policy->transition_ongoing) - return -EBUSY; /* Make sure that target_freq is within supported range */ if (target_freq > policy->max) @@ -1692,8 +1739,9 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, policy->cpu, event); mutex_lock(&cpufreq_governor_lock); - if ((!policy->governor_enabled && (event == CPUFREQ_GOV_STOP)) || - (policy->governor_enabled && (event == CPUFREQ_GOV_START))) { + if ((policy->governor_enabled && event == CPUFREQ_GOV_START) + || (!policy->governor_enabled + && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) { mutex_unlock(&cpufreq_governor_lock); return -EBUSY; } @@ -1994,7 +2042,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: - __cpufreq_remove_dev(dev, NULL, frozen); + __cpufreq_remove_dev_prepare(dev, NULL, frozen); + break; + + case CPU_POST_DEAD: + __cpufreq_remove_dev_finish(dev, NULL, frozen); break; case CPU_DOWN_FAILED: diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 04452f026ed0..4cf0d2805cb2 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -74,7 +74,7 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) for (i = 0; i < stat->state_num; i++) { len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i], (unsigned long long) - cputime64_to_clock_t(stat->time_in_state[i])); + jiffies_64_to_clock_t(stat->time_in_state[i])); } return len; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6efd96c196b2..9733f29ed148 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -522,6 +522,11 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x2a, default_policy), ICPU(0x2d, default_policy), ICPU(0x3a, default_policy), + ICPU(0x3c, default_policy), + ICPU(0x3e, default_policy), + ICPU(0x3f, default_policy), + ICPU(0x45, default_policy), + ICPU(0x46, default_policy), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 3ac499d5a207..6e11701f0fca 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -331,7 +331,8 @@ struct cpuidle_driver *cpuidle_driver_ref(void) spin_lock(&cpuidle_driver_lock); drv = cpuidle_get_driver(); - drv->refcnt++; + if (drv) + drv->refcnt++; spin_unlock(&cpuidle_driver_lock); return drv; diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 9b265a4c6a3d..c27a21034a5e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1676,7 +1676,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) return 0; out_gem_unload: - if (dev_priv->mm.inactive_shrinker.shrink) + if (dev_priv->mm.inactive_shrinker.scan_objects) unregister_shrinker(&dev_priv->mm.inactive_shrinker); if (dev->pdev->msi_enabled) @@ -1715,7 +1715,7 @@ int i915_driver_unload(struct drm_device *dev) i915_teardown_sysfs(dev); - if (dev_priv->mm.inactive_shrinker.shrink) + if (dev_priv->mm.inactive_shrinker.scan_objects) unregister_shrinker(&dev_priv->mm.inactive_shrinker); mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d9e337feef14..8507c6d1e642 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -57,10 +57,12 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, struct drm_i915_fence_reg *fence, bool enable); -static int i915_gem_inactive_shrink(struct shrinker *shrinker, - struct shrink_control *sc); +static unsigned long i915_gem_inactive_count(struct shrinker *shrinker, + struct shrink_control *sc); +static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker, + struct shrink_control *sc); static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); -static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); +static long i915_gem_shrink_all(struct drm_i915_private *dev_priv); static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); static bool cpu_cache_is_coherent(struct drm_device *dev, @@ -1769,16 +1771,21 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target) return __i915_gem_shrink(dev_priv, target, true); } -static void +static long i915_gem_shrink_all(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *obj, *next; + long freed = 0; i915_gem_evict_everything(dev_priv->dev); list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, - global_list) + global_list) { + if (obj->pages_pin_count == 0) + freed += obj->base.size >> PAGE_SHIFT; i915_gem_object_put_pages(obj); + } + return freed; } static int @@ -4558,7 +4565,8 @@ i915_gem_load(struct drm_device *dev) dev_priv->mm.interruptible = true; - dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; + dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan; + dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count; dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; register_shrinker(&dev_priv->mm.inactive_shrinker); } @@ -4781,8 +4789,8 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) #endif } -static int -i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) +static unsigned long +i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) { struct drm_i915_private *dev_priv = container_of(shrinker, @@ -4790,45 +4798,35 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) mm.inactive_shrinker); struct drm_device *dev = dev_priv->dev; struct drm_i915_gem_object *obj; - int nr_to_scan = sc->nr_to_scan; bool unlock = true; - int cnt; + unsigned long count; if (!mutex_trylock(&dev->struct_mutex)) { if (!mutex_is_locked_by(&dev->struct_mutex, current)) - return 0; + return SHRINK_STOP; if (dev_priv->mm.shrinker_no_lock_stealing) - return 0; + return SHRINK_STOP; unlock = false; } - if (nr_to_scan) { - nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); - if (nr_to_scan > 0) - nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan, - false); - if (nr_to_scan > 0) - i915_gem_shrink_all(dev_priv); - } - - cnt = 0; + count = 0; list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) if (obj->pages_pin_count == 0) - cnt += obj->base.size >> PAGE_SHIFT; + count += obj->base.size >> PAGE_SHIFT; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { if (obj->active) continue; if (obj->pin_count == 0 && obj->pages_pin_count == 0) - cnt += obj->base.size >> PAGE_SHIFT; + count += obj->base.size >> PAGE_SHIFT; } if (unlock) mutex_unlock(&dev->struct_mutex); - return cnt; + return count; } /* All the new VM stuff */ @@ -4892,6 +4890,40 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, return 0; } +static unsigned long +i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct drm_i915_private *dev_priv = + container_of(shrinker, + struct drm_i915_private, + mm.inactive_shrinker); + struct drm_device *dev = dev_priv->dev; + int nr_to_scan = sc->nr_to_scan; + unsigned long freed; + bool unlock = true; + + if (!mutex_trylock(&dev->struct_mutex)) { + if (!mutex_is_locked_by(&dev->struct_mutex, current)) + return 0; + + if (dev_priv->mm.shrinker_no_lock_stealing) + return 0; + + unlock = false; + } + + freed = i915_gem_purge(dev_priv, nr_to_scan); + if (freed < nr_to_scan) + freed += __i915_gem_shrink(dev_priv, nr_to_scan, + false); + if (freed < nr_to_scan) + freed += i915_gem_shrink_all(dev_priv); + + if (unlock) + mutex_unlock(&dev->struct_mutex); + return freed; +} + struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) { diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index bd2a3b40cd12..863bef9f9234 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -377,28 +377,26 @@ out: return nr_free; } -/* Get good estimation how many pages are free in pools */ -static int ttm_pool_get_num_unused_pages(void) -{ - unsigned i; - int total = 0; - for (i = 0; i < NUM_POOLS; ++i) - total += _manager->pools[i].npages; - - return total; -} - /** * Callback for mm to request pool to reduce number of page held. + * + * XXX: (dchinner) Deadlock warning! + * + * ttm_page_pool_free() does memory allocation using GFP_KERNEL. that means + * this can deadlock when called a sc->gfp_mask that is not equal to + * GFP_KERNEL. + * + * This code is crying out for a shrinker per pool.... */ -static int ttm_pool_mm_shrink(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long +ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { static atomic_t start_pool = ATOMIC_INIT(0); unsigned i; unsigned pool_offset = atomic_add_return(1, &start_pool); struct ttm_page_pool *pool; int shrink_pages = sc->nr_to_scan; + unsigned long freed = 0; pool_offset = pool_offset % NUM_POOLS; /* select start pool in round robin fashion */ @@ -408,14 +406,28 @@ static int ttm_pool_mm_shrink(struct shrinker *shrink, break; pool = &_manager->pools[(i + pool_offset)%NUM_POOLS]; shrink_pages = ttm_page_pool_free(pool, nr_free); + freed += nr_free - shrink_pages; } - /* return estimated number of unused pages in pool */ - return ttm_pool_get_num_unused_pages(); + return freed; +} + + +static unsigned long +ttm_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned i; + unsigned long count = 0; + + for (i = 0; i < NUM_POOLS; ++i) + count += _manager->pools[i].npages; + + return count; } static void ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager) { - manager->mm_shrink.shrink = &ttm_pool_mm_shrink; + manager->mm_shrink.count_objects = ttm_pool_shrink_count; + manager->mm_shrink.scan_objects = ttm_pool_shrink_scan; manager->mm_shrink.seeks = 1; register_shrinker(&manager->mm_shrink); } diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index b8b394319b45..7957beeeaf73 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -918,19 +918,6 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev) } EXPORT_SYMBOL_GPL(ttm_dma_populate); -/* Get good estimation how many pages are free in pools */ -static int ttm_dma_pool_get_num_unused_pages(void) -{ - struct device_pools *p; - unsigned total = 0; - - mutex_lock(&_manager->lock); - list_for_each_entry(p, &_manager->pools, pools) - total += p->pool->npages_free; - mutex_unlock(&_manager->lock); - return total; -} - /* Put all pages in pages list to correct pool to wait for reuse */ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) { @@ -1002,18 +989,29 @@ EXPORT_SYMBOL_GPL(ttm_dma_unpopulate); /** * Callback for mm to request pool to reduce number of page held. + * + * XXX: (dchinner) Deadlock warning! + * + * ttm_dma_page_pool_free() does GFP_KERNEL memory allocation, and so attention + * needs to be paid to sc->gfp_mask to determine if this can be done or not. + * GFP_KERNEL memory allocation in a GFP_ATOMIC reclaim context woul dbe really + * bad. + * + * I'm getting sadder as I hear more pathetical whimpers about needing per-pool + * shrinkers */ -static int ttm_dma_pool_mm_shrink(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long +ttm_dma_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { static atomic_t start_pool = ATOMIC_INIT(0); unsigned idx = 0; unsigned pool_offset = atomic_add_return(1, &start_pool); unsigned shrink_pages = sc->nr_to_scan; struct device_pools *p; + unsigned long freed = 0; if (list_empty(&_manager->pools)) - return 0; + return SHRINK_STOP; mutex_lock(&_manager->lock); pool_offset = pool_offset % _manager->npools; @@ -1029,18 +1027,33 @@ static int ttm_dma_pool_mm_shrink(struct shrinker *shrink, continue; nr_free = shrink_pages; shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free); + freed += nr_free - shrink_pages; + pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n", p->pool->dev_name, p->pool->name, current->pid, nr_free, shrink_pages); } mutex_unlock(&_manager->lock); - /* return estimated number of unused pages in pool */ - return ttm_dma_pool_get_num_unused_pages(); + return freed; +} + +static unsigned long +ttm_dma_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + struct device_pools *p; + unsigned long count = 0; + + mutex_lock(&_manager->lock); + list_for_each_entry(p, &_manager->pools, pools) + count += p->pool->npages_free; + mutex_unlock(&_manager->lock); + return count; } static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager) { - manager->mm_shrink.shrink = &ttm_dma_pool_mm_shrink; + manager->mm_shrink.count_objects = ttm_dma_pool_shrink_count; + manager->mm_shrink.scan_objects = &ttm_dma_pool_shrink_scan; manager->mm_shrink.seeks = 1; register_shrinker(&manager->mm_shrink); } diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index 57473415be10..132369fad4e0 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -662,7 +662,7 @@ static int davinci_i2c_probe(struct platform_device *pdev) #endif dev->dev = &pdev->dev; dev->irq = irq->start; - dev->pdata = dev_get_platdata(&dev->dev); + dev->pdata = dev_get_platdata(&pdev->dev); platform_set_drvdata(pdev, dev); if (!dev->pdata && pdev->dev.of_node) { diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 820d85c4a4a0..fe302e33f72e 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -17,6 +17,16 @@ config OF_IOMMU def_bool y depends on OF +config FSL_PAMU + bool "Freescale IOMMU support" + depends on PPC_E500MC + select IOMMU_API + select GENERIC_ALLOCATOR + help + Freescale PAMU support. PAMU is the IOMMU present on Freescale QorIQ platforms. + PAMU can authorize memory access, remap the memory address, and remap I/O + transaction types. + # MSM IOMMU support config MSM_IOMMU bool "MSM IOMMU Support" diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index bbe7041212dd..14c1f474cf11 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o +obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 6dc659426a51..72531f008a5e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -456,8 +456,10 @@ static int iommu_init_device(struct device *dev) } ret = init_iommu_group(dev); - if (ret) + if (ret) { + free_dev_data(dev_data); return ret; + } if (pci_iommuv2_capable(pdev)) { struct amd_iommu *iommu; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 7acbf351e9af..8f798be6e398 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1384,7 +1384,7 @@ static int iommu_init_msi(struct amd_iommu *iommu) if (iommu->int_enabled) goto enable_faults; - if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) + if (iommu->dev->msi_cap) ret = iommu_setup_msi(iommu); else ret = -ENODEV; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index ebd0a4cff049..f417e89e1e7e 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -56,9 +56,6 @@ /* Maximum number of mapping groups per SMMU */ #define ARM_SMMU_MAX_SMRS 128 -/* Number of VMIDs per SMMU */ -#define ARM_SMMU_NUM_VMIDS 256 - /* SMMU global address space */ #define ARM_SMMU_GR0(smmu) ((smmu)->base) #define ARM_SMMU_GR1(smmu) ((smmu)->base + (smmu)->pagesize) @@ -87,6 +84,7 @@ #define ARM_SMMU_PTE_AP_UNPRIV (((pteval_t)1) << 6) #define ARM_SMMU_PTE_AP_RDONLY (((pteval_t)2) << 6) #define ARM_SMMU_PTE_ATTRINDX_SHIFT 2 +#define ARM_SMMU_PTE_nG (((pteval_t)1) << 11) /* Stage-2 PTE */ #define ARM_SMMU_PTE_HAP_FAULT (((pteval_t)0) << 6) @@ -223,6 +221,7 @@ #define ARM_SMMU_CB_FAR_LO 0x60 #define ARM_SMMU_CB_FAR_HI 0x64 #define ARM_SMMU_CB_FSYNR0 0x68 +#define ARM_SMMU_CB_S1_TLBIASID 0x610 #define SCTLR_S1_ASIDPNE (1 << 12) #define SCTLR_CFCFG (1 << 7) @@ -282,6 +281,8 @@ #define TTBCR2_ADDR_44 4 #define TTBCR2_ADDR_48 5 +#define TTBRn_HI_ASID_SHIFT 16 + #define MAIR_ATTR_SHIFT(n) ((n) << 3) #define MAIR_ATTR_MASK 0xff #define MAIR_ATTR_DEVICE 0x04 @@ -305,7 +306,7 @@ #define FSR_IGN (FSR_AFF | FSR_ASF | FSR_TLBMCF | \ FSR_TLBLKF) #define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ - FSR_EF | FSR_PF | FSR_TF) + FSR_EF | FSR_PF | FSR_TF | FSR_IGN) #define FSYNR0_WNR (1 << 4) @@ -365,21 +366,21 @@ struct arm_smmu_device { u32 num_context_irqs; unsigned int *irqs; - DECLARE_BITMAP(vmid_map, ARM_SMMU_NUM_VMIDS); - struct list_head list; struct rb_root masters; }; struct arm_smmu_cfg { struct arm_smmu_device *smmu; - u8 vmid; u8 cbndx; u8 irptndx; u32 cbar; pgd_t *pgd; }; +#define ARM_SMMU_CB_ASID(cfg) ((cfg)->cbndx) +#define ARM_SMMU_CB_VMID(cfg) ((cfg)->cbndx + 1) + struct arm_smmu_domain { /* * A domain can span across multiple, chained SMMUs and requires @@ -533,6 +534,25 @@ static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu) } } +static void arm_smmu_tlb_inv_context(struct arm_smmu_cfg *cfg) +{ + struct arm_smmu_device *smmu = cfg->smmu; + void __iomem *base = ARM_SMMU_GR0(smmu); + bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; + + if (stage1) { + base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + writel_relaxed(ARM_SMMU_CB_ASID(cfg), + base + ARM_SMMU_CB_S1_TLBIASID); + } else { + base = ARM_SMMU_GR0(smmu); + writel_relaxed(ARM_SMMU_CB_VMID(cfg), + base + ARM_SMMU_GR0_TLBIVMID); + } + + arm_smmu_tlb_sync(smmu); +} + static irqreturn_t arm_smmu_context_fault(int irq, void *dev) { int flags, ret; @@ -590,6 +610,9 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) void __iomem *gr0_base = ARM_SMMU_GR0(smmu); gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR); + if (!gfsr) + return IRQ_NONE; + gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0); gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1); gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2); @@ -601,7 +624,7 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) gfsr, gfsynr0, gfsynr1, gfsynr2); writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR); - return IRQ_NONE; + return IRQ_HANDLED; } static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) @@ -618,14 +641,15 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx); /* CBAR */ - reg = root_cfg->cbar | - (root_cfg->vmid << CBAR_VMID_SHIFT); + reg = root_cfg->cbar; if (smmu->version == 1) reg |= root_cfg->irptndx << CBAR_IRPTNDX_SHIFT; /* Use the weakest memory type, so it is overridden by the pte */ if (stage1) reg |= (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT); + else + reg |= ARM_SMMU_CB_VMID(root_cfg) << CBAR_VMID_SHIFT; writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(root_cfg->cbndx)); if (smmu->version > 1) { @@ -687,15 +711,11 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) /* TTBR0 */ reg = __pa(root_cfg->pgd); -#ifndef __BIG_ENDIAN writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); reg = (phys_addr_t)__pa(root_cfg->pgd) >> 32; + if (stage1) + reg |= ARM_SMMU_CB_ASID(root_cfg) << TTBRn_HI_ASID_SHIFT; writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); -#else - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); - reg = (phys_addr_t)__pa(root_cfg->pgd) >> 32; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); -#endif /* * TTBCR @@ -750,10 +770,6 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0); } - /* Nuke the TLB */ - writel_relaxed(root_cfg->vmid, gr0_base + ARM_SMMU_GR0_TLBIVMID); - arm_smmu_tlb_sync(smmu); - /* SCTLR */ reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP; if (stage1) @@ -790,11 +806,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, return -ENODEV; } - ret = __arm_smmu_alloc_bitmap(smmu->vmid_map, 0, ARM_SMMU_NUM_VMIDS); - if (IS_ERR_VALUE(ret)) - return ret; - - root_cfg->vmid = ret; if (smmu->features & ARM_SMMU_FEAT_TRANS_NESTED) { /* * We will likely want to change this if/when KVM gets @@ -813,10 +824,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ret = __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks); if (IS_ERR_VALUE(ret)) - goto out_free_vmid; + return ret; root_cfg->cbndx = ret; - if (smmu->version == 1) { root_cfg->irptndx = atomic_inc_return(&smmu->irptndx); root_cfg->irptndx %= smmu->num_context_irqs; @@ -840,8 +850,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, out_free_context: __arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx); -out_free_vmid: - __arm_smmu_free_bitmap(smmu->vmid_map, root_cfg->vmid); return ret; } @@ -850,17 +858,22 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = domain->priv; struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; struct arm_smmu_device *smmu = root_cfg->smmu; + void __iomem *cb_base; int irq; if (!smmu) return; + /* Disable the context bank and nuke the TLB before freeing it. */ + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx); + writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); + arm_smmu_tlb_inv_context(root_cfg); + if (root_cfg->irptndx != -1) { irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx]; free_irq(irq, domain); } - __arm_smmu_free_bitmap(smmu->vmid_map, root_cfg->vmid); __arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx); } @@ -959,6 +972,11 @@ static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain) static void arm_smmu_domain_destroy(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = domain->priv; + + /* + * Free the domain resources. We assume that all devices have + * already been detached. + */ arm_smmu_destroy_domain_context(domain); arm_smmu_free_pgtables(smmu_domain); kfree(smmu_domain); @@ -1199,7 +1217,7 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd, } if (stage == 1) { - pteval |= ARM_SMMU_PTE_AP_UNPRIV; + pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG; if (!(flags & IOMMU_WRITE) && (flags & IOMMU_READ)) pteval |= ARM_SMMU_PTE_AP_RDONLY; @@ -1415,13 +1433,9 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, { int ret; struct arm_smmu_domain *smmu_domain = domain->priv; - struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; - struct arm_smmu_device *smmu = root_cfg->smmu; - void __iomem *gr0_base = ARM_SMMU_GR0(smmu); ret = arm_smmu_handle_mapping(smmu_domain, iova, 0, size, 0); - writel_relaxed(root_cfg->vmid, gr0_base + ARM_SMMU_GR0_TLBIVMID); - arm_smmu_tlb_sync(smmu); + arm_smmu_tlb_inv_context(&smmu_domain->root_cfg); return ret ? ret : size; } @@ -1544,6 +1558,7 @@ static struct iommu_ops arm_smmu_ops = { static void arm_smmu_device_reset(struct arm_smmu_device *smmu) { void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + void __iomem *sctlr_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB_SCTLR; int i = 0; u32 scr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sCR0); @@ -1553,6 +1568,10 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) writel_relaxed(S2CR_TYPE_BYPASS, gr0_base + ARM_SMMU_GR0_S2CR(i)); } + /* Make sure all context banks are disabled */ + for (i = 0; i < smmu->num_context_banks; ++i) + writel_relaxed(0, sctlr_base + ARM_SMMU_CB(smmu, i)); + /* Invalidate the TLB, just in case */ writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL); writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH); @@ -1906,7 +1925,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) of_node_put(master->of_node); } - if (!bitmap_empty(smmu->vmid_map, ARM_SMMU_NUM_VMIDS)) + if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) dev_err(dev, "removing device with active domains!\n"); for (i = 0; i < smmu->num_global_irqs; ++i) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 3f32d64ab87a..074018979cdf 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -247,50 +247,6 @@ static void __sysmmu_set_prefbuf(void __iomem *sfrbase, unsigned long base, __raw_writel(size - 1 + base, sfrbase + REG_PB0_EADDR + idx * 8); } -void exynos_sysmmu_set_prefbuf(struct device *dev, - unsigned long base0, unsigned long size0, - unsigned long base1, unsigned long size1) -{ - struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu); - unsigned long flags; - int i; - - BUG_ON((base0 + size0) <= base0); - BUG_ON((size1 > 0) && ((base1 + size1) <= base1)); - - read_lock_irqsave(&data->lock, flags); - if (!is_sysmmu_active(data)) - goto finish; - - for (i = 0; i < data->nsfrs; i++) { - if ((readl(data->sfrbases[i] + REG_MMU_VERSION) >> 28) == 3) { - if (!sysmmu_block(data->sfrbases[i])) - continue; - - if (size1 == 0) { - if (size0 <= SZ_128K) { - base1 = base0; - size1 = size0; - } else { - size1 = size0 - - ALIGN(size0 / 2, SZ_64K); - size0 = size0 - size1; - base1 = base0 + size0; - } - } - - __sysmmu_set_prefbuf( - data->sfrbases[i], base0, size0, 0); - __sysmmu_set_prefbuf( - data->sfrbases[i], base1, size1, 1); - - sysmmu_unblock(data->sfrbases[i]); - } - } -finish: - read_unlock_irqrestore(&data->lock, flags); -} - static void __set_fault_handler(struct sysmmu_drvdata *data, sysmmu_fault_handler_t handler) { diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c new file mode 100644 index 000000000000..cba0498eb011 --- /dev/null +++ b/drivers/iommu/fsl_pamu.c @@ -0,0 +1,1309 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ + +#define pr_fmt(fmt) "fsl-pamu: %s: " fmt, __func__ + +#include <linux/init.h> +#include <linux/iommu.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/device.h> +#include <linux/of_platform.h> +#include <linux/bootmem.h> +#include <linux/genalloc.h> +#include <asm/io.h> +#include <asm/bitops.h> +#include <asm/fsl_guts.h> + +#include "fsl_pamu.h" + +/* define indexes for each operation mapping scenario */ +#define OMI_QMAN 0x00 +#define OMI_FMAN 0x01 +#define OMI_QMAN_PRIV 0x02 +#define OMI_CAAM 0x03 + +#define make64(high, low) (((u64)(high) << 32) | (low)) + +struct pamu_isr_data { + void __iomem *pamu_reg_base; /* Base address of PAMU regs*/ + unsigned int count; /* The number of PAMUs */ +}; + +static struct paace *ppaact; +static struct paace *spaact; +static struct ome *omt; + +/* + * Table for matching compatible strings, for device tree + * guts node, for QorIQ SOCs. + * "fsl,qoriq-device-config-2.0" corresponds to T4 & B4 + * SOCs. For the older SOCs "fsl,qoriq-device-config-1.0" + * string would be used. +*/ +static const struct of_device_id guts_device_ids[] = { + { .compatible = "fsl,qoriq-device-config-1.0", }, + { .compatible = "fsl,qoriq-device-config-2.0", }, + {} +}; + + +/* + * Table for matching compatible strings, for device tree + * L3 cache controller node. + * "fsl,t4240-l3-cache-controller" corresponds to T4, + * "fsl,b4860-l3-cache-controller" corresponds to B4 & + * "fsl,p4080-l3-cache-controller" corresponds to other, + * SOCs. +*/ +static const struct of_device_id l3_device_ids[] = { + { .compatible = "fsl,t4240-l3-cache-controller", }, + { .compatible = "fsl,b4860-l3-cache-controller", }, + { .compatible = "fsl,p4080-l3-cache-controller", }, + {} +}; + +/* maximum subwindows permitted per liodn */ +static u32 max_subwindow_count; + +/* Pool for fspi allocation */ +struct gen_pool *spaace_pool; + +/** + * pamu_get_max_subwin_cnt() - Return the maximum supported + * subwindow count per liodn. + * + */ +u32 pamu_get_max_subwin_cnt() +{ + return max_subwindow_count; +} + +/** + * pamu_get_ppaace() - Return the primary PACCE + * @liodn: liodn PAACT index for desired PAACE + * + * Returns the ppace pointer upon success else return + * null. + */ +static struct paace *pamu_get_ppaace(int liodn) +{ + if (!ppaact || liodn >= PAACE_NUMBER_ENTRIES) { + pr_debug("PPAACT doesn't exist\n"); + return NULL; + } + + return &ppaact[liodn]; +} + +/** + * pamu_enable_liodn() - Set valid bit of PACCE + * @liodn: liodn PAACT index for desired PAACE + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_enable_liodn(int liodn) +{ + struct paace *ppaace; + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) { + pr_debug("Invalid primary paace entry\n"); + return -ENOENT; + } + + if (!get_bf(ppaace->addr_bitfields, PPAACE_AF_WSE)) { + pr_debug("liodn %d not configured\n", liodn); + return -EINVAL; + } + + /* Ensure that all other stores to the ppaace complete first */ + mb(); + + set_bf(ppaace->addr_bitfields, PAACE_AF_V, PAACE_V_VALID); + mb(); + + return 0; +} + +/** + * pamu_disable_liodn() - Clears valid bit of PACCE + * @liodn: liodn PAACT index for desired PAACE + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_disable_liodn(int liodn) +{ + struct paace *ppaace; + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) { + pr_debug("Invalid primary paace entry\n"); + return -ENOENT; + } + + set_bf(ppaace->addr_bitfields, PAACE_AF_V, PAACE_V_INVALID); + mb(); + + return 0; +} + +/* Derive the window size encoding for a particular PAACE entry */ +static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size) +{ + /* Bug if not a power of 2 */ + BUG_ON(!is_power_of_2(addrspace_size)); + + /* window size is 2^(WSE+1) bytes */ + return __ffs(addrspace_size) - 1; +} + +/* Derive the PAACE window count encoding for the subwindow count */ +static unsigned int map_subwindow_cnt_to_wce(u32 subwindow_cnt) +{ + /* window count is 2^(WCE+1) bytes */ + return __ffs(subwindow_cnt) - 1; +} + +/* + * Set the PAACE type as primary and set the coherency required domain + * attribute + */ +static void pamu_init_ppaace(struct paace *ppaace) +{ + set_bf(ppaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_PRIMARY); + + set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + PAACE_M_COHERENCE_REQ); +} + +/* + * Set the PAACE type as secondary and set the coherency required domain + * attribute. + */ +static void pamu_init_spaace(struct paace *spaace) +{ + set_bf(spaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_SECONDARY); + set_bf(spaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + PAACE_M_COHERENCE_REQ); +} + +/* + * Return the spaace (corresponding to the secondary window index) + * for a particular ppaace. + */ +static struct paace *pamu_get_spaace(struct paace *paace, u32 wnum) +{ + u32 subwin_cnt; + struct paace *spaace = NULL; + + subwin_cnt = 1UL << (get_bf(paace->impl_attr, PAACE_IA_WCE) + 1); + + if (wnum < subwin_cnt) + spaace = &spaact[paace->fspi + wnum]; + else + pr_debug("secondary paace out of bounds\n"); + + return spaace; +} + +/** + * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves subwindows + * required for primary PAACE in the secondary + * PAACE table. + * @subwin_cnt: Number of subwindows to be reserved. + * + * A PPAACE entry may have a number of associated subwindows. A subwindow + * corresponds to a SPAACE entry in the SPAACT table. Each PAACE entry stores + * the index (fspi) of the first SPAACE entry in the SPAACT table. This + * function returns the index of the first SPAACE entry. The remaining + * SPAACE entries are reserved contiguously from that index. + * + * Returns a valid fspi index in the range of 0 - SPAACE_NUMBER_ENTRIES on success. + * If no SPAACE entry is available or the allocator can not reserve the required + * number of contiguous entries function returns ULONG_MAX indicating a failure. + * +*/ +static unsigned long pamu_get_fspi_and_allocate(u32 subwin_cnt) +{ + unsigned long spaace_addr; + + spaace_addr = gen_pool_alloc(spaace_pool, subwin_cnt * sizeof(struct paace)); + if (!spaace_addr) + return ULONG_MAX; + + return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace)); +} + +/* Release the subwindows reserved for a particular LIODN */ +void pamu_free_subwins(int liodn) +{ + struct paace *ppaace; + u32 subwin_cnt, size; + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) { + pr_debug("Invalid liodn entry\n"); + return; + } + + if (get_bf(ppaace->addr_bitfields, PPAACE_AF_MW)) { + subwin_cnt = 1UL << (get_bf(ppaace->impl_attr, PAACE_IA_WCE) + 1); + size = (subwin_cnt - 1) * sizeof(struct paace); + gen_pool_free(spaace_pool, (unsigned long)&spaact[ppaace->fspi], size); + set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0); + } +} + +/* + * Function used for updating stash destination for the coressponding + * LIODN. + */ +int pamu_update_paace_stash(int liodn, u32 subwin, u32 value) +{ + struct paace *paace; + + paace = pamu_get_ppaace(liodn); + if (!paace) { + pr_debug("Invalid liodn entry\n"); + return -ENOENT; + } + if (subwin) { + paace = pamu_get_spaace(paace, subwin - 1); + if (!paace) { + return -ENOENT; + } + } + set_bf(paace->impl_attr, PAACE_IA_CID, value); + + mb(); + + return 0; +} + +/* Disable a subwindow corresponding to the LIODN */ +int pamu_disable_spaace(int liodn, u32 subwin) +{ + struct paace *paace; + + paace = pamu_get_ppaace(liodn); + if (!paace) { + pr_debug("Invalid liodn entry\n"); + return -ENOENT; + } + if (subwin) { + paace = pamu_get_spaace(paace, subwin - 1); + if (!paace) { + return -ENOENT; + } + set_bf(paace->addr_bitfields, PAACE_AF_V, + PAACE_V_INVALID); + } else { + set_bf(paace->addr_bitfields, PAACE_AF_AP, + PAACE_AP_PERMS_DENIED); + } + + mb(); + + return 0; +} + + +/** + * pamu_config_paace() - Sets up PPAACE entry for specified liodn + * + * @liodn: Logical IO device number + * @win_addr: starting address of DSA window + * @win-size: size of DSA window + * @omi: Operation mapping index -- if ~omi == 0 then omi not defined + * @rpn: real (true physical) page number + * @stashid: cache stash id for associated cpu -- if ~stashid == 0 then + * stashid not defined + * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then + * snoopid not defined + * @subwin_cnt: number of sub-windows + * @prot: window permissions + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, + u32 omi, unsigned long rpn, u32 snoopid, u32 stashid, + u32 subwin_cnt, int prot) +{ + struct paace *ppaace; + unsigned long fspi; + + if (!is_power_of_2(win_size) || win_size < PAMU_PAGE_SIZE) { + pr_debug("window size too small or not a power of two %llx\n", win_size); + return -EINVAL; + } + + if (win_addr & (win_size - 1)) { + pr_debug("window address is not aligned with window size\n"); + return -EINVAL; + } + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) { + return -ENOENT; + } + + /* window size is 2^(WSE+1) bytes */ + set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, + map_addrspace_size_to_wse(win_size)); + + pamu_init_ppaace(ppaace); + + ppaace->wbah = win_addr >> (PAMU_PAGE_SHIFT + 20); + set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, + (win_addr >> PAMU_PAGE_SHIFT)); + + /* set up operation mapping if it's configured */ + if (omi < OME_NUMBER_ENTRIES) { + set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + ppaace->op_encode.index_ot.omi = omi; + } else if (~omi != 0) { + pr_debug("bad operation mapping index: %d\n", omi); + return -EINVAL; + } + + /* configure stash id */ + if (~stashid != 0) + set_bf(ppaace->impl_attr, PAACE_IA_CID, stashid); + + /* configure snoop id */ + if (~snoopid != 0) + ppaace->domain_attr.to_host.snpid = snoopid; + + if (subwin_cnt) { + /* The first entry is in the primary PAACE instead */ + fspi = pamu_get_fspi_and_allocate(subwin_cnt - 1); + if (fspi == ULONG_MAX) { + pr_debug("spaace indexes exhausted\n"); + return -EINVAL; + } + + /* window count is 2^(WCE+1) bytes */ + set_bf(ppaace->impl_attr, PAACE_IA_WCE, + map_subwindow_cnt_to_wce(subwin_cnt)); + set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0x1); + ppaace->fspi = fspi; + } else { + set_bf(ppaace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE); + ppaace->twbah = rpn >> 20; + set_bf(ppaace->win_bitfields, PAACE_WIN_TWBAL, rpn); + set_bf(ppaace->addr_bitfields, PAACE_AF_AP, prot); + set_bf(ppaace->impl_attr, PAACE_IA_WCE, 0); + set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0); + } + mb(); + + return 0; +} + +/** + * pamu_config_spaace() - Sets up SPAACE entry for specified subwindow + * + * @liodn: Logical IO device number + * @subwin_cnt: number of sub-windows associated with dma-window + * @subwin: subwindow index + * @subwin_size: size of subwindow + * @omi: Operation mapping index + * @rpn: real (true physical) page number + * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then + * snoopid not defined + * @stashid: cache stash id for associated cpu + * @enable: enable/disable subwindow after reconfiguration + * @prot: sub window permissions + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin, + phys_addr_t subwin_size, u32 omi, unsigned long rpn, + u32 snoopid, u32 stashid, int enable, int prot) +{ + struct paace *paace; + + + /* setup sub-windows */ + if (!subwin_cnt) { + pr_debug("Invalid subwindow count\n"); + return -EINVAL; + } + + paace = pamu_get_ppaace(liodn); + if (subwin > 0 && subwin < subwin_cnt && paace) { + paace = pamu_get_spaace(paace, subwin - 1); + + if (paace && !(paace->addr_bitfields & PAACE_V_VALID)) { + pamu_init_spaace(paace); + set_bf(paace->addr_bitfields, SPAACE_AF_LIODN, liodn); + } + } + + if (!paace) { + pr_debug("Invalid liodn entry\n"); + return -ENOENT; + } + + if (!is_power_of_2(subwin_size) || subwin_size < PAMU_PAGE_SIZE) { + pr_debug("subwindow size out of range, or not a power of 2\n"); + return -EINVAL; + } + + if (rpn == ULONG_MAX) { + pr_debug("real page number out of range\n"); + return -EINVAL; + } + + /* window size is 2^(WSE+1) bytes */ + set_bf(paace->win_bitfields, PAACE_WIN_SWSE, + map_addrspace_size_to_wse(subwin_size)); + + set_bf(paace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE); + paace->twbah = rpn >> 20; + set_bf(paace->win_bitfields, PAACE_WIN_TWBAL, rpn); + set_bf(paace->addr_bitfields, PAACE_AF_AP, prot); + + /* configure snoop id */ + if (~snoopid != 0) + paace->domain_attr.to_host.snpid = snoopid; + + /* set up operation mapping if it's configured */ + if (omi < OME_NUMBER_ENTRIES) { + set_bf(paace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + paace->op_encode.index_ot.omi = omi; + } else if (~omi != 0) { + pr_debug("bad operation mapping index: %d\n", omi); + return -EINVAL; + } + + if (~stashid != 0) + set_bf(paace->impl_attr, PAACE_IA_CID, stashid); + + smp_wmb(); + + if (enable) + set_bf(paace->addr_bitfields, PAACE_AF_V, PAACE_V_VALID); + + mb(); + + return 0; +} + +/** +* get_ome_index() - Returns the index in the operation mapping table +* for device. +* @*omi_index: pointer for storing the index value +* +*/ +void get_ome_index(u32 *omi_index, struct device *dev) +{ + if (of_device_is_compatible(dev->of_node, "fsl,qman-portal")) + *omi_index = OMI_QMAN; + if (of_device_is_compatible(dev->of_node, "fsl,qman")) + *omi_index = OMI_QMAN_PRIV; +} + +/** + * get_stash_id - Returns stash destination id corresponding to a + * cache type and vcpu. + * @stash_dest_hint: L1, L2 or L3 + * @vcpu: vpcu target for a particular cache type. + * + * Returs stash on success or ~(u32)0 on failure. + * + */ +u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) +{ + const u32 *prop; + struct device_node *node; + u32 cache_level; + int len, found = 0; + int i; + + /* Fastpath, exit early if L3/CPC cache is target for stashing */ + if (stash_dest_hint == PAMU_ATTR_CACHE_L3) { + node = of_find_matching_node(NULL, l3_device_ids); + if (node) { + prop = of_get_property(node, "cache-stash-id", 0); + if (!prop) { + pr_debug("missing cache-stash-id at %s\n", node->full_name); + of_node_put(node); + return ~(u32)0; + } + of_node_put(node); + return be32_to_cpup(prop); + } + return ~(u32)0; + } + + for_each_node_by_type(node, "cpu") { + prop = of_get_property(node, "reg", &len); + for (i = 0; i < len / sizeof(u32); i++) { + if (be32_to_cpup(&prop[i]) == vcpu) { + found = 1; + goto found_cpu_node; + } + } + } +found_cpu_node: + + /* find the hwnode that represents the cache */ + for (cache_level = PAMU_ATTR_CACHE_L1; (cache_level < PAMU_ATTR_CACHE_L3) && found; cache_level++) { + if (stash_dest_hint == cache_level) { + prop = of_get_property(node, "cache-stash-id", 0); + if (!prop) { + pr_debug("missing cache-stash-id at %s\n", node->full_name); + of_node_put(node); + return ~(u32)0; + } + of_node_put(node); + return be32_to_cpup(prop); + } + + prop = of_get_property(node, "next-level-cache", 0); + if (!prop) { + pr_debug("can't find next-level-cache at %s\n", + node->full_name); + of_node_put(node); + return ~(u32)0; /* can't traverse any further */ + } + of_node_put(node); + + /* advance to next node in cache hierarchy */ + node = of_find_node_by_phandle(*prop); + if (!node) { + pr_debug("Invalid node for cache hierarchy %s\n", + node->full_name); + return ~(u32)0; + } + } + + pr_debug("stash dest not found for %d on vcpu %d\n", + stash_dest_hint, vcpu); + return ~(u32)0; +} + +/* Identify if the PAACT table entry belongs to QMAN, BMAN or QMAN Portal */ +#define QMAN_PAACE 1 +#define QMAN_PORTAL_PAACE 2 +#define BMAN_PAACE 3 + +/** + * Setup operation mapping and stash destinations for QMAN and QMAN portal. + * Memory accesses to QMAN and BMAN private memory need not be coherent, so + * clear the PAACE entry coherency attribute for them. + */ +static void setup_qbman_paace(struct paace *ppaace, int paace_type) +{ + switch (paace_type) { + case QMAN_PAACE: + set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + ppaace->op_encode.index_ot.omi = OMI_QMAN_PRIV; + /* setup QMAN Private data stashing for the L3 cache */ + set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(PAMU_ATTR_CACHE_L3, 0)); + set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + 0); + break; + case QMAN_PORTAL_PAACE: + set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + ppaace->op_encode.index_ot.omi = OMI_QMAN; + /*Set DQRR and Frame stashing for the L3 cache */ + set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(PAMU_ATTR_CACHE_L3, 0)); + break; + case BMAN_PAACE: + set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + 0); + break; + } +} + +/** + * Setup the operation mapping table for various devices. This is a static + * table where each table index corresponds to a particular device. PAMU uses + * this table to translate device transaction to appropriate corenet + * transaction. + */ +static void __init setup_omt(struct ome *omt) +{ + struct ome *ome; + + /* Configure OMI_QMAN */ + ome = &omt[OMI_QMAN]; + + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READ; + ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; + ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSAO; + + ome->moe[IOE_DIRECT0_IDX] = EOE_VALID | EOE_LDEC; + ome->moe[IOE_DIRECT1_IDX] = EOE_VALID | EOE_LDECPE; + + /* Configure OMI_FMAN */ + ome = &omt[OMI_FMAN]; + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READI; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; + + /* Configure OMI_QMAN private */ + ome = &omt[OMI_QMAN_PRIV]; + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READ; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; + ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA; + ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSA; + + /* Configure OMI_CAAM */ + ome = &omt[OMI_CAAM]; + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READI; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; +} + +/* + * Get the maximum number of PAACT table entries + * and subwindows supported by PAMU + */ +static void get_pamu_cap_values(unsigned long pamu_reg_base) +{ + u32 pc_val; + + pc_val = in_be32((u32 *)(pamu_reg_base + PAMU_PC3)); + /* Maximum number of subwindows per liodn */ + max_subwindow_count = 1 << (1 + PAMU_PC3_MWCE(pc_val)); +} + +/* Setup PAMU registers pointing to PAACT, SPAACT and OMT */ +int setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size, + phys_addr_t ppaact_phys, phys_addr_t spaact_phys, + phys_addr_t omt_phys) +{ + u32 *pc; + struct pamu_mmap_regs *pamu_regs; + + pc = (u32 *) (pamu_reg_base + PAMU_PC); + pamu_regs = (struct pamu_mmap_regs *) + (pamu_reg_base + PAMU_MMAP_REGS_BASE); + + /* set up pointers to corenet control blocks */ + + out_be32(&pamu_regs->ppbah, upper_32_bits(ppaact_phys)); + out_be32(&pamu_regs->ppbal, lower_32_bits(ppaact_phys)); + ppaact_phys = ppaact_phys + PAACT_SIZE; + out_be32(&pamu_regs->pplah, upper_32_bits(ppaact_phys)); + out_be32(&pamu_regs->pplal, lower_32_bits(ppaact_phys)); + + out_be32(&pamu_regs->spbah, upper_32_bits(spaact_phys)); + out_be32(&pamu_regs->spbal, lower_32_bits(spaact_phys)); + spaact_phys = spaact_phys + SPAACT_SIZE; + out_be32(&pamu_regs->splah, upper_32_bits(spaact_phys)); + out_be32(&pamu_regs->splal, lower_32_bits(spaact_phys)); + + out_be32(&pamu_regs->obah, upper_32_bits(omt_phys)); + out_be32(&pamu_regs->obal, lower_32_bits(omt_phys)); + omt_phys = omt_phys + OMT_SIZE; + out_be32(&pamu_regs->olah, upper_32_bits(omt_phys)); + out_be32(&pamu_regs->olal, lower_32_bits(omt_phys)); + + /* + * set PAMU enable bit, + * allow ppaact & omt to be cached + * & enable PAMU access violation interrupts. + */ + + out_be32((u32 *)(pamu_reg_base + PAMU_PICS), + PAMU_ACCESS_VIOLATION_ENABLE); + out_be32(pc, PAMU_PC_PE | PAMU_PC_OCE | PAMU_PC_SPCC | PAMU_PC_PPCC); + return 0; +} + +/* Enable all device LIODNS */ +static void __init setup_liodns(void) +{ + int i, len; + struct paace *ppaace; + struct device_node *node = NULL; + const u32 *prop; + + for_each_node_with_property(node, "fsl,liodn") { + prop = of_get_property(node, "fsl,liodn", &len); + for (i = 0; i < len / sizeof(u32); i++) { + int liodn; + + liodn = be32_to_cpup(&prop[i]); + if (liodn >= PAACE_NUMBER_ENTRIES) { + pr_debug("Invalid LIODN value %d\n", liodn); + continue; + } + ppaace = pamu_get_ppaace(liodn); + pamu_init_ppaace(ppaace); + /* window size is 2^(WSE+1) bytes */ + set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35); + ppaace->wbah = 0; + set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0); + set_bf(ppaace->impl_attr, PAACE_IA_ATM, + PAACE_ATM_NO_XLATE); + set_bf(ppaace->addr_bitfields, PAACE_AF_AP, + PAACE_AP_PERMS_ALL); + if (of_device_is_compatible(node, "fsl,qman-portal")) + setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE); + if (of_device_is_compatible(node, "fsl,qman")) + setup_qbman_paace(ppaace, QMAN_PAACE); + if (of_device_is_compatible(node, "fsl,bman")) + setup_qbman_paace(ppaace, BMAN_PAACE); + mb(); + pamu_enable_liodn(liodn); + } + } +} + +irqreturn_t pamu_av_isr(int irq, void *arg) +{ + struct pamu_isr_data *data = arg; + phys_addr_t phys; + unsigned int i, j, ret; + + pr_emerg("access violation interrupt\n"); + + for (i = 0; i < data->count; i++) { + void __iomem *p = data->pamu_reg_base + i * PAMU_OFFSET; + u32 pics = in_be32(p + PAMU_PICS); + + if (pics & PAMU_ACCESS_VIOLATION_STAT) { + u32 avs1 = in_be32(p + PAMU_AVS1); + struct paace *paace; + + pr_emerg("POES1=%08x\n", in_be32(p + PAMU_POES1)); + pr_emerg("POES2=%08x\n", in_be32(p + PAMU_POES2)); + pr_emerg("AVS1=%08x\n", avs1); + pr_emerg("AVS2=%08x\n", in_be32(p + PAMU_AVS2)); + pr_emerg("AVA=%016llx\n", make64(in_be32(p + PAMU_AVAH), + in_be32(p + PAMU_AVAL))); + pr_emerg("UDAD=%08x\n", in_be32(p + PAMU_UDAD)); + pr_emerg("POEA=%016llx\n", make64(in_be32(p + PAMU_POEAH), + in_be32(p + PAMU_POEAL))); + + phys = make64(in_be32(p + PAMU_POEAH), + in_be32(p + PAMU_POEAL)); + + /* Assume that POEA points to a PAACE */ + if (phys) { + u32 *paace = phys_to_virt(phys); + + /* Only the first four words are relevant */ + for (j = 0; j < 4; j++) + pr_emerg("PAACE[%u]=%08x\n", j, in_be32(paace + j)); + } + + /* clear access violation condition */ + out_be32((p + PAMU_AVS1), avs1 & PAMU_AV_MASK); + paace = pamu_get_ppaace(avs1 >> PAMU_AVS1_LIODN_SHIFT); + BUG_ON(!paace); + /* check if we got a violation for a disabled LIODN */ + if (!get_bf(paace->addr_bitfields, PAACE_AF_V)) { + /* + * As per hardware erratum A-003638, access + * violation can be reported for a disabled + * LIODN. If we hit that condition, disable + * access violation reporting. + */ + pics &= ~PAMU_ACCESS_VIOLATION_ENABLE; + } else { + /* Disable the LIODN */ + ret = pamu_disable_liodn(avs1 >> PAMU_AVS1_LIODN_SHIFT); + BUG_ON(ret); + pr_emerg("Disabling liodn %x\n", avs1 >> PAMU_AVS1_LIODN_SHIFT); + } + out_be32((p + PAMU_PICS), pics); + } + } + + + return IRQ_HANDLED; +} + +#define LAWAR_EN 0x80000000 +#define LAWAR_TARGET_MASK 0x0FF00000 +#define LAWAR_TARGET_SHIFT 20 +#define LAWAR_SIZE_MASK 0x0000003F +#define LAWAR_CSDID_MASK 0x000FF000 +#define LAWAR_CSDID_SHIFT 12 + +#define LAW_SIZE_4K 0xb + +struct ccsr_law { + u32 lawbarh; /* LAWn base address high */ + u32 lawbarl; /* LAWn base address low */ + u32 lawar; /* LAWn attributes */ + u32 reserved; +}; + +/* + * Create a coherence subdomain for a given memory block. + */ +static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id) +{ + struct device_node *np; + const __be32 *iprop; + void __iomem *lac = NULL; /* Local Access Control registers */ + struct ccsr_law __iomem *law; + void __iomem *ccm = NULL; + u32 __iomem *csdids; + unsigned int i, num_laws, num_csds; + u32 law_target = 0; + u32 csd_id = 0; + int ret = 0; + + np = of_find_compatible_node(NULL, NULL, "fsl,corenet-law"); + if (!np) + return -ENODEV; + + iprop = of_get_property(np, "fsl,num-laws", NULL); + if (!iprop) { + ret = -ENODEV; + goto error; + } + + num_laws = be32_to_cpup(iprop); + if (!num_laws) { + ret = -ENODEV; + goto error; + } + + lac = of_iomap(np, 0); + if (!lac) { + ret = -ENODEV; + goto error; + } + + /* LAW registers are at offset 0xC00 */ + law = lac + 0xC00; + + of_node_put(np); + + np = of_find_compatible_node(NULL, NULL, "fsl,corenet-cf"); + if (!np) { + ret = -ENODEV; + goto error; + } + + iprop = of_get_property(np, "fsl,ccf-num-csdids", NULL); + if (!iprop) { + ret = -ENODEV; + goto error; + } + + num_csds = be32_to_cpup(iprop); + if (!num_csds) { + ret = -ENODEV; + goto error; + } + + ccm = of_iomap(np, 0); + if (!ccm) { + ret = -ENOMEM; + goto error; + } + + /* The undocumented CSDID registers are at offset 0x600 */ + csdids = ccm + 0x600; + + of_node_put(np); + np = NULL; + + /* Find an unused coherence subdomain ID */ + for (csd_id = 0; csd_id < num_csds; csd_id++) { + if (!csdids[csd_id]) + break; + } + + /* Store the Port ID in the (undocumented) proper CIDMRxx register */ + csdids[csd_id] = csd_port_id; + + /* Find the DDR LAW that maps to our buffer. */ + for (i = 0; i < num_laws; i++) { + if (law[i].lawar & LAWAR_EN) { + phys_addr_t law_start, law_end; + + law_start = make64(law[i].lawbarh, law[i].lawbarl); + law_end = law_start + + (2ULL << (law[i].lawar & LAWAR_SIZE_MASK)); + + if (law_start <= phys && phys < law_end) { + law_target = law[i].lawar & LAWAR_TARGET_MASK; + break; + } + } + } + + if (i == 0 || i == num_laws) { + /* This should never happen*/ + ret = -ENOENT; + goto error; + } + + /* Find a free LAW entry */ + while (law[--i].lawar & LAWAR_EN) { + if (i == 0) { + /* No higher priority LAW slots available */ + ret = -ENOENT; + goto error; + } + } + + law[i].lawbarh = upper_32_bits(phys); + law[i].lawbarl = lower_32_bits(phys); + wmb(); + law[i].lawar = LAWAR_EN | law_target | (csd_id << LAWAR_CSDID_SHIFT) | + (LAW_SIZE_4K + get_order(size)); + wmb(); + +error: + if (ccm) + iounmap(ccm); + + if (lac) + iounmap(lac); + + if (np) + of_node_put(np); + + return ret; +} + +/* + * Table of SVRs and the corresponding PORT_ID values. Port ID corresponds to a + * bit map of snoopers for a given range of memory mapped by a LAW. + * + * All future CoreNet-enabled SOCs will have this erratum(A-004510) fixed, so this + * table should never need to be updated. SVRs are guaranteed to be unique, so + * there is no worry that a future SOC will inadvertently have one of these + * values. + */ +static const struct { + u32 svr; + u32 port_id; +} port_id_map[] = { + {0x82100010, 0xFF000000}, /* P2040 1.0 */ + {0x82100011, 0xFF000000}, /* P2040 1.1 */ + {0x82100110, 0xFF000000}, /* P2041 1.0 */ + {0x82100111, 0xFF000000}, /* P2041 1.1 */ + {0x82110310, 0xFF000000}, /* P3041 1.0 */ + {0x82110311, 0xFF000000}, /* P3041 1.1 */ + {0x82010020, 0xFFF80000}, /* P4040 2.0 */ + {0x82000020, 0xFFF80000}, /* P4080 2.0 */ + {0x82210010, 0xFC000000}, /* P5010 1.0 */ + {0x82210020, 0xFC000000}, /* P5010 2.0 */ + {0x82200010, 0xFC000000}, /* P5020 1.0 */ + {0x82050010, 0xFF800000}, /* P5021 1.0 */ + {0x82040010, 0xFF800000}, /* P5040 1.0 */ +}; + +#define SVR_SECURITY 0x80000 /* The Security (E) bit */ + +static int __init fsl_pamu_probe(struct platform_device *pdev) +{ + void __iomem *pamu_regs = NULL; + struct ccsr_guts __iomem *guts_regs = NULL; + u32 pamubypenr, pamu_counter; + unsigned long pamu_reg_off; + unsigned long pamu_reg_base; + struct pamu_isr_data *data = NULL; + struct device_node *guts_node; + u64 size; + struct page *p; + int ret = 0; + int irq; + phys_addr_t ppaact_phys; + phys_addr_t spaact_phys; + phys_addr_t omt_phys; + size_t mem_size = 0; + unsigned int order = 0; + u32 csd_port_id = 0; + unsigned i; + /* + * enumerate all PAMUs and allocate and setup PAMU tables + * for each of them, + * NOTE : All PAMUs share the same LIODN tables. + */ + + pamu_regs = of_iomap(pdev->dev.of_node, 0); + if (!pamu_regs) { + dev_err(&pdev->dev, "ioremap of PAMU node failed\n"); + return -ENOMEM; + } + of_get_address(pdev->dev.of_node, 0, &size, NULL); + + irq = irq_of_parse_and_map(pdev->dev.of_node, 0); + if (irq == NO_IRQ) { + dev_warn(&pdev->dev, "no interrupts listed in PAMU node\n"); + goto error; + } + + data = kzalloc(sizeof(struct pamu_isr_data), GFP_KERNEL); + if (!data) { + dev_err(&pdev->dev, "PAMU isr data memory allocation failed\n"); + ret = -ENOMEM; + goto error; + } + data->pamu_reg_base = pamu_regs; + data->count = size / PAMU_OFFSET; + + /* The ISR needs access to the regs, so we won't iounmap them */ + ret = request_irq(irq, pamu_av_isr, 0, "pamu", data); + if (ret < 0) { + dev_err(&pdev->dev, "error %i installing ISR for irq %i\n", + ret, irq); + goto error; + } + + guts_node = of_find_matching_node(NULL, guts_device_ids); + if (!guts_node) { + dev_err(&pdev->dev, "could not find GUTS node %s\n", + pdev->dev.of_node->full_name); + ret = -ENODEV; + goto error; + } + + guts_regs = of_iomap(guts_node, 0); + of_node_put(guts_node); + if (!guts_regs) { + dev_err(&pdev->dev, "ioremap of GUTS node failed\n"); + ret = -ENODEV; + goto error; + } + + /* read in the PAMU capability registers */ + get_pamu_cap_values((unsigned long)pamu_regs); + /* + * To simplify the allocation of a coherency domain, we allocate the + * PAACT and the OMT in the same memory buffer. Unfortunately, this + * wastes more memory compared to allocating the buffers separately. + */ + /* Determine how much memory we need */ + mem_size = (PAGE_SIZE << get_order(PAACT_SIZE)) + + (PAGE_SIZE << get_order(SPAACT_SIZE)) + + (PAGE_SIZE << get_order(OMT_SIZE)); + order = get_order(mem_size); + + p = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!p) { + dev_err(&pdev->dev, "unable to allocate PAACT/SPAACT/OMT block\n"); + ret = -ENOMEM; + goto error; + } + + ppaact = page_address(p); + ppaact_phys = page_to_phys(p); + + /* Make sure the memory is naturally aligned */ + if (ppaact_phys & ((PAGE_SIZE << order) - 1)) { + dev_err(&pdev->dev, "PAACT/OMT block is unaligned\n"); + ret = -ENOMEM; + goto error; + } + + spaact = (void *)ppaact + (PAGE_SIZE << get_order(PAACT_SIZE)); + omt = (void *)spaact + (PAGE_SIZE << get_order(SPAACT_SIZE)); + + dev_dbg(&pdev->dev, "ppaact virt=%p phys=0x%llx\n", ppaact, + (unsigned long long) ppaact_phys); + + /* Check to see if we need to implement the work-around on this SOC */ + + /* Determine the Port ID for our coherence subdomain */ + for (i = 0; i < ARRAY_SIZE(port_id_map); i++) { + if (port_id_map[i].svr == (mfspr(SPRN_SVR) & ~SVR_SECURITY)) { + csd_port_id = port_id_map[i].port_id; + dev_dbg(&pdev->dev, "found matching SVR %08x\n", + port_id_map[i].svr); + break; + } + } + + if (csd_port_id) { + dev_dbg(&pdev->dev, "creating coherency subdomain at address " + "0x%llx, size %zu, port id 0x%08x", ppaact_phys, + mem_size, csd_port_id); + + ret = create_csd(ppaact_phys, mem_size, csd_port_id); + if (ret) { + dev_err(&pdev->dev, "could not create coherence " + "subdomain\n"); + return ret; + } + } + + spaact_phys = virt_to_phys(spaact); + omt_phys = virt_to_phys(omt); + + spaace_pool = gen_pool_create(ilog2(sizeof(struct paace)), -1); + if (!spaace_pool) { + ret = -ENOMEM; + dev_err(&pdev->dev, "PAMU : failed to allocate spaace gen pool\n"); + goto error; + } + + ret = gen_pool_add(spaace_pool, (unsigned long)spaact, SPAACT_SIZE, -1); + if (ret) + goto error_genpool; + + pamubypenr = in_be32(&guts_regs->pamubypenr); + + for (pamu_reg_off = 0, pamu_counter = 0x80000000; pamu_reg_off < size; + pamu_reg_off += PAMU_OFFSET, pamu_counter >>= 1) { + + pamu_reg_base = (unsigned long) pamu_regs + pamu_reg_off; + setup_one_pamu(pamu_reg_base, pamu_reg_off, ppaact_phys, + spaact_phys, omt_phys); + /* Disable PAMU bypass for this PAMU */ + pamubypenr &= ~pamu_counter; + } + + setup_omt(omt); + + /* Enable all relevant PAMU(s) */ + out_be32(&guts_regs->pamubypenr, pamubypenr); + + iounmap(guts_regs); + + /* Enable DMA for the LIODNs in the device tree*/ + + setup_liodns(); + + return 0; + +error_genpool: + gen_pool_destroy(spaace_pool); + +error: + if (irq != NO_IRQ) + free_irq(irq, data); + + if (data) { + memset(data, 0, sizeof(struct pamu_isr_data)); + kfree(data); + } + + if (pamu_regs) + iounmap(pamu_regs); + + if (guts_regs) + iounmap(guts_regs); + + if (ppaact) + free_pages((unsigned long)ppaact, order); + + ppaact = NULL; + + return ret; +} + +static const struct of_device_id fsl_of_pamu_ids[] = { + { + .compatible = "fsl,p4080-pamu", + }, + { + .compatible = "fsl,pamu", + }, + {}, +}; + +static struct platform_driver fsl_of_pamu_driver = { + .driver = { + .name = "fsl-of-pamu", + .owner = THIS_MODULE, + }, + .probe = fsl_pamu_probe, +}; + +static __init int fsl_pamu_init(void) +{ + struct platform_device *pdev = NULL; + struct device_node *np; + int ret; + + /* + * The normal OF process calls the probe function at some + * indeterminate later time, after most drivers have loaded. This is + * too late for us, because PAMU clients (like the Qman driver) + * depend on PAMU being initialized early. + * + * So instead, we "manually" call our probe function by creating the + * platform devices ourselves. + */ + + /* + * We assume that there is only one PAMU node in the device tree. A + * single PAMU node represents all of the PAMU devices in the SOC + * already. Everything else already makes that assumption, and the + * binding for the PAMU nodes doesn't allow for any parent-child + * relationships anyway. In other words, support for more than one + * PAMU node would require significant changes to a lot of code. + */ + + np = of_find_compatible_node(NULL, NULL, "fsl,pamu"); + if (!np) { + pr_err("could not find a PAMU node\n"); + return -ENODEV; + } + + ret = platform_driver_register(&fsl_of_pamu_driver); + if (ret) { + pr_err("could not register driver (err=%i)\n", ret); + goto error_driver_register; + } + + pdev = platform_device_alloc("fsl-of-pamu", 0); + if (!pdev) { + pr_err("could not allocate device %s\n", + np->full_name); + ret = -ENOMEM; + goto error_device_alloc; + } + pdev->dev.of_node = of_node_get(np); + + ret = pamu_domain_init(); + if (ret) + goto error_device_add; + + ret = platform_device_add(pdev); + if (ret) { + pr_err("could not add device %s (err=%i)\n", + np->full_name, ret); + goto error_device_add; + } + + return 0; + +error_device_add: + of_node_put(pdev->dev.of_node); + pdev->dev.of_node = NULL; + + platform_device_put(pdev); + +error_device_alloc: + platform_driver_unregister(&fsl_of_pamu_driver); + +error_driver_register: + of_node_put(np); + + return ret; +} +arch_initcall(fsl_pamu_init); diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h new file mode 100644 index 000000000000..8fc1a125b16e --- /dev/null +++ b/drivers/iommu/fsl_pamu.h @@ -0,0 +1,410 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ + +#ifndef __FSL_PAMU_H +#define __FSL_PAMU_H + +#include <asm/fsl_pamu_stash.h> + +/* Bit Field macros + * v = bit field variable; m = mask, m##_SHIFT = shift, x = value to load + */ +#define set_bf(v, m, x) (v = ((v) & ~(m)) | (((x) << (m##_SHIFT)) & (m))) +#define get_bf(v, m) (((v) & (m)) >> (m##_SHIFT)) + +/* PAMU CCSR space */ +#define PAMU_PGC 0x00000000 /* Allows all peripheral accesses */ +#define PAMU_PE 0x40000000 /* enable PAMU */ + +/* PAMU_OFFSET to the next pamu space in ccsr */ +#define PAMU_OFFSET 0x1000 + +#define PAMU_MMAP_REGS_BASE 0 + +struct pamu_mmap_regs { + u32 ppbah; + u32 ppbal; + u32 pplah; + u32 pplal; + u32 spbah; + u32 spbal; + u32 splah; + u32 splal; + u32 obah; + u32 obal; + u32 olah; + u32 olal; +}; + +/* PAMU Error Registers */ +#define PAMU_POES1 0x0040 +#define PAMU_POES2 0x0044 +#define PAMU_POEAH 0x0048 +#define PAMU_POEAL 0x004C +#define PAMU_AVS1 0x0050 +#define PAMU_AVS1_AV 0x1 +#define PAMU_AVS1_OTV 0x6 +#define PAMU_AVS1_APV 0x78 +#define PAMU_AVS1_WAV 0x380 +#define PAMU_AVS1_LAV 0x1c00 +#define PAMU_AVS1_GCV 0x2000 +#define PAMU_AVS1_PDV 0x4000 +#define PAMU_AV_MASK (PAMU_AVS1_AV | PAMU_AVS1_OTV | PAMU_AVS1_APV | PAMU_AVS1_WAV \ + | PAMU_AVS1_LAV | PAMU_AVS1_GCV | PAMU_AVS1_PDV) +#define PAMU_AVS1_LIODN_SHIFT 16 +#define PAMU_LAV_LIODN_NOT_IN_PPAACT 0x400 + +#define PAMU_AVS2 0x0054 +#define PAMU_AVAH 0x0058 +#define PAMU_AVAL 0x005C +#define PAMU_EECTL 0x0060 +#define PAMU_EEDIS 0x0064 +#define PAMU_EEINTEN 0x0068 +#define PAMU_EEDET 0x006C +#define PAMU_EEATTR 0x0070 +#define PAMU_EEAHI 0x0074 +#define PAMU_EEALO 0x0078 +#define PAMU_EEDHI 0X007C +#define PAMU_EEDLO 0x0080 +#define PAMU_EECC 0x0084 +#define PAMU_UDAD 0x0090 + +/* PAMU Revision Registers */ +#define PAMU_PR1 0x0BF8 +#define PAMU_PR2 0x0BFC + +/* PAMU version mask */ +#define PAMU_PR1_MASK 0xffff + +/* PAMU Capabilities Registers */ +#define PAMU_PC1 0x0C00 +#define PAMU_PC2 0x0C04 +#define PAMU_PC3 0x0C08 +#define PAMU_PC4 0x0C0C + +/* PAMU Control Register */ +#define PAMU_PC 0x0C10 + +/* PAMU control defs */ +#define PAMU_CONTROL 0x0C10 +#define PAMU_PC_PGC 0x80000000 /* PAMU gate closed bit */ +#define PAMU_PC_PE 0x40000000 /* PAMU enable bit */ +#define PAMU_PC_SPCC 0x00000010 /* sPAACE cache enable */ +#define PAMU_PC_PPCC 0x00000001 /* pPAACE cache enable */ +#define PAMU_PC_OCE 0x00001000 /* OMT cache enable */ + +#define PAMU_PFA1 0x0C14 +#define PAMU_PFA2 0x0C18 + +#define PAMU_PC2_MLIODN(X) ((X) >> 16) +#define PAMU_PC3_MWCE(X) (((X) >> 21) & 0xf) + +/* PAMU Interrupt control and Status Register */ +#define PAMU_PICS 0x0C1C +#define PAMU_ACCESS_VIOLATION_STAT 0x8 +#define PAMU_ACCESS_VIOLATION_ENABLE 0x4 + +/* PAMU Debug Registers */ +#define PAMU_PD1 0x0F00 +#define PAMU_PD2 0x0F04 +#define PAMU_PD3 0x0F08 +#define PAMU_PD4 0x0F0C + +#define PAACE_AP_PERMS_DENIED 0x0 +#define PAACE_AP_PERMS_QUERY 0x1 +#define PAACE_AP_PERMS_UPDATE 0x2 +#define PAACE_AP_PERMS_ALL 0x3 + +#define PAACE_DD_TO_HOST 0x0 +#define PAACE_DD_TO_IO 0x1 +#define PAACE_PT_PRIMARY 0x0 +#define PAACE_PT_SECONDARY 0x1 +#define PAACE_V_INVALID 0x0 +#define PAACE_V_VALID 0x1 +#define PAACE_MW_SUBWINDOWS 0x1 + +#define PAACE_WSE_4K 0xB +#define PAACE_WSE_8K 0xC +#define PAACE_WSE_16K 0xD +#define PAACE_WSE_32K 0xE +#define PAACE_WSE_64K 0xF +#define PAACE_WSE_128K 0x10 +#define PAACE_WSE_256K 0x11 +#define PAACE_WSE_512K 0x12 +#define PAACE_WSE_1M 0x13 +#define PAACE_WSE_2M 0x14 +#define PAACE_WSE_4M 0x15 +#define PAACE_WSE_8M 0x16 +#define PAACE_WSE_16M 0x17 +#define PAACE_WSE_32M 0x18 +#define PAACE_WSE_64M 0x19 +#define PAACE_WSE_128M 0x1A +#define PAACE_WSE_256M 0x1B +#define PAACE_WSE_512M 0x1C +#define PAACE_WSE_1G 0x1D +#define PAACE_WSE_2G 0x1E +#define PAACE_WSE_4G 0x1F + +#define PAACE_DID_PCI_EXPRESS_1 0x00 +#define PAACE_DID_PCI_EXPRESS_2 0x01 +#define PAACE_DID_PCI_EXPRESS_3 0x02 +#define PAACE_DID_PCI_EXPRESS_4 0x03 +#define PAACE_DID_LOCAL_BUS 0x04 +#define PAACE_DID_SRIO 0x0C +#define PAACE_DID_MEM_1 0x10 +#define PAACE_DID_MEM_2 0x11 +#define PAACE_DID_MEM_3 0x12 +#define PAACE_DID_MEM_4 0x13 +#define PAACE_DID_MEM_1_2 0x14 +#define PAACE_DID_MEM_3_4 0x15 +#define PAACE_DID_MEM_1_4 0x16 +#define PAACE_DID_BM_SW_PORTAL 0x18 +#define PAACE_DID_PAMU 0x1C +#define PAACE_DID_CAAM 0x21 +#define PAACE_DID_QM_SW_PORTAL 0x3C +#define PAACE_DID_CORE0_INST 0x80 +#define PAACE_DID_CORE0_DATA 0x81 +#define PAACE_DID_CORE1_INST 0x82 +#define PAACE_DID_CORE1_DATA 0x83 +#define PAACE_DID_CORE2_INST 0x84 +#define PAACE_DID_CORE2_DATA 0x85 +#define PAACE_DID_CORE3_INST 0x86 +#define PAACE_DID_CORE3_DATA 0x87 +#define PAACE_DID_CORE4_INST 0x88 +#define PAACE_DID_CORE4_DATA 0x89 +#define PAACE_DID_CORE5_INST 0x8A +#define PAACE_DID_CORE5_DATA 0x8B +#define PAACE_DID_CORE6_INST 0x8C +#define PAACE_DID_CORE6_DATA 0x8D +#define PAACE_DID_CORE7_INST 0x8E +#define PAACE_DID_CORE7_DATA 0x8F +#define PAACE_DID_BROADCAST 0xFF + +#define PAACE_ATM_NO_XLATE 0x00 +#define PAACE_ATM_WINDOW_XLATE 0x01 +#define PAACE_ATM_PAGE_XLATE 0x02 +#define PAACE_ATM_WIN_PG_XLATE \ + (PAACE_ATM_WINDOW_XLATE | PAACE_ATM_PAGE_XLATE) +#define PAACE_OTM_NO_XLATE 0x00 +#define PAACE_OTM_IMMEDIATE 0x01 +#define PAACE_OTM_INDEXED 0x02 +#define PAACE_OTM_RESERVED 0x03 + +#define PAACE_M_COHERENCE_REQ 0x01 + +#define PAACE_PID_0 0x0 +#define PAACE_PID_1 0x1 +#define PAACE_PID_2 0x2 +#define PAACE_PID_3 0x3 +#define PAACE_PID_4 0x4 +#define PAACE_PID_5 0x5 +#define PAACE_PID_6 0x6 +#define PAACE_PID_7 0x7 + +#define PAACE_TCEF_FORMAT0_8B 0x00 +#define PAACE_TCEF_FORMAT1_RSVD 0x01 +/* + * Hard coded value for the PAACT size to accomodate + * maximum LIODN value generated by u-boot. + */ +#define PAACE_NUMBER_ENTRIES 0x500 +/* Hard coded value for the SPAACT size */ +#define SPAACE_NUMBER_ENTRIES 0x800 + +#define OME_NUMBER_ENTRIES 16 + +/* PAACE Bit Field Defines */ +#define PPAACE_AF_WBAL 0xfffff000 +#define PPAACE_AF_WBAL_SHIFT 12 +#define PPAACE_AF_WSE 0x00000fc0 +#define PPAACE_AF_WSE_SHIFT 6 +#define PPAACE_AF_MW 0x00000020 +#define PPAACE_AF_MW_SHIFT 5 + +#define SPAACE_AF_LIODN 0xffff0000 +#define SPAACE_AF_LIODN_SHIFT 16 + +#define PAACE_AF_AP 0x00000018 +#define PAACE_AF_AP_SHIFT 3 +#define PAACE_AF_DD 0x00000004 +#define PAACE_AF_DD_SHIFT 2 +#define PAACE_AF_PT 0x00000002 +#define PAACE_AF_PT_SHIFT 1 +#define PAACE_AF_V 0x00000001 +#define PAACE_AF_V_SHIFT 0 + +#define PAACE_DA_HOST_CR 0x80 +#define PAACE_DA_HOST_CR_SHIFT 7 + +#define PAACE_IA_CID 0x00FF0000 +#define PAACE_IA_CID_SHIFT 16 +#define PAACE_IA_WCE 0x000000F0 +#define PAACE_IA_WCE_SHIFT 4 +#define PAACE_IA_ATM 0x0000000C +#define PAACE_IA_ATM_SHIFT 2 +#define PAACE_IA_OTM 0x00000003 +#define PAACE_IA_OTM_SHIFT 0 + +#define PAACE_WIN_TWBAL 0xfffff000 +#define PAACE_WIN_TWBAL_SHIFT 12 +#define PAACE_WIN_SWSE 0x00000fc0 +#define PAACE_WIN_SWSE_SHIFT 6 + +/* PAMU Data Structures */ +/* primary / secondary paact structure */ +struct paace { + /* PAACE Offset 0x00 */ + u32 wbah; /* only valid for Primary PAACE */ + u32 addr_bitfields; /* See P/S PAACE_AF_* */ + + /* PAACE Offset 0x08 */ + /* Interpretation of first 32 bits dependent on DD above */ + union { + struct { + /* Destination ID, see PAACE_DID_* defines */ + u8 did; + /* Partition ID */ + u8 pid; + /* Snoop ID */ + u8 snpid; + /* coherency_required : 1 reserved : 7 */ + u8 coherency_required; /* See PAACE_DA_* */ + } to_host; + struct { + /* Destination ID, see PAACE_DID_* defines */ + u8 did; + u8 reserved1; + u16 reserved2; + } to_io; + } domain_attr; + + /* Implementation attributes + window count + address & operation translation modes */ + u32 impl_attr; /* See PAACE_IA_* */ + + /* PAACE Offset 0x10 */ + /* Translated window base address */ + u32 twbah; + u32 win_bitfields; /* See PAACE_WIN_* */ + + /* PAACE Offset 0x18 */ + /* first secondary paace entry */ + u32 fspi; /* only valid for Primary PAACE */ + union { + struct { + u8 ioea; + u8 moea; + u8 ioeb; + u8 moeb; + } immed_ot; + struct { + u16 reserved; + u16 omi; + } index_ot; + } op_encode; + + /* PAACE Offsets 0x20-0x38 */ + u32 reserved[8]; /* not currently implemented */ +}; + +/* OME : Operation mapping entry + * MOE : Mapped Operation Encodings + * The operation mapping table is table containing operation mapping entries (OME). + * The index of a particular OME is programmed in the PAACE entry for translation + * in bound I/O operations corresponding to an LIODN. The OMT is used for translation + * specifically in case of the indexed translation mode. Each OME contains a 128 + * byte mapped operation encoding (MOE), where each byte represents an MOE. + */ +#define NUM_MOE 128 +struct ome { + u8 moe[NUM_MOE]; +} __attribute__((packed)); + +#define PAACT_SIZE (sizeof(struct paace) * PAACE_NUMBER_ENTRIES) +#define SPAACT_SIZE (sizeof(struct paace) * SPAACE_NUMBER_ENTRIES) +#define OMT_SIZE (sizeof(struct ome) * OME_NUMBER_ENTRIES) + +#define PAMU_PAGE_SHIFT 12 +#define PAMU_PAGE_SIZE 4096ULL + +#define IOE_READ 0x00 +#define IOE_READ_IDX 0x00 +#define IOE_WRITE 0x81 +#define IOE_WRITE_IDX 0x01 +#define IOE_EREAD0 0x82 /* Enhanced read type 0 */ +#define IOE_EREAD0_IDX 0x02 /* Enhanced read type 0 */ +#define IOE_EWRITE0 0x83 /* Enhanced write type 0 */ +#define IOE_EWRITE0_IDX 0x03 /* Enhanced write type 0 */ +#define IOE_DIRECT0 0x84 /* Directive type 0 */ +#define IOE_DIRECT0_IDX 0x04 /* Directive type 0 */ +#define IOE_EREAD1 0x85 /* Enhanced read type 1 */ +#define IOE_EREAD1_IDX 0x05 /* Enhanced read type 1 */ +#define IOE_EWRITE1 0x86 /* Enhanced write type 1 */ +#define IOE_EWRITE1_IDX 0x06 /* Enhanced write type 1 */ +#define IOE_DIRECT1 0x87 /* Directive type 1 */ +#define IOE_DIRECT1_IDX 0x07 /* Directive type 1 */ +#define IOE_RAC 0x8c /* Read with Atomic clear */ +#define IOE_RAC_IDX 0x0c /* Read with Atomic clear */ +#define IOE_RAS 0x8d /* Read with Atomic set */ +#define IOE_RAS_IDX 0x0d /* Read with Atomic set */ +#define IOE_RAD 0x8e /* Read with Atomic decrement */ +#define IOE_RAD_IDX 0x0e /* Read with Atomic decrement */ +#define IOE_RAI 0x8f /* Read with Atomic increment */ +#define IOE_RAI_IDX 0x0f /* Read with Atomic increment */ + +#define EOE_READ 0x00 +#define EOE_WRITE 0x01 +#define EOE_RAC 0x0c /* Read with Atomic clear */ +#define EOE_RAS 0x0d /* Read with Atomic set */ +#define EOE_RAD 0x0e /* Read with Atomic decrement */ +#define EOE_RAI 0x0f /* Read with Atomic increment */ +#define EOE_LDEC 0x10 /* Load external cache */ +#define EOE_LDECL 0x11 /* Load external cache with stash lock */ +#define EOE_LDECPE 0x12 /* Load external cache with preferred exclusive */ +#define EOE_LDECPEL 0x13 /* Load external cache with preferred exclusive and lock */ +#define EOE_LDECFE 0x14 /* Load external cache with forced exclusive */ +#define EOE_LDECFEL 0x15 /* Load external cache with forced exclusive and lock */ +#define EOE_RSA 0x16 /* Read with stash allocate */ +#define EOE_RSAU 0x17 /* Read with stash allocate and unlock */ +#define EOE_READI 0x18 /* Read with invalidate */ +#define EOE_RWNITC 0x19 /* Read with no intention to cache */ +#define EOE_WCI 0x1a /* Write cache inhibited */ +#define EOE_WWSA 0x1b /* Write with stash allocate */ +#define EOE_WWSAL 0x1c /* Write with stash allocate and lock */ +#define EOE_WWSAO 0x1d /* Write with stash allocate only */ +#define EOE_WWSAOL 0x1e /* Write with stash allocate only and lock */ +#define EOE_VALID 0x80 + +/* Function prototypes */ +int pamu_domain_init(void); +int pamu_enable_liodn(int liodn); +int pamu_disable_liodn(int liodn); +void pamu_free_subwins(int liodn); +int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, + u32 omi, unsigned long rpn, u32 snoopid, uint32_t stashid, + u32 subwin_cnt, int prot); +int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin_addr, + phys_addr_t subwin_size, u32 omi, unsigned long rpn, + uint32_t snoopid, u32 stashid, int enable, int prot); + +u32 get_stash_id(u32 stash_dest_hint, u32 vcpu); +void get_ome_index(u32 *omi_index, struct device *dev); +int pamu_update_paace_stash(int liodn, u32 subwin, u32 value); +int pamu_disable_spaace(int liodn, u32 subwin); +u32 pamu_get_max_subwin_cnt(void); + +#endif /* __FSL_PAMU_H */ diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c new file mode 100644 index 000000000000..c857c30da979 --- /dev/null +++ b/drivers/iommu/fsl_pamu_domain.c @@ -0,0 +1,1172 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * Author: Varun Sethi <varun.sethi@freescale.com> + * + */ + +#define pr_fmt(fmt) "fsl-pamu-domain: %s: " fmt, __func__ + +#include <linux/init.h> +#include <linux/iommu.h> +#include <linux/notifier.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/device.h> +#include <linux/of_platform.h> +#include <linux/bootmem.h> +#include <linux/err.h> +#include <asm/io.h> +#include <asm/bitops.h> + +#include <asm/pci-bridge.h> +#include <sysdev/fsl_pci.h> + +#include "fsl_pamu_domain.h" +#include "pci.h" + +/* + * Global spinlock that needs to be held while + * configuring PAMU. + */ +static DEFINE_SPINLOCK(iommu_lock); + +static struct kmem_cache *fsl_pamu_domain_cache; +static struct kmem_cache *iommu_devinfo_cache; +static DEFINE_SPINLOCK(device_domain_lock); + +static int __init iommu_init_mempool(void) +{ + + fsl_pamu_domain_cache = kmem_cache_create("fsl_pamu_domain", + sizeof(struct fsl_dma_domain), + 0, + SLAB_HWCACHE_ALIGN, + + NULL); + if (!fsl_pamu_domain_cache) { + pr_debug("Couldn't create fsl iommu_domain cache\n"); + return -ENOMEM; + } + + iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", + sizeof(struct device_domain_info), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!iommu_devinfo_cache) { + pr_debug("Couldn't create devinfo cache\n"); + kmem_cache_destroy(fsl_pamu_domain_cache); + return -ENOMEM; + } + + return 0; +} + +static phys_addr_t get_phys_addr(struct fsl_dma_domain *dma_domain, dma_addr_t iova) +{ + u32 win_cnt = dma_domain->win_cnt; + struct dma_window *win_ptr = + &dma_domain->win_arr[0]; + struct iommu_domain_geometry *geom; + + geom = &dma_domain->iommu_domain->geometry; + + if (!win_cnt || !dma_domain->geom_size) { + pr_debug("Number of windows/geometry not configured for the domain\n"); + return 0; + } + + if (win_cnt > 1) { + u64 subwin_size; + dma_addr_t subwin_iova; + u32 wnd; + + subwin_size = dma_domain->geom_size >> ilog2(win_cnt); + subwin_iova = iova & ~(subwin_size - 1); + wnd = (subwin_iova - geom->aperture_start) >> ilog2(subwin_size); + win_ptr = &dma_domain->win_arr[wnd]; + } + + if (win_ptr->valid) + return (win_ptr->paddr + (iova & (win_ptr->size - 1))); + + return 0; +} + +static int map_subwins(int liodn, struct fsl_dma_domain *dma_domain) +{ + struct dma_window *sub_win_ptr = + &dma_domain->win_arr[0]; + int i, ret; + unsigned long rpn, flags; + + for (i = 0; i < dma_domain->win_cnt; i++) { + if (sub_win_ptr[i].valid) { + rpn = sub_win_ptr[i].paddr >> + PAMU_PAGE_SHIFT; + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_config_spaace(liodn, dma_domain->win_cnt, i, + sub_win_ptr[i].size, + ~(u32)0, + rpn, + dma_domain->snoop_id, + dma_domain->stash_id, + (i > 0) ? 1 : 0, + sub_win_ptr[i].prot); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) { + pr_debug("PAMU SPAACE configuration failed for liodn %d\n", + liodn); + return ret; + } + } + } + + return ret; +} + +static int map_win(int liodn, struct fsl_dma_domain *dma_domain) +{ + int ret; + struct dma_window *wnd = &dma_domain->win_arr[0]; + phys_addr_t wnd_addr = dma_domain->iommu_domain->geometry.aperture_start; + unsigned long flags; + + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_config_ppaace(liodn, wnd_addr, + wnd->size, + ~(u32)0, + wnd->paddr >> PAMU_PAGE_SHIFT, + dma_domain->snoop_id, dma_domain->stash_id, + 0, wnd->prot); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) + pr_debug("PAMU PAACE configuration failed for liodn %d\n", + liodn); + + return ret; +} + +/* Map the DMA window corresponding to the LIODN */ +static int map_liodn(int liodn, struct fsl_dma_domain *dma_domain) +{ + if (dma_domain->win_cnt > 1) + return map_subwins(liodn, dma_domain); + else + return map_win(liodn, dma_domain); + +} + +/* Update window/subwindow mapping for the LIODN */ +static int update_liodn(int liodn, struct fsl_dma_domain *dma_domain, u32 wnd_nr) +{ + int ret; + struct dma_window *wnd = &dma_domain->win_arr[wnd_nr]; + unsigned long flags; + + spin_lock_irqsave(&iommu_lock, flags); + if (dma_domain->win_cnt > 1) { + ret = pamu_config_spaace(liodn, dma_domain->win_cnt, wnd_nr, + wnd->size, + ~(u32)0, + wnd->paddr >> PAMU_PAGE_SHIFT, + dma_domain->snoop_id, + dma_domain->stash_id, + (wnd_nr > 0) ? 1 : 0, + wnd->prot); + if (ret) + pr_debug("Subwindow reconfiguration failed for liodn %d\n", liodn); + } else { + phys_addr_t wnd_addr; + + wnd_addr = dma_domain->iommu_domain->geometry.aperture_start; + + ret = pamu_config_ppaace(liodn, wnd_addr, + wnd->size, + ~(u32)0, + wnd->paddr >> PAMU_PAGE_SHIFT, + dma_domain->snoop_id, dma_domain->stash_id, + 0, wnd->prot); + if (ret) + pr_debug("Window reconfiguration failed for liodn %d\n", liodn); + } + + spin_unlock_irqrestore(&iommu_lock, flags); + + return ret; +} + +static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain, + u32 val) +{ + int ret = 0, i; + unsigned long flags; + + spin_lock_irqsave(&iommu_lock, flags); + if (!dma_domain->win_arr) { + pr_debug("Windows not configured, stash destination update failed for liodn %d\n", liodn); + spin_unlock_irqrestore(&iommu_lock, flags); + return -EINVAL; + } + + for (i = 0; i < dma_domain->win_cnt; i++) { + ret = pamu_update_paace_stash(liodn, i, val); + if (ret) { + pr_debug("Failed to update SPAACE %d field for liodn %d\n ", i, liodn); + spin_unlock_irqrestore(&iommu_lock, flags); + return ret; + } + } + + spin_unlock_irqrestore(&iommu_lock, flags); + + return ret; +} + +/* Set the geometry parameters for a LIODN */ +static int pamu_set_liodn(int liodn, struct device *dev, + struct fsl_dma_domain *dma_domain, + struct iommu_domain_geometry *geom_attr, + u32 win_cnt) +{ + phys_addr_t window_addr, window_size; + phys_addr_t subwin_size; + int ret = 0, i; + u32 omi_index = ~(u32)0; + unsigned long flags; + + /* + * Configure the omi_index at the geometry setup time. + * This is a static value which depends on the type of + * device and would not change thereafter. + */ + get_ome_index(&omi_index, dev); + + window_addr = geom_attr->aperture_start; + window_size = dma_domain->geom_size; + + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_disable_liodn(liodn); + if (!ret) + ret = pamu_config_ppaace(liodn, window_addr, window_size, omi_index, + 0, dma_domain->snoop_id, + dma_domain->stash_id, win_cnt, 0); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) { + pr_debug("PAMU PAACE configuration failed for liodn %d, win_cnt =%d\n", liodn, win_cnt); + return ret; + } + + if (win_cnt > 1) { + subwin_size = window_size >> ilog2(win_cnt); + for (i = 0; i < win_cnt; i++) { + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_disable_spaace(liodn, i); + if (!ret) + ret = pamu_config_spaace(liodn, win_cnt, i, + subwin_size, omi_index, + 0, dma_domain->snoop_id, + dma_domain->stash_id, + 0, 0); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) { + pr_debug("PAMU SPAACE configuration failed for liodn %d\n", liodn); + return ret; + } + } + } + + return ret; +} + +static int check_size(u64 size, dma_addr_t iova) +{ + /* + * Size must be a power of two and at least be equal + * to PAMU page size. + */ + if (!is_power_of_2(size) || size < PAMU_PAGE_SIZE) { + pr_debug("%s: size too small or not a power of two\n", __func__); + return -EINVAL; + } + + /* iova must be page size aligned*/ + if (iova & (size - 1)) { + pr_debug("%s: address is not aligned with window size\n", __func__); + return -EINVAL; + } + + return 0; +} + +static struct fsl_dma_domain *iommu_alloc_dma_domain(void) +{ + struct fsl_dma_domain *domain; + + domain = kmem_cache_zalloc(fsl_pamu_domain_cache, GFP_KERNEL); + if (!domain) + return NULL; + + domain->stash_id = ~(u32)0; + domain->snoop_id = ~(u32)0; + domain->win_cnt = pamu_get_max_subwin_cnt(); + domain->geom_size = 0; + + INIT_LIST_HEAD(&domain->devices); + + spin_lock_init(&domain->domain_lock); + + return domain; +} + +static inline struct device_domain_info *find_domain(struct device *dev) +{ + return dev->archdata.iommu_domain; +} + +static void remove_device_ref(struct device_domain_info *info, u32 win_cnt) +{ + unsigned long flags; + + list_del(&info->link); + spin_lock_irqsave(&iommu_lock, flags); + if (win_cnt > 1) + pamu_free_subwins(info->liodn); + pamu_disable_liodn(info->liodn); + spin_unlock_irqrestore(&iommu_lock, flags); + spin_lock_irqsave(&device_domain_lock, flags); + info->dev->archdata.iommu_domain = NULL; + kmem_cache_free(iommu_devinfo_cache, info); + spin_unlock_irqrestore(&device_domain_lock, flags); +} + +static void detach_device(struct device *dev, struct fsl_dma_domain *dma_domain) +{ + struct device_domain_info *info, *tmp; + unsigned long flags; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + /* Remove the device from the domain device list */ + list_for_each_entry_safe(info, tmp, &dma_domain->devices, link) { + if (!dev || (info->dev == dev)) + remove_device_ref(info, dma_domain->win_cnt); + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); +} + +static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct device *dev) +{ + struct device_domain_info *info, *old_domain_info; + unsigned long flags; + + spin_lock_irqsave(&device_domain_lock, flags); + /* + * Check here if the device is already attached to domain or not. + * If the device is already attached to a domain detach it. + */ + old_domain_info = find_domain(dev); + if (old_domain_info && old_domain_info->domain != dma_domain) { + spin_unlock_irqrestore(&device_domain_lock, flags); + detach_device(dev, old_domain_info->domain); + spin_lock_irqsave(&device_domain_lock, flags); + } + + info = kmem_cache_zalloc(iommu_devinfo_cache, GFP_ATOMIC); + + info->dev = dev; + info->liodn = liodn; + info->domain = dma_domain; + + list_add(&info->link, &dma_domain->devices); + /* + * In case of devices with multiple LIODNs just store + * the info for the first LIODN as all + * LIODNs share the same domain + */ + if (!old_domain_info) + dev->archdata.iommu_domain = info; + spin_unlock_irqrestore(&device_domain_lock, flags); + +} + +static phys_addr_t fsl_pamu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + + if ((iova < domain->geometry.aperture_start) || + iova > (domain->geometry.aperture_end)) + return 0; + + return get_phys_addr(dma_domain, iova); +} + +static int fsl_pamu_domain_has_cap(struct iommu_domain *domain, + unsigned long cap) +{ + return cap == IOMMU_CAP_CACHE_COHERENCY; +} + +static void fsl_pamu_domain_destroy(struct iommu_domain *domain) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + + domain->priv = NULL; + + /* remove all the devices from the device list */ + detach_device(NULL, dma_domain); + + dma_domain->enabled = 0; + dma_domain->mapped = 0; + + kmem_cache_free(fsl_pamu_domain_cache, dma_domain); +} + +static int fsl_pamu_domain_init(struct iommu_domain *domain) +{ + struct fsl_dma_domain *dma_domain; + + dma_domain = iommu_alloc_dma_domain(); + if (!dma_domain) { + pr_debug("dma_domain allocation failed\n"); + return -ENOMEM; + } + domain->priv = dma_domain; + dma_domain->iommu_domain = domain; + /* defaul geometry 64 GB i.e. maximum system address */ + domain->geometry.aperture_start = 0; + domain->geometry.aperture_end = (1ULL << 36) - 1; + domain->geometry.force_aperture = true; + + return 0; +} + +/* Configure geometry settings for all LIODNs associated with domain */ +static int pamu_set_domain_geometry(struct fsl_dma_domain *dma_domain, + struct iommu_domain_geometry *geom_attr, + u32 win_cnt) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + ret = pamu_set_liodn(info->liodn, info->dev, dma_domain, + geom_attr, win_cnt); + if (ret) + break; + } + + return ret; +} + +/* Update stash destination for all LIODNs associated with the domain */ +static int update_domain_stash(struct fsl_dma_domain *dma_domain, u32 val) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + ret = update_liodn_stash(info->liodn, dma_domain, val); + if (ret) + break; + } + + return ret; +} + +/* Update domain mappings for all LIODNs associated with the domain */ +static int update_domain_mapping(struct fsl_dma_domain *dma_domain, u32 wnd_nr) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + ret = update_liodn(info->liodn, dma_domain, wnd_nr); + if (ret) + break; + } + return ret; +} + +static int disable_domain_win(struct fsl_dma_domain *dma_domain, u32 wnd_nr) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + if (dma_domain->win_cnt == 1 && dma_domain->enabled) { + ret = pamu_disable_liodn(info->liodn); + if (!ret) + dma_domain->enabled = 0; + } else { + ret = pamu_disable_spaace(info->liodn, wnd_nr); + } + } + + return ret; +} + +static void fsl_pamu_window_disable(struct iommu_domain *domain, u32 wnd_nr) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + if (!dma_domain->win_arr) { + pr_debug("Number of windows not configured\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return; + } + + if (wnd_nr >= dma_domain->win_cnt) { + pr_debug("Invalid window index\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return; + } + + if (dma_domain->win_arr[wnd_nr].valid) { + ret = disable_domain_win(dma_domain, wnd_nr); + if (!ret) { + dma_domain->win_arr[wnd_nr].valid = 0; + dma_domain->mapped--; + } + } + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + +} + +static int fsl_pamu_window_enable(struct iommu_domain *domain, u32 wnd_nr, + phys_addr_t paddr, u64 size, int prot) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + struct dma_window *wnd; + int pamu_prot = 0; + int ret; + unsigned long flags; + u64 win_size; + + if (prot & IOMMU_READ) + pamu_prot |= PAACE_AP_PERMS_QUERY; + if (prot & IOMMU_WRITE) + pamu_prot |= PAACE_AP_PERMS_UPDATE; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + if (!dma_domain->win_arr) { + pr_debug("Number of windows not configured\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -ENODEV; + } + + if (wnd_nr >= dma_domain->win_cnt) { + pr_debug("Invalid window index\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + win_size = dma_domain->geom_size >> ilog2(dma_domain->win_cnt); + if (size > win_size) { + pr_debug("Invalid window size \n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + if (dma_domain->win_cnt == 1) { + if (dma_domain->enabled) { + pr_debug("Disable the window before updating the mapping\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EBUSY; + } + + ret = check_size(size, domain->geometry.aperture_start); + if (ret) { + pr_debug("Aperture start not aligned to the size\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + } + + wnd = &dma_domain->win_arr[wnd_nr]; + if (!wnd->valid) { + wnd->paddr = paddr; + wnd->size = size; + wnd->prot = pamu_prot; + + ret = update_domain_mapping(dma_domain, wnd_nr); + if (!ret) { + wnd->valid = 1; + dma_domain->mapped++; + } + } else { + pr_debug("Disable the window before updating the mapping\n"); + ret = -EBUSY; + } + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +/* + * Attach the LIODN to the DMA domain and configure the geometry + * and window mappings. + */ +static int handle_attach_device(struct fsl_dma_domain *dma_domain, + struct device *dev, const u32 *liodn, + int num) +{ + unsigned long flags; + struct iommu_domain *domain = dma_domain->iommu_domain; + int ret = 0; + int i; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + for (i = 0; i < num; i++) { + + /* Ensure that LIODN value is valid */ + if (liodn[i] >= PAACE_NUMBER_ENTRIES) { + pr_debug("Invalid liodn %d, attach device failed for %s\n", + liodn[i], dev->of_node->full_name); + ret = -EINVAL; + break; + } + + attach_device(dma_domain, liodn[i], dev); + /* + * Check if geometry has already been configured + * for the domain. If yes, set the geometry for + * the LIODN. + */ + if (dma_domain->win_arr) { + u32 win_cnt = dma_domain->win_cnt > 1 ? dma_domain->win_cnt : 0; + ret = pamu_set_liodn(liodn[i], dev, dma_domain, + &domain->geometry, + win_cnt); + if (ret) + break; + if (dma_domain->mapped) { + /* + * Create window/subwindow mapping for + * the LIODN. + */ + ret = map_liodn(liodn[i], dma_domain); + if (ret) + break; + } + } + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +static int fsl_pamu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + const u32 *liodn; + u32 liodn_cnt; + int len, ret = 0; + struct pci_dev *pdev = NULL; + struct pci_controller *pci_ctl; + + /* + * Use LIODN of the PCI controller while attaching a + * PCI device. + */ + if (dev->bus == &pci_bus_type) { + pdev = to_pci_dev(dev); + pci_ctl = pci_bus_to_host(pdev->bus); + /* + * make dev point to pci controller device + * so we can get the LIODN programmed by + * u-boot. + */ + dev = pci_ctl->parent; + } + + liodn = of_get_property(dev->of_node, "fsl,liodn", &len); + if (liodn) { + liodn_cnt = len / sizeof(u32); + ret = handle_attach_device(dma_domain, dev, + liodn, liodn_cnt); + } else { + pr_debug("missing fsl,liodn property at %s\n", + dev->of_node->full_name); + ret = -EINVAL; + } + + return ret; +} + +static void fsl_pamu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + const u32 *prop; + int len; + struct pci_dev *pdev = NULL; + struct pci_controller *pci_ctl; + + /* + * Use LIODN of the PCI controller while detaching a + * PCI device. + */ + if (dev->bus == &pci_bus_type) { + pdev = to_pci_dev(dev); + pci_ctl = pci_bus_to_host(pdev->bus); + /* + * make dev point to pci controller device + * so we can get the LIODN programmed by + * u-boot. + */ + dev = pci_ctl->parent; + } + + prop = of_get_property(dev->of_node, "fsl,liodn", &len); + if (prop) + detach_device(dev, dma_domain); + else + pr_debug("missing fsl,liodn property at %s\n", + dev->of_node->full_name); +} + +static int configure_domain_geometry(struct iommu_domain *domain, void *data) +{ + struct iommu_domain_geometry *geom_attr = data; + struct fsl_dma_domain *dma_domain = domain->priv; + dma_addr_t geom_size; + unsigned long flags; + + geom_size = geom_attr->aperture_end - geom_attr->aperture_start + 1; + /* + * Sanity check the geometry size. Also, we do not support + * DMA outside of the geometry. + */ + if (check_size(geom_size, geom_attr->aperture_start) || + !geom_attr->force_aperture) { + pr_debug("Invalid PAMU geometry attributes\n"); + return -EINVAL; + } + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + if (dma_domain->enabled) { + pr_debug("Can't set geometry attributes as domain is active\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EBUSY; + } + + /* Copy the domain geometry information */ + memcpy(&domain->geometry, geom_attr, + sizeof(struct iommu_domain_geometry)); + dma_domain->geom_size = geom_size; + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return 0; +} + +/* Set the domain stash attribute */ +static int configure_domain_stash(struct fsl_dma_domain *dma_domain, void *data) +{ + struct pamu_stash_attribute *stash_attr = data; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + + memcpy(&dma_domain->dma_stash, stash_attr, + sizeof(struct pamu_stash_attribute)); + + dma_domain->stash_id = get_stash_id(stash_attr->cache, + stash_attr->cpu); + if (dma_domain->stash_id == ~(u32)0) { + pr_debug("Invalid stash attributes\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + ret = update_domain_stash(dma_domain, dma_domain->stash_id); + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +/* Configure domain dma state i.e. enable/disable DMA*/ +static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool enable) +{ + struct device_domain_info *info; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + + if (enable && !dma_domain->mapped) { + pr_debug("Can't enable DMA domain without valid mapping\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -ENODEV; + } + + dma_domain->enabled = enable; + list_for_each_entry(info, &dma_domain->devices, + link) { + ret = (enable) ? pamu_enable_liodn(info->liodn) : + pamu_disable_liodn(info->liodn); + if (ret) + pr_debug("Unable to set dma state for liodn %d", + info->liodn); + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return 0; +} + +static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, + enum iommu_attr attr_type, void *data) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + int ret = 0; + + + switch (attr_type) { + case DOMAIN_ATTR_GEOMETRY: + ret = configure_domain_geometry(domain, data); + break; + case DOMAIN_ATTR_FSL_PAMU_STASH: + ret = configure_domain_stash(dma_domain, data); + break; + case DOMAIN_ATTR_FSL_PAMU_ENABLE: + ret = configure_domain_dma_state(dma_domain, *(int *)data); + break; + default: + pr_debug("Unsupported attribute type\n"); + ret = -EINVAL; + break; + }; + + return ret; +} + +static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, + enum iommu_attr attr_type, void *data) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + int ret = 0; + + + switch (attr_type) { + case DOMAIN_ATTR_FSL_PAMU_STASH: + memcpy((struct pamu_stash_attribute *) data, &dma_domain->dma_stash, + sizeof(struct pamu_stash_attribute)); + break; + case DOMAIN_ATTR_FSL_PAMU_ENABLE: + *(int *)data = dma_domain->enabled; + break; + case DOMAIN_ATTR_FSL_PAMUV1: + *(int *)data = DOMAIN_ATTR_FSL_PAMUV1; + break; + default: + pr_debug("Unsupported attribute type\n"); + ret = -EINVAL; + break; + }; + + return ret; +} + +#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) + +static struct iommu_group *get_device_iommu_group(struct device *dev) +{ + struct iommu_group *group; + + group = iommu_group_get(dev); + if (!group) + group = iommu_group_alloc(); + + return group; +} + +static bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl) +{ + u32 version; + + /* Check the PCI controller version number by readding BRR1 register */ + version = in_be32(pci_ctl->cfg_addr + (PCI_FSL_BRR1 >> 2)); + version &= PCI_FSL_BRR1_VER; + /* If PCI controller version is >= 0x204 we can partition endpoints*/ + if (version >= 0x204) + return 1; + + return 0; +} + +/* Get iommu group information from peer devices or devices on the parent bus */ +static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev) +{ + struct pci_dev *tmp; + struct iommu_group *group; + struct pci_bus *bus = pdev->bus; + + /* + * Traverese the pci bus device list to get + * the shared iommu group. + */ + while (bus) { + list_for_each_entry(tmp, &bus->devices, bus_list) { + if (tmp == pdev) + continue; + group = iommu_group_get(&tmp->dev); + if (group) + return group; + } + + bus = bus->parent; + } + + return NULL; +} + +static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) +{ + struct pci_controller *pci_ctl; + bool pci_endpt_partioning; + struct iommu_group *group = NULL; + struct pci_dev *bridge, *dma_pdev = NULL; + + pci_ctl = pci_bus_to_host(pdev->bus); + pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl); + /* We can partition PCIe devices so assign device group to the device */ + if (pci_endpt_partioning) { + bridge = pci_find_upstream_pcie_bridge(pdev); + if (bridge) { + if (pci_is_pcie(bridge)) + dma_pdev = pci_get_domain_bus_and_slot( + pci_domain_nr(pdev->bus), + bridge->subordinate->number, 0); + if (!dma_pdev) + dma_pdev = pci_dev_get(bridge); + } else + dma_pdev = pci_dev_get(pdev); + + /* Account for quirked devices */ + swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); + + /* + * If it's a multifunction device that does not support our + * required ACS flags, add to the same group as lowest numbered + * function that also does not suport the required ACS flags. + */ + if (dma_pdev->multifunction && + !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) { + u8 i, slot = PCI_SLOT(dma_pdev->devfn); + + for (i = 0; i < 8; i++) { + struct pci_dev *tmp; + + tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i)); + if (!tmp) + continue; + + if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) { + swap_pci_ref(&dma_pdev, tmp); + break; + } + pci_dev_put(tmp); + } + } + + /* + * Devices on the root bus go through the iommu. If that's not us, + * find the next upstream device and test ACS up to the root bus. + * Finding the next device may require skipping virtual buses. + */ + while (!pci_is_root_bus(dma_pdev->bus)) { + struct pci_bus *bus = dma_pdev->bus; + + while (!bus->self) { + if (!pci_is_root_bus(bus)) + bus = bus->parent; + else + goto root_bus; + } + + if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) + break; + + swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); + } + +root_bus: + group = get_device_iommu_group(&dma_pdev->dev); + pci_dev_put(dma_pdev); + /* + * PCIe controller is not a paritionable entity + * free the controller device iommu_group. + */ + if (pci_ctl->parent->iommu_group) + iommu_group_remove_device(pci_ctl->parent); + } else { + /* + * All devices connected to the controller will share the + * PCI controllers device group. If this is the first + * device to be probed for the pci controller, copy the + * device group information from the PCI controller device + * node and remove the PCI controller iommu group. + * For subsequent devices, the iommu group information can + * be obtained from sibling devices (i.e. from the bus_devices + * link list). + */ + if (pci_ctl->parent->iommu_group) { + group = get_device_iommu_group(pci_ctl->parent); + iommu_group_remove_device(pci_ctl->parent); + } else + group = get_shared_pci_device_group(pdev); + } + + return group; +} + +static int fsl_pamu_add_device(struct device *dev) +{ + struct iommu_group *group = NULL; + struct pci_dev *pdev; + const u32 *prop; + int ret, len; + + /* + * For platform devices we allocate a separate group for + * each of the devices. + */ + if (dev->bus == &pci_bus_type) { + pdev = to_pci_dev(dev); + /* Don't create device groups for virtual PCI bridges */ + if (pdev->subordinate) + return 0; + + group = get_pci_device_group(pdev); + + } else { + prop = of_get_property(dev->of_node, "fsl,liodn", &len); + if (prop) + group = get_device_iommu_group(dev); + } + + if (!group || IS_ERR(group)) + return PTR_ERR(group); + + ret = iommu_group_add_device(group, dev); + + iommu_group_put(group); + return ret; +} + +static void fsl_pamu_remove_device(struct device *dev) +{ + iommu_group_remove_device(dev); +} + +static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + /* Ensure domain is inactive i.e. DMA should be disabled for the domain */ + if (dma_domain->enabled) { + pr_debug("Can't set geometry attributes as domain is active\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EBUSY; + } + + /* Ensure that the geometry has been set for the domain */ + if (!dma_domain->geom_size) { + pr_debug("Please configure geometry before setting the number of windows\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + /* + * Ensure we have valid window count i.e. it should be less than + * maximum permissible limit and should be a power of two. + */ + if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) { + pr_debug("Invalid window count\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + ret = pamu_set_domain_geometry(dma_domain, &domain->geometry, + ((w_count > 1) ? w_count : 0)); + if (!ret) { + if (dma_domain->win_arr) + kfree(dma_domain->win_arr); + dma_domain->win_arr = kzalloc(sizeof(struct dma_window) * + w_count, GFP_ATOMIC); + if (!dma_domain->win_arr) { + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -ENOMEM; + } + dma_domain->win_cnt = w_count; + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +static u32 fsl_pamu_get_windows(struct iommu_domain *domain) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + + return dma_domain->win_cnt; +} + +static struct iommu_ops fsl_pamu_ops = { + .domain_init = fsl_pamu_domain_init, + .domain_destroy = fsl_pamu_domain_destroy, + .attach_dev = fsl_pamu_attach_device, + .detach_dev = fsl_pamu_detach_device, + .domain_window_enable = fsl_pamu_window_enable, + .domain_window_disable = fsl_pamu_window_disable, + .domain_get_windows = fsl_pamu_get_windows, + .domain_set_windows = fsl_pamu_set_windows, + .iova_to_phys = fsl_pamu_iova_to_phys, + .domain_has_cap = fsl_pamu_domain_has_cap, + .domain_set_attr = fsl_pamu_set_domain_attr, + .domain_get_attr = fsl_pamu_get_domain_attr, + .add_device = fsl_pamu_add_device, + .remove_device = fsl_pamu_remove_device, +}; + +int pamu_domain_init() +{ + int ret = 0; + + ret = iommu_init_mempool(); + if (ret) + return ret; + + bus_set_iommu(&platform_bus_type, &fsl_pamu_ops); + bus_set_iommu(&pci_bus_type, &fsl_pamu_ops); + + return ret; +} diff --git a/drivers/iommu/fsl_pamu_domain.h b/drivers/iommu/fsl_pamu_domain.h new file mode 100644 index 000000000000..c90293f99709 --- /dev/null +++ b/drivers/iommu/fsl_pamu_domain.h @@ -0,0 +1,85 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ + +#ifndef __FSL_PAMU_DOMAIN_H +#define __FSL_PAMU_DOMAIN_H + +#include "fsl_pamu.h" + +struct dma_window { + phys_addr_t paddr; + u64 size; + int valid; + int prot; +}; + +struct fsl_dma_domain { + /* + * Indicates the geometry size for the domain. + * This would be set when the geometry is + * configured for the domain. + */ + dma_addr_t geom_size; + /* + * Number of windows assocaited with this domain. + * During domain initialization, it is set to the + * the maximum number of subwindows allowed for a LIODN. + * Minimum value for this is 1 indicating a single PAMU + * window, without any sub windows. Value can be set/ + * queried by set_attr/get_attr API for DOMAIN_ATTR_WINDOWS. + * Value can only be set once the geometry has been configured. + */ + u32 win_cnt; + /* + * win_arr contains information of the configured + * windows for a domain. This is allocated only + * when the number of windows for the domain are + * set. + */ + struct dma_window *win_arr; + /* list of devices associated with the domain */ + struct list_head devices; + /* dma_domain states: + * mapped - A particular mapping has been created + * within the configured geometry. + * enabled - DMA has been enabled for the given + * domain. This translates to setting of the + * valid bit for the primary PAACE in the PAMU + * PAACT table. Domain geometry should be set and + * it must have a valid mapping before DMA can be + * enabled for it. + * + */ + int mapped; + int enabled; + /* stash_id obtained from the stash attribute details */ + u32 stash_id; + struct pamu_stash_attribute dma_stash; + u32 snoop_id; + struct iommu_domain *iommu_domain; + spinlock_t domain_lock; +}; + +/* domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct device *dev; + u32 liodn; + struct fsl_dma_domain *domain; /* pointer to domain */ +}; +#endif /* __FSL_PAMU_DOMAIN_H */ diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index eec0d3e04bf5..15e9b57e9cf0 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -890,56 +890,54 @@ static int dma_pte_clear_range(struct dmar_domain *domain, return order; } +static void dma_pte_free_level(struct dmar_domain *domain, int level, + struct dma_pte *pte, unsigned long pfn, + unsigned long start_pfn, unsigned long last_pfn) +{ + pfn = max(start_pfn, pfn); + pte = &pte[pfn_level_offset(pfn, level)]; + + do { + unsigned long level_pfn; + struct dma_pte *level_pte; + + if (!dma_pte_present(pte) || dma_pte_superpage(pte)) + goto next; + + level_pfn = pfn & level_mask(level - 1); + level_pte = phys_to_virt(dma_pte_addr(pte)); + + if (level > 2) + dma_pte_free_level(domain, level - 1, level_pte, + level_pfn, start_pfn, last_pfn); + + /* If range covers entire pagetable, free it */ + if (!(start_pfn > level_pfn || + last_pfn < level_pfn + level_size(level))) { + dma_clear_pte(pte); + domain_flush_cache(domain, pte, sizeof(*pte)); + free_pgtable_page(level_pte); + } +next: + pfn += level_size(level); + } while (!first_pte_in_page(++pte) && pfn <= last_pfn); +} + /* free page table pages. last level pte should already be cleared */ static void dma_pte_free_pagetable(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - struct dma_pte *first_pte, *pte; - int total = agaw_to_level(domain->agaw); - int level; - unsigned long tmp; - int large_page = 2; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); BUG_ON(start_pfn > last_pfn); /* We don't need lock here; nobody else touches the iova range */ - level = 2; - while (level <= total) { - tmp = align_to_level(start_pfn, level); - - /* If we can't even clear one PTE at this level, we're done */ - if (tmp + level_size(level) - 1 > last_pfn) - return; - - do { - large_page = level; - first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); - if (large_page > level) - level = large_page + 1; - if (!pte) { - tmp = align_to_level(tmp + 1, level + 1); - continue; - } - do { - if (dma_pte_present(pte)) { - free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); - dma_clear_pte(pte); - } - pte++; - tmp += level_size(level); - } while (!first_pte_in_page(pte) && - tmp + level_size(level) - 1 <= last_pfn); + dma_pte_free_level(domain, agaw_to_level(domain->agaw), + domain->pgd, 0, start_pfn, last_pfn); - domain_flush_cache(domain, first_pte, - (void *)pte - (void *)first_pte); - - } while (tmp && tmp + level_size(level) - 1 <= last_pfn); - level++; - } /* free pgd */ if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { free_pgtable_page(domain->pgd); diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 074bcb3892b5..875bbe4c962e 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -194,11 +194,11 @@ config LEDS_LP3944 module will be called leds-lp3944. config LEDS_LP55XX_COMMON - tristate "Common Driver for TI/National LP5521, LP5523/55231 and LP5562" - depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562 + tristate "Common Driver for TI/National LP5521/5523/55231/5562/8501" + depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562 || LEDS_LP8501 select FW_LOADER help - This option supports common operations for LP5521 and LP5523/55231 + This option supports common operations for LP5521/5523/55231/5562/8501 devices. config LEDS_LP5521 @@ -232,6 +232,18 @@ config LEDS_LP5562 Driver provides direct control via LED class and interface for programming the engines. +config LEDS_LP8501 + tristate "LED Support for TI LP8501 LED driver chip" + depends on LEDS_CLASS && I2C + select LEDS_LP55XX_COMMON + help + If you say yes here you get support for TI LP8501 LED driver. + It is 9 channel chip with programmable engines. + Driver provides direct control via LED class and interface for + programming the engines. + It is similar as LP5523, but output power selection is available. + And register layout and engine program schemes are different. + config LEDS_LP8788 tristate "LED support for the TI LP8788 PMIC" depends on LEDS_CLASS @@ -279,13 +291,14 @@ config LEDS_PCA955X LED driver chips accessed via the I2C bus. Supported devices include PCA9550, PCA9551, PCA9552, and PCA9553. -config LEDS_PCA9633 - tristate "LED support for PCA9633 I2C chip" +config LEDS_PCA963X + tristate "LED support for PCA963x I2C chip" depends on LEDS_CLASS depends on I2C help - This option enables support for LEDs connected to the PCA9633 - LED driver chip accessed via the I2C bus. + This option enables support for LEDs connected to the PCA963x + LED driver chip accessed via the I2C bus. Supported + devices include PCA9633 and PCA9634 config LEDS_WM831X_STATUS tristate "LED support for status LEDs on WM831x PMICs" @@ -398,10 +411,7 @@ config LEDS_MC13783 config LEDS_NS2 tristate "LED support for Network Space v2 GPIO LEDs" depends on LEDS_CLASS - depends on MACH_NETSPACE_V2 || MACH_INETSPACE_V2 || \ - MACH_NETSPACE_MAX_V2 || MACH_D2NET_V2 || \ - MACH_NETSPACE_V2_DT || MACH_INETSPACE_V2_DT || \ - MACH_NETSPACE_MAX_V2_DT || MACH_NETSPACE_MINI_V2_DT + depends on ARCH_KIRKWOOD default y help This option enable support for the dual-GPIO LED found on the @@ -410,8 +420,8 @@ config LEDS_NS2 config LEDS_NETXBIG tristate "LED support for Big Network series LEDs" - depends on MACH_NET2BIG_V2 || MACH_NET5BIG_V2 depends on LEDS_CLASS + depends on ARCH_KIRKWOOD default y help This option enable support for LEDs found on the LaCie 2Big diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index ae4b6135f665..8979b0b2c85e 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_LEDS_LP55XX_COMMON) += leds-lp55xx-common.o obj-$(CONFIG_LEDS_LP5521) += leds-lp5521.o obj-$(CONFIG_LEDS_LP5523) += leds-lp5523.o obj-$(CONFIG_LEDS_LP5562) += leds-lp5562.o +obj-$(CONFIG_LEDS_LP8501) += leds-lp8501.o obj-$(CONFIG_LEDS_LP8788) += leds-lp8788.o obj-$(CONFIG_LEDS_TCA6507) += leds-tca6507.o obj-$(CONFIG_LEDS_CLEVO_MAIL) += leds-clevo-mail.o @@ -34,7 +35,7 @@ obj-$(CONFIG_LEDS_HP6XX) += leds-hp6xx.o obj-$(CONFIG_LEDS_OT200) += leds-ot200.o obj-$(CONFIG_LEDS_FSG) += leds-fsg.o obj-$(CONFIG_LEDS_PCA955X) += leds-pca955x.o -obj-$(CONFIG_LEDS_PCA9633) += leds-pca9633.o +obj-$(CONFIG_LEDS_PCA963X) += leds-pca963x.o obj-$(CONFIG_LEDS_DA903X) += leds-da903x.o obj-$(CONFIG_LEDS_DA9052) += leds-da9052.o obj-$(CONFIG_LEDS_WM831X_STATUS) += leds-wm831x-status.o diff --git a/drivers/leds/leds-88pm860x.c b/drivers/leds/leds-88pm860x.c index 232b3ce902e5..5f588c0a376e 100644 --- a/drivers/leds/leds-88pm860x.c +++ b/drivers/leds/leds-88pm860x.c @@ -157,7 +157,7 @@ static int pm860x_led_dt_init(struct platform_device *pdev, static int pm860x_led_probe(struct platform_device *pdev) { struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent); - struct pm860x_led_pdata *pdata = pdev->dev.platform_data; + struct pm860x_led_pdata *pdata = dev_get_platdata(&pdev->dev); struct pm860x_led *data; struct resource *res; int ret = 0; diff --git a/drivers/leds/leds-adp5520.c b/drivers/leds/leds-adp5520.c index e8072abe76e5..7e311a120b11 100644 --- a/drivers/leds/leds-adp5520.c +++ b/drivers/leds/leds-adp5520.c @@ -87,7 +87,7 @@ static int adp5520_led_setup(struct adp5520_led *led) static int adp5520_led_prepare(struct platform_device *pdev) { - struct adp5520_leds_platform_data *pdata = pdev->dev.platform_data; + struct adp5520_leds_platform_data *pdata = dev_get_platdata(&pdev->dev); struct device *dev = pdev->dev.parent; int ret = 0; @@ -103,7 +103,7 @@ static int adp5520_led_prepare(struct platform_device *pdev) static int adp5520_led_probe(struct platform_device *pdev) { - struct adp5520_leds_platform_data *pdata = pdev->dev.platform_data; + struct adp5520_leds_platform_data *pdata = dev_get_platdata(&pdev->dev); struct adp5520_led *led, *led_dat; struct led_info *cur_led; int ret, i; @@ -185,7 +185,7 @@ err: static int adp5520_led_remove(struct platform_device *pdev) { - struct adp5520_leds_platform_data *pdata = pdev->dev.platform_data; + struct adp5520_leds_platform_data *pdata = dev_get_platdata(&pdev->dev); struct adp5520_led *led; int i; diff --git a/drivers/leds/leds-asic3.c b/drivers/leds/leds-asic3.c index cf9efe421c2b..6de216a89a0c 100644 --- a/drivers/leds/leds-asic3.c +++ b/drivers/leds/leds-asic3.c @@ -94,7 +94,7 @@ static int blink_set(struct led_classdev *cdev, static int asic3_led_probe(struct platform_device *pdev) { - struct asic3_led *led = pdev->dev.platform_data; + struct asic3_led *led = dev_get_platdata(&pdev->dev); int ret; ret = mfd_cell_enable(pdev); @@ -127,7 +127,7 @@ out: static int asic3_led_remove(struct platform_device *pdev) { - struct asic3_led *led = pdev->dev.platform_data; + struct asic3_led *led = dev_get_platdata(&pdev->dev); led_classdev_unregister(led->cdev); diff --git a/drivers/leds/leds-atmel-pwm.c b/drivers/leds/leds-atmel-pwm.c index 90518f84b9c0..56cec8d6a2ac 100644 --- a/drivers/leds/leds-atmel-pwm.c +++ b/drivers/leds/leds-atmel-pwm.c @@ -42,7 +42,7 @@ static int pwmled_probe(struct platform_device *pdev) int i; int status; - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); if (!pdata || pdata->num_leds < 1) return -ENODEV; @@ -119,7 +119,7 @@ static int pwmled_remove(struct platform_device *pdev) struct pwmled *leds; unsigned i; - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); leds = platform_get_drvdata(pdev); for (i = 0; i < pdata->num_leds; i++) { diff --git a/drivers/leds/leds-bd2802.c b/drivers/leds/leds-bd2802.c index 2db04231a792..fb5a3472d614 100644 --- a/drivers/leds/leds-bd2802.c +++ b/drivers/leds/leds-bd2802.c @@ -684,7 +684,7 @@ static int bd2802_probe(struct i2c_client *client, } led->client = client; - pdata = led->pdata = client->dev.platform_data; + pdata = led->pdata = dev_get_platdata(&client->dev); i2c_set_clientdata(client, led); /* Configure RESET GPIO (L: RESET, H: RESET cancel) */ diff --git a/drivers/leds/leds-clevo-mail.c b/drivers/leds/leds-clevo-mail.c index 6a8405df76a3..d93e2455da5c 100644 --- a/drivers/leds/leds-clevo-mail.c +++ b/drivers/leds/leds-clevo-mail.c @@ -40,7 +40,7 @@ static int __init clevo_mail_led_dmi_callback(const struct dmi_system_id *id) * detected as working, but in reality it is not) as low as * possible. */ -static struct dmi_system_id __initdata clevo_mail_led_dmi_table[] = { +static struct dmi_system_id clevo_mail_led_dmi_table[] __initdata = { { .callback = clevo_mail_led_dmi_callback, .ident = "Clevo D410J", diff --git a/drivers/leds/leds-da903x.c b/drivers/leds/leds-da903x.c index c263a21db829..2a4b87f8091a 100644 --- a/drivers/leds/leds-da903x.c +++ b/drivers/leds/leds-da903x.c @@ -93,7 +93,7 @@ static void da903x_led_set(struct led_classdev *led_cdev, static int da903x_led_probe(struct platform_device *pdev) { - struct led_info *pdata = pdev->dev.platform_data; + struct led_info *pdata = dev_get_platdata(&pdev->dev); struct da903x_led *led; int id, ret; diff --git a/drivers/leds/leds-da9052.c b/drivers/leds/leds-da9052.c index efec43344e9f..865d4faf874a 100644 --- a/drivers/leds/leds-da9052.c +++ b/drivers/leds/leds-da9052.c @@ -112,7 +112,7 @@ static int da9052_led_probe(struct platform_device *pdev) int i; da9052 = dev_get_drvdata(pdev->dev.parent); - pdata = da9052->dev->platform_data; + pdata = dev_get_platdata(da9052->dev); if (pdata == NULL) { dev_err(&pdev->dev, "No platform data\n"); goto err; @@ -185,7 +185,7 @@ static int da9052_led_remove(struct platform_device *pdev) int i; da9052 = dev_get_drvdata(pdev->dev.parent); - pdata = da9052->dev->platform_data; + pdata = dev_get_platdata(da9052->dev); pled = pdata->pled; for (i = 0; i < pled->num_leds; i++) { diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c index 84d74c373cae..e8b01e57348d 100644 --- a/drivers/leds/leds-gpio.c +++ b/drivers/leds/leds-gpio.c @@ -233,7 +233,7 @@ static struct gpio_leds_priv *gpio_leds_create_of(struct platform_device *pdev) static int gpio_led_probe(struct platform_device *pdev) { - struct gpio_led_platform_data *pdata = pdev->dev.platform_data; + struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct gpio_leds_priv *priv; int i, ret = 0; diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c index a036a19040fe..652368c2ea9a 100644 --- a/drivers/leds/leds-lm3530.c +++ b/drivers/leds/leds-lm3530.c @@ -403,7 +403,7 @@ static DEVICE_ATTR(mode, 0644, lm3530_mode_get, lm3530_mode_set); static int lm3530_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct lm3530_platform_data *pdata = client->dev.platform_data; + struct lm3530_platform_data *pdata = dev_get_platdata(&client->dev); struct lm3530_data *drvdata; int err = 0; diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c index bbf24d038a7f..027ede73b80d 100644 --- a/drivers/leds/leds-lm3533.c +++ b/drivers/leds/leds-lm3533.c @@ -671,7 +671,7 @@ static int lm3533_led_probe(struct platform_device *pdev) if (!lm3533) return -EINVAL; - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); if (!pdata) { dev_err(&pdev->dev, "no platform data\n"); return -EINVAL; diff --git a/drivers/leds/leds-lm355x.c b/drivers/leds/leds-lm355x.c index d81a8e7afd6c..591eb5e58ae3 100644 --- a/drivers/leds/leds-lm355x.c +++ b/drivers/leds/leds-lm355x.c @@ -423,7 +423,7 @@ static const struct regmap_config lm355x_regmap = { static int lm355x_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct lm355x_platform_data *pdata = client->dev.platform_data; + struct lm355x_platform_data *pdata = dev_get_platdata(&client->dev); struct lm355x_chip_data *chip; int err; diff --git a/drivers/leds/leds-lm3642.c b/drivers/leds/leds-lm3642.c index f361bbef2dec..ceb6b3cde6fe 100644 --- a/drivers/leds/leds-lm3642.c +++ b/drivers/leds/leds-lm3642.c @@ -316,7 +316,7 @@ static const struct regmap_config lm3642_regmap = { static int lm3642_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct lm3642_platform_data *pdata = client->dev.platform_data; + struct lm3642_platform_data *pdata = dev_get_platdata(&client->dev); struct lm3642_chip_data *chip; int err; diff --git a/drivers/leds/leds-lp3944.c b/drivers/leds/leds-lp3944.c index 0c4386e656c1..8e1abdcd4c9d 100644 --- a/drivers/leds/leds-lp3944.c +++ b/drivers/leds/leds-lp3944.c @@ -289,7 +289,7 @@ static void lp3944_led_set_brightness(struct led_classdev *led_cdev, dev_dbg(&led->client->dev, "%s: %s, %d\n", __func__, led_cdev->name, brightness); - led->status = brightness; + led->status = !!brightness; schedule_work(&led->work); } @@ -377,7 +377,8 @@ exit: static int lp3944_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct lp3944_platform_data *lp3944_pdata = client->dev.platform_data; + struct lp3944_platform_data *lp3944_pdata = + dev_get_platdata(&client->dev); struct lp3944_data *data; int err; @@ -413,7 +414,7 @@ static int lp3944_probe(struct i2c_client *client, static int lp3944_remove(struct i2c_client *client) { - struct lp3944_platform_data *pdata = client->dev.platform_data; + struct lp3944_platform_data *pdata = dev_get_platdata(&client->dev); struct lp3944_data *data = i2c_get_clientdata(client); int i; diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 1392feb1bcf7..05188351711d 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -220,17 +220,11 @@ static int lp5521_update_program_memory(struct lp55xx_chip *chip, }; unsigned cmd; char c[3]; - int program_size; int nrchars; - int offset = 0; int ret; - int i; - - /* clear program memory before updating */ - for (i = 0; i < LP5521_PROGRAM_LENGTH; i++) - lp55xx_write(chip, addr[idx] + i, 0); + int offset = 0; + int i = 0; - i = 0; while ((offset < size - 1) && (i < LP5521_PROGRAM_LENGTH)) { /* separate sscanfs because length is working only for %s */ ret = sscanf(data + offset, "%2s%n ", c, &nrchars); @@ -250,11 +244,19 @@ static int lp5521_update_program_memory(struct lp55xx_chip *chip, if (i % 2) goto err; - program_size = i; - for (i = 0; i < program_size; i++) - lp55xx_write(chip, addr[idx] + i, pattern[i]); + mutex_lock(&chip->lock); - return 0; + for (i = 0; i < LP5521_PROGRAM_LENGTH; i++) { + ret = lp55xx_write(chip, addr[idx] + i, pattern[i]); + if (ret) { + mutex_unlock(&chip->lock); + return -EINVAL; + } + } + + mutex_unlock(&chip->lock); + + return size; err: dev_err(&chip->cl->dev, "wrong pattern format\n"); @@ -365,6 +367,80 @@ static void lp5521_led_brightness_work(struct work_struct *work) mutex_unlock(&chip->lock); } +static ssize_t show_engine_mode(struct device *dev, + struct device_attribute *attr, + char *buf, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + enum lp55xx_engine_mode mode = chip->engines[nr - 1].mode; + + switch (mode) { + case LP55XX_ENGINE_RUN: + return sprintf(buf, "run\n"); + case LP55XX_ENGINE_LOAD: + return sprintf(buf, "load\n"); + case LP55XX_ENGINE_DISABLED: + default: + return sprintf(buf, "disabled\n"); + } +} +show_mode(1) +show_mode(2) +show_mode(3) + +static ssize_t store_engine_mode(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + struct lp55xx_engine *engine = &chip->engines[nr - 1]; + + mutex_lock(&chip->lock); + + chip->engine_idx = nr; + + if (!strncmp(buf, "run", 3)) { + lp5521_run_engine(chip, true); + engine->mode = LP55XX_ENGINE_RUN; + } else if (!strncmp(buf, "load", 4)) { + lp5521_stop_engine(chip); + lp5521_load_engine(chip); + engine->mode = LP55XX_ENGINE_LOAD; + } else if (!strncmp(buf, "disabled", 8)) { + lp5521_stop_engine(chip); + engine->mode = LP55XX_ENGINE_DISABLED; + } + + mutex_unlock(&chip->lock); + + return len; +} +store_mode(1) +store_mode(2) +store_mode(3) + +static ssize_t store_engine_load(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + + mutex_lock(&chip->lock); + + chip->engine_idx = nr; + lp5521_load_engine(chip); + + mutex_unlock(&chip->lock); + + return lp5521_update_program_memory(chip, buf, len); +} +store_load(1) +store_load(2) +store_load(3) + static ssize_t lp5521_selftest(struct device *dev, struct device_attribute *attr, char *buf) @@ -381,9 +457,21 @@ static ssize_t lp5521_selftest(struct device *dev, } /* device attributes */ -static DEVICE_ATTR(selftest, S_IRUGO, lp5521_selftest, NULL); +static LP55XX_DEV_ATTR_RW(engine1_mode, show_engine1_mode, store_engine1_mode); +static LP55XX_DEV_ATTR_RW(engine2_mode, show_engine2_mode, store_engine2_mode); +static LP55XX_DEV_ATTR_RW(engine3_mode, show_engine3_mode, store_engine3_mode); +static LP55XX_DEV_ATTR_WO(engine1_load, store_engine1_load); +static LP55XX_DEV_ATTR_WO(engine2_load, store_engine2_load); +static LP55XX_DEV_ATTR_WO(engine3_load, store_engine3_load); +static LP55XX_DEV_ATTR_RO(selftest, lp5521_selftest); static struct attribute *lp5521_attributes[] = { + &dev_attr_engine1_mode.attr, + &dev_attr_engine2_mode.attr, + &dev_attr_engine3_mode.attr, + &dev_attr_engine1_load.attr, + &dev_attr_engine2_load.attr, + &dev_attr_engine3_load.attr, &dev_attr_selftest.attr, NULL }; @@ -420,7 +508,7 @@ static int lp5521_probe(struct i2c_client *client, struct lp55xx_platform_data *pdata; struct device_node *np = client->dev.of_node; - if (!client->dev.platform_data) { + if (!dev_get_platdata(&client->dev)) { if (np) { ret = lp55xx_of_populate_pdata(&client->dev, np); if (ret < 0) @@ -430,7 +518,7 @@ static int lp5521_probe(struct i2c_client *client, return -EINVAL; } } - pdata = client->dev.platform_data; + pdata = dev_get_platdata(&client->dev); chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); if (!chip) diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c index 3979428f3100..fe3bcbb5747f 100644 --- a/drivers/leds/leds-lp5523.c +++ b/drivers/leds/leds-lp5523.c @@ -49,6 +49,9 @@ #define LP5523_REG_RESET 0x3D #define LP5523_REG_LED_TEST_CTRL 0x41 #define LP5523_REG_LED_TEST_ADC 0x42 +#define LP5523_REG_CH1_PROG_START 0x4C +#define LP5523_REG_CH2_PROG_START 0x4D +#define LP5523_REG_CH3_PROG_START 0x4E #define LP5523_REG_PROG_PAGE_SEL 0x4F #define LP5523_REG_PROG_MEM 0x50 @@ -65,11 +68,15 @@ #define LP5523_RESET 0xFF #define LP5523_ADC_SHORTCIRC_LIM 80 #define LP5523_EXT_CLK_USED 0x08 +#define LP5523_ENG_STATUS_MASK 0x07 /* Memory Page Selection */ #define LP5523_PAGE_ENG1 0 #define LP5523_PAGE_ENG2 1 #define LP5523_PAGE_ENG3 2 +#define LP5523_PAGE_MUX1 3 +#define LP5523_PAGE_MUX2 4 +#define LP5523_PAGE_MUX3 5 /* Program Memory Operations */ #define LP5523_MODE_ENG1_M 0x30 /* Operation Mode Register */ @@ -94,11 +101,15 @@ #define LP5523_RUN_ENG2 0x08 #define LP5523_RUN_ENG3 0x02 +#define LED_ACTIVE(mux, led) (!!(mux & (0x0001 << led))) + enum lp5523_chip_id { LP5523, LP55231, }; +static int lp5523_init_program_engine(struct lp55xx_chip *chip); + static inline void lp5523_wait_opmode_done(void) { usleep_range(1000, 2000); @@ -134,7 +145,11 @@ static int lp5523_post_init_device(struct lp55xx_chip *chip) if (ret) return ret; - return lp55xx_write(chip, LP5523_REG_ENABLE_LEDS_LSB, 0xff); + ret = lp55xx_write(chip, LP5523_REG_ENABLE_LEDS_LSB, 0xff); + if (ret) + return ret; + + return lp5523_init_program_engine(chip); } static void lp5523_load_engine(struct lp55xx_chip *chip) @@ -152,15 +167,21 @@ static void lp5523_load_engine(struct lp55xx_chip *chip) [LP55XX_ENGINE_3] = LP5523_LOAD_ENG3, }; + lp55xx_update_bits(chip, LP5523_REG_OP_MODE, mask[idx], val[idx]); + + lp5523_wait_opmode_done(); +} + +static void lp5523_load_engine_and_select_page(struct lp55xx_chip *chip) +{ + enum lp55xx_engine_index idx = chip->engine_idx; u8 page_sel[] = { [LP55XX_ENGINE_1] = LP5523_PAGE_ENG1, [LP55XX_ENGINE_2] = LP5523_PAGE_ENG2, [LP55XX_ENGINE_3] = LP5523_PAGE_ENG3, }; - lp55xx_update_bits(chip, LP5523_REG_OP_MODE, mask[idx], val[idx]); - - lp5523_wait_opmode_done(); + lp5523_load_engine(chip); lp55xx_write(chip, LP5523_REG_PROG_PAGE_SEL, page_sel[idx]); } @@ -227,23 +248,75 @@ static void lp5523_run_engine(struct lp55xx_chip *chip, bool start) lp55xx_update_bits(chip, LP5523_REG_ENABLE, LP5523_EXEC_M, exec); } +static int lp5523_init_program_engine(struct lp55xx_chip *chip) +{ + int i; + int j; + int ret; + u8 status; + /* one pattern per engine setting LED MUX start and stop addresses */ + static const u8 pattern[][LP5523_PROGRAM_LENGTH] = { + { 0x9c, 0x30, 0x9c, 0xb0, 0x9d, 0x80, 0xd8, 0x00, 0}, + { 0x9c, 0x40, 0x9c, 0xc0, 0x9d, 0x80, 0xd8, 0x00, 0}, + { 0x9c, 0x50, 0x9c, 0xd0, 0x9d, 0x80, 0xd8, 0x00, 0}, + }; + + /* hardcode 32 bytes of memory for each engine from program memory */ + ret = lp55xx_write(chip, LP5523_REG_CH1_PROG_START, 0x00); + if (ret) + return ret; + + ret = lp55xx_write(chip, LP5523_REG_CH2_PROG_START, 0x10); + if (ret) + return ret; + + ret = lp55xx_write(chip, LP5523_REG_CH3_PROG_START, 0x20); + if (ret) + return ret; + + /* write LED MUX address space for each engine */ + for (i = LP55XX_ENGINE_1; i <= LP55XX_ENGINE_3; i++) { + chip->engine_idx = i; + lp5523_load_engine_and_select_page(chip); + + for (j = 0; j < LP5523_PROGRAM_LENGTH; j++) { + ret = lp55xx_write(chip, LP5523_REG_PROG_MEM + j, + pattern[i - 1][j]); + if (ret) + goto out; + } + } + + lp5523_run_engine(chip, true); + + /* Let the programs run for couple of ms and check the engine status */ + usleep_range(3000, 6000); + lp55xx_read(chip, LP5523_REG_STATUS, &status); + status &= LP5523_ENG_STATUS_MASK; + + if (status != LP5523_ENG_STATUS_MASK) { + dev_err(&chip->cl->dev, + "cound not configure LED engine, status = 0x%.2x\n", + status); + ret = -1; + } + +out: + lp5523_stop_engine(chip); + return ret; +} + static int lp5523_update_program_memory(struct lp55xx_chip *chip, const u8 *data, size_t size) { u8 pattern[LP5523_PROGRAM_LENGTH] = {0}; unsigned cmd; char c[3]; - int update_size; int nrchars; - int offset = 0; int ret; - int i; - - /* clear program memory before updating */ - for (i = 0; i < LP5523_PROGRAM_LENGTH; i++) - lp55xx_write(chip, LP5523_REG_PROG_MEM + i, 0); + int offset = 0; + int i = 0; - i = 0; while ((offset < size - 1) && (i < LP5523_PROGRAM_LENGTH)) { /* separate sscanfs because length is working only for %s */ ret = sscanf(data + offset, "%2s%n ", c, &nrchars); @@ -263,11 +336,19 @@ static int lp5523_update_program_memory(struct lp55xx_chip *chip, if (i % 2) goto err; - update_size = i; - for (i = 0; i < update_size; i++) - lp55xx_write(chip, LP5523_REG_PROG_MEM + i, pattern[i]); + mutex_lock(&chip->lock); - return 0; + for (i = 0; i < LP5523_PROGRAM_LENGTH; i++) { + ret = lp55xx_write(chip, LP5523_REG_PROG_MEM + i, pattern[i]); + if (ret) { + mutex_unlock(&chip->lock); + return -EINVAL; + } + } + + mutex_unlock(&chip->lock); + + return size; err: dev_err(&chip->cl->dev, "wrong pattern format\n"); @@ -290,10 +371,196 @@ static void lp5523_firmware_loaded(struct lp55xx_chip *chip) * 2) write firmware data into program memory */ - lp5523_load_engine(chip); + lp5523_load_engine_and_select_page(chip); lp5523_update_program_memory(chip, fw->data, fw->size); } +static ssize_t show_engine_mode(struct device *dev, + struct device_attribute *attr, + char *buf, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + enum lp55xx_engine_mode mode = chip->engines[nr - 1].mode; + + switch (mode) { + case LP55XX_ENGINE_RUN: + return sprintf(buf, "run\n"); + case LP55XX_ENGINE_LOAD: + return sprintf(buf, "load\n"); + case LP55XX_ENGINE_DISABLED: + default: + return sprintf(buf, "disabled\n"); + } +} +show_mode(1) +show_mode(2) +show_mode(3) + +static ssize_t store_engine_mode(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + struct lp55xx_engine *engine = &chip->engines[nr - 1]; + + mutex_lock(&chip->lock); + + chip->engine_idx = nr; + + if (!strncmp(buf, "run", 3)) { + lp5523_run_engine(chip, true); + engine->mode = LP55XX_ENGINE_RUN; + } else if (!strncmp(buf, "load", 4)) { + lp5523_stop_engine(chip); + lp5523_load_engine(chip); + engine->mode = LP55XX_ENGINE_LOAD; + } else if (!strncmp(buf, "disabled", 8)) { + lp5523_stop_engine(chip); + engine->mode = LP55XX_ENGINE_DISABLED; + } + + mutex_unlock(&chip->lock); + + return len; +} +store_mode(1) +store_mode(2) +store_mode(3) + +static int lp5523_mux_parse(const char *buf, u16 *mux, size_t len) +{ + u16 tmp_mux = 0; + int i; + + len = min_t(int, len, LP5523_MAX_LEDS); + + for (i = 0; i < len; i++) { + switch (buf[i]) { + case '1': + tmp_mux |= (1 << i); + break; + case '0': + break; + case '\n': + i = len; + break; + default: + return -1; + } + } + *mux = tmp_mux; + + return 0; +} + +static void lp5523_mux_to_array(u16 led_mux, char *array) +{ + int i, pos = 0; + for (i = 0; i < LP5523_MAX_LEDS; i++) + pos += sprintf(array + pos, "%x", LED_ACTIVE(led_mux, i)); + + array[pos] = '\0'; +} + +static ssize_t show_engine_leds(struct device *dev, + struct device_attribute *attr, + char *buf, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + char mux[LP5523_MAX_LEDS + 1]; + + lp5523_mux_to_array(chip->engines[nr - 1].led_mux, mux); + + return sprintf(buf, "%s\n", mux); +} +show_leds(1) +show_leds(2) +show_leds(3) + +static int lp5523_load_mux(struct lp55xx_chip *chip, u16 mux, int nr) +{ + struct lp55xx_engine *engine = &chip->engines[nr - 1]; + int ret; + u8 mux_page[] = { + [LP55XX_ENGINE_1] = LP5523_PAGE_MUX1, + [LP55XX_ENGINE_2] = LP5523_PAGE_MUX2, + [LP55XX_ENGINE_3] = LP5523_PAGE_MUX3, + }; + + lp5523_load_engine(chip); + + ret = lp55xx_write(chip, LP5523_REG_PROG_PAGE_SEL, mux_page[nr]); + if (ret) + return ret; + + ret = lp55xx_write(chip, LP5523_REG_PROG_MEM , (u8)(mux >> 8)); + if (ret) + return ret; + + ret = lp55xx_write(chip, LP5523_REG_PROG_MEM + 1, (u8)(mux)); + if (ret) + return ret; + + engine->led_mux = mux; + return 0; +} + +static ssize_t store_engine_leds(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + struct lp55xx_engine *engine = &chip->engines[nr - 1]; + u16 mux = 0; + ssize_t ret; + + if (lp5523_mux_parse(buf, &mux, len)) + return -EINVAL; + + mutex_lock(&chip->lock); + + chip->engine_idx = nr; + ret = -EINVAL; + + if (engine->mode != LP55XX_ENGINE_LOAD) + goto leave; + + if (lp5523_load_mux(chip, mux, nr)) + goto leave; + + ret = len; +leave: + mutex_unlock(&chip->lock); + return ret; +} +store_leds(1) +store_leds(2) +store_leds(3) + +static ssize_t store_engine_load(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + + mutex_lock(&chip->lock); + + chip->engine_idx = nr; + lp5523_load_engine_and_select_page(chip); + + mutex_unlock(&chip->lock); + + return lp5523_update_program_memory(chip, buf, len); +} +store_load(1) +store_load(2) +store_load(3) + static ssize_t lp5523_selftest(struct device *dev, struct device_attribute *attr, char *buf) @@ -393,9 +660,27 @@ static void lp5523_led_brightness_work(struct work_struct *work) mutex_unlock(&chip->lock); } -static DEVICE_ATTR(selftest, S_IRUGO, lp5523_selftest, NULL); +static LP55XX_DEV_ATTR_RW(engine1_mode, show_engine1_mode, store_engine1_mode); +static LP55XX_DEV_ATTR_RW(engine2_mode, show_engine2_mode, store_engine2_mode); +static LP55XX_DEV_ATTR_RW(engine3_mode, show_engine3_mode, store_engine3_mode); +static LP55XX_DEV_ATTR_RW(engine1_leds, show_engine1_leds, store_engine1_leds); +static LP55XX_DEV_ATTR_RW(engine2_leds, show_engine2_leds, store_engine2_leds); +static LP55XX_DEV_ATTR_RW(engine3_leds, show_engine3_leds, store_engine3_leds); +static LP55XX_DEV_ATTR_WO(engine1_load, store_engine1_load); +static LP55XX_DEV_ATTR_WO(engine2_load, store_engine2_load); +static LP55XX_DEV_ATTR_WO(engine3_load, store_engine3_load); +static LP55XX_DEV_ATTR_RO(selftest, lp5523_selftest); static struct attribute *lp5523_attributes[] = { + &dev_attr_engine1_mode.attr, + &dev_attr_engine2_mode.attr, + &dev_attr_engine3_mode.attr, + &dev_attr_engine1_load.attr, + &dev_attr_engine2_load.attr, + &dev_attr_engine3_load.attr, + &dev_attr_engine1_leds.attr, + &dev_attr_engine2_leds.attr, + &dev_attr_engine3_leds.attr, &dev_attr_selftest.attr, NULL, }; @@ -432,7 +717,7 @@ static int lp5523_probe(struct i2c_client *client, struct lp55xx_platform_data *pdata; struct device_node *np = client->dev.of_node; - if (!client->dev.platform_data) { + if (!dev_get_platdata(&client->dev)) { if (np) { ret = lp55xx_of_populate_pdata(&client->dev, np); if (ret < 0) @@ -442,7 +727,7 @@ static int lp5523_probe(struct i2c_client *client, return -EINVAL; } } - pdata = client->dev.platform_data; + pdata = dev_get_platdata(&client->dev); chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); if (!chip) diff --git a/drivers/leds/leds-lp5562.c b/drivers/leds/leds-lp5562.c index cbd856dac150..2585cfd57711 100644 --- a/drivers/leds/leds-lp5562.c +++ b/drivers/leds/leds-lp5562.c @@ -477,8 +477,8 @@ static ssize_t lp5562_store_engine_mux(struct device *dev, return len; } -static DEVICE_ATTR(led_pattern, S_IWUSR, NULL, lp5562_store_pattern); -static DEVICE_ATTR(engine_mux, S_IWUSR, NULL, lp5562_store_engine_mux); +static LP55XX_DEV_ATTR_WO(led_pattern, lp5562_store_pattern); +static LP55XX_DEV_ATTR_WO(engine_mux, lp5562_store_engine_mux); static struct attribute *lp5562_attributes[] = { &dev_attr_led_pattern.attr, @@ -518,7 +518,7 @@ static int lp5562_probe(struct i2c_client *client, struct lp55xx_platform_data *pdata; struct device_node *np = client->dev.of_node; - if (!client->dev.platform_data) { + if (!dev_get_platdata(&client->dev)) { if (np) { ret = lp55xx_of_populate_pdata(&client->dev, np); if (ret < 0) @@ -528,7 +528,7 @@ static int lp5562_probe(struct i2c_client *client, return -EINVAL; } } - pdata = client->dev.platform_data; + pdata = dev_get_platdata(&client->dev); chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); if (!chip) diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c index c2fecd4d391c..351825b96f16 100644 --- a/drivers/leds/leds-lp55xx-common.c +++ b/drivers/leds/leds-lp55xx-common.c @@ -593,6 +593,9 @@ int lp55xx_of_populate_pdata(struct device *dev, struct device_node *np) of_property_read_string(np, "label", &pdata->label); of_property_read_u8(np, "clock-mode", &pdata->clock_mode); + /* LP8501 specific */ + of_property_read_u8(np, "pwr-sel", (u8 *)&pdata->pwr_sel); + dev->platform_data = pdata; return 0; diff --git a/drivers/leds/leds-lp55xx-common.h b/drivers/leds/leds-lp55xx-common.h index dbbf86df0f1f..cceab483edd0 100644 --- a/drivers/leds/leds-lp55xx-common.h +++ b/drivers/leds/leds-lp55xx-common.h @@ -20,8 +20,62 @@ enum lp55xx_engine_index { LP55XX_ENGINE_1, LP55XX_ENGINE_2, LP55XX_ENGINE_3, + LP55XX_ENGINE_MAX = LP55XX_ENGINE_3, }; +enum lp55xx_engine_mode { + LP55XX_ENGINE_DISABLED, + LP55XX_ENGINE_LOAD, + LP55XX_ENGINE_RUN, +}; + +#define LP55XX_DEV_ATTR_RW(name, show, store) \ + DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show, store) +#define LP55XX_DEV_ATTR_RO(name, show) \ + DEVICE_ATTR(name, S_IRUGO, show, NULL) +#define LP55XX_DEV_ATTR_WO(name, store) \ + DEVICE_ATTR(name, S_IWUSR, NULL, store) + +#define show_mode(nr) \ +static ssize_t show_engine##nr##_mode(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return show_engine_mode(dev, attr, buf, nr); \ +} + +#define store_mode(nr) \ +static ssize_t store_engine##nr##_mode(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + return store_engine_mode(dev, attr, buf, len, nr); \ +} + +#define show_leds(nr) \ +static ssize_t show_engine##nr##_leds(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return show_engine_leds(dev, attr, buf, nr); \ +} + +#define store_leds(nr) \ +static ssize_t store_engine##nr##_leds(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + return store_engine_leds(dev, attr, buf, len, nr); \ +} + +#define store_load(nr) \ +static ssize_t store_engine##nr##_load(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + return store_engine_load(dev, attr, buf, len, nr); \ +} + struct lp55xx_led; struct lp55xx_chip; @@ -72,6 +126,16 @@ struct lp55xx_device_config { }; /* + * struct lp55xx_engine + * @mode : Engine mode + * @led_mux : Mux bits for LED selection. Only used in LP5523 + */ +struct lp55xx_engine { + enum lp55xx_engine_mode mode; + u16 led_mux; +}; + +/* * struct lp55xx_chip * @cl : I2C communication for access registers * @pdata : Platform specific data @@ -79,6 +143,7 @@ struct lp55xx_device_config { * @num_leds : Number of registered LEDs * @cfg : Device specific configuration data * @engine_idx : Selected engine number + * @engines : Engine structure for the device attribute R/W interface * @fw : Firmware data for running a LED pattern */ struct lp55xx_chip { @@ -89,6 +154,7 @@ struct lp55xx_chip { int num_leds; struct lp55xx_device_config *cfg; enum lp55xx_engine_index engine_idx; + struct lp55xx_engine engines[LP55XX_ENGINE_MAX]; const struct firmware *fw; }; diff --git a/drivers/leds/leds-lp8501.c b/drivers/leds/leds-lp8501.c new file mode 100644 index 000000000000..8d55a780ca46 --- /dev/null +++ b/drivers/leds/leds-lp8501.c @@ -0,0 +1,410 @@ +/* + * TI LP8501 9 channel LED Driver + * + * Copyright (C) 2013 Texas Instruments + * + * Author: Milo(Woogyom) Kim <milo.kim@ti.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + */ + +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/i2c.h> +#include <linux/init.h> +#include <linux/leds.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/platform_data/leds-lp55xx.h> +#include <linux/slab.h> + +#include "leds-lp55xx-common.h" + +#define LP8501_PROGRAM_LENGTH 32 +#define LP8501_MAX_LEDS 9 + +/* Registers */ +#define LP8501_REG_ENABLE 0x00 +#define LP8501_ENABLE BIT(6) +#define LP8501_EXEC_M 0x3F +#define LP8501_EXEC_ENG1_M 0x30 +#define LP8501_EXEC_ENG2_M 0x0C +#define LP8501_EXEC_ENG3_M 0x03 +#define LP8501_RUN_ENG1 0x20 +#define LP8501_RUN_ENG2 0x08 +#define LP8501_RUN_ENG3 0x02 + +#define LP8501_REG_OP_MODE 0x01 +#define LP8501_MODE_ENG1_M 0x30 +#define LP8501_MODE_ENG2_M 0x0C +#define LP8501_MODE_ENG3_M 0x03 +#define LP8501_LOAD_ENG1 0x10 +#define LP8501_LOAD_ENG2 0x04 +#define LP8501_LOAD_ENG3 0x01 + +#define LP8501_REG_PWR_CONFIG 0x05 +#define LP8501_PWR_CONFIG_M 0x03 + +#define LP8501_REG_LED_PWM_BASE 0x16 + +#define LP8501_REG_LED_CURRENT_BASE 0x26 + +#define LP8501_REG_CONFIG 0x36 +#define LP8501_PWM_PSAVE BIT(7) +#define LP8501_AUTO_INC BIT(6) +#define LP8501_PWR_SAVE BIT(5) +#define LP8501_CP_AUTO 0x18 +#define LP8501_INT_CLK BIT(0) +#define LP8501_DEFAULT_CFG \ + (LP8501_PWM_PSAVE | LP8501_AUTO_INC | LP8501_PWR_SAVE | LP8501_CP_AUTO) + +#define LP8501_REG_RESET 0x3D +#define LP8501_RESET 0xFF + +#define LP8501_REG_PROG_PAGE_SEL 0x4F +#define LP8501_PAGE_ENG1 0 +#define LP8501_PAGE_ENG2 1 +#define LP8501_PAGE_ENG3 2 + +#define LP8501_REG_PROG_MEM 0x50 + +#define LP8501_ENG1_IS_LOADING(mode) \ + ((mode & LP8501_MODE_ENG1_M) == LP8501_LOAD_ENG1) +#define LP8501_ENG2_IS_LOADING(mode) \ + ((mode & LP8501_MODE_ENG2_M) == LP8501_LOAD_ENG2) +#define LP8501_ENG3_IS_LOADING(mode) \ + ((mode & LP8501_MODE_ENG3_M) == LP8501_LOAD_ENG3) + +static inline void lp8501_wait_opmode_done(void) +{ + usleep_range(1000, 2000); +} + +static void lp8501_set_led_current(struct lp55xx_led *led, u8 led_current) +{ + led->led_current = led_current; + lp55xx_write(led->chip, LP8501_REG_LED_CURRENT_BASE + led->chan_nr, + led_current); +} + +static int lp8501_post_init_device(struct lp55xx_chip *chip) +{ + int ret; + u8 val = LP8501_DEFAULT_CFG; + + ret = lp55xx_write(chip, LP8501_REG_ENABLE, LP8501_ENABLE); + if (ret) + return ret; + + /* Chip startup time is 500 us, 1 - 2 ms gives some margin */ + usleep_range(1000, 2000); + + if (chip->pdata->clock_mode != LP55XX_CLOCK_EXT) + val |= LP8501_INT_CLK; + + ret = lp55xx_write(chip, LP8501_REG_CONFIG, val); + if (ret) + return ret; + + /* Power selection for each output */ + return lp55xx_update_bits(chip, LP8501_REG_PWR_CONFIG, + LP8501_PWR_CONFIG_M, chip->pdata->pwr_sel); +} + +static void lp8501_load_engine(struct lp55xx_chip *chip) +{ + enum lp55xx_engine_index idx = chip->engine_idx; + u8 mask[] = { + [LP55XX_ENGINE_1] = LP8501_MODE_ENG1_M, + [LP55XX_ENGINE_2] = LP8501_MODE_ENG2_M, + [LP55XX_ENGINE_3] = LP8501_MODE_ENG3_M, + }; + + u8 val[] = { + [LP55XX_ENGINE_1] = LP8501_LOAD_ENG1, + [LP55XX_ENGINE_2] = LP8501_LOAD_ENG2, + [LP55XX_ENGINE_3] = LP8501_LOAD_ENG3, + }; + + u8 page_sel[] = { + [LP55XX_ENGINE_1] = LP8501_PAGE_ENG1, + [LP55XX_ENGINE_2] = LP8501_PAGE_ENG2, + [LP55XX_ENGINE_3] = LP8501_PAGE_ENG3, + }; + + lp55xx_update_bits(chip, LP8501_REG_OP_MODE, mask[idx], val[idx]); + + lp8501_wait_opmode_done(); + + lp55xx_write(chip, LP8501_REG_PROG_PAGE_SEL, page_sel[idx]); +} + +static void lp8501_stop_engine(struct lp55xx_chip *chip) +{ + lp55xx_write(chip, LP8501_REG_OP_MODE, 0); + lp8501_wait_opmode_done(); +} + +static void lp8501_turn_off_channels(struct lp55xx_chip *chip) +{ + int i; + + for (i = 0; i < LP8501_MAX_LEDS; i++) + lp55xx_write(chip, LP8501_REG_LED_PWM_BASE + i, 0); +} + +static void lp8501_run_engine(struct lp55xx_chip *chip, bool start) +{ + int ret; + u8 mode; + u8 exec; + + /* stop engine */ + if (!start) { + lp8501_stop_engine(chip); + lp8501_turn_off_channels(chip); + return; + } + + /* + * To run the engine, + * operation mode and enable register should updated at the same time + */ + + ret = lp55xx_read(chip, LP8501_REG_OP_MODE, &mode); + if (ret) + return; + + ret = lp55xx_read(chip, LP8501_REG_ENABLE, &exec); + if (ret) + return; + + /* change operation mode to RUN only when each engine is loading */ + if (LP8501_ENG1_IS_LOADING(mode)) { + mode = (mode & ~LP8501_MODE_ENG1_M) | LP8501_RUN_ENG1; + exec = (exec & ~LP8501_EXEC_ENG1_M) | LP8501_RUN_ENG1; + } + + if (LP8501_ENG2_IS_LOADING(mode)) { + mode = (mode & ~LP8501_MODE_ENG2_M) | LP8501_RUN_ENG2; + exec = (exec & ~LP8501_EXEC_ENG2_M) | LP8501_RUN_ENG2; + } + + if (LP8501_ENG3_IS_LOADING(mode)) { + mode = (mode & ~LP8501_MODE_ENG3_M) | LP8501_RUN_ENG3; + exec = (exec & ~LP8501_EXEC_ENG3_M) | LP8501_RUN_ENG3; + } + + lp55xx_write(chip, LP8501_REG_OP_MODE, mode); + lp8501_wait_opmode_done(); + + lp55xx_update_bits(chip, LP8501_REG_ENABLE, LP8501_EXEC_M, exec); +} + +static int lp8501_update_program_memory(struct lp55xx_chip *chip, + const u8 *data, size_t size) +{ + u8 pattern[LP8501_PROGRAM_LENGTH] = {0}; + unsigned cmd; + char c[3]; + int update_size; + int nrchars; + int offset = 0; + int ret; + int i; + + /* clear program memory before updating */ + for (i = 0; i < LP8501_PROGRAM_LENGTH; i++) + lp55xx_write(chip, LP8501_REG_PROG_MEM + i, 0); + + i = 0; + while ((offset < size - 1) && (i < LP8501_PROGRAM_LENGTH)) { + /* separate sscanfs because length is working only for %s */ + ret = sscanf(data + offset, "%2s%n ", c, &nrchars); + if (ret != 1) + goto err; + + ret = sscanf(c, "%2x", &cmd); + if (ret != 1) + goto err; + + pattern[i] = (u8)cmd; + offset += nrchars; + i++; + } + + /* Each instruction is 16bit long. Check that length is even */ + if (i % 2) + goto err; + + update_size = i; + for (i = 0; i < update_size; i++) + lp55xx_write(chip, LP8501_REG_PROG_MEM + i, pattern[i]); + + return 0; + +err: + dev_err(&chip->cl->dev, "wrong pattern format\n"); + return -EINVAL; +} + +static void lp8501_firmware_loaded(struct lp55xx_chip *chip) +{ + const struct firmware *fw = chip->fw; + + if (fw->size > LP8501_PROGRAM_LENGTH) { + dev_err(&chip->cl->dev, "firmware data size overflow: %zu\n", + fw->size); + return; + } + + /* + * Program momery sequence + * 1) set engine mode to "LOAD" + * 2) write firmware data into program memory + */ + + lp8501_load_engine(chip); + lp8501_update_program_memory(chip, fw->data, fw->size); +} + +static void lp8501_led_brightness_work(struct work_struct *work) +{ + struct lp55xx_led *led = container_of(work, struct lp55xx_led, + brightness_work); + struct lp55xx_chip *chip = led->chip; + + mutex_lock(&chip->lock); + lp55xx_write(chip, LP8501_REG_LED_PWM_BASE + led->chan_nr, + led->brightness); + mutex_unlock(&chip->lock); +} + +/* Chip specific configurations */ +static struct lp55xx_device_config lp8501_cfg = { + .reset = { + .addr = LP8501_REG_RESET, + .val = LP8501_RESET, + }, + .enable = { + .addr = LP8501_REG_ENABLE, + .val = LP8501_ENABLE, + }, + .max_channel = LP8501_MAX_LEDS, + .post_init_device = lp8501_post_init_device, + .brightness_work_fn = lp8501_led_brightness_work, + .set_led_current = lp8501_set_led_current, + .firmware_cb = lp8501_firmware_loaded, + .run_engine = lp8501_run_engine, +}; + +static int lp8501_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret; + struct lp55xx_chip *chip; + struct lp55xx_led *led; + struct lp55xx_platform_data *pdata; + struct device_node *np = client->dev.of_node; + + if (!dev_get_platdata(&client->dev)) { + if (np) { + ret = lp55xx_of_populate_pdata(&client->dev, np); + if (ret < 0) + return ret; + } else { + dev_err(&client->dev, "no platform data\n"); + return -EINVAL; + } + } + pdata = dev_get_platdata(&client->dev); + + chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + led = devm_kzalloc(&client->dev, + sizeof(*led) * pdata->num_channels, GFP_KERNEL); + if (!led) + return -ENOMEM; + + chip->cl = client; + chip->pdata = pdata; + chip->cfg = &lp8501_cfg; + + mutex_init(&chip->lock); + + i2c_set_clientdata(client, led); + + ret = lp55xx_init_device(chip); + if (ret) + goto err_init; + + dev_info(&client->dev, "%s Programmable led chip found\n", id->name); + + ret = lp55xx_register_leds(led, chip); + if (ret) + goto err_register_leds; + + ret = lp55xx_register_sysfs(chip); + if (ret) { + dev_err(&client->dev, "registering sysfs failed\n"); + goto err_register_sysfs; + } + + return 0; + +err_register_sysfs: + lp55xx_unregister_leds(led, chip); +err_register_leds: + lp55xx_deinit_device(chip); +err_init: + return ret; +} + +static int lp8501_remove(struct i2c_client *client) +{ + struct lp55xx_led *led = i2c_get_clientdata(client); + struct lp55xx_chip *chip = led->chip; + + lp8501_stop_engine(chip); + lp55xx_unregister_sysfs(chip); + lp55xx_unregister_leds(led, chip); + lp55xx_deinit_device(chip); + + return 0; +} + +static const struct i2c_device_id lp8501_id[] = { + { "lp8501", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lp8501_id); + +#ifdef CONFIG_OF +static const struct of_device_id of_lp8501_leds_match[] = { + { .compatible = "ti,lp8501", }, + {}, +}; + +MODULE_DEVICE_TABLE(of, of_lp8501_leds_match); +#endif + +static struct i2c_driver lp8501_driver = { + .driver = { + .name = "lp8501", + .of_match_table = of_match_ptr(of_lp8501_leds_match), + }, + .probe = lp8501_probe, + .remove = lp8501_remove, + .id_table = lp8501_id, +}; + +module_i2c_driver(lp8501_driver); + +MODULE_DESCRIPTION("Texas Instruments LP8501 LED drvier"); +MODULE_AUTHOR("Milo Kim"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/leds-lt3593.c b/drivers/leds/leds-lt3593.c index ca48a7d5502d..3417e5be7b57 100644 --- a/drivers/leds/leds-lt3593.c +++ b/drivers/leds/leds-lt3593.c @@ -135,7 +135,7 @@ static void delete_lt3593_led(struct lt3593_led_data *led) static int lt3593_led_probe(struct platform_device *pdev) { - struct gpio_led_platform_data *pdata = pdev->dev.platform_data; + struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct lt3593_led_data *leds_data; int i, ret = 0; @@ -169,7 +169,7 @@ err: static int lt3593_led_remove(struct platform_device *pdev) { int i; - struct gpio_led_platform_data *pdata = pdev->dev.platform_data; + struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct lt3593_led_data *leds_data; leds_data = platform_get_drvdata(pdev); diff --git a/drivers/leds/leds-netxbig.c b/drivers/leds/leds-netxbig.c index c61c5ebcc08e..2f9f141084ba 100644 --- a/drivers/leds/leds-netxbig.c +++ b/drivers/leds/leds-netxbig.c @@ -306,7 +306,7 @@ create_netxbig_led(struct platform_device *pdev, struct netxbig_led_data *led_dat, const struct netxbig_led *template) { - struct netxbig_led_platform_data *pdata = pdev->dev.platform_data; + struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev); int ret; spin_lock_init(&led_dat->lock); @@ -354,7 +354,7 @@ create_netxbig_led(struct platform_device *pdev, static int netxbig_led_probe(struct platform_device *pdev) { - struct netxbig_led_platform_data *pdata = pdev->dev.platform_data; + struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct netxbig_led_data *leds_data; int i; int ret; @@ -391,7 +391,7 @@ err_free_leds: static int netxbig_led_remove(struct platform_device *pdev) { - struct netxbig_led_platform_data *pdata = pdev->dev.platform_data; + struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct netxbig_led_data *leds_data; int i; diff --git a/drivers/leds/leds-ns2.c b/drivers/leds/leds-ns2.c index e7df9875c400..141f13438e80 100644 --- a/drivers/leds/leds-ns2.c +++ b/drivers/leds/leds-ns2.c @@ -321,7 +321,7 @@ static inline int sizeof_ns2_led_priv(int num_leds) static int ns2_led_probe(struct platform_device *pdev) { - struct ns2_led_platform_data *pdata = pdev->dev.platform_data; + struct ns2_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct ns2_led_priv *priv; int i; int ret; diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c index 0c597bdd23f9..4a0e786b7832 100644 --- a/drivers/leds/leds-pca9532.c +++ b/drivers/leds/leds-pca9532.c @@ -446,7 +446,8 @@ static int pca9532_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct pca9532_data *data = i2c_get_clientdata(client); - struct pca9532_platform_data *pca9532_pdata = client->dev.platform_data; + struct pca9532_platform_data *pca9532_pdata = + dev_get_platdata(&client->dev); if (!pca9532_pdata) return -EIO; diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index edf485b773c8..c3a08b60535b 100644 --- a/drivers/leds/leds-pca955x.c +++ b/drivers/leds/leds-pca955x.c @@ -267,7 +267,7 @@ static int pca955x_probe(struct i2c_client *client, chip = &pca955x_chipdefs[id->driver_data]; adapter = to_i2c_adapter(client->dev.parent); - pdata = client->dev.platform_data; + pdata = dev_get_platdata(&client->dev); /* Make sure the slave address / chip type combo given is possible */ if ((client->addr & ~((1 << chip->slv_addr_shift) - 1)) != diff --git a/drivers/leds/leds-pca9633.c b/drivers/leds/leds-pca9633.c deleted file mode 100644 index 9aae5679ffb2..000000000000 --- a/drivers/leds/leds-pca9633.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright 2011 bct electronic GmbH - * - * Author: Peter Meerwald <p.meerwald@bct-electronic.com> - * - * Based on leds-pca955x.c - * - * This file is subject to the terms and conditions of version 2 of - * the GNU General Public License. See the file COPYING in the main - * directory of this archive for more details. - * - * LED driver for the PCA9633 I2C LED driver (7-bit slave address 0x62) - * - */ - -#include <linux/module.h> -#include <linux/delay.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <linux/leds.h> -#include <linux/err.h> -#include <linux/i2c.h> -#include <linux/workqueue.h> -#include <linux/slab.h> -#include <linux/platform_data/leds-pca9633.h> - -/* LED select registers determine the source that drives LED outputs */ -#define PCA9633_LED_OFF 0x0 /* LED driver off */ -#define PCA9633_LED_ON 0x1 /* LED driver on */ -#define PCA9633_LED_PWM 0x2 /* Controlled through PWM */ -#define PCA9633_LED_GRP_PWM 0x3 /* Controlled through PWM/GRPPWM */ - -#define PCA9633_MODE1 0x00 -#define PCA9633_MODE2 0x01 -#define PCA9633_PWM_BASE 0x02 -#define PCA9633_LEDOUT 0x08 - -static const struct i2c_device_id pca9633_id[] = { - { "pca9633", 0 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, pca9633_id); - -struct pca9633_led { - struct i2c_client *client; - struct work_struct work; - enum led_brightness brightness; - struct led_classdev led_cdev; - int led_num; /* 0 .. 3 potentially */ - char name[32]; -}; - -static void pca9633_led_work(struct work_struct *work) -{ - struct pca9633_led *pca9633 = container_of(work, - struct pca9633_led, work); - u8 ledout = i2c_smbus_read_byte_data(pca9633->client, PCA9633_LEDOUT); - int shift = 2 * pca9633->led_num; - u8 mask = 0x3 << shift; - - switch (pca9633->brightness) { - case LED_FULL: - i2c_smbus_write_byte_data(pca9633->client, PCA9633_LEDOUT, - (ledout & ~mask) | (PCA9633_LED_ON << shift)); - break; - case LED_OFF: - i2c_smbus_write_byte_data(pca9633->client, PCA9633_LEDOUT, - ledout & ~mask); - break; - default: - i2c_smbus_write_byte_data(pca9633->client, - PCA9633_PWM_BASE + pca9633->led_num, - pca9633->brightness); - i2c_smbus_write_byte_data(pca9633->client, PCA9633_LEDOUT, - (ledout & ~mask) | (PCA9633_LED_PWM << shift)); - break; - } -} - -static void pca9633_led_set(struct led_classdev *led_cdev, - enum led_brightness value) -{ - struct pca9633_led *pca9633; - - pca9633 = container_of(led_cdev, struct pca9633_led, led_cdev); - - pca9633->brightness = value; - - /* - * Must use workqueue for the actual I/O since I2C operations - * can sleep. - */ - schedule_work(&pca9633->work); -} - -static int pca9633_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ - struct pca9633_led *pca9633; - struct pca9633_platform_data *pdata; - int i, err; - - pdata = client->dev.platform_data; - - if (pdata) { - if (pdata->leds.num_leds <= 0 || pdata->leds.num_leds > 4) { - dev_err(&client->dev, "board info must claim at most 4 LEDs"); - return -EINVAL; - } - } - - pca9633 = devm_kzalloc(&client->dev, 4 * sizeof(*pca9633), GFP_KERNEL); - if (!pca9633) - return -ENOMEM; - - i2c_set_clientdata(client, pca9633); - - for (i = 0; i < 4; i++) { - pca9633[i].client = client; - pca9633[i].led_num = i; - - /* Platform data can specify LED names and default triggers */ - if (pdata && i < pdata->leds.num_leds) { - if (pdata->leds.leds[i].name) - snprintf(pca9633[i].name, - sizeof(pca9633[i].name), "pca9633:%s", - pdata->leds.leds[i].name); - if (pdata->leds.leds[i].default_trigger) - pca9633[i].led_cdev.default_trigger = - pdata->leds.leds[i].default_trigger; - } else { - snprintf(pca9633[i].name, sizeof(pca9633[i].name), - "pca9633:%d", i); - } - - pca9633[i].led_cdev.name = pca9633[i].name; - pca9633[i].led_cdev.brightness_set = pca9633_led_set; - - INIT_WORK(&pca9633[i].work, pca9633_led_work); - - err = led_classdev_register(&client->dev, &pca9633[i].led_cdev); - if (err < 0) - goto exit; - } - - /* Disable LED all-call address and set normal mode */ - i2c_smbus_write_byte_data(client, PCA9633_MODE1, 0x00); - - /* Configure output: open-drain or totem pole (push-pull) */ - if (pdata && pdata->outdrv == PCA9633_OPEN_DRAIN) - i2c_smbus_write_byte_data(client, PCA9633_MODE2, 0x01); - - /* Turn off LEDs */ - i2c_smbus_write_byte_data(client, PCA9633_LEDOUT, 0x00); - - return 0; - -exit: - while (i--) { - led_classdev_unregister(&pca9633[i].led_cdev); - cancel_work_sync(&pca9633[i].work); - } - - return err; -} - -static int pca9633_remove(struct i2c_client *client) -{ - struct pca9633_led *pca9633 = i2c_get_clientdata(client); - int i; - - for (i = 0; i < 4; i++) { - led_classdev_unregister(&pca9633[i].led_cdev); - cancel_work_sync(&pca9633[i].work); - } - - return 0; -} - -static struct i2c_driver pca9633_driver = { - .driver = { - .name = "leds-pca9633", - .owner = THIS_MODULE, - }, - .probe = pca9633_probe, - .remove = pca9633_remove, - .id_table = pca9633_id, -}; - -module_i2c_driver(pca9633_driver); - -MODULE_AUTHOR("Peter Meerwald <p.meerwald@bct-electronic.com>"); -MODULE_DESCRIPTION("PCA9633 LED driver"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c new file mode 100644 index 000000000000..82589c0a5689 --- /dev/null +++ b/drivers/leds/leds-pca963x.c @@ -0,0 +1,461 @@ +/* + * Copyright 2011 bct electronic GmbH + * Copyright 2013 Qtechnology/AS + * + * Author: Peter Meerwald <p.meerwald@bct-electronic.com> + * Author: Ricardo Ribalda <ricardo.ribalda@gmail.com> + * + * Based on leds-pca955x.c + * + * This file is subject to the terms and conditions of version 2 of + * the GNU General Public License. See the file COPYING in the main + * directory of this archive for more details. + * + * LED driver for the PCA9633 I2C LED driver (7-bit slave address 0x62) + * LED driver for the PCA9634 I2C LED driver (7-bit slave address set by hw.) + * + * Note that hardware blinking violates the leds infrastructure driver + * interface since the hardware only supports blinking all LEDs with the + * same delay_on/delay_off rates. That is, only the LEDs that are set to + * blink will actually blink but all LEDs that are set to blink will blink + * in identical fashion. The delay_on/delay_off values of the last LED + * that is set to blink will be used for all of the blinking LEDs. + * Hardware blinking is disabled by default but can be enabled by setting + * the 'blink_type' member in the platform_data struct to 'PCA963X_HW_BLINK' + * or by adding the 'nxp,hw-blink' property to the DTS. + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/leds.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/workqueue.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/platform_data/leds-pca963x.h> + +/* LED select registers determine the source that drives LED outputs */ +#define PCA963X_LED_OFF 0x0 /* LED driver off */ +#define PCA963X_LED_ON 0x1 /* LED driver on */ +#define PCA963X_LED_PWM 0x2 /* Controlled through PWM */ +#define PCA963X_LED_GRP_PWM 0x3 /* Controlled through PWM/GRPPWM */ + +#define PCA963X_MODE2_DMBLNK 0x20 /* Enable blinking */ + +#define PCA963X_MODE1 0x00 +#define PCA963X_MODE2 0x01 +#define PCA963X_PWM_BASE 0x02 + +enum pca963x_type { + pca9633, + pca9634, +}; + +struct pca963x_chipdef { + u8 grppwm; + u8 grpfreq; + u8 ledout_base; + int n_leds; +}; + +static struct pca963x_chipdef pca963x_chipdefs[] = { + [pca9633] = { + .grppwm = 0x6, + .grpfreq = 0x7, + .ledout_base = 0x8, + .n_leds = 4, + }, + [pca9634] = { + .grppwm = 0xa, + .grpfreq = 0xb, + .ledout_base = 0xc, + .n_leds = 8, + }, +}; + +/* Total blink period in milliseconds */ +#define PCA963X_BLINK_PERIOD_MIN 42 +#define PCA963X_BLINK_PERIOD_MAX 10667 + +static const struct i2c_device_id pca963x_id[] = { + { "pca9632", pca9633 }, + { "pca9633", pca9633 }, + { "pca9634", pca9634 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, pca963x_id); + +enum pca963x_cmd { + BRIGHTNESS_SET, + BLINK_SET, +}; + +struct pca963x_led; + +struct pca963x { + struct pca963x_chipdef *chipdef; + struct mutex mutex; + struct i2c_client *client; + struct pca963x_led *leds; +}; + +struct pca963x_led { + struct pca963x *chip; + struct work_struct work; + enum led_brightness brightness; + struct led_classdev led_cdev; + int led_num; /* 0 .. 7 potentially */ + enum pca963x_cmd cmd; + char name[32]; + u8 gdc; + u8 gfrq; +}; + +static void pca963x_brightness_work(struct pca963x_led *pca963x) +{ + u8 ledout_addr = pca963x->chip->chipdef->ledout_base + + (pca963x->led_num / 4); + u8 ledout; + int shift = 2 * (pca963x->led_num % 4); + u8 mask = 0x3 << shift; + + mutex_lock(&pca963x->chip->mutex); + ledout = i2c_smbus_read_byte_data(pca963x->chip->client, ledout_addr); + switch (pca963x->brightness) { + case LED_FULL: + i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr, + (ledout & ~mask) | (PCA963X_LED_ON << shift)); + break; + case LED_OFF: + i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr, + ledout & ~mask); + break; + default: + i2c_smbus_write_byte_data(pca963x->chip->client, + PCA963X_PWM_BASE + pca963x->led_num, + pca963x->brightness); + i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr, + (ledout & ~mask) | (PCA963X_LED_PWM << shift)); + break; + } + mutex_unlock(&pca963x->chip->mutex); +} + +static void pca963x_blink_work(struct pca963x_led *pca963x) +{ + u8 ledout_addr = pca963x->chip->chipdef->ledout_base + + (pca963x->led_num / 4); + u8 ledout; + u8 mode2 = i2c_smbus_read_byte_data(pca963x->chip->client, + PCA963X_MODE2); + int shift = 2 * (pca963x->led_num % 4); + u8 mask = 0x3 << shift; + + i2c_smbus_write_byte_data(pca963x->chip->client, + pca963x->chip->chipdef->grppwm, pca963x->gdc); + + i2c_smbus_write_byte_data(pca963x->chip->client, + pca963x->chip->chipdef->grpfreq, pca963x->gfrq); + + if (!(mode2 & PCA963X_MODE2_DMBLNK)) + i2c_smbus_write_byte_data(pca963x->chip->client, PCA963X_MODE2, + mode2 | PCA963X_MODE2_DMBLNK); + + mutex_lock(&pca963x->chip->mutex); + ledout = i2c_smbus_read_byte_data(pca963x->chip->client, ledout_addr); + if ((ledout & mask) != (PCA963X_LED_GRP_PWM << shift)) + i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr, + (ledout & ~mask) | (PCA963X_LED_GRP_PWM << shift)); + mutex_unlock(&pca963x->chip->mutex); +} + +static void pca963x_work(struct work_struct *work) +{ + struct pca963x_led *pca963x = container_of(work, + struct pca963x_led, work); + + switch (pca963x->cmd) { + case BRIGHTNESS_SET: + pca963x_brightness_work(pca963x); + break; + case BLINK_SET: + pca963x_blink_work(pca963x); + break; + } +} + +static void pca963x_led_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct pca963x_led *pca963x; + + pca963x = container_of(led_cdev, struct pca963x_led, led_cdev); + + pca963x->cmd = BRIGHTNESS_SET; + pca963x->brightness = value; + + /* + * Must use workqueue for the actual I/O since I2C operations + * can sleep. + */ + schedule_work(&pca963x->work); +} + +static int pca963x_blink_set(struct led_classdev *led_cdev, + unsigned long *delay_on, unsigned long *delay_off) +{ + struct pca963x_led *pca963x; + unsigned long time_on, time_off, period; + u8 gdc, gfrq; + + pca963x = container_of(led_cdev, struct pca963x_led, led_cdev); + + time_on = *delay_on; + time_off = *delay_off; + + /* If both zero, pick reasonable defaults of 500ms each */ + if (!time_on && !time_off) { + time_on = 500; + time_off = 500; + } + + period = time_on + time_off; + + /* If period not supported by hardware, default to someting sane. */ + if ((period < PCA963X_BLINK_PERIOD_MIN) || + (period > PCA963X_BLINK_PERIOD_MAX)) { + time_on = 500; + time_off = 500; + period = time_on + time_off; + } + + /* + * From manual: duty cycle = (GDC / 256) -> + * (time_on / period) = (GDC / 256) -> + * GDC = ((time_on * 256) / period) + */ + gdc = (time_on * 256) / period; + + /* + * From manual: period = ((GFRQ + 1) / 24) in seconds. + * So, period (in ms) = (((GFRQ + 1) / 24) * 1000) -> + * GFRQ = ((period * 24 / 1000) - 1) + */ + gfrq = (period * 24 / 1000) - 1; + + pca963x->cmd = BLINK_SET; + pca963x->gdc = gdc; + pca963x->gfrq = gfrq; + + /* + * Must use workqueue for the actual I/O since I2C operations + * can sleep. + */ + schedule_work(&pca963x->work); + + *delay_on = time_on; + *delay_off = time_off; + + return 0; +} + +#if IS_ENABLED(CONFIG_OF) +static struct pca963x_platform_data * +pca963x_dt_init(struct i2c_client *client, struct pca963x_chipdef *chip) +{ + struct device_node *np = client->dev.of_node, *child; + struct pca963x_platform_data *pdata; + struct led_info *pca963x_leds; + int count; + + count = of_get_child_count(np); + if (!count || count > chip->n_leds) + return ERR_PTR(-ENODEV); + + pca963x_leds = devm_kzalloc(&client->dev, + sizeof(struct led_info) * chip->n_leds, GFP_KERNEL); + if (!pca963x_leds) + return ERR_PTR(-ENOMEM); + + for_each_child_of_node(np, child) { + struct led_info led; + u32 reg; + int res; + + res = of_property_read_u32(child, "reg", ®); + if ((res != 0) || (reg >= chip->n_leds)) + continue; + led.name = + of_get_property(child, "label", NULL) ? : child->name; + led.default_trigger = + of_get_property(child, "linux,default-trigger", NULL); + pca963x_leds[reg] = led; + } + pdata = devm_kzalloc(&client->dev, + sizeof(struct pca963x_platform_data), GFP_KERNEL); + if (!pdata) + return ERR_PTR(-ENOMEM); + + pdata->leds.leds = pca963x_leds; + pdata->leds.num_leds = chip->n_leds; + + /* default to open-drain unless totem pole (push-pull) is specified */ + if (of_property_read_bool(np, "nxp,totem-pole")) + pdata->outdrv = PCA963X_TOTEM_POLE; + else + pdata->outdrv = PCA963X_OPEN_DRAIN; + + /* default to software blinking unless hardware blinking is specified */ + if (of_property_read_bool(np, "nxp,hw-blink")) + pdata->blink_type = PCA963X_HW_BLINK; + else + pdata->blink_type = PCA963X_SW_BLINK; + + return pdata; +} + +static const struct of_device_id of_pca963x_match[] = { + { .compatible = "nxp,pca9632", }, + { .compatible = "nxp,pca9633", }, + { .compatible = "nxp,pca9634", }, + {}, +}; +#else +static struct pca963x_platform_data * +pca963x_dt_init(struct i2c_client *client, struct pca963x_chipdef *chip) +{ + return ERR_PTR(-ENODEV); +} +#endif + +static int pca963x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct pca963x *pca963x_chip; + struct pca963x_led *pca963x; + struct pca963x_platform_data *pdata; + struct pca963x_chipdef *chip; + int i, err; + + chip = &pca963x_chipdefs[id->driver_data]; + pdata = dev_get_platdata(&client->dev); + + if (!pdata) { + pdata = pca963x_dt_init(client, chip); + if (IS_ERR(pdata)) { + dev_warn(&client->dev, "could not parse configuration\n"); + pdata = NULL; + } + } + + if (pdata && (pdata->leds.num_leds < 1 || + pdata->leds.num_leds > chip->n_leds)) { + dev_err(&client->dev, "board info must claim 1-%d LEDs", + chip->n_leds); + return -EINVAL; + } + + pca963x_chip = devm_kzalloc(&client->dev, sizeof(*pca963x_chip), + GFP_KERNEL); + if (!pca963x_chip) + return -ENOMEM; + pca963x = devm_kzalloc(&client->dev, chip->n_leds * sizeof(*pca963x), + GFP_KERNEL); + if (!pca963x) + return -ENOMEM; + + i2c_set_clientdata(client, pca963x_chip); + + mutex_init(&pca963x_chip->mutex); + pca963x_chip->chipdef = chip; + pca963x_chip->client = client; + pca963x_chip->leds = pca963x; + + /* Turn off LEDs by default*/ + i2c_smbus_write_byte_data(client, chip->ledout_base, 0x00); + if (chip->n_leds > 4) + i2c_smbus_write_byte_data(client, chip->ledout_base + 1, 0x00); + + for (i = 0; i < chip->n_leds; i++) { + pca963x[i].led_num = i; + pca963x[i].chip = pca963x_chip; + + /* Platform data can specify LED names and default triggers */ + if (pdata && i < pdata->leds.num_leds) { + if (pdata->leds.leds[i].name) + snprintf(pca963x[i].name, + sizeof(pca963x[i].name), "pca963x:%s", + pdata->leds.leds[i].name); + if (pdata->leds.leds[i].default_trigger) + pca963x[i].led_cdev.default_trigger = + pdata->leds.leds[i].default_trigger; + } + if (!pdata || i >= pdata->leds.num_leds || + !pdata->leds.leds[i].name) + snprintf(pca963x[i].name, sizeof(pca963x[i].name), + "pca963x:%d:%.2x:%d", client->adapter->nr, + client->addr, i); + + pca963x[i].led_cdev.name = pca963x[i].name; + pca963x[i].led_cdev.brightness_set = pca963x_led_set; + + if (pdata && pdata->blink_type == PCA963X_HW_BLINK) + pca963x[i].led_cdev.blink_set = pca963x_blink_set; + + INIT_WORK(&pca963x[i].work, pca963x_work); + + err = led_classdev_register(&client->dev, &pca963x[i].led_cdev); + if (err < 0) + goto exit; + } + + /* Disable LED all-call address and set normal mode */ + i2c_smbus_write_byte_data(client, PCA963X_MODE1, 0x00); + + /* Configure output: open-drain or totem pole (push-pull) */ + if (pdata && pdata->outdrv == PCA963X_OPEN_DRAIN) + i2c_smbus_write_byte_data(client, PCA963X_MODE2, 0x01); + + return 0; + +exit: + while (i--) { + led_classdev_unregister(&pca963x[i].led_cdev); + cancel_work_sync(&pca963x[i].work); + } + + return err; +} + +static int pca963x_remove(struct i2c_client *client) +{ + struct pca963x *pca963x = i2c_get_clientdata(client); + int i; + + for (i = 0; i < pca963x->chipdef->n_leds; i++) { + led_classdev_unregister(&pca963x->leds[i].led_cdev); + cancel_work_sync(&pca963x->leds[i].work); + } + + return 0; +} + +static struct i2c_driver pca963x_driver = { + .driver = { + .name = "leds-pca963x", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(of_pca963x_match), + }, + .probe = pca963x_probe, + .remove = pca963x_remove, + .id_table = pca963x_id, +}; + +module_i2c_driver(pca963x_driver); + +MODULE_AUTHOR("Peter Meerwald <p.meerwald@bct-electronic.com>"); +MODULE_DESCRIPTION("PCA963X LED driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c index faf52c005e8c..bb6f94898541 100644 --- a/drivers/leds/leds-pwm.c +++ b/drivers/leds/leds-pwm.c @@ -147,7 +147,7 @@ err: static int led_pwm_probe(struct platform_device *pdev) { - struct led_pwm_platform_data *pdata = pdev->dev.platform_data; + struct led_pwm_platform_data *pdata = dev_get_platdata(&pdev->dev); struct led_pwm_priv *priv; int i, ret = 0; diff --git a/drivers/leds/leds-regulator.c b/drivers/leds/leds-regulator.c index 4253a9b03dbf..358430db6e66 100644 --- a/drivers/leds/leds-regulator.c +++ b/drivers/leds/leds-regulator.c @@ -142,7 +142,8 @@ static void regulator_led_brightness_set(struct led_classdev *led_cdev, static int regulator_led_probe(struct platform_device *pdev) { - struct led_regulator_platform_data *pdata = pdev->dev.platform_data; + struct led_regulator_platform_data *pdata = + dev_get_platdata(&pdev->dev); struct regulator_led *led; struct regulator *vcc; int ret = 0; diff --git a/drivers/leds/leds-s3c24xx.c b/drivers/leds/leds-s3c24xx.c index e1a0df63a37f..76483fb5ee45 100644 --- a/drivers/leds/leds-s3c24xx.c +++ b/drivers/leds/leds-s3c24xx.c @@ -71,7 +71,7 @@ static int s3c24xx_led_remove(struct platform_device *dev) static int s3c24xx_led_probe(struct platform_device *dev) { - struct s3c24xx_led_platdata *pdata = dev->dev.platform_data; + struct s3c24xx_led_platdata *pdata = dev_get_platdata(&dev->dev); struct s3c24xx_gpio_led *led; int ret; diff --git a/drivers/leds/leds-ss4200.c b/drivers/leds/leds-ss4200.c index 64e204e714f6..5b8f938a8d73 100644 --- a/drivers/leds/leds-ss4200.c +++ b/drivers/leds/leds-ss4200.c @@ -91,7 +91,7 @@ MODULE_PARM_DESC(nodetect, "Skip DMI-based hardware detection"); * detected as working, but in reality it is not) as low as * possible. */ -static struct dmi_system_id __initdata nas_led_whitelist[] = { +static struct dmi_system_id nas_led_whitelist[] __initdata = { { .callback = ss4200_led_dmi_callback, .ident = "Intel SS4200-E", @@ -197,7 +197,7 @@ static void nasgpio_led_set_attr(struct led_classdev *led_cdev, spin_unlock(&nasgpio_gpio_lock); } -u32 nasgpio_led_get_attr(struct led_classdev *led_cdev, u32 port) +static u32 nasgpio_led_get_attr(struct led_classdev *led_cdev, u32 port) { struct nasgpio_led *led = led_classdev_to_nasgpio_led(led_cdev); u32 gpio_in; diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c index 98fe021ba276..8cc304f36728 100644 --- a/drivers/leds/leds-tca6507.c +++ b/drivers/leds/leds-tca6507.c @@ -737,7 +737,7 @@ static int tca6507_probe(struct i2c_client *client, int i = 0; adapter = to_i2c_adapter(client->dev.parent); - pdata = client->dev.platform_data; + pdata = dev_get_platdata(&client->dev); if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) return -EIO; diff --git a/drivers/leds/leds-wm831x-status.c b/drivers/leds/leds-wm831x-status.c index 120815a42701..0a1a13f3a6a5 100644 --- a/drivers/leds/leds-wm831x-status.c +++ b/drivers/leds/leds-wm831x-status.c @@ -230,9 +230,9 @@ static int wm831x_status_probe(struct platform_device *pdev) int id = pdev->id % ARRAY_SIZE(chip_pdata->status); int ret; - res = platform_get_resource(pdev, IORESOURCE_IO, 0); + res = platform_get_resource(pdev, IORESOURCE_REG, 0); if (res == NULL) { - dev_err(&pdev->dev, "No I/O resource\n"); + dev_err(&pdev->dev, "No register resource\n"); ret = -EINVAL; goto err; } @@ -246,8 +246,8 @@ static int wm831x_status_probe(struct platform_device *pdev) drvdata->wm831x = wm831x; drvdata->reg = res->start; - if (wm831x->dev->platform_data) - chip_pdata = wm831x->dev->platform_data; + if (dev_get_platdata(wm831x->dev)) + chip_pdata = dev_get_platdata(wm831x->dev); else chip_pdata = NULL; diff --git a/drivers/leds/leds-wm8350.c b/drivers/leds/leds-wm8350.c index 8a181d56602d..3f75fd22fd49 100644 --- a/drivers/leds/leds-wm8350.c +++ b/drivers/leds/leds-wm8350.c @@ -203,7 +203,7 @@ static int wm8350_led_probe(struct platform_device *pdev) { struct regulator *isink, *dcdc; struct wm8350_led *led; - struct wm8350_led_platform_data *pdata = pdev->dev.platform_data; + struct wm8350_led_platform_data *pdata = dev_get_platdata(&pdev->dev); int i; if (pdata == NULL) { diff --git a/drivers/leds/trigger/ledtrig-backlight.c b/drivers/leds/trigger/ledtrig-backlight.c index 3c9c88a07eb8..47e55aa9eefa 100644 --- a/drivers/leds/trigger/ledtrig-backlight.c +++ b/drivers/leds/trigger/ledtrig-backlight.c @@ -36,26 +36,28 @@ static int fb_notifier_callback(struct notifier_block *p, struct bl_trig_notifier, notifier); struct led_classdev *led = n->led; struct fb_event *fb_event = data; - int *blank = fb_event->data; - int new_status = *blank ? BLANK : UNBLANK; + int *blank; + int new_status; - switch (event) { - case FB_EVENT_BLANK: - if (new_status == n->old_status) - break; + /* If we aren't interested in this event, skip it immediately ... */ + if (event != FB_EVENT_BLANK) + return 0; - if ((n->old_status == UNBLANK) ^ n->invert) { - n->brightness = led->brightness; - __led_set_brightness(led, LED_OFF); - } else { - __led_set_brightness(led, n->brightness); - } + blank = fb_event->data; + new_status = *blank ? BLANK : UNBLANK; - n->old_status = new_status; + if (new_status == n->old_status) + return 0; - break; + if ((n->old_status == UNBLANK) ^ n->invert) { + n->brightness = led->brightness; + __led_set_brightness(led, LED_OFF); + } else { + __led_set_brightness(led, n->brightness); } + n->old_status = new_status; + return 0; } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index ee372884c405..f9764e61978b 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -597,24 +597,19 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order) return 0; } -static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long bch_mca_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct cache_set *c = container_of(shrink, struct cache_set, shrink); struct btree *b, *t; unsigned long i, nr = sc->nr_to_scan; + unsigned long freed = 0; if (c->shrinker_disabled) - return 0; + return SHRINK_STOP; if (c->try_harder) - return 0; - - /* - * If nr == 0, we're supposed to return the number of items we have - * cached. Not allowed to return -1. - */ - if (!nr) - return mca_can_free(c) * c->btree_pages; + return SHRINK_STOP; /* Return -1 if we can't do anything right now */ if (sc->gfp_mask & __GFP_WAIT) @@ -634,14 +629,14 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) i = 0; list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { - if (!nr) + if (freed >= nr) break; if (++i > 3 && !mca_reap(b, NULL, 0)) { mca_data_free(b); rw_unlock(true, b); - --nr; + freed++; } } @@ -652,7 +647,7 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) if (list_empty(&c->btree_cache)) goto out; - for (i = 0; nr && i < c->bucket_cache_used; i++) { + for (i = 0; (nr--) && i < c->bucket_cache_used; i++) { b = list_first_entry(&c->btree_cache, struct btree, list); list_rotate_left(&c->btree_cache); @@ -661,14 +656,27 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) mca_bucket_free(b); mca_data_free(b); rw_unlock(true, b); - --nr; + freed++; } else b->accessed = 0; } out: - nr = mca_can_free(c) * c->btree_pages; mutex_unlock(&c->bucket_lock); - return nr; + return freed; +} + +static unsigned long bch_mca_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + struct cache_set *c = container_of(shrink, struct cache_set, shrink); + + if (c->shrinker_disabled) + return 0; + + if (c->try_harder) + return 0; + + return mca_can_free(c) * c->btree_pages; } void bch_btree_cache_free(struct cache_set *c) @@ -737,7 +745,8 @@ int bch_btree_cache_alloc(struct cache_set *c) c->verify_data = NULL; #endif - c->shrink.shrink = bch_mca_shrink; + c->shrink.count_objects = bch_mca_count; + c->shrink.scan_objects = bch_mca_scan; c->shrink.seeks = 4; c->shrink.batch = c->btree_pages * 2; register_shrinker(&c->shrink); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 12a2c2846f99..4fe6ab2fbe2e 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -556,7 +556,7 @@ STORE(__bch_cache_set) struct shrink_control sc; sc.gfp_mask = GFP_KERNEL; sc.nr_to_scan = strtoul_or_return(buf); - c->shrink.shrink(&c->shrink, &sc); + c->shrink.scan_objects(&c->shrink, &sc); } sysfs_strtoul(congested_read_threshold_us, diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 5227e079a6e3..173cbb20d104 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1425,62 +1425,75 @@ static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp, unsigned long max_jiffies) { if (jiffies - b->last_accessed < max_jiffies) - return 1; + return 0; if (!(gfp & __GFP_IO)) { if (test_bit(B_READING, &b->state) || test_bit(B_WRITING, &b->state) || test_bit(B_DIRTY, &b->state)) - return 1; + return 0; } if (b->hold_count) - return 1; + return 0; __make_buffer_clean(b); __unlink_buffer(b); __free_buffer_wake(b); - return 0; + return 1; } -static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, - struct shrink_control *sc) +static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, + gfp_t gfp_mask) { int l; struct dm_buffer *b, *tmp; + long freed = 0; for (l = 0; l < LIST_SIZE; l++) { - list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) - if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) && - !--nr_to_scan) - return; + list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { + freed += __cleanup_old_buffer(b, gfp_mask, 0); + if (!--nr_to_scan) + break; + } dm_bufio_cond_resched(); } + return freed; } -static int shrink(struct shrinker *shrinker, struct shrink_control *sc) +static unsigned long +dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct dm_bufio_client *c = - container_of(shrinker, struct dm_bufio_client, shrinker); - unsigned long r; - unsigned long nr_to_scan = sc->nr_to_scan; + struct dm_bufio_client *c; + unsigned long freed; + c = container_of(shrink, struct dm_bufio_client, shrinker); if (sc->gfp_mask & __GFP_IO) dm_bufio_lock(c); else if (!dm_bufio_trylock(c)) - return !nr_to_scan ? 0 : -1; + return SHRINK_STOP; - if (nr_to_scan) - __scan(c, nr_to_scan, sc); + freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); + dm_bufio_unlock(c); + return freed; +} - r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; - if (r > INT_MAX) - r = INT_MAX; +static unsigned long +dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + struct dm_bufio_client *c; + unsigned long count; - dm_bufio_unlock(c); + c = container_of(shrink, struct dm_bufio_client, shrinker); + if (sc->gfp_mask & __GFP_IO) + dm_bufio_lock(c); + else if (!dm_bufio_trylock(c)) + return 0; - return r; + count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; + dm_bufio_unlock(c); + return count; } /* @@ -1582,7 +1595,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign __cache_size_refresh(); mutex_unlock(&dm_bufio_clients_lock); - c->shrinker.shrink = shrink; + c->shrinker.count_objects = dm_bufio_shrink_count; + c->shrinker.scan_objects = dm_bufio_shrink_scan; c->shrinker.seeks = 1; c->shrinker.batch = 0; register_shrinker(&c->shrinker); @@ -1669,7 +1683,7 @@ static void cleanup_old_buffers(void) struct dm_buffer *b; b = list_entry(c->lru[LIST_CLEAN].prev, struct dm_buffer, lru_list); - if (__cleanup_old_buffer(b, 0, max_age * HZ)) + if (!__cleanup_old_buffer(b, 0, max_age * HZ)) break; dm_bufio_cond_resched(); } diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index f6488adf3af1..0b7d23b4ad95 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -487,7 +487,6 @@ static void acpiphp_bus_add(acpi_handle handle) { struct acpi_device *adev = NULL; - acpiphp_bus_trim(handle); acpi_bus_scan(handle); acpi_bus_get_device(handle, &adev); if (adev) @@ -529,6 +528,16 @@ static void check_hotplug_bridge(struct acpiphp_slot *slot, struct pci_dev *dev) } } +static int acpiphp_rescan_slot(struct acpiphp_slot *slot) +{ + struct acpiphp_func *func; + + list_for_each_entry(func, &slot->funcs, sibling) + acpiphp_bus_add(func_to_handle(func)); + + return pci_scan_slot(slot->bus, PCI_DEVFN(slot->device, 0)); +} + /** * enable_slot - enable, configure a slot * @slot: slot to be enabled @@ -543,12 +552,9 @@ static void __ref enable_slot(struct acpiphp_slot *slot) struct acpiphp_func *func; int max, pass; LIST_HEAD(add_list); + int nr_found; - list_for_each_entry(func, &slot->funcs, sibling) - acpiphp_bus_add(func_to_handle(func)); - - pci_scan_slot(bus, PCI_DEVFN(slot->device, 0)); - + nr_found = acpiphp_rescan_slot(slot); max = acpiphp_max_busnr(bus); for (pass = 0; pass < 2; pass++) { list_for_each_entry(dev, &bus->devices, bus_list) { @@ -567,8 +573,11 @@ static void __ref enable_slot(struct acpiphp_slot *slot) } } } - __pci_bus_assign_resources(bus, &add_list, NULL); + /* Nothing more to do here if there are no new devices on this bus. */ + if (!nr_found && (slot->flags & SLOT_ENABLED)) + return; + acpiphp_sanitize_bus(bus); acpiphp_set_hpp_values(bus); acpiphp_set_acpi_region(slot); @@ -837,11 +846,22 @@ static void hotplug_event(acpi_handle handle, u32 type, void *data) case ACPI_NOTIFY_DEVICE_CHECK: /* device check */ dbg("%s: Device check notify on %s\n", __func__, objname); - if (bridge) + if (bridge) { acpiphp_check_bridge(bridge); - else - acpiphp_check_bridge(func->parent); + } else { + struct acpiphp_slot *slot = func->slot; + int ret; + /* + * Check if anything has changed in the slot and rescan + * from the parent if that's the case. + */ + mutex_lock(&slot->crit_sect); + ret = acpiphp_rescan_slot(slot); + mutex_unlock(&slot->crit_sect); + if (ret) + acpiphp_check_bridge(func->parent); + } break; case ACPI_NOTIFY_EJECT_REQUEST: @@ -867,6 +887,8 @@ static void hotplug_event_work(struct work_struct *work) hotplug_event(hp_work->handle, hp_work->type, context); acpi_scan_lock_release(); + acpi_evaluate_hotplug_ost(hp_work->handle, hp_work->type, + ACPI_OST_SC_SUCCESS, NULL); kfree(hp_work); /* allocated in handle_hotplug_event() */ put_bridge(context->func.parent); } @@ -882,11 +904,15 @@ static void hotplug_event_work(struct work_struct *work) static void handle_hotplug_event(acpi_handle handle, u32 type, void *data) { struct acpiphp_context *context; + u32 ost_code = ACPI_OST_SC_SUCCESS; switch (type) { case ACPI_NOTIFY_BUS_CHECK: case ACPI_NOTIFY_DEVICE_CHECK: + break; case ACPI_NOTIFY_EJECT_REQUEST: + ost_code = ACPI_OST_SC_EJECT_IN_PROGRESS; + acpi_evaluate_hotplug_ost(handle, type, ost_code, NULL); break; case ACPI_NOTIFY_DEVICE_WAKE: @@ -895,20 +921,21 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data) case ACPI_NOTIFY_FREQUENCY_MISMATCH: acpi_handle_err(handle, "Device cannot be configured due " "to a frequency mismatch\n"); - return; + goto out; case ACPI_NOTIFY_BUS_MODE_MISMATCH: acpi_handle_err(handle, "Device cannot be configured due " "to a bus mode mismatch\n"); - return; + goto out; case ACPI_NOTIFY_POWER_FAULT: acpi_handle_err(handle, "Device has suffered a power fault\n"); - return; + goto out; default: acpi_handle_warn(handle, "Unsupported event type 0x%x\n", type); - return; + ost_code = ACPI_OST_SC_UNRECOGNIZED_NOTIFY; + goto out; } mutex_lock(&acpiphp_context_lock); @@ -917,8 +944,14 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data) get_bridge(context->func.parent); acpiphp_put_context(context); alloc_acpi_hp_work(handle, type, context, hotplug_event_work); + mutex_unlock(&acpiphp_context_lock); + return; } mutex_unlock(&acpiphp_context_lock); + ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; + + out: + acpi_evaluate_hotplug_ost(handle, type, ost_code, NULL); } /* diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 21a3f7250531..8e76ddca0999 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -341,27 +341,26 @@ out: /* * ashmem_shrink - our cache shrinker, called from mm/vmscan.c :: shrink_slab * - * 'nr_to_scan' is the number of objects (pages) to prune, or 0 to query how - * many objects (pages) we have in total. + * 'nr_to_scan' is the number of objects to scan for freeing. * * 'gfp_mask' is the mask of the allocation that got us into this mess. * - * Return value is the number of objects (pages) remaining, or -1 if we cannot + * Return value is the number of objects freed or -1 if we cannot * proceed without risk of deadlock (due to gfp_mask). * * We approximate LRU via least-recently-unpinned, jettisoning unpinned partial * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan' * pages freed. */ -static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) +static unsigned long +ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct ashmem_range *range, *next; + unsigned long freed = 0; /* We might recurse into filesystem code, so bail out if necessary */ - if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS)) - return -1; - if (!sc->nr_to_scan) - return lru_count; + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; mutex_lock(&ashmem_mutex); list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { @@ -374,17 +373,32 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) range->purged = ASHMEM_WAS_PURGED; lru_del(range); - sc->nr_to_scan -= range_size(range); - if (sc->nr_to_scan <= 0) + freed += range_size(range); + if (--sc->nr_to_scan <= 0) break; } mutex_unlock(&ashmem_mutex); + return freed; +} +static unsigned long +ashmem_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + /* + * note that lru_count is count of pages on the lru, not a count of + * objects on the list. This means the scan function needs to return the + * number of pages freed, not the number of objects scanned. + */ return lru_count; } static struct shrinker ashmem_shrinker = { - .shrink = ashmem_shrink, + .count_objects = ashmem_shrink_count, + .scan_objects = ashmem_shrink_scan, + /* + * XXX (dchinner): I wish people would comment on why they need on + * significant changes to the default value here + */ .seeks = DEFAULT_SEEKS * 4, }; @@ -690,11 +704,11 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (capable(CAP_SYS_ADMIN)) { struct shrink_control sc = { .gfp_mask = GFP_KERNEL, - .nr_to_scan = 0, + .nr_to_scan = LONG_MAX, }; - ret = ashmem_shrink(&ashmem_shrinker, &sc); - sc.nr_to_scan = ret; - ashmem_shrink(&ashmem_shrinker, &sc); + + nodes_setall(sc.nodes_to_scan); + ashmem_shrink_scan(&ashmem_shrinker, &sc); } break; } diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index fe74494868ef..6f094b37f1f1 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -66,11 +66,20 @@ static unsigned long lowmem_deathpending_timeout; pr_info(x); \ } while (0) -static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) +static unsigned long lowmem_count(struct shrinker *s, + struct shrink_control *sc) +{ + return global_page_state(NR_ACTIVE_ANON) + + global_page_state(NR_ACTIVE_FILE) + + global_page_state(NR_INACTIVE_ANON) + + global_page_state(NR_INACTIVE_FILE); +} + +static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; - int rem = 0; + unsigned long rem = 0; int tasksize; int i; short min_score_adj = OOM_SCORE_ADJ_MAX + 1; @@ -92,19 +101,17 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) break; } } - if (sc->nr_to_scan > 0) - lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n", - sc->nr_to_scan, sc->gfp_mask, other_free, - other_file, min_score_adj); - rem = global_page_state(NR_ACTIVE_ANON) + - global_page_state(NR_ACTIVE_FILE) + - global_page_state(NR_INACTIVE_ANON) + - global_page_state(NR_INACTIVE_FILE); - if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { - lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", - sc->nr_to_scan, sc->gfp_mask, rem); - return rem; + + lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n", + sc->nr_to_scan, sc->gfp_mask, other_free, + other_file, min_score_adj); + + if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) { + lowmem_print(5, "lowmem_scan %lu, %x, return 0\n", + sc->nr_to_scan, sc->gfp_mask); + return 0; } + selected_oom_score_adj = min_score_adj; rcu_read_lock(); @@ -154,16 +161,18 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); - rem -= selected_tasksize; + rem += selected_tasksize; } - lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", + + lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); return rem; } static struct shrinker lowmem_shrinker = { - .shrink = lowmem_shrink, + .scan_objects = lowmem_scan, + .count_objects = lowmem_count, .seeks = DEFAULT_SEEKS * 16 }; diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h index 63efb7b456c6..2af15d41e77a 100644 --- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h @@ -79,42 +79,4 @@ do { __oldfs = get_fs(); set_fs(get_ds());} while(0) #define MMSPACE_CLOSE set_fs(__oldfs) -/* - * Shrinker - */ - -# define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask) \ - struct shrinker *shrinker, \ - struct shrink_control *sc -# define shrink_param(sc, var) ((sc)->var) - -typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)); - -static inline -struct shrinker *set_shrinker(int seek, shrinker_t func) -{ - struct shrinker *s; - - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s == NULL) - return (NULL); - - s->shrink = func; - s->seeks = seek; - - register_shrinker(s); - - return s; -} - -static inline -void remove_shrinker(struct shrinker *shrinker) -{ - if (shrinker == NULL) - return; - - unregister_shrinker(shrinker); - kfree(shrinker); -} - #endif /* __LINUX_CFS_MEM_H__ */ diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c index 454027d68d54..0025ee6356da 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c @@ -521,7 +521,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask) { struct ldlm_namespace *ns; - int canceled = 0, unused; + int unused; ns = ldlm_pl2ns(pl); @@ -540,14 +540,10 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, unused = ns->ns_nr_unused; spin_unlock(&ns->ns_lock); - if (nr) { - canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC, - LDLM_CANCEL_SHRINK); - } - /* - * Return the number of potentially reclaimable locks. - */ - return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure; + if (nr == 0) + return (unused / 100) * sysctl_vfs_cache_pressure; + else + return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK); } struct ldlm_pool_ops ldlm_srv_pool_ops = { @@ -601,9 +597,10 @@ int ldlm_pool_recalc(struct ldlm_pool *pl) return recalc_interval_sec; } -/** +/* * Pool shrink wrapper. Will call either client or server pool recalc callback - * depending what pool \a pl is used. + * depending what pool pl is used. When nr == 0, just return the number of + * freeable locks. Otherwise, return the number of canceled locks. */ int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask) @@ -1017,29 +1014,24 @@ static int ldlm_pool_granted(struct ldlm_pool *pl) } static struct ptlrpc_thread *ldlm_pools_thread; -static struct shrinker *ldlm_pools_srv_shrinker; -static struct shrinker *ldlm_pools_cli_shrinker; static struct completion ldlm_pools_comp; /* - * Cancel \a nr locks from all namespaces (if possible). Returns number of - * cached locks after shrink is finished. All namespaces are asked to - * cancel approximately equal amount of locks to keep balancing. + * count locks from all namespaces (if possible). Returns number of + * cached locks. */ -static int ldlm_pools_shrink(ldlm_side_t client, int nr, - unsigned int gfp_mask) +static unsigned long ldlm_pools_count(ldlm_side_t client, unsigned int gfp_mask) { - int total = 0, cached = 0, nr_ns; + int total = 0, nr_ns; struct ldlm_namespace *ns; struct ldlm_namespace *ns_old = NULL; /* loop detection */ void *cookie; - if (client == LDLM_NAMESPACE_CLIENT && nr != 0 && - !(gfp_mask & __GFP_FS)) - return -1; + if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) + return 0; - CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n", - nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); + CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n", + client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); cookie = cl_env_reenter(); @@ -1047,8 +1039,7 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, * Find out how many resources we may release. */ for (nr_ns = ldlm_namespace_nr_read(client); - nr_ns > 0; nr_ns--) - { + nr_ns > 0; nr_ns--) { mutex_lock(ldlm_namespace_lock(client)); if (list_empty(ldlm_namespace_list(client))) { mutex_unlock(ldlm_namespace_lock(client)); @@ -1078,17 +1069,27 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, ldlm_namespace_put(ns); } - if (nr == 0 || total == 0) { - cl_env_reexit(cookie); - return total; - } + cl_env_reexit(cookie); + return total; +} + +static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, unsigned int gfp_mask) +{ + unsigned long freed = 0; + int tmp, nr_ns; + struct ldlm_namespace *ns; + void *cookie; + + if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) + return -1; + + cookie = cl_env_reenter(); /* - * Shrink at least ldlm_namespace_nr(client) namespaces. + * Shrink at least ldlm_namespace_nr_read(client) namespaces. */ - for (nr_ns = ldlm_namespace_nr_read(client) - nr_ns; - nr_ns > 0; nr_ns--) - { + for (tmp = nr_ns = ldlm_namespace_nr_read(client); + tmp > 0; tmp--) { int cancel, nr_locks; /* @@ -1097,12 +1098,6 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, mutex_lock(ldlm_namespace_lock(client)); if (list_empty(ldlm_namespace_list(client))) { mutex_unlock(ldlm_namespace_lock(client)); - /* - * If list is empty, we can't return any @cached > 0, - * that probably would cause needless shrinker - * call. - */ - cached = 0; break; } ns = ldlm_namespace_first_locked(client); @@ -1111,29 +1106,42 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, mutex_unlock(ldlm_namespace_lock(client)); nr_locks = ldlm_pool_granted(&ns->ns_pool); - cancel = 1 + nr_locks * nr / total; - ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); - cached += ldlm_pool_granted(&ns->ns_pool); + /* + * We use to shrink propotionally but with new shrinker API, + * we lost the total number of freeable locks. + */ + cancel = 1 + min_t(int, nr_locks, nr / nr_ns); + freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); ldlm_namespace_put(ns); } cl_env_reexit(cookie); - /* we only decrease the SLV in server pools shrinker, return -1 to - * kernel to avoid needless loop. LU-1128 */ - return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached; + /* + * we only decrease the SLV in server pools shrinker, return + * SHRINK_STOP to kernel to avoid needless loop. LU-1128 + */ + return (client == LDLM_NAMESPACE_SERVER) ? SHRINK_STOP : freed; +} + +static unsigned long ldlm_pools_srv_count(struct shrinker *s, struct shrink_control *sc) +{ + return ldlm_pools_count(LDLM_NAMESPACE_SERVER, sc->gfp_mask); } -static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long ldlm_pools_srv_scan(struct shrinker *s, struct shrink_control *sc) { - return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER, - shrink_param(sc, nr_to_scan), - shrink_param(sc, gfp_mask)); + return ldlm_pools_scan(LDLM_NAMESPACE_SERVER, sc->nr_to_scan, + sc->gfp_mask); } -static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long ldlm_pools_cli_count(struct shrinker *s, struct shrink_control *sc) { - return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT, - shrink_param(sc, nr_to_scan), - shrink_param(sc, gfp_mask)); + return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask); +} + +static unsigned long ldlm_pools_cli_scan(struct shrinker *s, struct shrink_control *sc) +{ + return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan, + sc->gfp_mask); } int ldlm_pools_recalc(ldlm_side_t client) @@ -1216,7 +1224,7 @@ int ldlm_pools_recalc(ldlm_side_t client) } /* - * Recalc at least ldlm_namespace_nr(client) namespaces. + * Recalc at least ldlm_namespace_nr_read(client) namespaces. */ for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) { int skip; @@ -1383,18 +1391,26 @@ static void ldlm_pools_thread_stop(void) ldlm_pools_thread = NULL; } +static struct shrinker ldlm_pools_srv_shrinker = { + .count_objects = ldlm_pools_srv_count, + .scan_objects = ldlm_pools_srv_scan, + .seeks = DEFAULT_SEEKS, +}; + +static struct shrinker ldlm_pools_cli_shrinker = { + .count_objects = ldlm_pools_cli_count, + .scan_objects = ldlm_pools_cli_scan, + .seeks = DEFAULT_SEEKS, +}; + int ldlm_pools_init(void) { int rc; rc = ldlm_pools_thread_start(); if (rc == 0) { - ldlm_pools_srv_shrinker = - set_shrinker(DEFAULT_SEEKS, - ldlm_pools_srv_shrink); - ldlm_pools_cli_shrinker = - set_shrinker(DEFAULT_SEEKS, - ldlm_pools_cli_shrink); + register_shrinker(&ldlm_pools_srv_shrinker); + register_shrinker(&ldlm_pools_cli_shrinker); } return rc; } @@ -1402,14 +1418,8 @@ EXPORT_SYMBOL(ldlm_pools_init); void ldlm_pools_fini(void) { - if (ldlm_pools_srv_shrinker != NULL) { - remove_shrinker(ldlm_pools_srv_shrinker); - ldlm_pools_srv_shrinker = NULL; - } - if (ldlm_pools_cli_shrinker != NULL) { - remove_shrinker(ldlm_pools_cli_shrinker); - ldlm_pools_cli_shrinker = NULL; - } + unregister_shrinker(&ldlm_pools_srv_shrinker); + unregister_shrinker(&ldlm_pools_cli_shrinker); ldlm_pools_thread_stop(); } EXPORT_SYMBOL(ldlm_pools_fini); diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c index c29ac1c2defd..3a3d5bc5a628 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c @@ -1779,7 +1779,6 @@ int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, } EXPORT_SYMBOL(lu_env_refill_by_tags); -static struct shrinker *lu_site_shrinker = NULL; typedef struct lu_site_stats{ unsigned lss_populated; @@ -1835,61 +1834,68 @@ static void lu_site_stats_get(cfs_hash_t *hs, * objects without taking the lu_sites_guard lock, but this is not * possible in the current implementation. */ -static int lu_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long lu_cache_shrink_count(struct shrinker *sk, + struct shrink_control *sc) { lu_site_stats_t stats; struct lu_site *s; struct lu_site *tmp; - int cached = 0; - int remain = shrink_param(sc, nr_to_scan); - LIST_HEAD(splice); - - if (!(shrink_param(sc, gfp_mask) & __GFP_FS)) { - if (remain != 0) - return -1; - else - /* We must not take the lu_sites_guard lock when - * __GFP_FS is *not* set because of the deadlock - * possibility detailed above. Additionally, - * since we cannot determine the number of - * objects in the cache without taking this - * lock, we're in a particularly tough spot. As - * a result, we'll just lie and say our cache is - * empty. This _should_ be ok, as we can't - * reclaim objects when __GFP_FS is *not* set - * anyways. - */ - return 0; - } + unsigned long cached = 0; - CDEBUG(D_INODE, "Shrink %d objects\n", remain); + if (!(sc->gfp_mask & __GFP_FS)) + return 0; mutex_lock(&lu_sites_guard); list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { - if (shrink_param(sc, nr_to_scan) != 0) { - remain = lu_site_purge(&lu_shrink_env, s, remain); - /* - * Move just shrunk site to the tail of site list to - * assure shrinking fairness. - */ - list_move_tail(&s->ls_linkage, &splice); - } - memset(&stats, 0, sizeof(stats)); lu_site_stats_get(s->ls_obj_hash, &stats, 0); cached += stats.lss_total - stats.lss_busy; - if (shrink_param(sc, nr_to_scan) && remain <= 0) - break; } - list_splice(&splice, lu_sites.prev); mutex_unlock(&lu_sites_guard); cached = (cached / 100) * sysctl_vfs_cache_pressure; - if (shrink_param(sc, nr_to_scan) == 0) - CDEBUG(D_INODE, "%d objects cached\n", cached); + CDEBUG(D_INODE, "%ld objects cached\n", cached); return cached; } +static unsigned long lu_cache_shrink_scan(struct shrinker *sk, + struct shrink_control *sc) +{ + struct lu_site *s; + struct lu_site *tmp; + unsigned long remain = sc->nr_to_scan, freed = 0; + LIST_HEAD(splice); + + if (!(sc->gfp_mask & __GFP_FS)) + /* We must not take the lu_sites_guard lock when + * __GFP_FS is *not* set because of the deadlock + * possibility detailed above. Additionally, + * since we cannot determine the number of + * objects in the cache without taking this + * lock, we're in a particularly tough spot. As + * a result, we'll just lie and say our cache is + * empty. This _should_ be ok, as we can't + * reclaim objects when __GFP_FS is *not* set + * anyways. + */ + return SHRINK_STOP; + + mutex_lock(&lu_sites_guard); + list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { + freed = lu_site_purge(&lu_shrink_env, s, remain); + remain -= freed; + /* + * Move just shrunk site to the tail of site list to + * assure shrinking fairness. + */ + list_move_tail(&s->ls_linkage, &splice); + } + list_splice(&splice, lu_sites.prev); + mutex_unlock(&lu_sites_guard); + + return sc->nr_to_scan - remain; +} + /* * Debugging stuff. */ @@ -1913,6 +1919,12 @@ int lu_printk_printer(const struct lu_env *env, return 0; } +static struct shrinker lu_site_shrinker = { + .count_objects = lu_cache_shrink_count, + .scan_objects = lu_cache_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; + /** * Initialization of global lu_* data. */ @@ -1947,9 +1959,7 @@ int lu_global_init(void) * inode, one for ea. Unfortunately setting this high value results in * lu_object/inode cache consuming all the memory. */ - lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, lu_cache_shrink); - if (lu_site_shrinker == NULL) - return -ENOMEM; + register_shrinker(&lu_site_shrinker); return result; } @@ -1959,11 +1969,7 @@ int lu_global_init(void) */ void lu_global_fini(void) { - if (lu_site_shrinker != NULL) { - remove_shrinker(lu_site_shrinker); - lu_site_shrinker = NULL; - } - + unregister_shrinker(&lu_site_shrinker); lu_context_key_degister(&lu_global_key); /* diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c index 9013745ab105..e90c8fb7da6a 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c @@ -121,13 +121,6 @@ static struct ptlrpc_enc_page_pool { } page_pools; /* - * memory shrinker - */ -const int pools_shrinker_seeks = DEFAULT_SEEKS; -static struct shrinker *pools_shrinker = NULL; - - -/* * /proc/fs/lustre/sptlrpc/encrypt_page_pools */ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) @@ -226,30 +219,46 @@ static void enc_pools_release_free_pages(long npages) } /* - * could be called frequently for query (@nr_to_scan == 0). * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. */ -static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long enc_pools_shrink_count(struct shrinker *s, + struct shrink_control *sc) { - if (unlikely(shrink_param(sc, nr_to_scan) != 0)) { + /* + * if no pool access for a long time, we consider it's fully idle. + * a little race here is fine. + */ + if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access > + CACHE_QUIESCENT_PERIOD)) { spin_lock(&page_pools.epp_lock); - shrink_param(sc, nr_to_scan) = min_t(unsigned long, - shrink_param(sc, nr_to_scan), - page_pools.epp_free_pages - - PTLRPC_MAX_BRW_PAGES); - if (shrink_param(sc, nr_to_scan) > 0) { - enc_pools_release_free_pages(shrink_param(sc, - nr_to_scan)); - CDEBUG(D_SEC, "released %ld pages, %ld left\n", - (long)shrink_param(sc, nr_to_scan), - page_pools.epp_free_pages); - - page_pools.epp_st_shrinks++; - page_pools.epp_last_shrink = cfs_time_current_sec(); - } + page_pools.epp_idle_idx = IDLE_IDX_MAX; spin_unlock(&page_pools.epp_lock); } + LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); + return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * + (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; +} + +/* + * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. + */ +static unsigned long enc_pools_shrink_scan(struct shrinker *s, + struct shrink_control *sc) +{ + spin_lock(&page_pools.epp_lock); + sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, + page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); + if (sc->nr_to_scan > 0) { + enc_pools_release_free_pages(sc->nr_to_scan); + CDEBUG(D_SEC, "released %ld pages, %ld left\n", + (long)sc->nr_to_scan, page_pools.epp_free_pages); + + page_pools.epp_st_shrinks++; + page_pools.epp_last_shrink = cfs_time_current_sec(); + } + spin_unlock(&page_pools.epp_lock); + /* * if no pool access for a long time, we consider it's fully idle. * a little race here is fine. @@ -262,8 +271,7 @@ static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) } LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); - return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * - (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; + return sc->nr_to_scan; } static inline @@ -699,6 +707,12 @@ static inline void enc_pools_free(void) sizeof(*page_pools.epp_pools)); } +static struct shrinker pools_shrinker = { + .count_objects = enc_pools_shrink_count, + .scan_objects = enc_pools_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; + int sptlrpc_enc_pool_init(void) { /* @@ -736,12 +750,7 @@ int sptlrpc_enc_pool_init(void) if (page_pools.epp_pools == NULL) return -ENOMEM; - pools_shrinker = set_shrinker(pools_shrinker_seeks, - enc_pools_shrink); - if (pools_shrinker == NULL) { - enc_pools_free(); - return -ENOMEM; - } + register_shrinker(&pools_shrinker); return 0; } @@ -750,11 +759,10 @@ void sptlrpc_enc_pool_fini(void) { unsigned long cleaned, npools; - LASSERT(pools_shrinker); LASSERT(page_pools.epp_pools); LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); - remove_shrinker(pools_shrinker); + unregister_shrinker(&pools_shrinker); npools = npages_to_npools(page_pools.epp_total_pages); cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index e988c81d763c..dbfc390330ac 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -17,8 +17,17 @@ if THERMAL config THERMAL_HWMON bool + prompt "Expose thermal sensors as hwmon device" depends on HWMON=y || HWMON=THERMAL default y + help + In case a sensor is registered with the thermal + framework, this option will also register it + as a hwmon. The sensor will then have the common + hwmon sysfs interface. + + Say 'Y' here if you want all thermal sensors to + have hwmon sysfs interface too. choice prompt "Default Thermal governor" @@ -91,6 +100,17 @@ config THERMAL_EMULATION because userland can easily disable the thermal policy by simply flooding this sysfs node with low temperature values. +config IMX_THERMAL + tristate "Temperature sensor driver for Freescale i.MX SoCs" + depends on CPU_THERMAL + depends on MFD_SYSCON + depends on OF + help + Support for Temperature Monitor (TEMPMON) found on Freescale i.MX SoCs. + It supports one critical trip point and one passive trip point. The + cpufreq is used as the cooling device to throttle CPUs when the + passive trip is crossed. + config SPEAR_THERMAL bool "SPEAr thermal sensor driver" depends on PLAT_SPEAR @@ -114,14 +134,6 @@ config KIRKWOOD_THERMAL Support for the Kirkwood thermal sensor driver into the Linux thermal framework. Only kirkwood 88F6282 and 88F6283 have this sensor. -config EXYNOS_THERMAL - tristate "Temperature sensor on Samsung EXYNOS" - depends on (ARCH_EXYNOS4 || ARCH_EXYNOS5) - depends on CPU_THERMAL - help - If you say yes here you get support for TMU (Thermal Management - Unit) on SAMSUNG EXYNOS series of SoC. - config DOVE_THERMAL tristate "Temperature sensor on Marvell Dove SoCs" depends on ARCH_DOVE @@ -184,4 +196,9 @@ menu "Texas Instruments thermal drivers" source "drivers/thermal/ti-soc-thermal/Kconfig" endmenu +menu "Samsung thermal drivers" +depends on PLAT_SAMSUNG +source "drivers/thermal/samsung/Kconfig" +endmenu + endif diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 67184a293e3f..584b36319d51 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -5,6 +5,9 @@ obj-$(CONFIG_THERMAL) += thermal_sys.o thermal_sys-y += thermal_core.o +# interface to/from other layers providing sensors +thermal_sys-$(CONFIG_THERMAL_HWMON) += thermal_hwmon.o + # governors thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o @@ -17,10 +20,11 @@ thermal_sys-$(CONFIG_CPU_THERMAL) += cpu_cooling.o obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o obj-$(CONFIG_RCAR_THERMAL) += rcar_thermal.o obj-$(CONFIG_KIRKWOOD_THERMAL) += kirkwood_thermal.o -obj-$(CONFIG_EXYNOS_THERMAL) += exynos_thermal.o +obj-y += samsung/ obj-$(CONFIG_DOVE_THERMAL) += dove_thermal.o obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o obj-$(CONFIG_ARMADA_THERMAL) += armada_thermal.o +obj-$(CONFIG_IMX_THERMAL) += imx_thermal.o obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 82e15dbb3ac7..d17902886c3f 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -322,6 +322,8 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, if (cpumask_test_cpu(policy->cpu, ¬ify_device->allowed_cpus)) max_freq = notify_device->cpufreq_val; + else + return 0; /* Never exceed user_policy.max */ if (max_freq > policy->user_policy.max) @@ -496,8 +498,12 @@ EXPORT_SYMBOL_GPL(cpufreq_cooling_register); */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { - struct cpufreq_cooling_device *cpufreq_dev = cdev->devdata; + struct cpufreq_cooling_device *cpufreq_dev; + + if (!cdev) + return; + cpufreq_dev = cdev->devdata; mutex_lock(&cooling_cpufreq_lock); cpufreq_dev_count--; diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c deleted file mode 100644 index 9af4b93c9f86..000000000000 --- a/drivers/thermal/exynos_thermal.c +++ /dev/null @@ -1,1059 +0,0 @@ -/* - * exynos_thermal.c - Samsung EXYNOS TMU (Thermal Management Unit) - * - * Copyright (C) 2011 Samsung Electronics - * Donggeun Kim <dg77.kim@samsung.com> - * Amit Daniel Kachhap <amit.kachhap@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include <linux/module.h> -#include <linux/err.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/platform_device.h> -#include <linux/interrupt.h> -#include <linux/clk.h> -#include <linux/workqueue.h> -#include <linux/sysfs.h> -#include <linux/kobject.h> -#include <linux/io.h> -#include <linux/mutex.h> -#include <linux/platform_data/exynos_thermal.h> -#include <linux/thermal.h> -#include <linux/cpufreq.h> -#include <linux/cpu_cooling.h> -#include <linux/of.h> - -/* Exynos generic registers */ -#define EXYNOS_TMU_REG_TRIMINFO 0x0 -#define EXYNOS_TMU_REG_CONTROL 0x20 -#define EXYNOS_TMU_REG_STATUS 0x28 -#define EXYNOS_TMU_REG_CURRENT_TEMP 0x40 -#define EXYNOS_TMU_REG_INTEN 0x70 -#define EXYNOS_TMU_REG_INTSTAT 0x74 -#define EXYNOS_TMU_REG_INTCLEAR 0x78 - -#define EXYNOS_TMU_TRIM_TEMP_MASK 0xff -#define EXYNOS_TMU_GAIN_SHIFT 8 -#define EXYNOS_TMU_REF_VOLTAGE_SHIFT 24 -#define EXYNOS_TMU_CORE_ON 3 -#define EXYNOS_TMU_CORE_OFF 2 -#define EXYNOS_TMU_DEF_CODE_TO_TEMP_OFFSET 50 - -/* Exynos4210 specific registers */ -#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP 0x44 -#define EXYNOS4210_TMU_REG_TRIG_LEVEL0 0x50 -#define EXYNOS4210_TMU_REG_TRIG_LEVEL1 0x54 -#define EXYNOS4210_TMU_REG_TRIG_LEVEL2 0x58 -#define EXYNOS4210_TMU_REG_TRIG_LEVEL3 0x5C -#define EXYNOS4210_TMU_REG_PAST_TEMP0 0x60 -#define EXYNOS4210_TMU_REG_PAST_TEMP1 0x64 -#define EXYNOS4210_TMU_REG_PAST_TEMP2 0x68 -#define EXYNOS4210_TMU_REG_PAST_TEMP3 0x6C - -#define EXYNOS4210_TMU_TRIG_LEVEL0_MASK 0x1 -#define EXYNOS4210_TMU_TRIG_LEVEL1_MASK 0x10 -#define EXYNOS4210_TMU_TRIG_LEVEL2_MASK 0x100 -#define EXYNOS4210_TMU_TRIG_LEVEL3_MASK 0x1000 -#define EXYNOS4210_TMU_INTCLEAR_VAL 0x1111 - -/* Exynos5250 and Exynos4412 specific registers */ -#define EXYNOS_TMU_TRIMINFO_CON 0x14 -#define EXYNOS_THD_TEMP_RISE 0x50 -#define EXYNOS_THD_TEMP_FALL 0x54 -#define EXYNOS_EMUL_CON 0x80 - -#define EXYNOS_TRIMINFO_RELOAD 0x1 -#define EXYNOS_TMU_CLEAR_RISE_INT 0x111 -#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 12) -#define EXYNOS_MUX_ADDR_VALUE 6 -#define EXYNOS_MUX_ADDR_SHIFT 20 -#define EXYNOS_TMU_TRIP_MODE_SHIFT 13 - -#define EFUSE_MIN_VALUE 40 -#define EFUSE_MAX_VALUE 100 - -/* In-kernel thermal framework related macros & definations */ -#define SENSOR_NAME_LEN 16 -#define MAX_TRIP_COUNT 8 -#define MAX_COOLING_DEVICE 4 -#define MAX_THRESHOLD_LEVS 4 - -#define ACTIVE_INTERVAL 500 -#define IDLE_INTERVAL 10000 -#define MCELSIUS 1000 - -#ifdef CONFIG_THERMAL_EMULATION -#define EXYNOS_EMUL_TIME 0x57F0 -#define EXYNOS_EMUL_TIME_SHIFT 16 -#define EXYNOS_EMUL_DATA_SHIFT 8 -#define EXYNOS_EMUL_DATA_MASK 0xFF -#define EXYNOS_EMUL_ENABLE 0x1 -#endif /* CONFIG_THERMAL_EMULATION */ - -/* CPU Zone information */ -#define PANIC_ZONE 4 -#define WARN_ZONE 3 -#define MONITOR_ZONE 2 -#define SAFE_ZONE 1 - -#define GET_ZONE(trip) (trip + 2) -#define GET_TRIP(zone) (zone - 2) - -#define EXYNOS_ZONE_COUNT 3 - -struct exynos_tmu_data { - struct exynos_tmu_platform_data *pdata; - struct resource *mem; - void __iomem *base; - int irq; - enum soc_type soc; - struct work_struct irq_work; - struct mutex lock; - struct clk *clk; - u8 temp_error1, temp_error2; -}; - -struct thermal_trip_point_conf { - int trip_val[MAX_TRIP_COUNT]; - int trip_count; - u8 trigger_falling; -}; - -struct thermal_cooling_conf { - struct freq_clip_table freq_data[MAX_TRIP_COUNT]; - int freq_clip_count; -}; - -struct thermal_sensor_conf { - char name[SENSOR_NAME_LEN]; - int (*read_temperature)(void *data); - int (*write_emul_temp)(void *drv_data, unsigned long temp); - struct thermal_trip_point_conf trip_data; - struct thermal_cooling_conf cooling_data; - void *private_data; -}; - -struct exynos_thermal_zone { - enum thermal_device_mode mode; - struct thermal_zone_device *therm_dev; - struct thermal_cooling_device *cool_dev[MAX_COOLING_DEVICE]; - unsigned int cool_dev_size; - struct platform_device *exynos4_dev; - struct thermal_sensor_conf *sensor_conf; - bool bind; -}; - -static struct exynos_thermal_zone *th_zone; -static void exynos_unregister_thermal(void); -static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf); - -/* Get mode callback functions for thermal zone */ -static int exynos_get_mode(struct thermal_zone_device *thermal, - enum thermal_device_mode *mode) -{ - if (th_zone) - *mode = th_zone->mode; - return 0; -} - -/* Set mode callback functions for thermal zone */ -static int exynos_set_mode(struct thermal_zone_device *thermal, - enum thermal_device_mode mode) -{ - if (!th_zone->therm_dev) { - pr_notice("thermal zone not registered\n"); - return 0; - } - - mutex_lock(&th_zone->therm_dev->lock); - - if (mode == THERMAL_DEVICE_ENABLED && - !th_zone->sensor_conf->trip_data.trigger_falling) - th_zone->therm_dev->polling_delay = IDLE_INTERVAL; - else - th_zone->therm_dev->polling_delay = 0; - - mutex_unlock(&th_zone->therm_dev->lock); - - th_zone->mode = mode; - thermal_zone_device_update(th_zone->therm_dev); - pr_info("thermal polling set for duration=%d msec\n", - th_zone->therm_dev->polling_delay); - return 0; -} - - -/* Get trip type callback functions for thermal zone */ -static int exynos_get_trip_type(struct thermal_zone_device *thermal, int trip, - enum thermal_trip_type *type) -{ - switch (GET_ZONE(trip)) { - case MONITOR_ZONE: - case WARN_ZONE: - *type = THERMAL_TRIP_ACTIVE; - break; - case PANIC_ZONE: - *type = THERMAL_TRIP_CRITICAL; - break; - default: - return -EINVAL; - } - return 0; -} - -/* Get trip temperature callback functions for thermal zone */ -static int exynos_get_trip_temp(struct thermal_zone_device *thermal, int trip, - unsigned long *temp) -{ - if (trip < GET_TRIP(MONITOR_ZONE) || trip > GET_TRIP(PANIC_ZONE)) - return -EINVAL; - - *temp = th_zone->sensor_conf->trip_data.trip_val[trip]; - /* convert the temperature into millicelsius */ - *temp = *temp * MCELSIUS; - - return 0; -} - -/* Get critical temperature callback functions for thermal zone */ -static int exynos_get_crit_temp(struct thermal_zone_device *thermal, - unsigned long *temp) -{ - int ret; - /* Panic zone */ - ret = exynos_get_trip_temp(thermal, GET_TRIP(PANIC_ZONE), temp); - return ret; -} - -/* Bind callback functions for thermal zone */ -static int exynos_bind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - int ret = 0, i, tab_size, level; - struct freq_clip_table *tab_ptr, *clip_data; - struct thermal_sensor_conf *data = th_zone->sensor_conf; - - tab_ptr = (struct freq_clip_table *)data->cooling_data.freq_data; - tab_size = data->cooling_data.freq_clip_count; - - if (tab_ptr == NULL || tab_size == 0) - return -EINVAL; - - /* find the cooling device registered*/ - for (i = 0; i < th_zone->cool_dev_size; i++) - if (cdev == th_zone->cool_dev[i]) - break; - - /* No matching cooling device */ - if (i == th_zone->cool_dev_size) - return 0; - - /* Bind the thermal zone to the cpufreq cooling device */ - for (i = 0; i < tab_size; i++) { - clip_data = (struct freq_clip_table *)&(tab_ptr[i]); - level = cpufreq_cooling_get_level(0, clip_data->freq_clip_max); - if (level == THERMAL_CSTATE_INVALID) - return 0; - switch (GET_ZONE(i)) { - case MONITOR_ZONE: - case WARN_ZONE: - if (thermal_zone_bind_cooling_device(thermal, i, cdev, - level, 0)) { - pr_err("error binding cdev inst %d\n", i); - ret = -EINVAL; - } - th_zone->bind = true; - break; - default: - ret = -EINVAL; - } - } - - return ret; -} - -/* Unbind callback functions for thermal zone */ -static int exynos_unbind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - int ret = 0, i, tab_size; - struct thermal_sensor_conf *data = th_zone->sensor_conf; - - if (th_zone->bind == false) - return 0; - - tab_size = data->cooling_data.freq_clip_count; - - if (tab_size == 0) - return -EINVAL; - - /* find the cooling device registered*/ - for (i = 0; i < th_zone->cool_dev_size; i++) - if (cdev == th_zone->cool_dev[i]) - break; - - /* No matching cooling device */ - if (i == th_zone->cool_dev_size) - return 0; - - /* Bind the thermal zone to the cpufreq cooling device */ - for (i = 0; i < tab_size; i++) { - switch (GET_ZONE(i)) { - case MONITOR_ZONE: - case WARN_ZONE: - if (thermal_zone_unbind_cooling_device(thermal, i, - cdev)) { - pr_err("error unbinding cdev inst=%d\n", i); - ret = -EINVAL; - } - th_zone->bind = false; - break; - default: - ret = -EINVAL; - } - } - return ret; -} - -/* Get temperature callback functions for thermal zone */ -static int exynos_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) -{ - void *data; - - if (!th_zone->sensor_conf) { - pr_info("Temperature sensor not initialised\n"); - return -EINVAL; - } - data = th_zone->sensor_conf->private_data; - *temp = th_zone->sensor_conf->read_temperature(data); - /* convert the temperature into millicelsius */ - *temp = *temp * MCELSIUS; - return 0; -} - -/* Get temperature callback functions for thermal zone */ -static int exynos_set_emul_temp(struct thermal_zone_device *thermal, - unsigned long temp) -{ - void *data; - int ret = -EINVAL; - - if (!th_zone->sensor_conf) { - pr_info("Temperature sensor not initialised\n"); - return -EINVAL; - } - data = th_zone->sensor_conf->private_data; - if (th_zone->sensor_conf->write_emul_temp) - ret = th_zone->sensor_conf->write_emul_temp(data, temp); - return ret; -} - -/* Get the temperature trend */ -static int exynos_get_trend(struct thermal_zone_device *thermal, - int trip, enum thermal_trend *trend) -{ - int ret; - unsigned long trip_temp; - - ret = exynos_get_trip_temp(thermal, trip, &trip_temp); - if (ret < 0) - return ret; - - if (thermal->temperature >= trip_temp) - *trend = THERMAL_TREND_RAISE_FULL; - else - *trend = THERMAL_TREND_DROP_FULL; - - return 0; -} -/* Operation callback functions for thermal zone */ -static struct thermal_zone_device_ops const exynos_dev_ops = { - .bind = exynos_bind, - .unbind = exynos_unbind, - .get_temp = exynos_get_temp, - .set_emul_temp = exynos_set_emul_temp, - .get_trend = exynos_get_trend, - .get_mode = exynos_get_mode, - .set_mode = exynos_set_mode, - .get_trip_type = exynos_get_trip_type, - .get_trip_temp = exynos_get_trip_temp, - .get_crit_temp = exynos_get_crit_temp, -}; - -/* - * This function may be called from interrupt based temperature sensor - * when threshold is changed. - */ -static void exynos_report_trigger(void) -{ - unsigned int i; - char data[10]; - char *envp[] = { data, NULL }; - - if (!th_zone || !th_zone->therm_dev) - return; - if (th_zone->bind == false) { - for (i = 0; i < th_zone->cool_dev_size; i++) { - if (!th_zone->cool_dev[i]) - continue; - exynos_bind(th_zone->therm_dev, - th_zone->cool_dev[i]); - } - } - - thermal_zone_device_update(th_zone->therm_dev); - - mutex_lock(&th_zone->therm_dev->lock); - /* Find the level for which trip happened */ - for (i = 0; i < th_zone->sensor_conf->trip_data.trip_count; i++) { - if (th_zone->therm_dev->last_temperature < - th_zone->sensor_conf->trip_data.trip_val[i] * MCELSIUS) - break; - } - - if (th_zone->mode == THERMAL_DEVICE_ENABLED && - !th_zone->sensor_conf->trip_data.trigger_falling) { - if (i > 0) - th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL; - else - th_zone->therm_dev->polling_delay = IDLE_INTERVAL; - } - - snprintf(data, sizeof(data), "%u", i); - kobject_uevent_env(&th_zone->therm_dev->device.kobj, KOBJ_CHANGE, envp); - mutex_unlock(&th_zone->therm_dev->lock); -} - -/* Register with the in-kernel thermal management */ -static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) -{ - int ret; - struct cpumask mask_val; - - if (!sensor_conf || !sensor_conf->read_temperature) { - pr_err("Temperature sensor not initialised\n"); - return -EINVAL; - } - - th_zone = kzalloc(sizeof(struct exynos_thermal_zone), GFP_KERNEL); - if (!th_zone) - return -ENOMEM; - - th_zone->sensor_conf = sensor_conf; - cpumask_set_cpu(0, &mask_val); - th_zone->cool_dev[0] = cpufreq_cooling_register(&mask_val); - if (IS_ERR(th_zone->cool_dev[0])) { - pr_err("Failed to register cpufreq cooling device\n"); - ret = -EINVAL; - goto err_unregister; - } - th_zone->cool_dev_size++; - - th_zone->therm_dev = thermal_zone_device_register(sensor_conf->name, - EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, NULL, 0, - sensor_conf->trip_data.trigger_falling ? - 0 : IDLE_INTERVAL); - - if (IS_ERR(th_zone->therm_dev)) { - pr_err("Failed to register thermal zone device\n"); - ret = PTR_ERR(th_zone->therm_dev); - goto err_unregister; - } - th_zone->mode = THERMAL_DEVICE_ENABLED; - - pr_info("Exynos: Kernel Thermal management registered\n"); - - return 0; - -err_unregister: - exynos_unregister_thermal(); - return ret; -} - -/* Un-Register with the in-kernel thermal management */ -static void exynos_unregister_thermal(void) -{ - int i; - - if (!th_zone) - return; - - if (th_zone->therm_dev) - thermal_zone_device_unregister(th_zone->therm_dev); - - for (i = 0; i < th_zone->cool_dev_size; i++) { - if (th_zone->cool_dev[i]) - cpufreq_cooling_unregister(th_zone->cool_dev[i]); - } - - kfree(th_zone); - pr_info("Exynos: Kernel Thermal management unregistered\n"); -} - -/* - * TMU treats temperature as a mapped temperature code. - * The temperature is converted differently depending on the calibration type. - */ -static int temp_to_code(struct exynos_tmu_data *data, u8 temp) -{ - struct exynos_tmu_platform_data *pdata = data->pdata; - int temp_code; - - if (data->soc == SOC_ARCH_EXYNOS4210) - /* temp should range between 25 and 125 */ - if (temp < 25 || temp > 125) { - temp_code = -EINVAL; - goto out; - } - - switch (pdata->cal_type) { - case TYPE_TWO_POINT_TRIMMING: - temp_code = (temp - 25) * - (data->temp_error2 - data->temp_error1) / - (85 - 25) + data->temp_error1; - break; - case TYPE_ONE_POINT_TRIMMING: - temp_code = temp + data->temp_error1 - 25; - break; - default: - temp_code = temp + EXYNOS_TMU_DEF_CODE_TO_TEMP_OFFSET; - break; - } -out: - return temp_code; -} - -/* - * Calculate a temperature value from a temperature code. - * The unit of the temperature is degree Celsius. - */ -static int code_to_temp(struct exynos_tmu_data *data, u8 temp_code) -{ - struct exynos_tmu_platform_data *pdata = data->pdata; - int temp; - - if (data->soc == SOC_ARCH_EXYNOS4210) - /* temp_code should range between 75 and 175 */ - if (temp_code < 75 || temp_code > 175) { - temp = -ENODATA; - goto out; - } - - switch (pdata->cal_type) { - case TYPE_TWO_POINT_TRIMMING: - temp = (temp_code - data->temp_error1) * (85 - 25) / - (data->temp_error2 - data->temp_error1) + 25; - break; - case TYPE_ONE_POINT_TRIMMING: - temp = temp_code - data->temp_error1 + 25; - break; - default: - temp = temp_code - EXYNOS_TMU_DEF_CODE_TO_TEMP_OFFSET; - break; - } -out: - return temp; -} - -static int exynos_tmu_initialize(struct platform_device *pdev) -{ - struct exynos_tmu_data *data = platform_get_drvdata(pdev); - struct exynos_tmu_platform_data *pdata = data->pdata; - unsigned int status, trim_info; - unsigned int rising_threshold = 0, falling_threshold = 0; - int ret = 0, threshold_code, i, trigger_levs = 0; - - mutex_lock(&data->lock); - clk_enable(data->clk); - - status = readb(data->base + EXYNOS_TMU_REG_STATUS); - if (!status) { - ret = -EBUSY; - goto out; - } - - if (data->soc == SOC_ARCH_EXYNOS) { - __raw_writel(EXYNOS_TRIMINFO_RELOAD, - data->base + EXYNOS_TMU_TRIMINFO_CON); - } - /* Save trimming info in order to perform calibration */ - trim_info = readl(data->base + EXYNOS_TMU_REG_TRIMINFO); - data->temp_error1 = trim_info & EXYNOS_TMU_TRIM_TEMP_MASK; - data->temp_error2 = ((trim_info >> 8) & EXYNOS_TMU_TRIM_TEMP_MASK); - - if ((EFUSE_MIN_VALUE > data->temp_error1) || - (data->temp_error1 > EFUSE_MAX_VALUE) || - (data->temp_error2 != 0)) - data->temp_error1 = pdata->efuse_value; - - /* Count trigger levels to be enabled */ - for (i = 0; i < MAX_THRESHOLD_LEVS; i++) - if (pdata->trigger_levels[i]) - trigger_levs++; - - if (data->soc == SOC_ARCH_EXYNOS4210) { - /* Write temperature code for threshold */ - threshold_code = temp_to_code(data, pdata->threshold); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - writeb(threshold_code, - data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP); - for (i = 0; i < trigger_levs; i++) - writeb(pdata->trigger_levels[i], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4); - - writel(EXYNOS4210_TMU_INTCLEAR_VAL, - data->base + EXYNOS_TMU_REG_INTCLEAR); - } else if (data->soc == SOC_ARCH_EXYNOS) { - /* Write temperature code for rising and falling threshold */ - for (i = 0; i < trigger_levs; i++) { - threshold_code = temp_to_code(data, - pdata->trigger_levels[i]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - rising_threshold |= threshold_code << 8 * i; - if (pdata->threshold_falling) { - threshold_code = temp_to_code(data, - pdata->trigger_levels[i] - - pdata->threshold_falling); - if (threshold_code > 0) - falling_threshold |= - threshold_code << 8 * i; - } - } - - writel(rising_threshold, - data->base + EXYNOS_THD_TEMP_RISE); - writel(falling_threshold, - data->base + EXYNOS_THD_TEMP_FALL); - - writel(EXYNOS_TMU_CLEAR_RISE_INT | EXYNOS_TMU_CLEAR_FALL_INT, - data->base + EXYNOS_TMU_REG_INTCLEAR); - } -out: - clk_disable(data->clk); - mutex_unlock(&data->lock); - - return ret; -} - -static void exynos_tmu_control(struct platform_device *pdev, bool on) -{ - struct exynos_tmu_data *data = platform_get_drvdata(pdev); - struct exynos_tmu_platform_data *pdata = data->pdata; - unsigned int con, interrupt_en; - - mutex_lock(&data->lock); - clk_enable(data->clk); - - con = pdata->reference_voltage << EXYNOS_TMU_REF_VOLTAGE_SHIFT | - pdata->gain << EXYNOS_TMU_GAIN_SHIFT; - - if (data->soc == SOC_ARCH_EXYNOS) { - con |= pdata->noise_cancel_mode << EXYNOS_TMU_TRIP_MODE_SHIFT; - con |= (EXYNOS_MUX_ADDR_VALUE << EXYNOS_MUX_ADDR_SHIFT); - } - - if (on) { - con |= EXYNOS_TMU_CORE_ON; - interrupt_en = pdata->trigger_level3_en << 12 | - pdata->trigger_level2_en << 8 | - pdata->trigger_level1_en << 4 | - pdata->trigger_level0_en; - if (pdata->threshold_falling) - interrupt_en |= interrupt_en << 16; - } else { - con |= EXYNOS_TMU_CORE_OFF; - interrupt_en = 0; /* Disable all interrupts */ - } - writel(interrupt_en, data->base + EXYNOS_TMU_REG_INTEN); - writel(con, data->base + EXYNOS_TMU_REG_CONTROL); - - clk_disable(data->clk); - mutex_unlock(&data->lock); -} - -static int exynos_tmu_read(struct exynos_tmu_data *data) -{ - u8 temp_code; - int temp; - - mutex_lock(&data->lock); - clk_enable(data->clk); - - temp_code = readb(data->base + EXYNOS_TMU_REG_CURRENT_TEMP); - temp = code_to_temp(data, temp_code); - - clk_disable(data->clk); - mutex_unlock(&data->lock); - - return temp; -} - -#ifdef CONFIG_THERMAL_EMULATION -static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) -{ - struct exynos_tmu_data *data = drv_data; - unsigned int reg; - int ret = -EINVAL; - - if (data->soc == SOC_ARCH_EXYNOS4210) - goto out; - - if (temp && temp < MCELSIUS) - goto out; - - mutex_lock(&data->lock); - clk_enable(data->clk); - - reg = readl(data->base + EXYNOS_EMUL_CON); - - if (temp) { - temp /= MCELSIUS; - - reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT) | - (temp_to_code(data, temp) - << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE; - } else { - reg &= ~EXYNOS_EMUL_ENABLE; - } - - writel(reg, data->base + EXYNOS_EMUL_CON); - - clk_disable(data->clk); - mutex_unlock(&data->lock); - return 0; -out: - return ret; -} -#else -static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) - { return -EINVAL; } -#endif/*CONFIG_THERMAL_EMULATION*/ - -static void exynos_tmu_work(struct work_struct *work) -{ - struct exynos_tmu_data *data = container_of(work, - struct exynos_tmu_data, irq_work); - - exynos_report_trigger(); - mutex_lock(&data->lock); - clk_enable(data->clk); - if (data->soc == SOC_ARCH_EXYNOS) - writel(EXYNOS_TMU_CLEAR_RISE_INT | - EXYNOS_TMU_CLEAR_FALL_INT, - data->base + EXYNOS_TMU_REG_INTCLEAR); - else - writel(EXYNOS4210_TMU_INTCLEAR_VAL, - data->base + EXYNOS_TMU_REG_INTCLEAR); - clk_disable(data->clk); - mutex_unlock(&data->lock); - - enable_irq(data->irq); -} - -static irqreturn_t exynos_tmu_irq(int irq, void *id) -{ - struct exynos_tmu_data *data = id; - - disable_irq_nosync(irq); - schedule_work(&data->irq_work); - - return IRQ_HANDLED; -} -static struct thermal_sensor_conf exynos_sensor_conf = { - .name = "exynos-therm", - .read_temperature = (int (*)(void *))exynos_tmu_read, - .write_emul_temp = exynos_tmu_set_emulation, -}; - -#if defined(CONFIG_CPU_EXYNOS4210) -static struct exynos_tmu_platform_data const exynos4210_default_tmu_data = { - .threshold = 80, - .trigger_levels[0] = 5, - .trigger_levels[1] = 20, - .trigger_levels[2] = 30, - .trigger_level0_en = 1, - .trigger_level1_en = 1, - .trigger_level2_en = 1, - .trigger_level3_en = 0, - .gain = 15, - .reference_voltage = 7, - .cal_type = TYPE_ONE_POINT_TRIMMING, - .freq_tab[0] = { - .freq_clip_max = 800 * 1000, - .temp_level = 85, - }, - .freq_tab[1] = { - .freq_clip_max = 200 * 1000, - .temp_level = 100, - }, - .freq_tab_count = 2, - .type = SOC_ARCH_EXYNOS4210, -}; -#define EXYNOS4210_TMU_DRV_DATA (&exynos4210_default_tmu_data) -#else -#define EXYNOS4210_TMU_DRV_DATA (NULL) -#endif - -#if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412) || \ - defined(CONFIG_SOC_EXYNOS4212) -static struct exynos_tmu_platform_data const exynos_default_tmu_data = { - .threshold_falling = 10, - .trigger_levels[0] = 85, - .trigger_levels[1] = 103, - .trigger_levels[2] = 110, - .trigger_level0_en = 1, - .trigger_level1_en = 1, - .trigger_level2_en = 1, - .trigger_level3_en = 0, - .gain = 8, - .reference_voltage = 16, - .noise_cancel_mode = 4, - .cal_type = TYPE_ONE_POINT_TRIMMING, - .efuse_value = 55, - .freq_tab[0] = { - .freq_clip_max = 800 * 1000, - .temp_level = 85, - }, - .freq_tab[1] = { - .freq_clip_max = 200 * 1000, - .temp_level = 103, - }, - .freq_tab_count = 2, - .type = SOC_ARCH_EXYNOS, -}; -#define EXYNOS_TMU_DRV_DATA (&exynos_default_tmu_data) -#else -#define EXYNOS_TMU_DRV_DATA (NULL) -#endif - -#ifdef CONFIG_OF -static const struct of_device_id exynos_tmu_match[] = { - { - .compatible = "samsung,exynos4210-tmu", - .data = (void *)EXYNOS4210_TMU_DRV_DATA, - }, - { - .compatible = "samsung,exynos4412-tmu", - .data = (void *)EXYNOS_TMU_DRV_DATA, - }, - { - .compatible = "samsung,exynos5250-tmu", - .data = (void *)EXYNOS_TMU_DRV_DATA, - }, - {}, -}; -MODULE_DEVICE_TABLE(of, exynos_tmu_match); -#endif - -static struct platform_device_id exynos_tmu_driver_ids[] = { - { - .name = "exynos4210-tmu", - .driver_data = (kernel_ulong_t)EXYNOS4210_TMU_DRV_DATA, - }, - { - .name = "exynos5250-tmu", - .driver_data = (kernel_ulong_t)EXYNOS_TMU_DRV_DATA, - }, - { }, -}; -MODULE_DEVICE_TABLE(platform, exynos_tmu_driver_ids); - -static inline struct exynos_tmu_platform_data *exynos_get_driver_data( - struct platform_device *pdev) -{ -#ifdef CONFIG_OF - if (pdev->dev.of_node) { - const struct of_device_id *match; - match = of_match_node(exynos_tmu_match, pdev->dev.of_node); - if (!match) - return NULL; - return (struct exynos_tmu_platform_data *) match->data; - } -#endif - return (struct exynos_tmu_platform_data *) - platform_get_device_id(pdev)->driver_data; -} - -static int exynos_tmu_probe(struct platform_device *pdev) -{ - struct exynos_tmu_data *data; - struct exynos_tmu_platform_data *pdata = pdev->dev.platform_data; - int ret, i; - - if (!pdata) - pdata = exynos_get_driver_data(pdev); - - if (!pdata) { - dev_err(&pdev->dev, "No platform init data supplied.\n"); - return -ENODEV; - } - data = devm_kzalloc(&pdev->dev, sizeof(struct exynos_tmu_data), - GFP_KERNEL); - if (!data) { - dev_err(&pdev->dev, "Failed to allocate driver structure\n"); - return -ENOMEM; - } - - data->irq = platform_get_irq(pdev, 0); - if (data->irq < 0) { - dev_err(&pdev->dev, "Failed to get platform irq\n"); - return data->irq; - } - - INIT_WORK(&data->irq_work, exynos_tmu_work); - - data->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - data->base = devm_ioremap_resource(&pdev->dev, data->mem); - if (IS_ERR(data->base)) - return PTR_ERR(data->base); - - ret = devm_request_irq(&pdev->dev, data->irq, exynos_tmu_irq, - IRQF_TRIGGER_RISING, "exynos-tmu", data); - if (ret) { - dev_err(&pdev->dev, "Failed to request irq: %d\n", data->irq); - return ret; - } - - data->clk = devm_clk_get(&pdev->dev, "tmu_apbif"); - if (IS_ERR(data->clk)) { - dev_err(&pdev->dev, "Failed to get clock\n"); - return PTR_ERR(data->clk); - } - - ret = clk_prepare(data->clk); - if (ret) - return ret; - - if (pdata->type == SOC_ARCH_EXYNOS || - pdata->type == SOC_ARCH_EXYNOS4210) - data->soc = pdata->type; - else { - ret = -EINVAL; - dev_err(&pdev->dev, "Platform not supported\n"); - goto err_clk; - } - - data->pdata = pdata; - platform_set_drvdata(pdev, data); - mutex_init(&data->lock); - - ret = exynos_tmu_initialize(pdev); - if (ret) { - dev_err(&pdev->dev, "Failed to initialize TMU\n"); - goto err_clk; - } - - exynos_tmu_control(pdev, true); - - /* Register the sensor with thermal management interface */ - (&exynos_sensor_conf)->private_data = data; - exynos_sensor_conf.trip_data.trip_count = pdata->trigger_level0_en + - pdata->trigger_level1_en + pdata->trigger_level2_en + - pdata->trigger_level3_en; - - for (i = 0; i < exynos_sensor_conf.trip_data.trip_count; i++) - exynos_sensor_conf.trip_data.trip_val[i] = - pdata->threshold + pdata->trigger_levels[i]; - - exynos_sensor_conf.trip_data.trigger_falling = pdata->threshold_falling; - - exynos_sensor_conf.cooling_data.freq_clip_count = - pdata->freq_tab_count; - for (i = 0; i < pdata->freq_tab_count; i++) { - exynos_sensor_conf.cooling_data.freq_data[i].freq_clip_max = - pdata->freq_tab[i].freq_clip_max; - exynos_sensor_conf.cooling_data.freq_data[i].temp_level = - pdata->freq_tab[i].temp_level; - } - - ret = exynos_register_thermal(&exynos_sensor_conf); - if (ret) { - dev_err(&pdev->dev, "Failed to register thermal interface\n"); - goto err_clk; - } - - return 0; -err_clk: - clk_unprepare(data->clk); - return ret; -} - -static int exynos_tmu_remove(struct platform_device *pdev) -{ - struct exynos_tmu_data *data = platform_get_drvdata(pdev); - - exynos_tmu_control(pdev, false); - - exynos_unregister_thermal(); - - clk_unprepare(data->clk); - - return 0; -} - -#ifdef CONFIG_PM_SLEEP -static int exynos_tmu_suspend(struct device *dev) -{ - exynos_tmu_control(to_platform_device(dev), false); - - return 0; -} - -static int exynos_tmu_resume(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - - exynos_tmu_initialize(pdev); - exynos_tmu_control(pdev, true); - - return 0; -} - -static SIMPLE_DEV_PM_OPS(exynos_tmu_pm, - exynos_tmu_suspend, exynos_tmu_resume); -#define EXYNOS_TMU_PM (&exynos_tmu_pm) -#else -#define EXYNOS_TMU_PM NULL -#endif - -static struct platform_driver exynos_tmu_driver = { - .driver = { - .name = "exynos-tmu", - .owner = THIS_MODULE, - .pm = EXYNOS_TMU_PM, - .of_match_table = of_match_ptr(exynos_tmu_match), - }, - .probe = exynos_tmu_probe, - .remove = exynos_tmu_remove, - .id_table = exynos_tmu_driver_ids, -}; - -module_platform_driver(exynos_tmu_driver); - -MODULE_DESCRIPTION("EXYNOS TMU Driver"); -MODULE_AUTHOR("Donggeun Kim <dg77.kim@samsung.com>"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:exynos-tmu"); diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c new file mode 100644 index 000000000000..1d6c801c1eb9 --- /dev/null +++ b/drivers/thermal/imx_thermal.c @@ -0,0 +1,541 @@ +/* + * Copyright 2013 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/cpu_cooling.h> +#include <linux/cpufreq.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/slab.h> +#include <linux/thermal.h> +#include <linux/types.h> + +#define REG_SET 0x4 +#define REG_CLR 0x8 +#define REG_TOG 0xc + +#define MISC0 0x0150 +#define MISC0_REFTOP_SELBIASOFF (1 << 3) + +#define TEMPSENSE0 0x0180 +#define TEMPSENSE0_ALARM_VALUE_SHIFT 20 +#define TEMPSENSE0_ALARM_VALUE_MASK (0xfff << TEMPSENSE0_ALARM_VALUE_SHIFT) +#define TEMPSENSE0_TEMP_CNT_SHIFT 8 +#define TEMPSENSE0_TEMP_CNT_MASK (0xfff << TEMPSENSE0_TEMP_CNT_SHIFT) +#define TEMPSENSE0_FINISHED (1 << 2) +#define TEMPSENSE0_MEASURE_TEMP (1 << 1) +#define TEMPSENSE0_POWER_DOWN (1 << 0) + +#define TEMPSENSE1 0x0190 +#define TEMPSENSE1_MEASURE_FREQ 0xffff + +#define OCOTP_ANA1 0x04e0 + +/* The driver supports 1 passive trip point and 1 critical trip point */ +enum imx_thermal_trip { + IMX_TRIP_PASSIVE, + IMX_TRIP_CRITICAL, + IMX_TRIP_NUM, +}; + +/* + * It defines the temperature in millicelsius for passive trip point + * that will trigger cooling action when crossed. + */ +#define IMX_TEMP_PASSIVE 85000 + +#define IMX_POLLING_DELAY 2000 /* millisecond */ +#define IMX_PASSIVE_DELAY 1000 + +struct imx_thermal_data { + struct thermal_zone_device *tz; + struct thermal_cooling_device *cdev; + enum thermal_device_mode mode; + struct regmap *tempmon; + int c1, c2; /* See formula in imx_get_sensor_data() */ + unsigned long temp_passive; + unsigned long temp_critical; + unsigned long alarm_temp; + unsigned long last_temp; + bool irq_enabled; + int irq; +}; + +static void imx_set_alarm_temp(struct imx_thermal_data *data, + signed long alarm_temp) +{ + struct regmap *map = data->tempmon; + int alarm_value; + + data->alarm_temp = alarm_temp; + alarm_value = (alarm_temp - data->c2) / data->c1; + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_ALARM_VALUE_MASK); + regmap_write(map, TEMPSENSE0 + REG_SET, alarm_value << + TEMPSENSE0_ALARM_VALUE_SHIFT); +} + +static int imx_get_temp(struct thermal_zone_device *tz, unsigned long *temp) +{ + struct imx_thermal_data *data = tz->devdata; + struct regmap *map = data->tempmon; + unsigned int n_meas; + bool wait; + u32 val; + + if (data->mode == THERMAL_DEVICE_ENABLED) { + /* Check if a measurement is currently in progress */ + regmap_read(map, TEMPSENSE0, &val); + wait = !(val & TEMPSENSE0_FINISHED); + } else { + /* + * Every time we measure the temperature, we will power on the + * temperature sensor, enable measurements, take a reading, + * disable measurements, power off the temperature sensor. + */ + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP); + + wait = true; + } + + /* + * According to the temp sensor designers, it may require up to ~17us + * to complete a measurement. + */ + if (wait) + usleep_range(20, 50); + + regmap_read(map, TEMPSENSE0, &val); + + if (data->mode != THERMAL_DEVICE_ENABLED) { + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN); + } + + if ((val & TEMPSENSE0_FINISHED) == 0) { + dev_dbg(&tz->device, "temp measurement never finished\n"); + return -EAGAIN; + } + + n_meas = (val & TEMPSENSE0_TEMP_CNT_MASK) >> TEMPSENSE0_TEMP_CNT_SHIFT; + + /* See imx_get_sensor_data() for formula derivation */ + *temp = data->c2 + data->c1 * n_meas; + + /* Update alarm value to next higher trip point */ + if (data->alarm_temp == data->temp_passive && *temp >= data->temp_passive) + imx_set_alarm_temp(data, data->temp_critical); + if (data->alarm_temp == data->temp_critical && *temp < data->temp_passive) { + imx_set_alarm_temp(data, data->temp_passive); + dev_dbg(&tz->device, "thermal alarm off: T < %lu\n", + data->alarm_temp / 1000); + } + + if (*temp != data->last_temp) { + dev_dbg(&tz->device, "millicelsius: %ld\n", *temp); + data->last_temp = *temp; + } + + /* Reenable alarm IRQ if temperature below alarm temperature */ + if (!data->irq_enabled && *temp < data->alarm_temp) { + data->irq_enabled = true; + enable_irq(data->irq); + } + + return 0; +} + +static int imx_get_mode(struct thermal_zone_device *tz, + enum thermal_device_mode *mode) +{ + struct imx_thermal_data *data = tz->devdata; + + *mode = data->mode; + + return 0; +} + +static int imx_set_mode(struct thermal_zone_device *tz, + enum thermal_device_mode mode) +{ + struct imx_thermal_data *data = tz->devdata; + struct regmap *map = data->tempmon; + + if (mode == THERMAL_DEVICE_ENABLED) { + tz->polling_delay = IMX_POLLING_DELAY; + tz->passive_delay = IMX_PASSIVE_DELAY; + + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP); + + if (!data->irq_enabled) { + data->irq_enabled = true; + enable_irq(data->irq); + } + } else { + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN); + + tz->polling_delay = 0; + tz->passive_delay = 0; + + if (data->irq_enabled) { + disable_irq(data->irq); + data->irq_enabled = false; + } + } + + data->mode = mode; + thermal_zone_device_update(tz); + + return 0; +} + +static int imx_get_trip_type(struct thermal_zone_device *tz, int trip, + enum thermal_trip_type *type) +{ + *type = (trip == IMX_TRIP_PASSIVE) ? THERMAL_TRIP_PASSIVE : + THERMAL_TRIP_CRITICAL; + return 0; +} + +static int imx_get_crit_temp(struct thermal_zone_device *tz, + unsigned long *temp) +{ + struct imx_thermal_data *data = tz->devdata; + + *temp = data->temp_critical; + return 0; +} + +static int imx_get_trip_temp(struct thermal_zone_device *tz, int trip, + unsigned long *temp) +{ + struct imx_thermal_data *data = tz->devdata; + + *temp = (trip == IMX_TRIP_PASSIVE) ? data->temp_passive : + data->temp_critical; + return 0; +} + +static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip, + unsigned long temp) +{ + struct imx_thermal_data *data = tz->devdata; + + if (trip == IMX_TRIP_CRITICAL) + return -EPERM; + + if (temp > IMX_TEMP_PASSIVE) + return -EINVAL; + + data->temp_passive = temp; + + imx_set_alarm_temp(data, temp); + + return 0; +} + +static int imx_bind(struct thermal_zone_device *tz, + struct thermal_cooling_device *cdev) +{ + int ret; + + ret = thermal_zone_bind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev, + THERMAL_NO_LIMIT, + THERMAL_NO_LIMIT); + if (ret) { + dev_err(&tz->device, + "binding zone %s with cdev %s failed:%d\n", + tz->type, cdev->type, ret); + return ret; + } + + return 0; +} + +static int imx_unbind(struct thermal_zone_device *tz, + struct thermal_cooling_device *cdev) +{ + int ret; + + ret = thermal_zone_unbind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev); + if (ret) { + dev_err(&tz->device, + "unbinding zone %s with cdev %s failed:%d\n", + tz->type, cdev->type, ret); + return ret; + } + + return 0; +} + +static const struct thermal_zone_device_ops imx_tz_ops = { + .bind = imx_bind, + .unbind = imx_unbind, + .get_temp = imx_get_temp, + .get_mode = imx_get_mode, + .set_mode = imx_set_mode, + .get_trip_type = imx_get_trip_type, + .get_trip_temp = imx_get_trip_temp, + .get_crit_temp = imx_get_crit_temp, + .set_trip_temp = imx_set_trip_temp, +}; + +static int imx_get_sensor_data(struct platform_device *pdev) +{ + struct imx_thermal_data *data = platform_get_drvdata(pdev); + struct regmap *map; + int t1, t2, n1, n2; + int ret; + u32 val; + + map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "fsl,tempmon-data"); + if (IS_ERR(map)) { + ret = PTR_ERR(map); + dev_err(&pdev->dev, "failed to get sensor regmap: %d\n", ret); + return ret; + } + + ret = regmap_read(map, OCOTP_ANA1, &val); + if (ret) { + dev_err(&pdev->dev, "failed to read sensor data: %d\n", ret); + return ret; + } + + if (val == 0 || val == ~0) { + dev_err(&pdev->dev, "invalid sensor calibration data\n"); + return -EINVAL; + } + + /* + * Sensor data layout: + * [31:20] - sensor value @ 25C + * [19:8] - sensor value of hot + * [7:0] - hot temperature value + */ + n1 = val >> 20; + n2 = (val & 0xfff00) >> 8; + t2 = val & 0xff; + t1 = 25; /* t1 always 25C */ + + /* + * Derived from linear interpolation, + * Tmeas = T2 + (Nmeas - N2) * (T1 - T2) / (N1 - N2) + * We want to reduce this down to the minimum computation necessary + * for each temperature read. Also, we want Tmeas in millicelsius + * and we don't want to lose precision from integer division. So... + * milli_Tmeas = 1000 * T2 + 1000 * (Nmeas - N2) * (T1 - T2) / (N1 - N2) + * Let constant c1 = 1000 * (T1 - T2) / (N1 - N2) + * milli_Tmeas = (1000 * T2) + c1 * (Nmeas - N2) + * milli_Tmeas = (1000 * T2) + (c1 * Nmeas) - (c1 * N2) + * Let constant c2 = (1000 * T2) - (c1 * N2) + * milli_Tmeas = c2 + (c1 * Nmeas) + */ + data->c1 = 1000 * (t1 - t2) / (n1 - n2); + data->c2 = 1000 * t2 - data->c1 * n2; + + /* + * Set the default passive cooling trip point to 20 °C below the + * maximum die temperature. Can be changed from userspace. + */ + data->temp_passive = 1000 * (t2 - 20); + + /* + * The maximum die temperature is t2, let's give 5 °C cushion + * for noise and possible temperature rise between measurements. + */ + data->temp_critical = 1000 * (t2 - 5); + + return 0; +} + +static irqreturn_t imx_thermal_alarm_irq(int irq, void *dev) +{ + struct imx_thermal_data *data = dev; + + disable_irq_nosync(irq); + data->irq_enabled = false; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t imx_thermal_alarm_irq_thread(int irq, void *dev) +{ + struct imx_thermal_data *data = dev; + + dev_dbg(&data->tz->device, "THERMAL ALARM: T > %lu\n", + data->alarm_temp / 1000); + + thermal_zone_device_update(data->tz); + + return IRQ_HANDLED; +} + +static int imx_thermal_probe(struct platform_device *pdev) +{ + struct imx_thermal_data *data; + struct cpumask clip_cpus; + struct regmap *map; + int measure_freq; + int ret; + + data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "fsl,tempmon"); + if (IS_ERR(map)) { + ret = PTR_ERR(map); + dev_err(&pdev->dev, "failed to get tempmon regmap: %d\n", ret); + return ret; + } + data->tempmon = map; + + data->irq = platform_get_irq(pdev, 0); + if (data->irq < 0) + return data->irq; + + ret = devm_request_threaded_irq(&pdev->dev, data->irq, + imx_thermal_alarm_irq, imx_thermal_alarm_irq_thread, + 0, "imx_thermal", data); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); + return ret; + } + + platform_set_drvdata(pdev, data); + + ret = imx_get_sensor_data(pdev); + if (ret) { + dev_err(&pdev->dev, "failed to get sensor data\n"); + return ret; + } + + /* Make sure sensor is in known good state for measurements */ + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP); + regmap_write(map, TEMPSENSE1 + REG_CLR, TEMPSENSE1_MEASURE_FREQ); + regmap_write(map, MISC0 + REG_SET, MISC0_REFTOP_SELBIASOFF); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN); + + cpumask_set_cpu(0, &clip_cpus); + data->cdev = cpufreq_cooling_register(&clip_cpus); + if (IS_ERR(data->cdev)) { + ret = PTR_ERR(data->cdev); + dev_err(&pdev->dev, + "failed to register cpufreq cooling device: %d\n", ret); + return ret; + } + + data->tz = thermal_zone_device_register("imx_thermal_zone", + IMX_TRIP_NUM, + BIT(IMX_TRIP_PASSIVE), data, + &imx_tz_ops, NULL, + IMX_PASSIVE_DELAY, + IMX_POLLING_DELAY); + if (IS_ERR(data->tz)) { + ret = PTR_ERR(data->tz); + dev_err(&pdev->dev, + "failed to register thermal zone device %d\n", ret); + cpufreq_cooling_unregister(data->cdev); + return ret; + } + + /* Enable measurements at ~ 10 Hz */ + regmap_write(map, TEMPSENSE1 + REG_CLR, TEMPSENSE1_MEASURE_FREQ); + measure_freq = DIV_ROUND_UP(32768, 10); /* 10 Hz */ + regmap_write(map, TEMPSENSE1 + REG_SET, measure_freq); + imx_set_alarm_temp(data, data->temp_passive); + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP); + + data->irq_enabled = true; + data->mode = THERMAL_DEVICE_ENABLED; + + return 0; +} + +static int imx_thermal_remove(struct platform_device *pdev) +{ + struct imx_thermal_data *data = platform_get_drvdata(pdev); + struct regmap *map = data->tempmon; + + /* Disable measurements */ + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN); + + thermal_zone_device_unregister(data->tz); + cpufreq_cooling_unregister(data->cdev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int imx_thermal_suspend(struct device *dev) +{ + struct imx_thermal_data *data = dev_get_drvdata(dev); + struct regmap *map = data->tempmon; + u32 val; + + regmap_read(map, TEMPSENSE0, &val); + if ((val & TEMPSENSE0_POWER_DOWN) == 0) { + /* + * If a measurement is taking place, wait for a long enough + * time for it to finish, and then check again. If it still + * does not finish, something must go wrong. + */ + udelay(50); + regmap_read(map, TEMPSENSE0, &val); + if ((val & TEMPSENSE0_POWER_DOWN) == 0) + return -ETIMEDOUT; + } + + return 0; +} + +static int imx_thermal_resume(struct device *dev) +{ + /* Nothing to do for now */ + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(imx_thermal_pm_ops, + imx_thermal_suspend, imx_thermal_resume); + +static const struct of_device_id of_imx_thermal_match[] = { + { .compatible = "fsl,imx6q-tempmon", }, + { /* end */ } +}; + +static struct platform_driver imx_thermal = { + .driver = { + .name = "imx_thermal", + .owner = THIS_MODULE, + .pm = &imx_thermal_pm_ops, + .of_match_table = of_imx_thermal_match, + }, + .probe = imx_thermal_probe, + .remove = imx_thermal_remove, +}; +module_platform_driver(imx_thermal); + +MODULE_AUTHOR("Freescale Semiconductor, Inc."); +MODULE_DESCRIPTION("Thermal driver for Freescale i.MX SoCs"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:imx-thermal"); diff --git a/drivers/thermal/samsung/Kconfig b/drivers/thermal/samsung/Kconfig new file mode 100644 index 000000000000..f760389a204c --- /dev/null +++ b/drivers/thermal/samsung/Kconfig @@ -0,0 +1,18 @@ +config EXYNOS_THERMAL + tristate "Exynos thermal management unit driver" + depends on ARCH_HAS_BANDGAP && OF + help + If you say yes here you get support for the TMU (Thermal Management + Unit) driver for SAMSUNG EXYNOS series of SoCs. This driver initialises + the TMU, reports temperature and handles cooling action if defined. + This driver uses the Exynos core thermal APIs and TMU configuration + data from the supported SoCs. + +config EXYNOS_THERMAL_CORE + bool "Core thermal framework support for EXYNOS SOCs" + depends on EXYNOS_THERMAL + help + If you say yes here you get support for EXYNOS TMU + (Thermal Management Unit) common registration/unregistration + functions to the core thermal layer and also to use the generic + CPU cooling APIs. diff --git a/drivers/thermal/samsung/Makefile b/drivers/thermal/samsung/Makefile new file mode 100644 index 000000000000..c09d83095dc2 --- /dev/null +++ b/drivers/thermal/samsung/Makefile @@ -0,0 +1,7 @@ +# +# Samsung thermal specific Makefile +# +obj-$(CONFIG_EXYNOS_THERMAL) += exynos_thermal.o +exynos_thermal-y := exynos_tmu.o +exynos_thermal-y += exynos_tmu_data.o +exynos_thermal-$(CONFIG_EXYNOS_THERMAL_CORE) += exynos_thermal_common.o diff --git a/drivers/thermal/samsung/exynos_thermal_common.c b/drivers/thermal/samsung/exynos_thermal_common.c new file mode 100644 index 000000000000..f10a6ad37c06 --- /dev/null +++ b/drivers/thermal/samsung/exynos_thermal_common.c @@ -0,0 +1,432 @@ +/* + * exynos_thermal_common.c - Samsung EXYNOS common thermal file + * + * Copyright (C) 2013 Samsung Electronics + * Amit Daniel Kachhap <amit.daniel@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/cpu_cooling.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/thermal.h> + +#include "exynos_thermal_common.h" + +struct exynos_thermal_zone { + enum thermal_device_mode mode; + struct thermal_zone_device *therm_dev; + struct thermal_cooling_device *cool_dev[MAX_COOLING_DEVICE]; + unsigned int cool_dev_size; + struct platform_device *exynos4_dev; + struct thermal_sensor_conf *sensor_conf; + bool bind; +}; + +/* Get mode callback functions for thermal zone */ +static int exynos_get_mode(struct thermal_zone_device *thermal, + enum thermal_device_mode *mode) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + if (th_zone) + *mode = th_zone->mode; + return 0; +} + +/* Set mode callback functions for thermal zone */ +static int exynos_set_mode(struct thermal_zone_device *thermal, + enum thermal_device_mode mode) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + if (!th_zone) { + dev_err(&thermal->device, + "thermal zone not registered\n"); + return 0; + } + + mutex_lock(&thermal->lock); + + if (mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) + thermal->polling_delay = IDLE_INTERVAL; + else + thermal->polling_delay = 0; + + mutex_unlock(&thermal->lock); + + th_zone->mode = mode; + thermal_zone_device_update(thermal); + dev_dbg(th_zone->sensor_conf->dev, + "thermal polling set for duration=%d msec\n", + thermal->polling_delay); + return 0; +} + + +/* Get trip type callback functions for thermal zone */ +static int exynos_get_trip_type(struct thermal_zone_device *thermal, int trip, + enum thermal_trip_type *type) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + int max_trip = th_zone->sensor_conf->trip_data.trip_count; + int trip_type; + + if (trip < 0 || trip >= max_trip) + return -EINVAL; + + trip_type = th_zone->sensor_conf->trip_data.trip_type[trip]; + + if (trip_type == SW_TRIP) + *type = THERMAL_TRIP_CRITICAL; + else if (trip_type == THROTTLE_ACTIVE) + *type = THERMAL_TRIP_ACTIVE; + else if (trip_type == THROTTLE_PASSIVE) + *type = THERMAL_TRIP_PASSIVE; + else + return -EINVAL; + + return 0; +} + +/* Get trip temperature callback functions for thermal zone */ +static int exynos_get_trip_temp(struct thermal_zone_device *thermal, int trip, + unsigned long *temp) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + int max_trip = th_zone->sensor_conf->trip_data.trip_count; + + if (trip < 0 || trip >= max_trip) + return -EINVAL; + + *temp = th_zone->sensor_conf->trip_data.trip_val[trip]; + /* convert the temperature into millicelsius */ + *temp = *temp * MCELSIUS; + + return 0; +} + +/* Get critical temperature callback functions for thermal zone */ +static int exynos_get_crit_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + int max_trip = th_zone->sensor_conf->trip_data.trip_count; + /* Get the temp of highest trip*/ + return exynos_get_trip_temp(thermal, max_trip - 1, temp); +} + +/* Bind callback functions for thermal zone */ +static int exynos_bind(struct thermal_zone_device *thermal, + struct thermal_cooling_device *cdev) +{ + int ret = 0, i, tab_size, level; + struct freq_clip_table *tab_ptr, *clip_data; + struct exynos_thermal_zone *th_zone = thermal->devdata; + struct thermal_sensor_conf *data = th_zone->sensor_conf; + + tab_ptr = (struct freq_clip_table *)data->cooling_data.freq_data; + tab_size = data->cooling_data.freq_clip_count; + + if (tab_ptr == NULL || tab_size == 0) + return 0; + + /* find the cooling device registered*/ + for (i = 0; i < th_zone->cool_dev_size; i++) + if (cdev == th_zone->cool_dev[i]) + break; + + /* No matching cooling device */ + if (i == th_zone->cool_dev_size) + return 0; + + /* Bind the thermal zone to the cpufreq cooling device */ + for (i = 0; i < tab_size; i++) { + clip_data = (struct freq_clip_table *)&(tab_ptr[i]); + level = cpufreq_cooling_get_level(0, clip_data->freq_clip_max); + if (level == THERMAL_CSTATE_INVALID) + return 0; + switch (GET_ZONE(i)) { + case MONITOR_ZONE: + case WARN_ZONE: + if (thermal_zone_bind_cooling_device(thermal, i, cdev, + level, 0)) { + dev_err(data->dev, + "error unbinding cdev inst=%d\n", i); + ret = -EINVAL; + } + th_zone->bind = true; + break; + default: + ret = -EINVAL; + } + } + + return ret; +} + +/* Unbind callback functions for thermal zone */ +static int exynos_unbind(struct thermal_zone_device *thermal, + struct thermal_cooling_device *cdev) +{ + int ret = 0, i, tab_size; + struct exynos_thermal_zone *th_zone = thermal->devdata; + struct thermal_sensor_conf *data = th_zone->sensor_conf; + + if (th_zone->bind == false) + return 0; + + tab_size = data->cooling_data.freq_clip_count; + + if (tab_size == 0) + return 0; + + /* find the cooling device registered*/ + for (i = 0; i < th_zone->cool_dev_size; i++) + if (cdev == th_zone->cool_dev[i]) + break; + + /* No matching cooling device */ + if (i == th_zone->cool_dev_size) + return 0; + + /* Bind the thermal zone to the cpufreq cooling device */ + for (i = 0; i < tab_size; i++) { + switch (GET_ZONE(i)) { + case MONITOR_ZONE: + case WARN_ZONE: + if (thermal_zone_unbind_cooling_device(thermal, i, + cdev)) { + dev_err(data->dev, + "error unbinding cdev inst=%d\n", i); + ret = -EINVAL; + } + th_zone->bind = false; + break; + default: + ret = -EINVAL; + } + } + return ret; +} + +/* Get temperature callback functions for thermal zone */ +static int exynos_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + void *data; + + if (!th_zone->sensor_conf) { + dev_err(&thermal->device, + "Temperature sensor not initialised\n"); + return -EINVAL; + } + data = th_zone->sensor_conf->driver_data; + *temp = th_zone->sensor_conf->read_temperature(data); + /* convert the temperature into millicelsius */ + *temp = *temp * MCELSIUS; + return 0; +} + +/* Get temperature callback functions for thermal zone */ +static int exynos_set_emul_temp(struct thermal_zone_device *thermal, + unsigned long temp) +{ + void *data; + int ret = -EINVAL; + struct exynos_thermal_zone *th_zone = thermal->devdata; + + if (!th_zone->sensor_conf) { + dev_err(&thermal->device, + "Temperature sensor not initialised\n"); + return -EINVAL; + } + data = th_zone->sensor_conf->driver_data; + if (th_zone->sensor_conf->write_emul_temp) + ret = th_zone->sensor_conf->write_emul_temp(data, temp); + return ret; +} + +/* Get the temperature trend */ +static int exynos_get_trend(struct thermal_zone_device *thermal, + int trip, enum thermal_trend *trend) +{ + int ret; + unsigned long trip_temp; + + ret = exynos_get_trip_temp(thermal, trip, &trip_temp); + if (ret < 0) + return ret; + + if (thermal->temperature >= trip_temp) + *trend = THERMAL_TREND_RAISE_FULL; + else + *trend = THERMAL_TREND_DROP_FULL; + + return 0; +} +/* Operation callback functions for thermal zone */ +static struct thermal_zone_device_ops const exynos_dev_ops = { + .bind = exynos_bind, + .unbind = exynos_unbind, + .get_temp = exynos_get_temp, + .set_emul_temp = exynos_set_emul_temp, + .get_trend = exynos_get_trend, + .get_mode = exynos_get_mode, + .set_mode = exynos_set_mode, + .get_trip_type = exynos_get_trip_type, + .get_trip_temp = exynos_get_trip_temp, + .get_crit_temp = exynos_get_crit_temp, +}; + +/* + * This function may be called from interrupt based temperature sensor + * when threshold is changed. + */ +void exynos_report_trigger(struct thermal_sensor_conf *conf) +{ + unsigned int i; + char data[10]; + char *envp[] = { data, NULL }; + struct exynos_thermal_zone *th_zone; + + if (!conf || !conf->pzone_data) { + pr_err("Invalid temperature sensor configuration data\n"); + return; + } + + th_zone = conf->pzone_data; + if (th_zone->therm_dev) + return; + + if (th_zone->bind == false) { + for (i = 0; i < th_zone->cool_dev_size; i++) { + if (!th_zone->cool_dev[i]) + continue; + exynos_bind(th_zone->therm_dev, + th_zone->cool_dev[i]); + } + } + + thermal_zone_device_update(th_zone->therm_dev); + + mutex_lock(&th_zone->therm_dev->lock); + /* Find the level for which trip happened */ + for (i = 0; i < th_zone->sensor_conf->trip_data.trip_count; i++) { + if (th_zone->therm_dev->last_temperature < + th_zone->sensor_conf->trip_data.trip_val[i] * MCELSIUS) + break; + } + + if (th_zone->mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) { + if (i > 0) + th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL; + else + th_zone->therm_dev->polling_delay = IDLE_INTERVAL; + } + + snprintf(data, sizeof(data), "%u", i); + kobject_uevent_env(&th_zone->therm_dev->device.kobj, KOBJ_CHANGE, envp); + mutex_unlock(&th_zone->therm_dev->lock); +} + +/* Register with the in-kernel thermal management */ +int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) +{ + int ret; + struct cpumask mask_val; + struct exynos_thermal_zone *th_zone; + + if (!sensor_conf || !sensor_conf->read_temperature) { + pr_err("Temperature sensor not initialised\n"); + return -EINVAL; + } + + th_zone = devm_kzalloc(sensor_conf->dev, + sizeof(struct exynos_thermal_zone), GFP_KERNEL); + if (!th_zone) + return -ENOMEM; + + th_zone->sensor_conf = sensor_conf; + /* + * TODO: 1) Handle multiple cooling devices in a thermal zone + * 2) Add a flag/name in cooling info to map to specific + * sensor + */ + if (sensor_conf->cooling_data.freq_clip_count > 0) { + cpumask_set_cpu(0, &mask_val); + th_zone->cool_dev[th_zone->cool_dev_size] = + cpufreq_cooling_register(&mask_val); + if (IS_ERR(th_zone->cool_dev[th_zone->cool_dev_size])) { + dev_err(sensor_conf->dev, + "Failed to register cpufreq cooling device\n"); + ret = -EINVAL; + goto err_unregister; + } + th_zone->cool_dev_size++; + } + + th_zone->therm_dev = thermal_zone_device_register( + sensor_conf->name, sensor_conf->trip_data.trip_count, + 0, th_zone, &exynos_dev_ops, NULL, 0, + sensor_conf->trip_data.trigger_falling ? 0 : + IDLE_INTERVAL); + + if (IS_ERR(th_zone->therm_dev)) { + dev_err(sensor_conf->dev, + "Failed to register thermal zone device\n"); + ret = PTR_ERR(th_zone->therm_dev); + goto err_unregister; + } + th_zone->mode = THERMAL_DEVICE_ENABLED; + sensor_conf->pzone_data = th_zone; + + dev_info(sensor_conf->dev, + "Exynos: Thermal zone(%s) registered\n", sensor_conf->name); + + return 0; + +err_unregister: + exynos_unregister_thermal(sensor_conf); + return ret; +} + +/* Un-Register with the in-kernel thermal management */ +void exynos_unregister_thermal(struct thermal_sensor_conf *sensor_conf) +{ + int i; + struct exynos_thermal_zone *th_zone; + + if (!sensor_conf || !sensor_conf->pzone_data) { + pr_err("Invalid temperature sensor configuration data\n"); + return; + } + + th_zone = sensor_conf->pzone_data; + + if (th_zone->therm_dev) + thermal_zone_device_unregister(th_zone->therm_dev); + + for (i = 0; i < th_zone->cool_dev_size; i++) { + if (th_zone->cool_dev[i]) + cpufreq_cooling_unregister(th_zone->cool_dev[i]); + } + + dev_info(sensor_conf->dev, + "Exynos: Kernel Thermal management unregistered\n"); +} diff --git a/drivers/thermal/samsung/exynos_thermal_common.h b/drivers/thermal/samsung/exynos_thermal_common.h new file mode 100644 index 000000000000..3eb2ed9ea3a4 --- /dev/null +++ b/drivers/thermal/samsung/exynos_thermal_common.h @@ -0,0 +1,107 @@ +/* + * exynos_thermal_common.h - Samsung EXYNOS common header file + * + * Copyright (C) 2013 Samsung Electronics + * Amit Daniel Kachhap <amit.daniel@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _EXYNOS_THERMAL_COMMON_H +#define _EXYNOS_THERMAL_COMMON_H + +/* In-kernel thermal framework related macros & definations */ +#define SENSOR_NAME_LEN 16 +#define MAX_TRIP_COUNT 8 +#define MAX_COOLING_DEVICE 4 +#define MAX_THRESHOLD_LEVS 5 + +#define ACTIVE_INTERVAL 500 +#define IDLE_INTERVAL 10000 +#define MCELSIUS 1000 + +/* CPU Zone information */ +#define PANIC_ZONE 4 +#define WARN_ZONE 3 +#define MONITOR_ZONE 2 +#define SAFE_ZONE 1 + +#define GET_ZONE(trip) (trip + 2) +#define GET_TRIP(zone) (zone - 2) + +enum trigger_type { + THROTTLE_ACTIVE = 1, + THROTTLE_PASSIVE, + SW_TRIP, + HW_TRIP, +}; + +/** + * struct freq_clip_table + * @freq_clip_max: maximum frequency allowed for this cooling state. + * @temp_level: Temperature level at which the temperature clipping will + * happen. + * @mask_val: cpumask of the allowed cpu's where the clipping will take place. + * + * This structure is required to be filled and passed to the + * cpufreq_cooling_unregister function. + */ +struct freq_clip_table { + unsigned int freq_clip_max; + unsigned int temp_level; + const struct cpumask *mask_val; +}; + +struct thermal_trip_point_conf { + int trip_val[MAX_TRIP_COUNT]; + int trip_type[MAX_TRIP_COUNT]; + int trip_count; + unsigned char trigger_falling; +}; + +struct thermal_cooling_conf { + struct freq_clip_table freq_data[MAX_TRIP_COUNT]; + int freq_clip_count; +}; + +struct thermal_sensor_conf { + char name[SENSOR_NAME_LEN]; + int (*read_temperature)(void *data); + int (*write_emul_temp)(void *drv_data, unsigned long temp); + struct thermal_trip_point_conf trip_data; + struct thermal_cooling_conf cooling_data; + void *driver_data; + void *pzone_data; + struct device *dev; +}; + +/*Functions used exynos based thermal sensor driver*/ +#ifdef CONFIG_EXYNOS_THERMAL_CORE +void exynos_unregister_thermal(struct thermal_sensor_conf *sensor_conf); +int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf); +void exynos_report_trigger(struct thermal_sensor_conf *sensor_conf); +#else +static inline void +exynos_unregister_thermal(struct thermal_sensor_conf *sensor_conf) { return; } + +static inline int +exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) { return 0; } + +static inline void +exynos_report_trigger(struct thermal_sensor_conf *sensor_conf) { return; } + +#endif /* CONFIG_EXYNOS_THERMAL_CORE */ +#endif /* _EXYNOS_THERMAL_COMMON_H */ diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c new file mode 100644 index 000000000000..b43afda8acd1 --- /dev/null +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -0,0 +1,762 @@ +/* + * exynos_tmu.c - Samsung EXYNOS TMU (Thermal Management Unit) + * + * Copyright (C) 2011 Samsung Electronics + * Donggeun Kim <dg77.kim@samsung.com> + * Amit Daniel Kachhap <amit.kachhap@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/clk.h> +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/platform_device.h> +#include <linux/regulator/consumer.h> + +#include "exynos_thermal_common.h" +#include "exynos_tmu.h" +#include "exynos_tmu_data.h" + +/** + * struct exynos_tmu_data : A structure to hold the private data of the TMU + driver + * @id: identifier of the one instance of the TMU controller. + * @pdata: pointer to the tmu platform/configuration data + * @base: base address of the single instance of the TMU controller. + * @base_common: base address of the common registers of the TMU controller. + * @irq: irq number of the TMU controller. + * @soc: id of the SOC type. + * @irq_work: pointer to the irq work structure. + * @lock: lock to implement synchronization. + * @clk: pointer to the clock structure. + * @temp_error1: fused value of the first point trim. + * @temp_error2: fused value of the second point trim. + * @regulator: pointer to the TMU regulator structure. + * @reg_conf: pointer to structure to register with core thermal. + */ +struct exynos_tmu_data { + int id; + struct exynos_tmu_platform_data *pdata; + void __iomem *base; + void __iomem *base_common; + int irq; + enum soc_type soc; + struct work_struct irq_work; + struct mutex lock; + struct clk *clk; + u8 temp_error1, temp_error2; + struct regulator *regulator; + struct thermal_sensor_conf *reg_conf; +}; + +/* + * TMU treats temperature as a mapped temperature code. + * The temperature is converted differently depending on the calibration type. + */ +static int temp_to_code(struct exynos_tmu_data *data, u8 temp) +{ + struct exynos_tmu_platform_data *pdata = data->pdata; + int temp_code; + + if (pdata->cal_mode == HW_MODE) + return temp; + + if (data->soc == SOC_ARCH_EXYNOS4210) + /* temp should range between 25 and 125 */ + if (temp < 25 || temp > 125) { + temp_code = -EINVAL; + goto out; + } + + switch (pdata->cal_type) { + case TYPE_TWO_POINT_TRIMMING: + temp_code = (temp - pdata->first_point_trim) * + (data->temp_error2 - data->temp_error1) / + (pdata->second_point_trim - pdata->first_point_trim) + + data->temp_error1; + break; + case TYPE_ONE_POINT_TRIMMING: + temp_code = temp + data->temp_error1 - pdata->first_point_trim; + break; + default: + temp_code = temp + pdata->default_temp_offset; + break; + } +out: + return temp_code; +} + +/* + * Calculate a temperature value from a temperature code. + * The unit of the temperature is degree Celsius. + */ +static int code_to_temp(struct exynos_tmu_data *data, u8 temp_code) +{ + struct exynos_tmu_platform_data *pdata = data->pdata; + int temp; + + if (pdata->cal_mode == HW_MODE) + return temp_code; + + if (data->soc == SOC_ARCH_EXYNOS4210) + /* temp_code should range between 75 and 175 */ + if (temp_code < 75 || temp_code > 175) { + temp = -ENODATA; + goto out; + } + + switch (pdata->cal_type) { + case TYPE_TWO_POINT_TRIMMING: + temp = (temp_code - data->temp_error1) * + (pdata->second_point_trim - pdata->first_point_trim) / + (data->temp_error2 - data->temp_error1) + + pdata->first_point_trim; + break; + case TYPE_ONE_POINT_TRIMMING: + temp = temp_code - data->temp_error1 + pdata->first_point_trim; + break; + default: + temp = temp_code - pdata->default_temp_offset; + break; + } +out: + return temp; +} + +static int exynos_tmu_initialize(struct platform_device *pdev) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + struct exynos_tmu_platform_data *pdata = data->pdata; + const struct exynos_tmu_registers *reg = pdata->registers; + unsigned int status, trim_info = 0, con; + unsigned int rising_threshold = 0, falling_threshold = 0; + int ret = 0, threshold_code, i, trigger_levs = 0; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + if (TMU_SUPPORTS(pdata, READY_STATUS)) { + status = readb(data->base + reg->tmu_status); + if (!status) { + ret = -EBUSY; + goto out; + } + } + + if (TMU_SUPPORTS(pdata, TRIM_RELOAD)) + __raw_writel(1, data->base + reg->triminfo_ctrl); + + if (pdata->cal_mode == HW_MODE) + goto skip_calib_data; + + /* Save trimming info in order to perform calibration */ + if (data->soc == SOC_ARCH_EXYNOS5440) { + /* + * For exynos5440 soc triminfo value is swapped between TMU0 and + * TMU2, so the below logic is needed. + */ + switch (data->id) { + case 0: + trim_info = readl(data->base + + EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data); + break; + case 1: + trim_info = readl(data->base + reg->triminfo_data); + break; + case 2: + trim_info = readl(data->base - + EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data); + } + } else { + trim_info = readl(data->base + reg->triminfo_data); + } + data->temp_error1 = trim_info & EXYNOS_TMU_TEMP_MASK; + data->temp_error2 = ((trim_info >> reg->triminfo_85_shift) & + EXYNOS_TMU_TEMP_MASK); + + if (!data->temp_error1 || + (pdata->min_efuse_value > data->temp_error1) || + (data->temp_error1 > pdata->max_efuse_value)) + data->temp_error1 = pdata->efuse_value & EXYNOS_TMU_TEMP_MASK; + + if (!data->temp_error2) + data->temp_error2 = + (pdata->efuse_value >> reg->triminfo_85_shift) & + EXYNOS_TMU_TEMP_MASK; + +skip_calib_data: + if (pdata->max_trigger_level > MAX_THRESHOLD_LEVS) { + dev_err(&pdev->dev, "Invalid max trigger level\n"); + goto out; + } + + for (i = 0; i < pdata->max_trigger_level; i++) { + if (!pdata->trigger_levels[i]) + continue; + + if ((pdata->trigger_type[i] == HW_TRIP) && + (!pdata->trigger_levels[pdata->max_trigger_level - 1])) { + dev_err(&pdev->dev, "Invalid hw trigger level\n"); + ret = -EINVAL; + goto out; + } + + /* Count trigger levels except the HW trip*/ + if (!(pdata->trigger_type[i] == HW_TRIP)) + trigger_levs++; + } + + if (data->soc == SOC_ARCH_EXYNOS4210) { + /* Write temperature code for threshold */ + threshold_code = temp_to_code(data, pdata->threshold); + if (threshold_code < 0) { + ret = threshold_code; + goto out; + } + writeb(threshold_code, + data->base + reg->threshold_temp); + for (i = 0; i < trigger_levs; i++) + writeb(pdata->trigger_levels[i], data->base + + reg->threshold_th0 + i * sizeof(reg->threshold_th0)); + + writel(reg->inten_rise_mask, data->base + reg->tmu_intclear); + } else { + /* Write temperature code for rising and falling threshold */ + for (i = 0; + i < trigger_levs && i < EXYNOS_MAX_TRIGGER_PER_REG; i++) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i]); + if (threshold_code < 0) { + ret = threshold_code; + goto out; + } + rising_threshold |= threshold_code << 8 * i; + if (pdata->threshold_falling) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i] - + pdata->threshold_falling); + if (threshold_code > 0) + falling_threshold |= + threshold_code << 8 * i; + } + } + + writel(rising_threshold, + data->base + reg->threshold_th0); + writel(falling_threshold, + data->base + reg->threshold_th1); + + writel((reg->inten_rise_mask << reg->inten_rise_shift) | + (reg->inten_fall_mask << reg->inten_fall_shift), + data->base + reg->tmu_intclear); + + /* if last threshold limit is also present */ + i = pdata->max_trigger_level - 1; + if (pdata->trigger_levels[i] && + (pdata->trigger_type[i] == HW_TRIP)) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i]); + if (threshold_code < 0) { + ret = threshold_code; + goto out; + } + if (i == EXYNOS_MAX_TRIGGER_PER_REG - 1) { + /* 1-4 level to be assigned in th0 reg */ + rising_threshold |= threshold_code << 8 * i; + writel(rising_threshold, + data->base + reg->threshold_th0); + } else if (i == EXYNOS_MAX_TRIGGER_PER_REG) { + /* 5th level to be assigned in th2 reg */ + rising_threshold = + threshold_code << reg->threshold_th3_l0_shift; + writel(rising_threshold, + data->base + reg->threshold_th2); + } + con = readl(data->base + reg->tmu_ctrl); + con |= (1 << reg->therm_trip_en_shift); + writel(con, data->base + reg->tmu_ctrl); + } + } + /*Clear the PMIN in the common TMU register*/ + if (reg->tmu_pmin && !data->id) + writel(0, data->base_common + reg->tmu_pmin); +out: + clk_disable(data->clk); + mutex_unlock(&data->lock); + + return ret; +} + +static void exynos_tmu_control(struct platform_device *pdev, bool on) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + struct exynos_tmu_platform_data *pdata = data->pdata; + const struct exynos_tmu_registers *reg = pdata->registers; + unsigned int con, interrupt_en, cal_val; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + con = readl(data->base + reg->tmu_ctrl); + + if (pdata->reference_voltage) { + con &= ~(reg->buf_vref_sel_mask << reg->buf_vref_sel_shift); + con |= pdata->reference_voltage << reg->buf_vref_sel_shift; + } + + if (pdata->gain) { + con &= ~(reg->buf_slope_sel_mask << reg->buf_slope_sel_shift); + con |= (pdata->gain << reg->buf_slope_sel_shift); + } + + if (pdata->noise_cancel_mode) { + con &= ~(reg->therm_trip_mode_mask << + reg->therm_trip_mode_shift); + con |= (pdata->noise_cancel_mode << reg->therm_trip_mode_shift); + } + + if (pdata->cal_mode == HW_MODE) { + con &= ~(reg->calib_mode_mask << reg->calib_mode_shift); + cal_val = 0; + switch (pdata->cal_type) { + case TYPE_TWO_POINT_TRIMMING: + cal_val = 3; + break; + case TYPE_ONE_POINT_TRIMMING_85: + cal_val = 2; + break; + case TYPE_ONE_POINT_TRIMMING_25: + cal_val = 1; + break; + case TYPE_NONE: + break; + default: + dev_err(&pdev->dev, "Invalid calibration type, using none\n"); + } + con |= cal_val << reg->calib_mode_shift; + } + + if (on) { + con |= (1 << reg->core_en_shift); + interrupt_en = + pdata->trigger_enable[3] << reg->inten_rise3_shift | + pdata->trigger_enable[2] << reg->inten_rise2_shift | + pdata->trigger_enable[1] << reg->inten_rise1_shift | + pdata->trigger_enable[0] << reg->inten_rise0_shift; + if (TMU_SUPPORTS(pdata, FALLING_TRIP)) + interrupt_en |= + interrupt_en << reg->inten_fall0_shift; + } else { + con &= ~(1 << reg->core_en_shift); + interrupt_en = 0; /* Disable all interrupts */ + } + writel(interrupt_en, data->base + reg->tmu_inten); + writel(con, data->base + reg->tmu_ctrl); + + clk_disable(data->clk); + mutex_unlock(&data->lock); +} + +static int exynos_tmu_read(struct exynos_tmu_data *data) +{ + struct exynos_tmu_platform_data *pdata = data->pdata; + const struct exynos_tmu_registers *reg = pdata->registers; + u8 temp_code; + int temp; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + temp_code = readb(data->base + reg->tmu_cur_temp); + temp = code_to_temp(data, temp_code); + + clk_disable(data->clk); + mutex_unlock(&data->lock); + + return temp; +} + +#ifdef CONFIG_THERMAL_EMULATION +static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) +{ + struct exynos_tmu_data *data = drv_data; + struct exynos_tmu_platform_data *pdata = data->pdata; + const struct exynos_tmu_registers *reg = pdata->registers; + unsigned int val; + int ret = -EINVAL; + + if (!TMU_SUPPORTS(pdata, EMULATION)) + goto out; + + if (temp && temp < MCELSIUS) + goto out; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + val = readl(data->base + reg->emul_con); + + if (temp) { + temp /= MCELSIUS; + + if (TMU_SUPPORTS(pdata, EMUL_TIME)) { + val &= ~(EXYNOS_EMUL_TIME_MASK << reg->emul_time_shift); + val |= (EXYNOS_EMUL_TIME << reg->emul_time_shift); + } + val &= ~(EXYNOS_EMUL_DATA_MASK << reg->emul_temp_shift); + val |= (temp_to_code(data, temp) << reg->emul_temp_shift) | + EXYNOS_EMUL_ENABLE; + } else { + val &= ~EXYNOS_EMUL_ENABLE; + } + + writel(val, data->base + reg->emul_con); + + clk_disable(data->clk); + mutex_unlock(&data->lock); + return 0; +out: + return ret; +} +#else +static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) + { return -EINVAL; } +#endif/*CONFIG_THERMAL_EMULATION*/ + +static void exynos_tmu_work(struct work_struct *work) +{ + struct exynos_tmu_data *data = container_of(work, + struct exynos_tmu_data, irq_work); + struct exynos_tmu_platform_data *pdata = data->pdata; + const struct exynos_tmu_registers *reg = pdata->registers; + unsigned int val_irq, val_type; + + /* Find which sensor generated this interrupt */ + if (reg->tmu_irqstatus) { + val_type = readl(data->base_common + reg->tmu_irqstatus); + if (!((val_type >> data->id) & 0x1)) + goto out; + } + + exynos_report_trigger(data->reg_conf); + mutex_lock(&data->lock); + clk_enable(data->clk); + + /* TODO: take action based on particular interrupt */ + val_irq = readl(data->base + reg->tmu_intstat); + /* clear the interrupts */ + writel(val_irq, data->base + reg->tmu_intclear); + + clk_disable(data->clk); + mutex_unlock(&data->lock); +out: + enable_irq(data->irq); +} + +static irqreturn_t exynos_tmu_irq(int irq, void *id) +{ + struct exynos_tmu_data *data = id; + + disable_irq_nosync(irq); + schedule_work(&data->irq_work); + + return IRQ_HANDLED; +} + +static const struct of_device_id exynos_tmu_match[] = { + { + .compatible = "samsung,exynos4210-tmu", + .data = (void *)EXYNOS4210_TMU_DRV_DATA, + }, + { + .compatible = "samsung,exynos4412-tmu", + .data = (void *)EXYNOS5250_TMU_DRV_DATA, + }, + { + .compatible = "samsung,exynos5250-tmu", + .data = (void *)EXYNOS5250_TMU_DRV_DATA, + }, + { + .compatible = "samsung,exynos5440-tmu", + .data = (void *)EXYNOS5440_TMU_DRV_DATA, + }, + {}, +}; +MODULE_DEVICE_TABLE(of, exynos_tmu_match); + +static inline struct exynos_tmu_platform_data *exynos_get_driver_data( + struct platform_device *pdev, int id) +{ + struct exynos_tmu_init_data *data_table; + struct exynos_tmu_platform_data *tmu_data; + const struct of_device_id *match; + + match = of_match_node(exynos_tmu_match, pdev->dev.of_node); + if (!match) + return NULL; + data_table = (struct exynos_tmu_init_data *) match->data; + if (!data_table || id >= data_table->tmu_count) + return NULL; + tmu_data = data_table->tmu_data; + return (struct exynos_tmu_platform_data *) (tmu_data + id); +} + +static int exynos_map_dt_data(struct platform_device *pdev) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + struct exynos_tmu_platform_data *pdata; + struct resource res; + int ret; + + if (!data || !pdev->dev.of_node) + return -ENODEV; + + /* + * Try enabling the regulator if found + * TODO: Add regulator as an SOC feature, so that regulator enable + * is a compulsory call. + */ + data->regulator = devm_regulator_get(&pdev->dev, "vtmu"); + if (!IS_ERR(data->regulator)) { + ret = regulator_enable(data->regulator); + if (ret) { + dev_err(&pdev->dev, "failed to enable vtmu\n"); + return ret; + } + } else { + dev_info(&pdev->dev, "Regulator node (vtmu) not found\n"); + } + + data->id = of_alias_get_id(pdev->dev.of_node, "tmuctrl"); + if (data->id < 0) + data->id = 0; + + data->irq = irq_of_parse_and_map(pdev->dev.of_node, 0); + if (data->irq <= 0) { + dev_err(&pdev->dev, "failed to get IRQ\n"); + return -ENODEV; + } + + if (of_address_to_resource(pdev->dev.of_node, 0, &res)) { + dev_err(&pdev->dev, "failed to get Resource 0\n"); + return -ENODEV; + } + + data->base = devm_ioremap(&pdev->dev, res.start, resource_size(&res)); + if (!data->base) { + dev_err(&pdev->dev, "Failed to ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + pdata = exynos_get_driver_data(pdev, data->id); + if (!pdata) { + dev_err(&pdev->dev, "No platform init data supplied.\n"); + return -ENODEV; + } + data->pdata = pdata; + /* + * Check if the TMU shares some registers and then try to map the + * memory of common registers. + */ + if (!TMU_SUPPORTS(pdata, SHARED_MEMORY)) + return 0; + + if (of_address_to_resource(pdev->dev.of_node, 1, &res)) { + dev_err(&pdev->dev, "failed to get Resource 1\n"); + return -ENODEV; + } + + data->base_common = devm_ioremap(&pdev->dev, res.start, + resource_size(&res)); + if (!data->base_common) { + dev_err(&pdev->dev, "Failed to ioremap memory\n"); + return -ENOMEM; + } + + return 0; +} + +static int exynos_tmu_probe(struct platform_device *pdev) +{ + struct exynos_tmu_data *data; + struct exynos_tmu_platform_data *pdata; + struct thermal_sensor_conf *sensor_conf; + int ret, i; + + data = devm_kzalloc(&pdev->dev, sizeof(struct exynos_tmu_data), + GFP_KERNEL); + if (!data) { + dev_err(&pdev->dev, "Failed to allocate driver structure\n"); + return -ENOMEM; + } + + platform_set_drvdata(pdev, data); + mutex_init(&data->lock); + + ret = exynos_map_dt_data(pdev); + if (ret) + return ret; + + pdata = data->pdata; + + INIT_WORK(&data->irq_work, exynos_tmu_work); + + data->clk = devm_clk_get(&pdev->dev, "tmu_apbif"); + if (IS_ERR(data->clk)) { + dev_err(&pdev->dev, "Failed to get clock\n"); + return PTR_ERR(data->clk); + } + + ret = clk_prepare(data->clk); + if (ret) + return ret; + + if (pdata->type == SOC_ARCH_EXYNOS || + pdata->type == SOC_ARCH_EXYNOS4210 || + pdata->type == SOC_ARCH_EXYNOS5440) + data->soc = pdata->type; + else { + ret = -EINVAL; + dev_err(&pdev->dev, "Platform not supported\n"); + goto err_clk; + } + + ret = exynos_tmu_initialize(pdev); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize TMU\n"); + goto err_clk; + } + + exynos_tmu_control(pdev, true); + + /* Allocate a structure to register with the exynos core thermal */ + sensor_conf = devm_kzalloc(&pdev->dev, + sizeof(struct thermal_sensor_conf), GFP_KERNEL); + if (!sensor_conf) { + dev_err(&pdev->dev, "Failed to allocate registration struct\n"); + ret = -ENOMEM; + goto err_clk; + } + sprintf(sensor_conf->name, "therm_zone%d", data->id); + sensor_conf->read_temperature = (int (*)(void *))exynos_tmu_read; + sensor_conf->write_emul_temp = + (int (*)(void *, unsigned long))exynos_tmu_set_emulation; + sensor_conf->driver_data = data; + sensor_conf->trip_data.trip_count = pdata->trigger_enable[0] + + pdata->trigger_enable[1] + pdata->trigger_enable[2]+ + pdata->trigger_enable[3]; + + for (i = 0; i < sensor_conf->trip_data.trip_count; i++) { + sensor_conf->trip_data.trip_val[i] = + pdata->threshold + pdata->trigger_levels[i]; + sensor_conf->trip_data.trip_type[i] = + pdata->trigger_type[i]; + } + + sensor_conf->trip_data.trigger_falling = pdata->threshold_falling; + + sensor_conf->cooling_data.freq_clip_count = pdata->freq_tab_count; + for (i = 0; i < pdata->freq_tab_count; i++) { + sensor_conf->cooling_data.freq_data[i].freq_clip_max = + pdata->freq_tab[i].freq_clip_max; + sensor_conf->cooling_data.freq_data[i].temp_level = + pdata->freq_tab[i].temp_level; + } + sensor_conf->dev = &pdev->dev; + /* Register the sensor with thermal management interface */ + ret = exynos_register_thermal(sensor_conf); + if (ret) { + dev_err(&pdev->dev, "Failed to register thermal interface\n"); + goto err_clk; + } + data->reg_conf = sensor_conf; + + ret = devm_request_irq(&pdev->dev, data->irq, exynos_tmu_irq, + IRQF_TRIGGER_RISING | IRQF_SHARED, dev_name(&pdev->dev), data); + if (ret) { + dev_err(&pdev->dev, "Failed to request irq: %d\n", data->irq); + goto err_clk; + } + + return 0; +err_clk: + clk_unprepare(data->clk); + return ret; +} + +static int exynos_tmu_remove(struct platform_device *pdev) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + + exynos_tmu_control(pdev, false); + + exynos_unregister_thermal(data->reg_conf); + + clk_unprepare(data->clk); + + if (!IS_ERR(data->regulator)) + regulator_disable(data->regulator); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int exynos_tmu_suspend(struct device *dev) +{ + exynos_tmu_control(to_platform_device(dev), false); + + return 0; +} + +static int exynos_tmu_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + exynos_tmu_initialize(pdev); + exynos_tmu_control(pdev, true); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(exynos_tmu_pm, + exynos_tmu_suspend, exynos_tmu_resume); +#define EXYNOS_TMU_PM (&exynos_tmu_pm) +#else +#define EXYNOS_TMU_PM NULL +#endif + +static struct platform_driver exynos_tmu_driver = { + .driver = { + .name = "exynos-tmu", + .owner = THIS_MODULE, + .pm = EXYNOS_TMU_PM, + .of_match_table = exynos_tmu_match, + }, + .probe = exynos_tmu_probe, + .remove = exynos_tmu_remove, +}; + +module_platform_driver(exynos_tmu_driver); + +MODULE_DESCRIPTION("EXYNOS TMU Driver"); +MODULE_AUTHOR("Donggeun Kim <dg77.kim@samsung.com>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:exynos-tmu"); diff --git a/drivers/thermal/samsung/exynos_tmu.h b/drivers/thermal/samsung/exynos_tmu.h new file mode 100644 index 000000000000..b364c9eee701 --- /dev/null +++ b/drivers/thermal/samsung/exynos_tmu.h @@ -0,0 +1,311 @@ +/* + * exynos_tmu.h - Samsung EXYNOS TMU (Thermal Management Unit) + * + * Copyright (C) 2011 Samsung Electronics + * Donggeun Kim <dg77.kim@samsung.com> + * Amit Daniel Kachhap <amit.daniel@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _EXYNOS_TMU_H +#define _EXYNOS_TMU_H +#include <linux/cpu_cooling.h> + +#include "exynos_thermal_common.h" + +enum calibration_type { + TYPE_ONE_POINT_TRIMMING, + TYPE_ONE_POINT_TRIMMING_25, + TYPE_ONE_POINT_TRIMMING_85, + TYPE_TWO_POINT_TRIMMING, + TYPE_NONE, +}; + +enum calibration_mode { + SW_MODE, + HW_MODE, +}; + +enum soc_type { + SOC_ARCH_EXYNOS4210 = 1, + SOC_ARCH_EXYNOS, + SOC_ARCH_EXYNOS5440, +}; + +/** + * EXYNOS TMU supported features. + * TMU_SUPPORT_EMULATION - This features is used to set user defined + * temperature to the TMU controller. + * TMU_SUPPORT_MULTI_INST - This features denotes that the soc + * has many instances of TMU. + * TMU_SUPPORT_TRIM_RELOAD - This features shows that trimming can + * be reloaded. + * TMU_SUPPORT_FALLING_TRIP - This features shows that interrupt can + * be registered for falling trips also. + * TMU_SUPPORT_READY_STATUS - This feature tells that the TMU current + * state(active/idle) can be checked. + * TMU_SUPPORT_EMUL_TIME - This features allows to set next temp emulation + * sample time. + * TMU_SUPPORT_SHARED_MEMORY - This feature tells that the different TMU + * sensors shares some common registers. + * TMU_SUPPORT - macro to compare the above features with the supplied. + */ +#define TMU_SUPPORT_EMULATION BIT(0) +#define TMU_SUPPORT_MULTI_INST BIT(1) +#define TMU_SUPPORT_TRIM_RELOAD BIT(2) +#define TMU_SUPPORT_FALLING_TRIP BIT(3) +#define TMU_SUPPORT_READY_STATUS BIT(4) +#define TMU_SUPPORT_EMUL_TIME BIT(5) +#define TMU_SUPPORT_SHARED_MEMORY BIT(6) + +#define TMU_SUPPORTS(a, b) (a->features & TMU_SUPPORT_ ## b) + +/** + * struct exynos_tmu_register - register descriptors to access registers and + * bitfields. The register validity, offsets and bitfield values may vary + * slightly across different exynos SOC's. + * @triminfo_data: register containing 2 pont trimming data + * @triminfo_25_shift: shift bit of the 25 C trim value in triminfo_data reg. + * @triminfo_85_shift: shift bit of the 85 C trim value in triminfo_data reg. + * @triminfo_ctrl: trim info controller register. + * @triminfo_reload_shift: shift of triminfo reload enable bit in triminfo_ctrl + reg. + * @tmu_ctrl: TMU main controller register. + * @buf_vref_sel_shift: shift bits of reference voltage in tmu_ctrl register. + * @buf_vref_sel_mask: mask bits of reference voltage in tmu_ctrl register. + * @therm_trip_mode_shift: shift bits of tripping mode in tmu_ctrl register. + * @therm_trip_mode_mask: mask bits of tripping mode in tmu_ctrl register. + * @therm_trip_en_shift: shift bits of tripping enable in tmu_ctrl register. + * @buf_slope_sel_shift: shift bits of amplifier gain value in tmu_ctrl + register. + * @buf_slope_sel_mask: mask bits of amplifier gain value in tmu_ctrl register. + * @calib_mode_shift: shift bits of calibration mode value in tmu_ctrl + register. + * @calib_mode_mask: mask bits of calibration mode value in tmu_ctrl + register. + * @therm_trip_tq_en_shift: shift bits of thermal trip enable by TQ pin in + tmu_ctrl register. + * @core_en_shift: shift bits of TMU core enable bit in tmu_ctrl register. + * @tmu_status: register drescribing the TMU status. + * @tmu_cur_temp: register containing the current temperature of the TMU. + * @tmu_cur_temp_shift: shift bits of current temp value in tmu_cur_temp + register. + * @threshold_temp: register containing the base threshold level. + * @threshold_th0: Register containing first set of rising levels. + * @threshold_th0_l0_shift: shift bits of level0 threshold temperature. + * @threshold_th0_l1_shift: shift bits of level1 threshold temperature. + * @threshold_th0_l2_shift: shift bits of level2 threshold temperature. + * @threshold_th0_l3_shift: shift bits of level3 threshold temperature. + * @threshold_th1: Register containing second set of rising levels. + * @threshold_th1_l0_shift: shift bits of level0 threshold temperature. + * @threshold_th1_l1_shift: shift bits of level1 threshold temperature. + * @threshold_th1_l2_shift: shift bits of level2 threshold temperature. + * @threshold_th1_l3_shift: shift bits of level3 threshold temperature. + * @threshold_th2: Register containing third set of rising levels. + * @threshold_th2_l0_shift: shift bits of level0 threshold temperature. + * @threshold_th3: Register containing fourth set of rising levels. + * @threshold_th3_l0_shift: shift bits of level0 threshold temperature. + * @tmu_inten: register containing the different threshold interrupt + enable bits. + * @inten_rise_shift: shift bits of all rising interrupt bits. + * @inten_rise_mask: mask bits of all rising interrupt bits. + * @inten_fall_shift: shift bits of all rising interrupt bits. + * @inten_fall_mask: mask bits of all rising interrupt bits. + * @inten_rise0_shift: shift bits of rising 0 interrupt bits. + * @inten_rise1_shift: shift bits of rising 1 interrupt bits. + * @inten_rise2_shift: shift bits of rising 2 interrupt bits. + * @inten_rise3_shift: shift bits of rising 3 interrupt bits. + * @inten_fall0_shift: shift bits of falling 0 interrupt bits. + * @inten_fall1_shift: shift bits of falling 1 interrupt bits. + * @inten_fall2_shift: shift bits of falling 2 interrupt bits. + * @inten_fall3_shift: shift bits of falling 3 interrupt bits. + * @tmu_intstat: Register containing the interrupt status values. + * @tmu_intclear: Register for clearing the raised interrupt status. + * @emul_con: TMU emulation controller register. + * @emul_temp_shift: shift bits of emulation temperature. + * @emul_time_shift: shift bits of emulation time. + * @emul_time_mask: mask bits of emulation time. + * @tmu_irqstatus: register to find which TMU generated interrupts. + * @tmu_pmin: register to get/set the Pmin value. + */ +struct exynos_tmu_registers { + u32 triminfo_data; + u32 triminfo_25_shift; + u32 triminfo_85_shift; + + u32 triminfo_ctrl; + u32 triminfo_reload_shift; + + u32 tmu_ctrl; + u32 buf_vref_sel_shift; + u32 buf_vref_sel_mask; + u32 therm_trip_mode_shift; + u32 therm_trip_mode_mask; + u32 therm_trip_en_shift; + u32 buf_slope_sel_shift; + u32 buf_slope_sel_mask; + u32 calib_mode_shift; + u32 calib_mode_mask; + u32 therm_trip_tq_en_shift; + u32 core_en_shift; + + u32 tmu_status; + + u32 tmu_cur_temp; + u32 tmu_cur_temp_shift; + + u32 threshold_temp; + + u32 threshold_th0; + u32 threshold_th0_l0_shift; + u32 threshold_th0_l1_shift; + u32 threshold_th0_l2_shift; + u32 threshold_th0_l3_shift; + + u32 threshold_th1; + u32 threshold_th1_l0_shift; + u32 threshold_th1_l1_shift; + u32 threshold_th1_l2_shift; + u32 threshold_th1_l3_shift; + + u32 threshold_th2; + u32 threshold_th2_l0_shift; + + u32 threshold_th3; + u32 threshold_th3_l0_shift; + + u32 tmu_inten; + u32 inten_rise_shift; + u32 inten_rise_mask; + u32 inten_fall_shift; + u32 inten_fall_mask; + u32 inten_rise0_shift; + u32 inten_rise1_shift; + u32 inten_rise2_shift; + u32 inten_rise3_shift; + u32 inten_fall0_shift; + u32 inten_fall1_shift; + u32 inten_fall2_shift; + u32 inten_fall3_shift; + + u32 tmu_intstat; + + u32 tmu_intclear; + + u32 emul_con; + u32 emul_temp_shift; + u32 emul_time_shift; + u32 emul_time_mask; + + u32 tmu_irqstatus; + u32 tmu_pmin; +}; + +/** + * struct exynos_tmu_platform_data + * @threshold: basic temperature for generating interrupt + * 25 <= threshold <= 125 [unit: degree Celsius] + * @threshold_falling: differntial value for setting threshold + * of temperature falling interrupt. + * @trigger_levels: array for each interrupt levels + * [unit: degree Celsius] + * 0: temperature for trigger_level0 interrupt + * condition for trigger_level0 interrupt: + * current temperature > threshold + trigger_levels[0] + * 1: temperature for trigger_level1 interrupt + * condition for trigger_level1 interrupt: + * current temperature > threshold + trigger_levels[1] + * 2: temperature for trigger_level2 interrupt + * condition for trigger_level2 interrupt: + * current temperature > threshold + trigger_levels[2] + * 3: temperature for trigger_level3 interrupt + * condition for trigger_level3 interrupt: + * current temperature > threshold + trigger_levels[3] + * @trigger_type: defines the type of trigger. Possible values are, + * THROTTLE_ACTIVE trigger type + * THROTTLE_PASSIVE trigger type + * SW_TRIP trigger type + * HW_TRIP + * @trigger_enable[]: array to denote which trigger levels are enabled. + * 1 = enable trigger_level[] interrupt, + * 0 = disable trigger_level[] interrupt + * @max_trigger_level: max trigger level supported by the TMU + * @gain: gain of amplifier in the positive-TC generator block + * 0 <= gain <= 15 + * @reference_voltage: reference voltage of amplifier + * in the positive-TC generator block + * 0 <= reference_voltage <= 31 + * @noise_cancel_mode: noise cancellation mode + * 000, 100, 101, 110 and 111 can be different modes + * @type: determines the type of SOC + * @efuse_value: platform defined fuse value + * @min_efuse_value: minimum valid trimming data + * @max_efuse_value: maximum valid trimming data + * @first_point_trim: temp value of the first point trimming + * @second_point_trim: temp value of the second point trimming + * @default_temp_offset: default temperature offset in case of no trimming + * @cal_type: calibration type for temperature + * @cal_mode: calibration mode for temperature + * @freq_clip_table: Table representing frequency reduction percentage. + * @freq_tab_count: Count of the above table as frequency reduction may + * applicable to only some of the trigger levels. + * @registers: Pointer to structure containing all the TMU controller registers + * and bitfields shifts and masks. + * @features: a bitfield value indicating the features supported in SOC like + * emulation, multi instance etc + * + * This structure is required for configuration of exynos_tmu driver. + */ +struct exynos_tmu_platform_data { + u8 threshold; + u8 threshold_falling; + u8 trigger_levels[MAX_TRIP_COUNT]; + enum trigger_type trigger_type[MAX_TRIP_COUNT]; + bool trigger_enable[MAX_TRIP_COUNT]; + u8 max_trigger_level; + u8 gain; + u8 reference_voltage; + u8 noise_cancel_mode; + + u32 efuse_value; + u32 min_efuse_value; + u32 max_efuse_value; + u8 first_point_trim; + u8 second_point_trim; + u8 default_temp_offset; + + enum calibration_type cal_type; + enum calibration_mode cal_mode; + enum soc_type type; + struct freq_clip_table freq_tab[4]; + unsigned int freq_tab_count; + const struct exynos_tmu_registers *registers; + unsigned int features; +}; + +/** + * struct exynos_tmu_init_data + * @tmu_count: number of TMU instances. + * @tmu_data: platform data of all TMU instances. + * This structure is required to store data for multi-instance exynos tmu + * driver. + */ +struct exynos_tmu_init_data { + int tmu_count; + struct exynos_tmu_platform_data tmu_data[]; +}; + +#endif /* _EXYNOS_TMU_H */ diff --git a/drivers/thermal/samsung/exynos_tmu_data.c b/drivers/thermal/samsung/exynos_tmu_data.c new file mode 100644 index 000000000000..9002499c1f69 --- /dev/null +++ b/drivers/thermal/samsung/exynos_tmu_data.c @@ -0,0 +1,250 @@ +/* + * exynos_tmu_data.c - Samsung EXYNOS tmu data file + * + * Copyright (C) 2013 Samsung Electronics + * Amit Daniel Kachhap <amit.daniel@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include "exynos_thermal_common.h" +#include "exynos_tmu.h" +#include "exynos_tmu_data.h" + +#if defined(CONFIG_CPU_EXYNOS4210) +static const struct exynos_tmu_registers exynos4210_tmu_registers = { + .triminfo_data = EXYNOS_TMU_REG_TRIMINFO, + .triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT, + .triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT, + .tmu_ctrl = EXYNOS_TMU_REG_CONTROL, + .buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT, + .buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK, + .buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT, + .buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK, + .core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT, + .tmu_status = EXYNOS_TMU_REG_STATUS, + .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP, + .threshold_temp = EXYNOS4210_TMU_REG_THRESHOLD_TEMP, + .threshold_th0 = EXYNOS4210_TMU_REG_TRIG_LEVEL0, + .tmu_inten = EXYNOS_TMU_REG_INTEN, + .inten_rise_mask = EXYNOS4210_TMU_TRIG_LEVEL_MASK, + .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT, + .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT, + .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT, + .inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT, + .tmu_intstat = EXYNOS_TMU_REG_INTSTAT, + .tmu_intclear = EXYNOS_TMU_REG_INTCLEAR, +}; + +struct exynos_tmu_init_data const exynos4210_default_tmu_data = { + .tmu_data = { + { + .threshold = 80, + .trigger_levels[0] = 5, + .trigger_levels[1] = 20, + .trigger_levels[2] = 30, + .trigger_enable[0] = true, + .trigger_enable[1] = true, + .trigger_enable[2] = true, + .trigger_enable[3] = false, + .trigger_type[0] = THROTTLE_ACTIVE, + .trigger_type[1] = THROTTLE_ACTIVE, + .trigger_type[2] = SW_TRIP, + .max_trigger_level = 4, + .gain = 15, + .reference_voltage = 7, + .cal_type = TYPE_ONE_POINT_TRIMMING, + .min_efuse_value = 40, + .max_efuse_value = 100, + .first_point_trim = 25, + .second_point_trim = 85, + .default_temp_offset = 50, + .freq_tab[0] = { + .freq_clip_max = 800 * 1000, + .temp_level = 85, + }, + .freq_tab[1] = { + .freq_clip_max = 200 * 1000, + .temp_level = 100, + }, + .freq_tab_count = 2, + .type = SOC_ARCH_EXYNOS4210, + .registers = &exynos4210_tmu_registers, + .features = TMU_SUPPORT_READY_STATUS, + }, + }, + .tmu_count = 1, +}; +#endif + +#if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412) +static const struct exynos_tmu_registers exynos5250_tmu_registers = { + .triminfo_data = EXYNOS_TMU_REG_TRIMINFO, + .triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT, + .triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT, + .triminfo_ctrl = EXYNOS_TMU_TRIMINFO_CON, + .triminfo_reload_shift = EXYNOS_TRIMINFO_RELOAD_SHIFT, + .tmu_ctrl = EXYNOS_TMU_REG_CONTROL, + .buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT, + .buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK, + .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT, + .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK, + .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT, + .buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT, + .buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK, + .core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT, + .tmu_status = EXYNOS_TMU_REG_STATUS, + .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP, + .threshold_th0 = EXYNOS_THD_TEMP_RISE, + .threshold_th1 = EXYNOS_THD_TEMP_FALL, + .tmu_inten = EXYNOS_TMU_REG_INTEN, + .inten_rise_mask = EXYNOS_TMU_RISE_INT_MASK, + .inten_rise_shift = EXYNOS_TMU_RISE_INT_SHIFT, + .inten_fall_mask = EXYNOS_TMU_FALL_INT_MASK, + .inten_fall_shift = EXYNOS_TMU_FALL_INT_SHIFT, + .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT, + .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT, + .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT, + .inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT, + .inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT, + .tmu_intstat = EXYNOS_TMU_REG_INTSTAT, + .tmu_intclear = EXYNOS_TMU_REG_INTCLEAR, + .emul_con = EXYNOS_EMUL_CON, + .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT, + .emul_time_shift = EXYNOS_EMUL_TIME_SHIFT, + .emul_time_mask = EXYNOS_EMUL_TIME_MASK, +}; + +#define EXYNOS5250_TMU_DATA \ + .threshold_falling = 10, \ + .trigger_levels[0] = 85, \ + .trigger_levels[1] = 103, \ + .trigger_levels[2] = 110, \ + .trigger_levels[3] = 120, \ + .trigger_enable[0] = true, \ + .trigger_enable[1] = true, \ + .trigger_enable[2] = true, \ + .trigger_enable[3] = false, \ + .trigger_type[0] = THROTTLE_ACTIVE, \ + .trigger_type[1] = THROTTLE_ACTIVE, \ + .trigger_type[2] = SW_TRIP, \ + .trigger_type[3] = HW_TRIP, \ + .max_trigger_level = 4, \ + .gain = 8, \ + .reference_voltage = 16, \ + .noise_cancel_mode = 4, \ + .cal_type = TYPE_ONE_POINT_TRIMMING, \ + .efuse_value = 55, \ + .min_efuse_value = 40, \ + .max_efuse_value = 100, \ + .first_point_trim = 25, \ + .second_point_trim = 85, \ + .default_temp_offset = 50, \ + .freq_tab[0] = { \ + .freq_clip_max = 800 * 1000, \ + .temp_level = 85, \ + }, \ + .freq_tab[1] = { \ + .freq_clip_max = 200 * 1000, \ + .temp_level = 103, \ + }, \ + .freq_tab_count = 2, \ + .type = SOC_ARCH_EXYNOS, \ + .registers = &exynos5250_tmu_registers, \ + .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \ + TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \ + TMU_SUPPORT_EMUL_TIME) + +struct exynos_tmu_init_data const exynos5250_default_tmu_data = { + .tmu_data = { + { EXYNOS5250_TMU_DATA }, + }, + .tmu_count = 1, +}; +#endif + +#if defined(CONFIG_SOC_EXYNOS5440) +static const struct exynos_tmu_registers exynos5440_tmu_registers = { + .triminfo_data = EXYNOS5440_TMU_S0_7_TRIM, + .triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT, + .triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT, + .tmu_ctrl = EXYNOS5440_TMU_S0_7_CTRL, + .buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT, + .buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK, + .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT, + .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK, + .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT, + .buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT, + .buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK, + .calib_mode_shift = EXYNOS_TMU_CALIB_MODE_SHIFT, + .calib_mode_mask = EXYNOS_TMU_CALIB_MODE_MASK, + .core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT, + .tmu_status = EXYNOS5440_TMU_S0_7_STATUS, + .tmu_cur_temp = EXYNOS5440_TMU_S0_7_TEMP, + .threshold_th0 = EXYNOS5440_TMU_S0_7_TH0, + .threshold_th1 = EXYNOS5440_TMU_S0_7_TH1, + .threshold_th2 = EXYNOS5440_TMU_S0_7_TH2, + .threshold_th3_l0_shift = EXYNOS5440_TMU_TH_RISE4_SHIFT, + .tmu_inten = EXYNOS5440_TMU_S0_7_IRQEN, + .inten_rise_mask = EXYNOS5440_TMU_RISE_INT_MASK, + .inten_rise_shift = EXYNOS5440_TMU_RISE_INT_SHIFT, + .inten_fall_mask = EXYNOS5440_TMU_FALL_INT_MASK, + .inten_fall_shift = EXYNOS5440_TMU_FALL_INT_SHIFT, + .inten_rise0_shift = EXYNOS5440_TMU_INTEN_RISE0_SHIFT, + .inten_rise1_shift = EXYNOS5440_TMU_INTEN_RISE1_SHIFT, + .inten_rise2_shift = EXYNOS5440_TMU_INTEN_RISE2_SHIFT, + .inten_rise3_shift = EXYNOS5440_TMU_INTEN_RISE3_SHIFT, + .inten_fall0_shift = EXYNOS5440_TMU_INTEN_FALL0_SHIFT, + .tmu_intstat = EXYNOS5440_TMU_S0_7_IRQ, + .tmu_intclear = EXYNOS5440_TMU_S0_7_IRQ, + .tmu_irqstatus = EXYNOS5440_TMU_IRQ_STATUS, + .emul_con = EXYNOS5440_TMU_S0_7_DEBUG, + .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT, + .tmu_pmin = EXYNOS5440_TMU_PMIN, +}; + +#define EXYNOS5440_TMU_DATA \ + .trigger_levels[0] = 100, \ + .trigger_levels[4] = 105, \ + .trigger_enable[0] = 1, \ + .trigger_type[0] = SW_TRIP, \ + .trigger_type[4] = HW_TRIP, \ + .max_trigger_level = 5, \ + .gain = 5, \ + .reference_voltage = 16, \ + .noise_cancel_mode = 4, \ + .cal_type = TYPE_ONE_POINT_TRIMMING, \ + .cal_mode = 0, \ + .efuse_value = 0x5b2d, \ + .min_efuse_value = 16, \ + .max_efuse_value = 76, \ + .first_point_trim = 25, \ + .second_point_trim = 70, \ + .default_temp_offset = 25, \ + .type = SOC_ARCH_EXYNOS5440, \ + .registers = &exynos5440_tmu_registers, \ + .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \ + TMU_SUPPORT_MULTI_INST | TMU_SUPPORT_SHARED_MEMORY), + +struct exynos_tmu_init_data const exynos5440_default_tmu_data = { + .tmu_data = { + { EXYNOS5440_TMU_DATA } , + { EXYNOS5440_TMU_DATA } , + { EXYNOS5440_TMU_DATA } , + }, + .tmu_count = 3, +}; +#endif diff --git a/drivers/thermal/samsung/exynos_tmu_data.h b/drivers/thermal/samsung/exynos_tmu_data.h new file mode 100644 index 000000000000..dc7feb51099b --- /dev/null +++ b/drivers/thermal/samsung/exynos_tmu_data.h @@ -0,0 +1,155 @@ +/* + * exynos_tmu_data.h - Samsung EXYNOS tmu data header file + * + * Copyright (C) 2013 Samsung Electronics + * Amit Daniel Kachhap <amit.daniel@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _EXYNOS_TMU_DATA_H +#define _EXYNOS_TMU_DATA_H + +/* Exynos generic registers */ +#define EXYNOS_TMU_REG_TRIMINFO 0x0 +#define EXYNOS_TMU_REG_CONTROL 0x20 +#define EXYNOS_TMU_REG_STATUS 0x28 +#define EXYNOS_TMU_REG_CURRENT_TEMP 0x40 +#define EXYNOS_TMU_REG_INTEN 0x70 +#define EXYNOS_TMU_REG_INTSTAT 0x74 +#define EXYNOS_TMU_REG_INTCLEAR 0x78 + +#define EXYNOS_TMU_TEMP_MASK 0xff +#define EXYNOS_TMU_REF_VOLTAGE_SHIFT 24 +#define EXYNOS_TMU_REF_VOLTAGE_MASK 0x1f +#define EXYNOS_TMU_BUF_SLOPE_SEL_MASK 0xf +#define EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT 8 +#define EXYNOS_TMU_CORE_EN_SHIFT 0 + +/* Exynos4210 specific registers */ +#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP 0x44 +#define EXYNOS4210_TMU_REG_TRIG_LEVEL0 0x50 +#define EXYNOS4210_TMU_REG_TRIG_LEVEL1 0x54 +#define EXYNOS4210_TMU_REG_TRIG_LEVEL2 0x58 +#define EXYNOS4210_TMU_REG_TRIG_LEVEL3 0x5C +#define EXYNOS4210_TMU_REG_PAST_TEMP0 0x60 +#define EXYNOS4210_TMU_REG_PAST_TEMP1 0x64 +#define EXYNOS4210_TMU_REG_PAST_TEMP2 0x68 +#define EXYNOS4210_TMU_REG_PAST_TEMP3 0x6C + +#define EXYNOS4210_TMU_TRIG_LEVEL0_MASK 0x1 +#define EXYNOS4210_TMU_TRIG_LEVEL1_MASK 0x10 +#define EXYNOS4210_TMU_TRIG_LEVEL2_MASK 0x100 +#define EXYNOS4210_TMU_TRIG_LEVEL3_MASK 0x1000 +#define EXYNOS4210_TMU_TRIG_LEVEL_MASK 0x1111 +#define EXYNOS4210_TMU_INTCLEAR_VAL 0x1111 + +/* Exynos5250 and Exynos4412 specific registers */ +#define EXYNOS_TMU_TRIMINFO_CON 0x14 +#define EXYNOS_THD_TEMP_RISE 0x50 +#define EXYNOS_THD_TEMP_FALL 0x54 +#define EXYNOS_EMUL_CON 0x80 + +#define EXYNOS_TRIMINFO_RELOAD_SHIFT 1 +#define EXYNOS_TRIMINFO_25_SHIFT 0 +#define EXYNOS_TRIMINFO_85_SHIFT 8 +#define EXYNOS_TMU_RISE_INT_MASK 0x111 +#define EXYNOS_TMU_RISE_INT_SHIFT 0 +#define EXYNOS_TMU_FALL_INT_MASK 0x111 +#define EXYNOS_TMU_FALL_INT_SHIFT 12 +#define EXYNOS_TMU_CLEAR_RISE_INT 0x111 +#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 12) +#define EXYNOS_TMU_TRIP_MODE_SHIFT 13 +#define EXYNOS_TMU_TRIP_MODE_MASK 0x7 +#define EXYNOS_TMU_THERM_TRIP_EN_SHIFT 12 +#define EXYNOS_TMU_CALIB_MODE_SHIFT 4 +#define EXYNOS_TMU_CALIB_MODE_MASK 0x3 + +#define EXYNOS_TMU_INTEN_RISE0_SHIFT 0 +#define EXYNOS_TMU_INTEN_RISE1_SHIFT 4 +#define EXYNOS_TMU_INTEN_RISE2_SHIFT 8 +#define EXYNOS_TMU_INTEN_RISE3_SHIFT 12 +#define EXYNOS_TMU_INTEN_FALL0_SHIFT 16 +#define EXYNOS_TMU_INTEN_FALL1_SHIFT 20 +#define EXYNOS_TMU_INTEN_FALL2_SHIFT 24 + +#define EXYNOS_EMUL_TIME 0x57F0 +#define EXYNOS_EMUL_TIME_MASK 0xffff +#define EXYNOS_EMUL_TIME_SHIFT 16 +#define EXYNOS_EMUL_DATA_SHIFT 8 +#define EXYNOS_EMUL_DATA_MASK 0xFF +#define EXYNOS_EMUL_ENABLE 0x1 + +#define EXYNOS_MAX_TRIGGER_PER_REG 4 + +/*exynos5440 specific registers*/ +#define EXYNOS5440_TMU_S0_7_TRIM 0x000 +#define EXYNOS5440_TMU_S0_7_CTRL 0x020 +#define EXYNOS5440_TMU_S0_7_DEBUG 0x040 +#define EXYNOS5440_TMU_S0_7_STATUS 0x060 +#define EXYNOS5440_TMU_S0_7_TEMP 0x0f0 +#define EXYNOS5440_TMU_S0_7_TH0 0x110 +#define EXYNOS5440_TMU_S0_7_TH1 0x130 +#define EXYNOS5440_TMU_S0_7_TH2 0x150 +#define EXYNOS5440_TMU_S0_7_EVTEN 0x1F0 +#define EXYNOS5440_TMU_S0_7_IRQEN 0x210 +#define EXYNOS5440_TMU_S0_7_IRQ 0x230 +/* exynos5440 common registers */ +#define EXYNOS5440_TMU_IRQ_STATUS 0x000 +#define EXYNOS5440_TMU_PMIN 0x004 +#define EXYNOS5440_TMU_TEMP 0x008 + +#define EXYNOS5440_TMU_RISE_INT_MASK 0xf +#define EXYNOS5440_TMU_RISE_INT_SHIFT 0 +#define EXYNOS5440_TMU_FALL_INT_MASK 0xf +#define EXYNOS5440_TMU_FALL_INT_SHIFT 4 +#define EXYNOS5440_TMU_INTEN_RISE0_SHIFT 0 +#define EXYNOS5440_TMU_INTEN_RISE1_SHIFT 1 +#define EXYNOS5440_TMU_INTEN_RISE2_SHIFT 2 +#define EXYNOS5440_TMU_INTEN_RISE3_SHIFT 3 +#define EXYNOS5440_TMU_INTEN_FALL0_SHIFT 4 +#define EXYNOS5440_TMU_INTEN_FALL1_SHIFT 5 +#define EXYNOS5440_TMU_INTEN_FALL2_SHIFT 6 +#define EXYNOS5440_TMU_INTEN_FALL3_SHIFT 7 +#define EXYNOS5440_TMU_TH_RISE0_SHIFT 0 +#define EXYNOS5440_TMU_TH_RISE1_SHIFT 8 +#define EXYNOS5440_TMU_TH_RISE2_SHIFT 16 +#define EXYNOS5440_TMU_TH_RISE3_SHIFT 24 +#define EXYNOS5440_TMU_TH_RISE4_SHIFT 24 +#define EXYNOS5440_EFUSE_SWAP_OFFSET 8 + +#if defined(CONFIG_CPU_EXYNOS4210) +extern struct exynos_tmu_init_data const exynos4210_default_tmu_data; +#define EXYNOS4210_TMU_DRV_DATA (&exynos4210_default_tmu_data) +#else +#define EXYNOS4210_TMU_DRV_DATA (NULL) +#endif + +#if (defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412)) +extern struct exynos_tmu_init_data const exynos5250_default_tmu_data; +#define EXYNOS5250_TMU_DRV_DATA (&exynos5250_default_tmu_data) +#else +#define EXYNOS5250_TMU_DRV_DATA (NULL) +#endif + +#if defined(CONFIG_SOC_EXYNOS5440) +extern struct exynos_tmu_init_data const exynos5440_default_tmu_data; +#define EXYNOS5440_TMU_DRV_DATA (&exynos5440_default_tmu_data) +#else +#define EXYNOS5440_TMU_DRV_DATA (NULL) +#endif + +#endif /*_EXYNOS_TMU_DATA_H*/ diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 4d4ddae1a991..d89e781b0a18 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -51,44 +51,51 @@ static unsigned long get_target_state(struct thermal_instance *instance, { struct thermal_cooling_device *cdev = instance->cdev; unsigned long cur_state; + unsigned long next_target; + /* + * We keep this instance the way it is by default. + * Otherwise, we use the current state of the + * cdev in use to determine the next_target. + */ cdev->ops->get_cur_state(cdev, &cur_state); + next_target = instance->target; switch (trend) { case THERMAL_TREND_RAISING: if (throttle) { - cur_state = cur_state < instance->upper ? + next_target = cur_state < instance->upper ? (cur_state + 1) : instance->upper; - if (cur_state < instance->lower) - cur_state = instance->lower; + if (next_target < instance->lower) + next_target = instance->lower; } break; case THERMAL_TREND_RAISE_FULL: if (throttle) - cur_state = instance->upper; + next_target = instance->upper; break; case THERMAL_TREND_DROPPING: if (cur_state == instance->lower) { if (!throttle) - cur_state = -1; + next_target = THERMAL_NO_TARGET; } else { - cur_state -= 1; - if (cur_state > instance->upper) - cur_state = instance->upper; + next_target = cur_state - 1; + if (next_target > instance->upper) + next_target = instance->upper; } break; case THERMAL_TREND_DROP_FULL: if (cur_state == instance->lower) { if (!throttle) - cur_state = -1; + next_target = THERMAL_NO_TARGET; } else - cur_state = instance->lower; + next_target = instance->lower; break; default: break; } - return cur_state; + return next_target; } static void update_passive_instance(struct thermal_zone_device *tz, @@ -133,6 +140,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) old_target = instance->target; instance->target = get_target_state(instance, trend, throttle); + if (old_target == instance->target) + continue; + /* Activate a passive thermal instance */ if (old_target == THERMAL_NO_TARGET && instance->target != THERMAL_NO_TARGET) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 1f02e8edb45c..4962a6aaf295 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -38,6 +38,7 @@ #include <net/genetlink.h> #include "thermal_core.h" +#include "thermal_hwmon.h" MODULE_AUTHOR("Zhang Rui"); MODULE_DESCRIPTION("Generic thermal management sysfs support"); @@ -201,14 +202,23 @@ static void print_bind_err_msg(struct thermal_zone_device *tz, } static void __bind(struct thermal_zone_device *tz, int mask, - struct thermal_cooling_device *cdev) + struct thermal_cooling_device *cdev, + unsigned long *limits) { int i, ret; for (i = 0; i < tz->trips; i++) { if (mask & (1 << i)) { + unsigned long upper, lower; + + upper = THERMAL_NO_LIMIT; + lower = THERMAL_NO_LIMIT; + if (limits) { + lower = limits[i * 2]; + upper = limits[i * 2 + 1]; + } ret = thermal_zone_bind_cooling_device(tz, i, cdev, - THERMAL_NO_LIMIT, THERMAL_NO_LIMIT); + upper, lower); if (ret) print_bind_err_msg(tz, cdev, ret); } @@ -253,7 +263,8 @@ static void bind_cdev(struct thermal_cooling_device *cdev) if (tzp->tbp[i].match(pos, cdev)) continue; tzp->tbp[i].cdev = cdev; - __bind(pos, tzp->tbp[i].trip_mask, cdev); + __bind(pos, tzp->tbp[i].trip_mask, cdev, + tzp->tbp[i].binding_limits); } } @@ -291,7 +302,8 @@ static void bind_tz(struct thermal_zone_device *tz) if (tzp->tbp[i].match(tz, pos)) continue; tzp->tbp[i].cdev = pos; - __bind(tz, tzp->tbp[i].trip_mask, pos); + __bind(tz, tzp->tbp[i].trip_mask, pos, + tzp->tbp[i].binding_limits); } } exit: @@ -859,260 +871,6 @@ thermal_cooling_device_trip_point_show(struct device *dev, /* Device management */ -#if defined(CONFIG_THERMAL_HWMON) - -/* hwmon sys I/F */ -#include <linux/hwmon.h> - -/* thermal zone devices with the same type share one hwmon device */ -struct thermal_hwmon_device { - char type[THERMAL_NAME_LENGTH]; - struct device *device; - int count; - struct list_head tz_list; - struct list_head node; -}; - -struct thermal_hwmon_attr { - struct device_attribute attr; - char name[16]; -}; - -/* one temperature input for each thermal zone */ -struct thermal_hwmon_temp { - struct list_head hwmon_node; - struct thermal_zone_device *tz; - struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ - struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ -}; - -static LIST_HEAD(thermal_hwmon_list); - -static ssize_t -name_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct thermal_hwmon_device *hwmon = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", hwmon->type); -} -static DEVICE_ATTR(name, 0444, name_show, NULL); - -static ssize_t -temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - long temperature; - int ret; - struct thermal_hwmon_attr *hwmon_attr - = container_of(attr, struct thermal_hwmon_attr, attr); - struct thermal_hwmon_temp *temp - = container_of(hwmon_attr, struct thermal_hwmon_temp, - temp_input); - struct thermal_zone_device *tz = temp->tz; - - ret = thermal_zone_get_temp(tz, &temperature); - - if (ret) - return ret; - - return sprintf(buf, "%ld\n", temperature); -} - -static ssize_t -temp_crit_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct thermal_hwmon_attr *hwmon_attr - = container_of(attr, struct thermal_hwmon_attr, attr); - struct thermal_hwmon_temp *temp - = container_of(hwmon_attr, struct thermal_hwmon_temp, - temp_crit); - struct thermal_zone_device *tz = temp->tz; - long temperature; - int ret; - - ret = tz->ops->get_trip_temp(tz, 0, &temperature); - if (ret) - return ret; - - return sprintf(buf, "%ld\n", temperature); -} - - -static struct thermal_hwmon_device * -thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) -{ - struct thermal_hwmon_device *hwmon; - - mutex_lock(&thermal_list_lock); - list_for_each_entry(hwmon, &thermal_hwmon_list, node) - if (!strcmp(hwmon->type, tz->type)) { - mutex_unlock(&thermal_list_lock); - return hwmon; - } - mutex_unlock(&thermal_list_lock); - - return NULL; -} - -/* Find the temperature input matching a given thermal zone */ -static struct thermal_hwmon_temp * -thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, - const struct thermal_zone_device *tz) -{ - struct thermal_hwmon_temp *temp; - - mutex_lock(&thermal_list_lock); - list_for_each_entry(temp, &hwmon->tz_list, hwmon_node) - if (temp->tz == tz) { - mutex_unlock(&thermal_list_lock); - return temp; - } - mutex_unlock(&thermal_list_lock); - - return NULL; -} - -static int -thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) -{ - struct thermal_hwmon_device *hwmon; - struct thermal_hwmon_temp *temp; - int new_hwmon_device = 1; - int result; - - hwmon = thermal_hwmon_lookup_by_type(tz); - if (hwmon) { - new_hwmon_device = 0; - goto register_sys_interface; - } - - hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL); - if (!hwmon) - return -ENOMEM; - - INIT_LIST_HEAD(&hwmon->tz_list); - strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH); - hwmon->device = hwmon_device_register(NULL); - if (IS_ERR(hwmon->device)) { - result = PTR_ERR(hwmon->device); - goto free_mem; - } - dev_set_drvdata(hwmon->device, hwmon); - result = device_create_file(hwmon->device, &dev_attr_name); - if (result) - goto free_mem; - - register_sys_interface: - temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL); - if (!temp) { - result = -ENOMEM; - goto unregister_name; - } - - temp->tz = tz; - hwmon->count++; - - snprintf(temp->temp_input.name, sizeof(temp->temp_input.name), - "temp%d_input", hwmon->count); - temp->temp_input.attr.attr.name = temp->temp_input.name; - temp->temp_input.attr.attr.mode = 0444; - temp->temp_input.attr.show = temp_input_show; - sysfs_attr_init(&temp->temp_input.attr.attr); - result = device_create_file(hwmon->device, &temp->temp_input.attr); - if (result) - goto free_temp_mem; - - if (tz->ops->get_crit_temp) { - unsigned long temperature; - if (!tz->ops->get_crit_temp(tz, &temperature)) { - snprintf(temp->temp_crit.name, - sizeof(temp->temp_crit.name), - "temp%d_crit", hwmon->count); - temp->temp_crit.attr.attr.name = temp->temp_crit.name; - temp->temp_crit.attr.attr.mode = 0444; - temp->temp_crit.attr.show = temp_crit_show; - sysfs_attr_init(&temp->temp_crit.attr.attr); - result = device_create_file(hwmon->device, - &temp->temp_crit.attr); - if (result) - goto unregister_input; - } - } - - mutex_lock(&thermal_list_lock); - if (new_hwmon_device) - list_add_tail(&hwmon->node, &thermal_hwmon_list); - list_add_tail(&temp->hwmon_node, &hwmon->tz_list); - mutex_unlock(&thermal_list_lock); - - return 0; - - unregister_input: - device_remove_file(hwmon->device, &temp->temp_input.attr); - free_temp_mem: - kfree(temp); - unregister_name: - if (new_hwmon_device) { - device_remove_file(hwmon->device, &dev_attr_name); - hwmon_device_unregister(hwmon->device); - } - free_mem: - if (new_hwmon_device) - kfree(hwmon); - - return result; -} - -static void -thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) -{ - struct thermal_hwmon_device *hwmon; - struct thermal_hwmon_temp *temp; - - hwmon = thermal_hwmon_lookup_by_type(tz); - if (unlikely(!hwmon)) { - /* Should never happen... */ - dev_dbg(&tz->device, "hwmon device lookup failed!\n"); - return; - } - - temp = thermal_hwmon_lookup_temp(hwmon, tz); - if (unlikely(!temp)) { - /* Should never happen... */ - dev_dbg(&tz->device, "temperature input lookup failed!\n"); - return; - } - - device_remove_file(hwmon->device, &temp->temp_input.attr); - if (tz->ops->get_crit_temp) - device_remove_file(hwmon->device, &temp->temp_crit.attr); - - mutex_lock(&thermal_list_lock); - list_del(&temp->hwmon_node); - kfree(temp); - if (!list_empty(&hwmon->tz_list)) { - mutex_unlock(&thermal_list_lock); - return; - } - list_del(&hwmon->node); - mutex_unlock(&thermal_list_lock); - - device_remove_file(hwmon->device, &dev_attr_name); - hwmon_device_unregister(hwmon->device); - kfree(hwmon); -} -#else -static int -thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) -{ - return 0; -} - -static void -thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) -{ -} -#endif - /** * thermal_zone_bind_cooling_device() - bind a cooling device to a thermal zone * @tz: pointer to struct thermal_zone_device @@ -1715,9 +1473,11 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, mutex_unlock(&thermal_governor_lock); - result = thermal_add_hwmon_sysfs(tz); - if (result) - goto unregister; + if (!tz->tzp || !tz->tzp->no_hwmon) { + result = thermal_add_hwmon_sysfs(tz); + if (result) + goto unregister; + } mutex_lock(&thermal_list_lock); list_add_tail(&tz->node, &thermal_tz_list); diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c new file mode 100644 index 000000000000..eeef0e2498ca --- /dev/null +++ b/drivers/thermal/thermal_hwmon.c @@ -0,0 +1,269 @@ +/* + * thermal_hwmon.c - Generic Thermal Management hwmon support. + * + * Code based on Intel thermal_core.c. Copyrights of the original code: + * Copyright (C) 2008 Intel Corp + * Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com> + * Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com> + * + * Copyright (C) 2013 Texas Instruments + * Copyright (C) 2013 Eduardo Valentin <eduardo.valentin@ti.com> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <linux/hwmon.h> +#include <linux/thermal.h> +#include <linux/slab.h> +#include <linux/err.h> +#include "thermal_hwmon.h" + +/* hwmon sys I/F */ +/* thermal zone devices with the same type share one hwmon device */ +struct thermal_hwmon_device { + char type[THERMAL_NAME_LENGTH]; + struct device *device; + int count; + struct list_head tz_list; + struct list_head node; +}; + +struct thermal_hwmon_attr { + struct device_attribute attr; + char name[16]; +}; + +/* one temperature input for each thermal zone */ +struct thermal_hwmon_temp { + struct list_head hwmon_node; + struct thermal_zone_device *tz; + struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ + struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ +}; + +static LIST_HEAD(thermal_hwmon_list); + +static DEFINE_MUTEX(thermal_hwmon_list_lock); + +static ssize_t +name_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct thermal_hwmon_device *hwmon = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", hwmon->type); +} +static DEVICE_ATTR(name, 0444, name_show, NULL); + +static ssize_t +temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + long temperature; + int ret; + struct thermal_hwmon_attr *hwmon_attr + = container_of(attr, struct thermal_hwmon_attr, attr); + struct thermal_hwmon_temp *temp + = container_of(hwmon_attr, struct thermal_hwmon_temp, + temp_input); + struct thermal_zone_device *tz = temp->tz; + + ret = thermal_zone_get_temp(tz, &temperature); + + if (ret) + return ret; + + return sprintf(buf, "%ld\n", temperature); +} + +static ssize_t +temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct thermal_hwmon_attr *hwmon_attr + = container_of(attr, struct thermal_hwmon_attr, attr); + struct thermal_hwmon_temp *temp + = container_of(hwmon_attr, struct thermal_hwmon_temp, + temp_crit); + struct thermal_zone_device *tz = temp->tz; + long temperature; + int ret; + + ret = tz->ops->get_trip_temp(tz, 0, &temperature); + if (ret) + return ret; + + return sprintf(buf, "%ld\n", temperature); +} + + +static struct thermal_hwmon_device * +thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) +{ + struct thermal_hwmon_device *hwmon; + + mutex_lock(&thermal_hwmon_list_lock); + list_for_each_entry(hwmon, &thermal_hwmon_list, node) + if (!strcmp(hwmon->type, tz->type)) { + mutex_unlock(&thermal_hwmon_list_lock); + return hwmon; + } + mutex_unlock(&thermal_hwmon_list_lock); + + return NULL; +} + +/* Find the temperature input matching a given thermal zone */ +static struct thermal_hwmon_temp * +thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, + const struct thermal_zone_device *tz) +{ + struct thermal_hwmon_temp *temp; + + mutex_lock(&thermal_hwmon_list_lock); + list_for_each_entry(temp, &hwmon->tz_list, hwmon_node) + if (temp->tz == tz) { + mutex_unlock(&thermal_hwmon_list_lock); + return temp; + } + mutex_unlock(&thermal_hwmon_list_lock); + + return NULL; +} + +int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +{ + struct thermal_hwmon_device *hwmon; + struct thermal_hwmon_temp *temp; + int new_hwmon_device = 1; + int result; + + hwmon = thermal_hwmon_lookup_by_type(tz); + if (hwmon) { + new_hwmon_device = 0; + goto register_sys_interface; + } + + hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); + if (!hwmon) + return -ENOMEM; + + INIT_LIST_HEAD(&hwmon->tz_list); + strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH); + hwmon->device = hwmon_device_register(&tz->device); + if (IS_ERR(hwmon->device)) { + result = PTR_ERR(hwmon->device); + goto free_mem; + } + dev_set_drvdata(hwmon->device, hwmon); + result = device_create_file(hwmon->device, &dev_attr_name); + if (result) + goto free_mem; + + register_sys_interface: + temp = kzalloc(sizeof(*temp), GFP_KERNEL); + if (!temp) { + result = -ENOMEM; + goto unregister_name; + } + + temp->tz = tz; + hwmon->count++; + + snprintf(temp->temp_input.name, sizeof(temp->temp_input.name), + "temp%d_input", hwmon->count); + temp->temp_input.attr.attr.name = temp->temp_input.name; + temp->temp_input.attr.attr.mode = 0444; + temp->temp_input.attr.show = temp_input_show; + sysfs_attr_init(&temp->temp_input.attr.attr); + result = device_create_file(hwmon->device, &temp->temp_input.attr); + if (result) + goto free_temp_mem; + + if (tz->ops->get_crit_temp) { + unsigned long temperature; + if (!tz->ops->get_crit_temp(tz, &temperature)) { + snprintf(temp->temp_crit.name, + sizeof(temp->temp_crit.name), + "temp%d_crit", hwmon->count); + temp->temp_crit.attr.attr.name = temp->temp_crit.name; + temp->temp_crit.attr.attr.mode = 0444; + temp->temp_crit.attr.show = temp_crit_show; + sysfs_attr_init(&temp->temp_crit.attr.attr); + result = device_create_file(hwmon->device, + &temp->temp_crit.attr); + if (result) + goto unregister_input; + } + } + + mutex_lock(&thermal_hwmon_list_lock); + if (new_hwmon_device) + list_add_tail(&hwmon->node, &thermal_hwmon_list); + list_add_tail(&temp->hwmon_node, &hwmon->tz_list); + mutex_unlock(&thermal_hwmon_list_lock); + + return 0; + + unregister_input: + device_remove_file(hwmon->device, &temp->temp_input.attr); + free_temp_mem: + kfree(temp); + unregister_name: + if (new_hwmon_device) { + device_remove_file(hwmon->device, &dev_attr_name); + hwmon_device_unregister(hwmon->device); + } + free_mem: + if (new_hwmon_device) + kfree(hwmon); + + return result; +} + +void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) +{ + struct thermal_hwmon_device *hwmon; + struct thermal_hwmon_temp *temp; + + hwmon = thermal_hwmon_lookup_by_type(tz); + if (unlikely(!hwmon)) { + /* Should never happen... */ + dev_dbg(&tz->device, "hwmon device lookup failed!\n"); + return; + } + + temp = thermal_hwmon_lookup_temp(hwmon, tz); + if (unlikely(!temp)) { + /* Should never happen... */ + dev_dbg(&tz->device, "temperature input lookup failed!\n"); + return; + } + + device_remove_file(hwmon->device, &temp->temp_input.attr); + if (tz->ops->get_crit_temp) + device_remove_file(hwmon->device, &temp->temp_crit.attr); + + mutex_lock(&thermal_hwmon_list_lock); + list_del(&temp->hwmon_node); + kfree(temp); + if (!list_empty(&hwmon->tz_list)) { + mutex_unlock(&thermal_hwmon_list_lock); + return; + } + list_del(&hwmon->node); + mutex_unlock(&thermal_hwmon_list_lock); + + device_remove_file(hwmon->device, &dev_attr_name); + hwmon_device_unregister(hwmon->device); + kfree(hwmon); +} diff --git a/drivers/thermal/thermal_hwmon.h b/drivers/thermal/thermal_hwmon.h new file mode 100644 index 000000000000..c798fdb2ae43 --- /dev/null +++ b/drivers/thermal/thermal_hwmon.h @@ -0,0 +1,49 @@ +/* + * thermal_hwmon.h - Generic Thermal Management hwmon support. + * + * Code based on Intel thermal_core.c. Copyrights of the original code: + * Copyright (C) 2008 Intel Corp + * Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com> + * Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com> + * + * Copyright (C) 2013 Texas Instruments + * Copyright (C) 2013 Eduardo Valentin <eduardo.valentin@ti.com> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#ifndef __THERMAL_HWMON_H__ +#define __THERMAL_HWMON_H__ + +#include <linux/thermal.h> + +#ifdef CONFIG_THERMAL_HWMON +int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz); +void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz); +#else +static int +thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +{ + return 0; +} + +static void +thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) +{ +} +#endif + +#endif /* __THERMAL_HWMON_H__ */ diff --git a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c index e5d8326a54d6..a4929272074f 100644 --- a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c +++ b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c @@ -42,6 +42,7 @@ dra752_core_temp_sensor_registers = { .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_CORE_MASK, .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_CORE_MASK, .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK, .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_CORE_MASK, .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_CORE_MASK, .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_CORE_MASK, @@ -77,6 +78,7 @@ dra752_iva_temp_sensor_registers = { .mask_hot_mask = DRA752_BANDGAP_CTRL_2_MASK_HOT_IVA_MASK, .mask_cold_mask = DRA752_BANDGAP_CTRL_2_MASK_COLD_IVA_MASK, .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK, .mask_freeze_mask = DRA752_BANDGAP_CTRL_2_FREEZE_IVA_MASK, .mask_clear_mask = DRA752_BANDGAP_CTRL_2_CLEAR_IVA_MASK, .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_2_CLEAR_ACCUM_IVA_MASK, @@ -112,6 +114,7 @@ dra752_mpu_temp_sensor_registers = { .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_MPU_MASK, .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_MPU_MASK, .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK, .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_MPU_MASK, .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_MPU_MASK, .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_MPU_MASK, @@ -147,6 +150,7 @@ dra752_dspeve_temp_sensor_registers = { .mask_hot_mask = DRA752_BANDGAP_CTRL_2_MASK_HOT_DSPEVE_MASK, .mask_cold_mask = DRA752_BANDGAP_CTRL_2_MASK_COLD_DSPEVE_MASK, .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK, .mask_freeze_mask = DRA752_BANDGAP_CTRL_2_FREEZE_DSPEVE_MASK, .mask_clear_mask = DRA752_BANDGAP_CTRL_2_CLEAR_DSPEVE_MASK, .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_2_CLEAR_ACCUM_DSPEVE_MASK, @@ -182,6 +186,7 @@ dra752_gpu_temp_sensor_registers = { .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_GPU_MASK, .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_GPU_MASK, .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK, .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_GPU_MASK, .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_GPU_MASK, .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_GPU_MASK, diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c index 9dfd47196e63..74c0e3474d6e 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c @@ -1020,9 +1020,13 @@ int ti_bandgap_get_trend(struct ti_bandgap *bgp, int id, int *trend) /* Fetch the update interval */ ret = ti_bandgap_read_update_interval(bgp, id, &interval); - if (ret || !interval) + if (ret) goto unfreeze; + /* Set the interval to 1 ms if bandgap counter delay is not set */ + if (interval == 0) + interval = 1; + *trend = (t1 - t2) / interval; dev_dbg(bgp->dev, "The temperatures are t1 = %d and t2 = %d and trend =%d\n", diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index 4c5f55c37349..4f8b9af54a5a 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -174,6 +174,9 @@ static int ti_thermal_set_mode(struct thermal_zone_device *thermal, enum thermal_device_mode mode) { struct ti_thermal_data *data = thermal->devdata; + struct ti_bandgap *bgp; + + bgp = data->bgp; if (!data->ti_thermal) { dev_notice(&thermal->device, "thermal zone not registered\n"); @@ -190,6 +193,8 @@ static int ti_thermal_set_mode(struct thermal_zone_device *thermal, mutex_unlock(&data->ti_thermal->lock); data->mode = mode; + ti_bandgap_write_update_interval(bgp, data->sensor_id, + data->ti_thermal->polling_delay); thermal_zone_device_update(data->ti_thermal); dev_dbg(&thermal->device, "thermal polling set for duration=%d msec\n", data->ti_thermal->polling_delay); @@ -313,6 +318,8 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id, } data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE; ti_bandgap_set_sensor_data(bgp, id, data); + ti_bandgap_write_update_interval(bgp, data->sensor_id, + data->ti_thermal->polling_delay); return 0; } diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 362085d7ad8f..d1d53f301de7 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -290,6 +290,16 @@ config ORION_WATCHDOG To compile this driver as a module, choose M here: the module will be called orion_wdt. +config SUNXI_WATCHDOG + tristate "Allwinner SoCs watchdog support" + depends on ARCH_SUNXI + select WATCHDOG_CORE + help + Say Y here to include support for the watchdog timer + in Allwinner SoCs. + To compile this driver as a module, choose M here: the + module will be called sunxi_wdt. + config COH901327_WATCHDOG bool "ST-Ericsson COH 901 327 watchdog" depends on ARCH_U300 diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 2f26a0b47ddc..6c5bb274d3cd 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_PNX4008_WATCHDOG) += pnx4008_wdt.o obj-$(CONFIG_IOP_WATCHDOG) += iop_wdt.o obj-$(CONFIG_DAVINCI_WATCHDOG) += davinci_wdt.o obj-$(CONFIG_ORION_WATCHDOG) += orion_wdt.o +obj-$(CONFIG_SUNXI_WATCHDOG) += sunxi_wdt.o obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o diff --git a/drivers/watchdog/ar7_wdt.c b/drivers/watchdog/ar7_wdt.c index 2f3cc8fb471a..b3709f9cf5be 100644 --- a/drivers/watchdog/ar7_wdt.c +++ b/drivers/watchdog/ar7_wdt.c @@ -280,11 +280,6 @@ static int ar7_wdt_probe(struct platform_device *pdev) ar7_regs_wdt = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); - if (!ar7_regs_wdt) { - pr_err("could not get registers resource\n"); - return -ENODEV; - } - ar7_wdt = devm_ioremap_resource(&pdev->dev, ar7_regs_wdt); if (IS_ERR(ar7_wdt)) return PTR_ERR(ar7_wdt); diff --git a/drivers/watchdog/nuc900_wdt.c b/drivers/watchdog/nuc900_wdt.c index e2b6d2cf5c9d..b15b6efd91a1 100644 --- a/drivers/watchdog/nuc900_wdt.c +++ b/drivers/watchdog/nuc900_wdt.c @@ -256,11 +256,6 @@ static int nuc900wdt_probe(struct platform_device *pdev) spin_lock_init(&nuc900_wdt->wdt_lock); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res == NULL) { - dev_err(&pdev->dev, "no memory resource specified\n"); - return -ENOENT; - } - nuc900_wdt->wdt_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(nuc900_wdt->wdt_base)) return PTR_ERR(nuc900_wdt->wdt_base); diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 6a22cf5d35bd..23aad7c6bf5d 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -84,13 +84,17 @@ MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, " "0 to reboot (default 0)"); MODULE_PARM_DESC(debug, "Watchdog debug, set to >1 for debug (default 0)"); -static struct device *wdt_dev; /* platform device attached to */ -static struct resource *wdt_mem; -static struct resource *wdt_irq; -static struct clk *wdt_clock; -static void __iomem *wdt_base; -static unsigned int wdt_count; -static DEFINE_SPINLOCK(wdt_lock); +struct s3c2410_wdt { + struct device *dev; + struct clk *clock; + void __iomem *reg_base; + unsigned int count; + spinlock_t lock; + unsigned long wtcon_save; + unsigned long wtdat_save; + struct watchdog_device wdt_device; + struct notifier_block freq_transition; +}; /* watchdog control routines */ @@ -102,29 +106,38 @@ do { \ /* functions */ +static inline struct s3c2410_wdt *freq_to_wdt(struct notifier_block *nb) +{ + return container_of(nb, struct s3c2410_wdt, freq_transition); +} + static int s3c2410wdt_keepalive(struct watchdog_device *wdd) { - spin_lock(&wdt_lock); - writel(wdt_count, wdt_base + S3C2410_WTCNT); - spin_unlock(&wdt_lock); + struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd); + + spin_lock(&wdt->lock); + writel(wdt->count, wdt->reg_base + S3C2410_WTCNT); + spin_unlock(&wdt->lock); return 0; } -static void __s3c2410wdt_stop(void) +static void __s3c2410wdt_stop(struct s3c2410_wdt *wdt) { unsigned long wtcon; - wtcon = readl(wdt_base + S3C2410_WTCON); + wtcon = readl(wdt->reg_base + S3C2410_WTCON); wtcon &= ~(S3C2410_WTCON_ENABLE | S3C2410_WTCON_RSTEN); - writel(wtcon, wdt_base + S3C2410_WTCON); + writel(wtcon, wdt->reg_base + S3C2410_WTCON); } static int s3c2410wdt_stop(struct watchdog_device *wdd) { - spin_lock(&wdt_lock); - __s3c2410wdt_stop(); - spin_unlock(&wdt_lock); + struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd); + + spin_lock(&wdt->lock); + __s3c2410wdt_stop(wdt); + spin_unlock(&wdt->lock); return 0; } @@ -132,12 +145,13 @@ static int s3c2410wdt_stop(struct watchdog_device *wdd) static int s3c2410wdt_start(struct watchdog_device *wdd) { unsigned long wtcon; + struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd); - spin_lock(&wdt_lock); + spin_lock(&wdt->lock); - __s3c2410wdt_stop(); + __s3c2410wdt_stop(wdt); - wtcon = readl(wdt_base + S3C2410_WTCON); + wtcon = readl(wdt->reg_base + S3C2410_WTCON); wtcon |= S3C2410_WTCON_ENABLE | S3C2410_WTCON_DIV128; if (soft_noboot) { @@ -148,25 +162,26 @@ static int s3c2410wdt_start(struct watchdog_device *wdd) wtcon |= S3C2410_WTCON_RSTEN; } - DBG("%s: wdt_count=0x%08x, wtcon=%08lx\n", - __func__, wdt_count, wtcon); + DBG("%s: count=0x%08x, wtcon=%08lx\n", + __func__, wdt->count, wtcon); - writel(wdt_count, wdt_base + S3C2410_WTDAT); - writel(wdt_count, wdt_base + S3C2410_WTCNT); - writel(wtcon, wdt_base + S3C2410_WTCON); - spin_unlock(&wdt_lock); + writel(wdt->count, wdt->reg_base + S3C2410_WTDAT); + writel(wdt->count, wdt->reg_base + S3C2410_WTCNT); + writel(wtcon, wdt->reg_base + S3C2410_WTCON); + spin_unlock(&wdt->lock); return 0; } -static inline int s3c2410wdt_is_running(void) +static inline int s3c2410wdt_is_running(struct s3c2410_wdt *wdt) { - return readl(wdt_base + S3C2410_WTCON) & S3C2410_WTCON_ENABLE; + return readl(wdt->reg_base + S3C2410_WTCON) & S3C2410_WTCON_ENABLE; } static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeout) { - unsigned long freq = clk_get_rate(wdt_clock); + struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd); + unsigned long freq = clk_get_rate(wdt->clock); unsigned int count; unsigned int divisor = 1; unsigned long wtcon; @@ -192,7 +207,7 @@ static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeou } if ((count / divisor) >= 0x10000) { - dev_err(wdt_dev, "timeout %d too big\n", timeout); + dev_err(wdt->dev, "timeout %d too big\n", timeout); return -EINVAL; } } @@ -201,15 +216,15 @@ static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeou __func__, timeout, divisor, count, count/divisor); count /= divisor; - wdt_count = count; + wdt->count = count; /* update the pre-scaler */ - wtcon = readl(wdt_base + S3C2410_WTCON); + wtcon = readl(wdt->reg_base + S3C2410_WTCON); wtcon &= ~S3C2410_WTCON_PRESCALE_MASK; wtcon |= S3C2410_WTCON_PRESCALE(divisor-1); - writel(count, wdt_base + S3C2410_WTDAT); - writel(wtcon, wdt_base + S3C2410_WTCON); + writel(count, wdt->reg_base + S3C2410_WTDAT); + writel(wtcon, wdt->reg_base + S3C2410_WTCON); wdd->timeout = (count * divisor) / freq; @@ -242,21 +257,23 @@ static struct watchdog_device s3c2410_wdd = { static irqreturn_t s3c2410wdt_irq(int irqno, void *param) { - dev_info(wdt_dev, "watchdog timer expired (irq)\n"); + struct s3c2410_wdt *wdt = platform_get_drvdata(param); + + dev_info(wdt->dev, "watchdog timer expired (irq)\n"); - s3c2410wdt_keepalive(&s3c2410_wdd); + s3c2410wdt_keepalive(&wdt->wdt_device); return IRQ_HANDLED; } - #ifdef CONFIG_CPU_FREQ static int s3c2410wdt_cpufreq_transition(struct notifier_block *nb, unsigned long val, void *data) { int ret; + struct s3c2410_wdt *wdt = freq_to_wdt(nb); - if (!s3c2410wdt_is_running()) + if (!s3c2410wdt_is_running(wdt)) goto done; if (val == CPUFREQ_PRECHANGE) { @@ -265,14 +282,15 @@ static int s3c2410wdt_cpufreq_transition(struct notifier_block *nb, * the watchdog is running. */ - s3c2410wdt_keepalive(&s3c2410_wdd); + s3c2410wdt_keepalive(&wdt->wdt_device); } else if (val == CPUFREQ_POSTCHANGE) { - s3c2410wdt_stop(&s3c2410_wdd); + s3c2410wdt_stop(&wdt->wdt_device); - ret = s3c2410wdt_set_heartbeat(&s3c2410_wdd, s3c2410_wdd.timeout); + ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device, + wdt->wdt_device.timeout); if (ret >= 0) - s3c2410wdt_start(&s3c2410_wdd); + s3c2410wdt_start(&wdt->wdt_device); else goto err; } @@ -281,34 +299,35 @@ done: return 0; err: - dev_err(wdt_dev, "cannot set new value for timeout %d\n", - s3c2410_wdd.timeout); + dev_err(wdt->dev, "cannot set new value for timeout %d\n", + wdt->wdt_device.timeout); return ret; } -static struct notifier_block s3c2410wdt_cpufreq_transition_nb = { - .notifier_call = s3c2410wdt_cpufreq_transition, -}; - -static inline int s3c2410wdt_cpufreq_register(void) +static inline int s3c2410wdt_cpufreq_register(struct s3c2410_wdt *wdt) { - return cpufreq_register_notifier(&s3c2410wdt_cpufreq_transition_nb, + wdt->freq_transition.notifier_call = s3c2410wdt_cpufreq_transition; + + return cpufreq_register_notifier(&wdt->freq_transition, CPUFREQ_TRANSITION_NOTIFIER); } -static inline void s3c2410wdt_cpufreq_deregister(void) +static inline void s3c2410wdt_cpufreq_deregister(struct s3c2410_wdt *wdt) { - cpufreq_unregister_notifier(&s3c2410wdt_cpufreq_transition_nb, + wdt->freq_transition.notifier_call = s3c2410wdt_cpufreq_transition; + + cpufreq_unregister_notifier(&wdt->freq_transition, CPUFREQ_TRANSITION_NOTIFIER); } #else -static inline int s3c2410wdt_cpufreq_register(void) + +static inline int s3c2410wdt_cpufreq_register(struct s3c2410_wdt *wdt) { return 0; } -static inline void s3c2410wdt_cpufreq_deregister(void) +static inline void s3c2410wdt_cpufreq_deregister(struct s3c2410_wdt *wdt) { } #endif @@ -316,6 +335,9 @@ static inline void s3c2410wdt_cpufreq_deregister(void) static int s3c2410wdt_probe(struct platform_device *pdev) { struct device *dev; + struct s3c2410_wdt *wdt; + struct resource *wdt_mem; + struct resource *wdt_irq; unsigned int wtcon; int started = 0; int ret; @@ -323,13 +345,14 @@ static int s3c2410wdt_probe(struct platform_device *pdev) DBG("%s: probe=%p\n", __func__, pdev); dev = &pdev->dev; - wdt_dev = &pdev->dev; - wdt_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (wdt_mem == NULL) { - dev_err(dev, "no memory resource specified\n"); - return -ENOENT; - } + wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL); + if (!wdt) + return -ENOMEM; + + wdt->dev = &pdev->dev; + spin_lock_init(&wdt->lock); + wdt->wdt_device = s3c2410_wdd; wdt_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (wdt_irq == NULL) { @@ -339,35 +362,40 @@ static int s3c2410wdt_probe(struct platform_device *pdev) } /* get the memory region for the watchdog timer */ - wdt_base = devm_ioremap_resource(dev, wdt_mem); - if (IS_ERR(wdt_base)) { - ret = PTR_ERR(wdt_base); + wdt_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + wdt->reg_base = devm_ioremap_resource(dev, wdt_mem); + if (IS_ERR(wdt->reg_base)) { + ret = PTR_ERR(wdt->reg_base); goto err; } - DBG("probe: mapped wdt_base=%p\n", wdt_base); + DBG("probe: mapped reg_base=%p\n", wdt->reg_base); - wdt_clock = devm_clk_get(dev, "watchdog"); - if (IS_ERR(wdt_clock)) { + wdt->clock = devm_clk_get(dev, "watchdog"); + if (IS_ERR(wdt->clock)) { dev_err(dev, "failed to find watchdog clock source\n"); - ret = PTR_ERR(wdt_clock); + ret = PTR_ERR(wdt->clock); goto err; } - clk_prepare_enable(wdt_clock); + clk_prepare_enable(wdt->clock); - ret = s3c2410wdt_cpufreq_register(); + ret = s3c2410wdt_cpufreq_register(wdt); if (ret < 0) { dev_err(dev, "failed to register cpufreq\n"); goto err_clk; } + watchdog_set_drvdata(&wdt->wdt_device, wdt); + /* see if we can actually set the requested timer margin, and if * not, try the default value */ - watchdog_init_timeout(&s3c2410_wdd, tmr_margin, &pdev->dev); - if (s3c2410wdt_set_heartbeat(&s3c2410_wdd, s3c2410_wdd.timeout)) { - started = s3c2410wdt_set_heartbeat(&s3c2410_wdd, + watchdog_init_timeout(&wdt->wdt_device, tmr_margin, &pdev->dev); + ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device, + wdt->wdt_device.timeout); + if (ret) { + started = s3c2410wdt_set_heartbeat(&wdt->wdt_device, CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME); if (started == 0) @@ -386,9 +414,9 @@ static int s3c2410wdt_probe(struct platform_device *pdev) goto err_cpufreq; } - watchdog_set_nowayout(&s3c2410_wdd, nowayout); + watchdog_set_nowayout(&wdt->wdt_device, nowayout); - ret = watchdog_register_device(&s3c2410_wdd); + ret = watchdog_register_device(&wdt->wdt_device); if (ret) { dev_err(dev, "cannot register watchdog (%d)\n", ret); goto err_cpufreq; @@ -396,18 +424,20 @@ static int s3c2410wdt_probe(struct platform_device *pdev) if (tmr_atboot && started == 0) { dev_info(dev, "starting watchdog timer\n"); - s3c2410wdt_start(&s3c2410_wdd); + s3c2410wdt_start(&wdt->wdt_device); } else if (!tmr_atboot) { /* if we're not enabling the watchdog, then ensure it is * disabled if it has been left running from the bootloader * or other source */ - s3c2410wdt_stop(&s3c2410_wdd); + s3c2410wdt_stop(&wdt->wdt_device); } + platform_set_drvdata(pdev, wdt); + /* print out a statement of readiness */ - wtcon = readl(wdt_base + S3C2410_WTCON); + wtcon = readl(wdt->reg_base + S3C2410_WTCON); dev_info(dev, "watchdog %sactive, reset %sabled, irq %sabled\n", (wtcon & S3C2410_WTCON_ENABLE) ? "" : "in", @@ -417,64 +447,64 @@ static int s3c2410wdt_probe(struct platform_device *pdev) return 0; err_cpufreq: - s3c2410wdt_cpufreq_deregister(); + s3c2410wdt_cpufreq_deregister(wdt); err_clk: - clk_disable_unprepare(wdt_clock); - wdt_clock = NULL; + clk_disable_unprepare(wdt->clock); + wdt->clock = NULL; err: - wdt_irq = NULL; - wdt_mem = NULL; return ret; } static int s3c2410wdt_remove(struct platform_device *dev) { - watchdog_unregister_device(&s3c2410_wdd); + struct s3c2410_wdt *wdt = platform_get_drvdata(dev); - s3c2410wdt_cpufreq_deregister(); + watchdog_unregister_device(&wdt->wdt_device); - clk_disable_unprepare(wdt_clock); - wdt_clock = NULL; + s3c2410wdt_cpufreq_deregister(wdt); + + clk_disable_unprepare(wdt->clock); + wdt->clock = NULL; - wdt_irq = NULL; - wdt_mem = NULL; return 0; } static void s3c2410wdt_shutdown(struct platform_device *dev) { - s3c2410wdt_stop(&s3c2410_wdd); + struct s3c2410_wdt *wdt = platform_get_drvdata(dev); + + s3c2410wdt_stop(&wdt->wdt_device); } #ifdef CONFIG_PM_SLEEP -static unsigned long wtcon_save; -static unsigned long wtdat_save; - static int s3c2410wdt_suspend(struct device *dev) { + struct s3c2410_wdt *wdt = dev_get_drvdata(dev); + /* Save watchdog state, and turn it off. */ - wtcon_save = readl(wdt_base + S3C2410_WTCON); - wtdat_save = readl(wdt_base + S3C2410_WTDAT); + wdt->wtcon_save = readl(wdt->reg_base + S3C2410_WTCON); + wdt->wtdat_save = readl(wdt->reg_base + S3C2410_WTDAT); /* Note that WTCNT doesn't need to be saved. */ - s3c2410wdt_stop(&s3c2410_wdd); + s3c2410wdt_stop(&wdt->wdt_device); return 0; } static int s3c2410wdt_resume(struct device *dev) { - /* Restore watchdog state. */ + struct s3c2410_wdt *wdt = dev_get_drvdata(dev); - writel(wtdat_save, wdt_base + S3C2410_WTDAT); - writel(wtdat_save, wdt_base + S3C2410_WTCNT); /* Reset count */ - writel(wtcon_save, wdt_base + S3C2410_WTCON); + /* Restore watchdog state. */ + writel(wdt->wtdat_save, wdt->reg_base + S3C2410_WTDAT); + writel(wdt->wtdat_save, wdt->reg_base + S3C2410_WTCNT);/* Reset count */ + writel(wdt->wtcon_save, wdt->reg_base + S3C2410_WTCON); dev_info(dev, "watchdog %sabled\n", - (wtcon_save & S3C2410_WTCON_ENABLE) ? "en" : "dis"); + (wdt->wtcon_save & S3C2410_WTCON_ENABLE) ? "en" : "dis"); return 0; } diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c new file mode 100644 index 000000000000..1f94b42764aa --- /dev/null +++ b/drivers/watchdog/sunxi_wdt.c @@ -0,0 +1,237 @@ +/* + * sunxi Watchdog Driver + * + * Copyright (c) 2013 Carlo Caione + * 2012 Henrik Nordstrom + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on xen_wdt.c + * (c) Copyright 2010 Novell, Inc. + */ + +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/types.h> +#include <linux/watchdog.h> + +#define WDT_MAX_TIMEOUT 16 +#define WDT_MIN_TIMEOUT 1 +#define WDT_MODE_TIMEOUT(n) ((n) << 3) +#define WDT_TIMEOUT_MASK WDT_MODE_TIMEOUT(0x0F) + +#define WDT_CTRL 0x00 +#define WDT_CTRL_RELOAD ((1 << 0) | (0x0a57 << 1)) + +#define WDT_MODE 0x04 +#define WDT_MODE_EN (1 << 0) +#define WDT_MODE_RST_EN (1 << 1) + +#define DRV_NAME "sunxi-wdt" +#define DRV_VERSION "1.0" + +static bool nowayout = WATCHDOG_NOWAYOUT; +static unsigned int timeout = WDT_MAX_TIMEOUT; + +struct sunxi_wdt_dev { + struct watchdog_device wdt_dev; + void __iomem *wdt_base; +}; + +/* + * wdt_timeout_map maps the watchdog timer interval value in seconds to + * the value of the register WDT_MODE bit 3:6 + * + * [timeout seconds] = register value + * + */ + +static const int wdt_timeout_map[] = { + [1] = 0b0001, /* 1s */ + [2] = 0b0010, /* 2s */ + [3] = 0b0011, /* 3s */ + [4] = 0b0100, /* 4s */ + [5] = 0b0101, /* 5s */ + [6] = 0b0110, /* 6s */ + [8] = 0b0111, /* 8s */ + [10] = 0b1000, /* 10s */ + [12] = 0b1001, /* 12s */ + [14] = 0b1010, /* 14s */ + [16] = 0b1011, /* 16s */ +}; + +static int sunxi_wdt_ping(struct watchdog_device *wdt_dev) +{ + struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); + void __iomem *wdt_base = sunxi_wdt->wdt_base; + + iowrite32(WDT_CTRL_RELOAD, wdt_base + WDT_CTRL); + + return 0; +} + +static int sunxi_wdt_set_timeout(struct watchdog_device *wdt_dev, + unsigned int timeout) +{ + struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); + void __iomem *wdt_base = sunxi_wdt->wdt_base; + u32 reg; + + if (wdt_timeout_map[timeout] == 0) + timeout++; + + sunxi_wdt->wdt_dev.timeout = timeout; + + reg = ioread32(wdt_base + WDT_MODE); + reg &= ~WDT_TIMEOUT_MASK; + reg |= WDT_MODE_TIMEOUT(wdt_timeout_map[timeout]); + iowrite32(reg, wdt_base + WDT_MODE); + + sunxi_wdt_ping(wdt_dev); + + return 0; +} + +static int sunxi_wdt_stop(struct watchdog_device *wdt_dev) +{ + struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); + void __iomem *wdt_base = sunxi_wdt->wdt_base; + + iowrite32(0, wdt_base + WDT_MODE); + + return 0; +} + +static int sunxi_wdt_start(struct watchdog_device *wdt_dev) +{ + u32 reg; + struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); + void __iomem *wdt_base = sunxi_wdt->wdt_base; + int ret; + + ret = sunxi_wdt_set_timeout(&sunxi_wdt->wdt_dev, + sunxi_wdt->wdt_dev.timeout); + if (ret < 0) + return ret; + + reg = ioread32(wdt_base + WDT_MODE); + reg |= (WDT_MODE_RST_EN | WDT_MODE_EN); + iowrite32(reg, wdt_base + WDT_MODE); + + return 0; +} + +static const struct watchdog_info sunxi_wdt_info = { + .identity = DRV_NAME, + .options = WDIOF_SETTIMEOUT | + WDIOF_KEEPALIVEPING | + WDIOF_MAGICCLOSE, +}; + +static const struct watchdog_ops sunxi_wdt_ops = { + .owner = THIS_MODULE, + .start = sunxi_wdt_start, + .stop = sunxi_wdt_stop, + .ping = sunxi_wdt_ping, + .set_timeout = sunxi_wdt_set_timeout, +}; + +static int __init sunxi_wdt_probe(struct platform_device *pdev) +{ + struct sunxi_wdt_dev *sunxi_wdt; + struct resource *res; + int err; + + sunxi_wdt = devm_kzalloc(&pdev->dev, sizeof(*sunxi_wdt), GFP_KERNEL); + if (!sunxi_wdt) + return -EINVAL; + + platform_set_drvdata(pdev, sunxi_wdt); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + sunxi_wdt->wdt_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(sunxi_wdt->wdt_base)) + return PTR_ERR(sunxi_wdt->wdt_base); + + sunxi_wdt->wdt_dev.info = &sunxi_wdt_info; + sunxi_wdt->wdt_dev.ops = &sunxi_wdt_ops; + sunxi_wdt->wdt_dev.timeout = WDT_MAX_TIMEOUT; + sunxi_wdt->wdt_dev.max_timeout = WDT_MAX_TIMEOUT; + sunxi_wdt->wdt_dev.min_timeout = WDT_MIN_TIMEOUT; + sunxi_wdt->wdt_dev.parent = &pdev->dev; + + watchdog_init_timeout(&sunxi_wdt->wdt_dev, timeout, &pdev->dev); + watchdog_set_nowayout(&sunxi_wdt->wdt_dev, nowayout); + + watchdog_set_drvdata(&sunxi_wdt->wdt_dev, sunxi_wdt); + + sunxi_wdt_stop(&sunxi_wdt->wdt_dev); + + err = watchdog_register_device(&sunxi_wdt->wdt_dev); + if (unlikely(err)) + return err; + + dev_info(&pdev->dev, "Watchdog enabled (timeout=%d sec, nowayout=%d)", + sunxi_wdt->wdt_dev.timeout, nowayout); + + return 0; +} + +static int __exit sunxi_wdt_remove(struct platform_device *pdev) +{ + struct sunxi_wdt_dev *sunxi_wdt = platform_get_drvdata(pdev); + + watchdog_unregister_device(&sunxi_wdt->wdt_dev); + watchdog_set_drvdata(&sunxi_wdt->wdt_dev, NULL); + + return 0; +} + +static void sunxi_wdt_shutdown(struct platform_device *pdev) +{ + struct sunxi_wdt_dev *sunxi_wdt = platform_get_drvdata(pdev); + + sunxi_wdt_stop(&sunxi_wdt->wdt_dev); +} + +static const struct of_device_id sunxi_wdt_dt_ids[] = { + { .compatible = "allwinner,sun4i-wdt" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, sunxi_wdt_dt_ids); + +static struct platform_driver sunxi_wdt_driver = { + .probe = sunxi_wdt_probe, + .remove = sunxi_wdt_remove, + .shutdown = sunxi_wdt_shutdown, + .driver = { + .owner = THIS_MODULE, + .name = DRV_NAME, + .of_match_table = of_match_ptr(sunxi_wdt_dt_ids) + }, +}; + +module_platform_driver(sunxi_wdt_driver); + +module_param(timeout, uint, 0); +MODULE_PARM_DESC(timeout, "Watchdog heartbeat in seconds"); + +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " + "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Carlo Caione <carlo.caione@gmail.com>"); +MODULE_AUTHOR("Henrik Nordstrom <henrik@henriknordstrom.net>"); +MODULE_DESCRIPTION("sunxi WatchDog Timer Driver"); +MODULE_VERSION(DRV_VERSION); diff --git a/drivers/watchdog/ts72xx_wdt.c b/drivers/watchdog/ts72xx_wdt.c index 4da59b4d73f0..42913f131dc2 100644 --- a/drivers/watchdog/ts72xx_wdt.c +++ b/drivers/watchdog/ts72xx_wdt.c @@ -403,21 +403,11 @@ static int ts72xx_wdt_probe(struct platform_device *pdev) } r1 = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r1) { - dev_err(&pdev->dev, "failed to get memory resource\n"); - return -ENODEV; - } - wdt->control_reg = devm_ioremap_resource(&pdev->dev, r1); if (IS_ERR(wdt->control_reg)) return PTR_ERR(wdt->control_reg); r2 = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!r2) { - dev_err(&pdev->dev, "failed to get memory resource\n"); - return -ENODEV; - } - wdt->feed_reg = devm_ioremap_resource(&pdev->dev, r2); if (IS_ERR(wdt->feed_reg)) return PTR_ERR(wdt->feed_reg); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 3101cf6daf56..a50c6e3a7cc4 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -349,8 +349,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages) BUG_ON(page == NULL); pfn = page_to_pfn(page); - BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && - phys_to_machine_mapping_valid(pfn)); set_phys_to_machine(pfn, frame_list[i]); @@ -380,6 +378,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) enum bp_state state = BP_DONE; unsigned long pfn, i; struct page *page; + struct page *scratch_page; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, @@ -399,6 +398,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); + scratch_page = get_balloon_scratch_page(); + for (i = 0; i < nr_pages; i++) { page = alloc_page(gfp); if (page == NULL) { @@ -416,7 +417,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) if (xen_pv_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), - pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)), + pfn_pte(page_to_pfn(scratch_page), PAGE_KERNEL_RO), 0); BUG_ON(ret); } @@ -432,14 +433,14 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) pfn = mfn_to_pfn(frame_list[i]); if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long p; - struct page *pg; - pg = __get_cpu_var(balloon_scratch_page); - p = page_to_pfn(pg); + p = page_to_pfn(scratch_page); __set_phys_to_machine(pfn, pfn_to_mfn(p)); } balloon_append(pfn_to_page(pfn)); } + put_balloon_scratch_page(); + set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 2b3b83296977..398cbd517be2 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -72,3 +72,12 @@ config BTRFS_DEBUG performance, or export extra information via sysfs. If unsure, say N. + +config BTRFS_ASSERT + bool "Btrfs assert support" + depends on BTRFS_FS + help + Enable run-time assertion checking. This will result in panics if + any of the assertions trip. This is meant for btrfs developers only. + + If unsure, say N. diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 3932224f99e9..a91a6a355cc5 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -8,7 +8,10 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ - reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o + reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ + uuid-tree.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o + +btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 8bc5e8ccb091..0552a599b28f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -119,6 +119,26 @@ struct __prelim_ref { u64 wanted_disk_byte; }; +static struct kmem_cache *btrfs_prelim_ref_cache; + +int __init btrfs_prelim_ref_init(void) +{ + btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref", + sizeof(struct __prelim_ref), + 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + NULL); + if (!btrfs_prelim_ref_cache) + return -ENOMEM; + return 0; +} + +void btrfs_prelim_ref_exit(void) +{ + if (btrfs_prelim_ref_cache) + kmem_cache_destroy(btrfs_prelim_ref_cache); +} + /* * the rules for all callers of this function are: * - obtaining the parent is the goal @@ -160,12 +180,12 @@ struct __prelim_ref { static int __add_prelim_ref(struct list_head *head, u64 root_id, struct btrfs_key *key, int level, - u64 parent, u64 wanted_disk_byte, int count) + u64 parent, u64 wanted_disk_byte, int count, + gfp_t gfp_mask) { struct __prelim_ref *ref; - /* in case we're adding delayed refs, we're holding the refs spinlock */ - ref = kmalloc(sizeof(*ref), GFP_ATOMIC); + ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); if (!ref) return -ENOMEM; @@ -295,10 +315,9 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); pr_debug("search slot in root %llu (level %d, ref count %d) returned " "%d for key (%llu %u %llu)\n", - (unsigned long long)ref->root_id, level, ref->count, ret, - (unsigned long long)ref->key_for_search.objectid, - ref->key_for_search.type, - (unsigned long long)ref->key_for_search.offset); + ref->root_id, level, ref->count, ret, + ref->key_for_search.objectid, ref->key_for_search.type, + ref->key_for_search.offset); if (ret < 0) goto out; @@ -365,11 +384,12 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, node = ulist_next(parents, &uiter); ref->parent = node ? node->val : 0; ref->inode_list = node ? - (struct extent_inode_elem *)(uintptr_t)node->aux : 0; + (struct extent_inode_elem *)(uintptr_t)node->aux : NULL; /* additional parents require new refs being added here */ while ((node = ulist_next(parents, &uiter))) { - new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); + new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache, + GFP_NOFS); if (!new_ref) { ret = -ENOMEM; goto out; @@ -493,7 +513,7 @@ static void __merge_refs(struct list_head *head, int mode) ref1->count += ref2->count; list_del(&ref2->list); - kfree(ref2); + kmem_cache_free(btrfs_prelim_ref_cache, ref2); } } @@ -548,7 +568,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ref = btrfs_delayed_node_to_tree_ref(node); ret = __add_prelim_ref(prefs, ref->root, &op_key, ref->level + 1, 0, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_SHARED_BLOCK_REF_KEY: { @@ -558,7 +578,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ret = __add_prelim_ref(prefs, ref->root, NULL, ref->level + 1, ref->parent, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_EXTENT_DATA_REF_KEY: { @@ -570,7 +590,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, key.offset = ref->offset; ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_SHARED_DATA_REF_KEY: { @@ -583,7 +603,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, key.offset = ref->offset; ret = __add_prelim_ref(prefs, ref->root, &key, 0, ref->parent, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } default: @@ -657,7 +677,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, case BTRFS_SHARED_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, 0, NULL, *info_level + 1, offset, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_shared_data_ref *sdref; @@ -666,13 +686,13 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, sdref = (struct btrfs_shared_data_ref *)(iref + 1); count = btrfs_shared_data_ref_count(leaf, sdref); ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, - bytenr, count); + bytenr, count, GFP_NOFS); break; } case BTRFS_TREE_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, offset, NULL, *info_level + 1, 0, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -687,7 +707,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, key.offset = btrfs_extent_data_ref_offset(leaf, dref); root = btrfs_extent_data_ref_root(leaf, dref); ret = __add_prelim_ref(prefs, root, &key, 0, 0, - bytenr, count); + bytenr, count, GFP_NOFS); break; } default: @@ -738,7 +758,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, case BTRFS_SHARED_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, 0, NULL, info_level + 1, key.offset, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_shared_data_ref *sdref; @@ -748,13 +768,13 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, struct btrfs_shared_data_ref); count = btrfs_shared_data_ref_count(leaf, sdref); ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, - bytenr, count); + bytenr, count, GFP_NOFS); break; } case BTRFS_TREE_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, key.offset, NULL, info_level + 1, 0, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -770,7 +790,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, key.offset = btrfs_extent_data_ref_offset(leaf, dref); root = btrfs_extent_data_ref_root(leaf, dref); ret = __add_prelim_ref(prefs, root, &key, 0, 0, - bytenr, count); + bytenr, count, GFP_NOFS); break; } default: @@ -911,7 +931,6 @@ again: while (!list_empty(&prefs)) { ref = list_first_entry(&prefs, struct __prelim_ref, list); - list_del(&ref->list); WARN_ON(ref->count < 0); if (ref->count && ref->root_id && ref->parent == 0) { /* no parent == root of tree */ @@ -935,8 +954,10 @@ again: } ret = find_extent_in_eb(eb, bytenr, *extent_item_pos, &eie); - ref->inode_list = eie; free_extent_buffer(eb); + if (ret < 0) + goto out; + ref->inode_list = eie; } ret = ulist_add_merge(refs, ref->parent, (uintptr_t)ref->inode_list, @@ -954,7 +975,8 @@ again: eie->next = ref->inode_list; } } - kfree(ref); + list_del(&ref->list); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } out: @@ -962,13 +984,13 @@ out: while (!list_empty(&prefs)) { ref = list_first_entry(&prefs, struct __prelim_ref, list); list_del(&ref->list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } while (!list_empty(&prefs_delayed)) { ref = list_first_entry(&prefs_delayed, struct __prelim_ref, list); list_del(&ref->list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } return ret; @@ -1326,8 +1348,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, found_key->type != BTRFS_METADATA_ITEM_KEY) || found_key->objectid > logical || found_key->objectid + size <= logical) { - pr_debug("logical %llu is not within any extent\n", - (unsigned long long)logical); + pr_debug("logical %llu is not within any extent\n", logical); return -ENOENT; } @@ -1340,11 +1361,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, pr_debug("logical %llu is at position %llu within the extent (%llu " "EXTENT_ITEM %llu) flags %#llx size %u\n", - (unsigned long long)logical, - (unsigned long long)(logical - found_key->objectid), - (unsigned long long)found_key->objectid, - (unsigned long long)found_key->offset, - (unsigned long long)flags, item_size); + logical, logical - found_key->objectid, found_key->objectid, + found_key->offset, flags, item_size); WARN_ON(!flags_ret); if (flags_ret) { @@ -1516,7 +1534,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, while (!ret && (root_node = ulist_next(roots, &root_uiter))) { pr_debug("root %llu references leaf %llu, data list " "%#llx\n", root_node->val, ref_node->val, - (long long)ref_node->aux); + ref_node->aux); ret = iterate_leaf_refs((struct extent_inode_elem *) (uintptr_t)ref_node->aux, root_node->val, @@ -1608,9 +1626,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, name_len = btrfs_inode_ref_name_len(eb, iref); /* path must be released before calling iterate()! */ pr_debug("following ref at offset %u for inode %llu in " - "tree %llu\n", cur, - (unsigned long long)found_key.objectid, - (unsigned long long)fs_root->objectid); + "tree %llu\n", cur, found_key.objectid, + fs_root->objectid); ret = iterate(parent, name_len, (unsigned long)(iref + 1), eb, ctx); if (ret) diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 8f2e76702932..a910b27a8ad9 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, struct btrfs_inode_extref **ret_extref, u64 *found_off); +int __init btrfs_prelim_ref_init(void); +void btrfs_prelim_ref_exit(void); #endif diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 08b286b2a2c5..d0ae226926ee 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -218,6 +218,27 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) return 0; } +struct btrfs_dio_private { + struct inode *inode; + u64 logical_offset; + u64 disk_bytenr; + u64 bytes; + void *private; + + /* number of bios pending for this dio */ + atomic_t pending_bios; + + /* IO errors */ + int errors; + + /* orig_bio is our btrfs_io_bio */ + struct bio *orig_bio; + + /* dio_bio came from fs/direct-io.c */ + struct bio *dio_bio; + u8 csum[0]; +}; + /* * Disable DIO read nolock optimization, so new dio readers will be forced * to grab i_mutex. It is used to avoid the endless truncate due to diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 1431a6965017..1c47be187240 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -701,15 +701,13 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, next_bytenr = btrfs_super_root(selected_super); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "root@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "root@%llu\n", next_bytenr); break; case 1: next_bytenr = btrfs_super_chunk_root(selected_super); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "chunk@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "chunk@%llu\n", next_bytenr); break; case 2: next_bytenr = btrfs_super_log_root(selected_super); @@ -717,8 +715,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, continue; if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "log@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "log@%llu\n", next_bytenr); break; } @@ -727,7 +724,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { struct btrfsic_block *next_block; @@ -742,8 +739,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, printk(KERN_INFO "btrfsic:" " btrfsic_map_block(root @%llu," " mirror %d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); kfree(selected_super); return -1; } @@ -767,7 +763,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, if (ret < (int)PAGE_CACHE_SIZE) { printk(KERN_INFO "btrfsic: read @logical %llu failed!\n", - (unsigned long long) tmp_next_block_ctx.start); btrfsic_release_block_ctx(&tmp_next_block_ctx); kfree(selected_super); @@ -813,7 +808,7 @@ static int btrfsic_process_superblock_dev_mirror( (bh->b_data + (dev_bytenr & 4095)); if (btrfs_super_bytenr(super_tmp) != dev_bytenr || - super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) || + btrfs_super_magic(super_tmp) != BTRFS_MAGIC || memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || btrfs_super_nodesize(super_tmp) != state->metablock_size || btrfs_super_leafsize(super_tmp) != state->metablock_size || @@ -847,10 +842,8 @@ static int btrfsic_process_superblock_dev_mirror( printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" " @%llu (%s/%llu/%d)\n", superblock_bdev, - rcu_str_deref(device->name), - (unsigned long long)dev_bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + rcu_str_deref(device->name), dev_bytenr, + dev_state->name, dev_bytenr, superblock_mirror_num); list_add(&superblock_tmp->all_blocks_node, &state->all_blocks_list); @@ -880,20 +873,20 @@ static int btrfsic_process_superblock_dev_mirror( tmp_disk_key.offset = 0; switch (pass) { case 0: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_ROOT_TREE_OBJECTID); additional_string = "initial root "; next_bytenr = btrfs_super_root(super_tmp); break; case 1: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_CHUNK_TREE_OBJECTID); additional_string = "initial chunk "; next_bytenr = btrfs_super_chunk_root(super_tmp); break; case 2: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_TREE_LOG_OBJECTID); additional_string = "initial log "; next_bytenr = btrfs_super_log_root(super_tmp); if (0 == next_bytenr) @@ -906,7 +899,7 @@ static int btrfsic_process_superblock_dev_mirror( next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { struct btrfsic_block *next_block; struct btrfsic_block_data_ctx tmp_next_block_ctx; @@ -918,8 +911,7 @@ static int btrfsic_process_superblock_dev_mirror( mirror_num)) { printk(KERN_INFO "btrfsic: btrfsic_map_block(" "bytenr @%llu, mirror %d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); brelse(bh); return -1; } @@ -1003,19 +995,17 @@ continue_with_new_stack_frame: (struct btrfs_leaf *)sf->hdr; if (-1 == sf->i) { - sf->nr = le32_to_cpu(leafhdr->header.nritems); + sf->nr = btrfs_stack_header_nritems(&leafhdr->header); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "leaf %llu items %d generation %llu" " owner %llu\n", - (unsigned long long) - sf->block_ctx->start, - sf->nr, - (unsigned long long) - le64_to_cpu(leafhdr->header.generation), - (unsigned long long) - le64_to_cpu(leafhdr->header.owner)); + sf->block_ctx->start, sf->nr, + btrfs_stack_header_generation( + &leafhdr->header), + btrfs_stack_header_owner( + &leafhdr->header)); } continue_with_current_leaf_stack_frame: @@ -1047,10 +1037,10 @@ leaf_item_out_of_bounce_error: &disk_item, disk_item_offset, sizeof(struct btrfs_item)); - item_offset = le32_to_cpu(disk_item.offset); - item_size = le32_to_cpu(disk_item.size); + item_offset = btrfs_stack_item_offset(&disk_item); + item_size = btrfs_stack_item_offset(&disk_item); disk_key = &disk_item.key; - type = disk_key->type; + type = btrfs_disk_key_type(disk_key); if (BTRFS_ROOT_ITEM_KEY == type) { struct btrfs_root_item root_item; @@ -1066,7 +1056,7 @@ leaf_item_out_of_bounce_error: sf->block_ctx, &root_item, root_item_offset, item_size); - next_bytenr = le64_to_cpu(root_item.bytenr); + next_bytenr = btrfs_root_bytenr(&root_item); sf->error = btrfsic_create_link_to_next_block( @@ -1081,8 +1071,8 @@ leaf_item_out_of_bounce_error: &sf->num_copies, &sf->mirror_num, disk_key, - le64_to_cpu(root_item. - generation)); + btrfs_root_generation( + &root_item)); if (sf->error) goto one_stack_frame_backwards; @@ -1130,18 +1120,17 @@ leaf_item_out_of_bounce_error: struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; if (-1 == sf->i) { - sf->nr = le32_to_cpu(nodehdr->header.nritems); + sf->nr = btrfs_stack_header_nritems(&nodehdr->header); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "node %llu level %d items %d" " generation %llu owner %llu\n", - (unsigned long long) sf->block_ctx->start, nodehdr->header.level, sf->nr, - (unsigned long long) - le64_to_cpu(nodehdr->header.generation), - (unsigned long long) - le64_to_cpu(nodehdr->header.owner)); + btrfs_stack_header_generation( + &nodehdr->header), + btrfs_stack_header_owner( + &nodehdr->header)); } continue_with_current_node_stack_frame: @@ -1168,7 +1157,7 @@ continue_with_current_node_stack_frame: btrfsic_read_from_block_data( sf->block_ctx, &key_ptr, key_ptr_offset, sizeof(struct btrfs_key_ptr)); - next_bytenr = le64_to_cpu(key_ptr.blockptr); + next_bytenr = btrfs_stack_key_blockptr(&key_ptr); sf->error = btrfsic_create_link_to_next_block( state, @@ -1182,7 +1171,7 @@ continue_with_current_node_stack_frame: &sf->num_copies, &sf->mirror_num, &key_ptr.key, - le64_to_cpu(key_ptr.generation)); + btrfs_stack_key_generation(&key_ptr)); if (sf->error) goto one_stack_frame_backwards; @@ -1247,8 +1236,7 @@ static void btrfsic_read_from_block_data( unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; WARN_ON(offset + len > block_ctx->len); - offset_in_page = (start_offset + offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1); while (len > 0) { cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); @@ -1290,7 +1278,7 @@ static int btrfsic_create_link_to_next_block( next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, *num_copiesp); + next_bytenr, *num_copiesp); *mirror_nump = 1; } @@ -1307,7 +1295,7 @@ static int btrfsic_create_link_to_next_block( if (ret) { printk(KERN_INFO "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", - (unsigned long long)next_bytenr, *mirror_nump); + next_bytenr, *mirror_nump); btrfsic_release_block_ctx(next_block_ctx); *next_blockp = NULL; return -1; @@ -1335,20 +1323,16 @@ static int btrfsic_create_link_to_next_block( "Referenced block @%llu (%s/%llu/%d)" " found in hash table, %c," " bytenr mismatch (!= stored %llu).\n", - (unsigned long long)next_bytenr, - next_block_ctx->dev->name, - (unsigned long long)next_block_ctx->dev_bytenr, - *mirror_nump, + next_bytenr, next_block_ctx->dev->name, + next_block_ctx->dev_bytenr, *mirror_nump, btrfsic_get_block_type(state, next_block), - (unsigned long long)next_block->logical_bytenr); + next_block->logical_bytenr); } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Referenced block @%llu (%s/%llu/%d)" " found in hash table, %c.\n", - (unsigned long long)next_bytenr, - next_block_ctx->dev->name, - (unsigned long long)next_block_ctx->dev_bytenr, - *mirror_nump, + next_bytenr, next_block_ctx->dev->name, + next_block_ctx->dev_bytenr, *mirror_nump, btrfsic_get_block_type(state, next_block)); next_block->logical_bytenr = next_bytenr; @@ -1400,7 +1384,7 @@ static int btrfsic_create_link_to_next_block( if (ret < (int)next_block_ctx->len) { printk(KERN_INFO "btrfsic: read block @logical %llu failed!\n", - (unsigned long long)next_bytenr); + next_bytenr); btrfsic_release_block_ctx(next_block_ctx); *next_blockp = NULL; return -1; @@ -1444,12 +1428,12 @@ static int btrfsic_handle_extent_data( file_extent_item_offset, offsetof(struct btrfs_file_extent_item, disk_num_bytes)); if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || - ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { + btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", file_extent_item.type, - (unsigned long long) - le64_to_cpu(file_extent_item.disk_bytenr)); + btrfs_stack_file_extent_disk_bytenr( + &file_extent_item)); return 0; } @@ -1463,20 +1447,19 @@ static int btrfsic_handle_extent_data( btrfsic_read_from_block_data(block_ctx, &file_extent_item, file_extent_item_offset, sizeof(struct btrfs_file_extent_item)); - next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + - le64_to_cpu(file_extent_item.offset); - generation = le64_to_cpu(file_extent_item.generation); - num_bytes = le64_to_cpu(file_extent_item.num_bytes); - generation = le64_to_cpu(file_extent_item.generation); + next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) + + btrfs_stack_file_extent_offset(&file_extent_item); + generation = btrfs_stack_file_extent_generation(&file_extent_item); + num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item); + generation = btrfs_stack_file_extent_generation(&file_extent_item); if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," " offset = %llu, num_bytes = %llu\n", file_extent_item.type, - (unsigned long long) - le64_to_cpu(file_extent_item.disk_bytenr), - (unsigned long long)le64_to_cpu(file_extent_item.offset), - (unsigned long long)num_bytes); + btrfs_stack_file_extent_disk_bytenr(&file_extent_item), + btrfs_stack_file_extent_offset(&file_extent_item), + num_bytes); while (num_bytes > 0) { u32 chunk_len; int num_copies; @@ -1492,7 +1475,7 @@ static int btrfsic_handle_extent_data( next_bytenr, state->datablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { struct btrfsic_block_data_ctx next_block_ctx; struct btrfsic_block *next_block; @@ -1504,8 +1487,7 @@ static int btrfsic_handle_extent_data( if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) printk(KERN_INFO "\tdisk_bytenr = %llu, num_bytes %u\n", - (unsigned long long)next_bytenr, - chunk_len); + next_bytenr, chunk_len); ret = btrfsic_map_block(state, next_bytenr, chunk_len, &next_block_ctx, mirror_num); @@ -1513,8 +1495,7 @@ static int btrfsic_handle_extent_data( printk(KERN_INFO "btrfsic: btrfsic_map_block(@%llu," " mirror=%d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); return -1; } @@ -1543,12 +1524,10 @@ static int btrfsic_handle_extent_data( " found in hash table, D," " bytenr mismatch" " (!= stored %llu).\n", - (unsigned long long)next_bytenr, + next_bytenr, next_block_ctx.dev->name, - (unsigned long long) next_block_ctx.dev_bytenr, mirror_num, - (unsigned long long) next_block->logical_bytenr); } next_block->logical_bytenr = next_bytenr; @@ -1675,7 +1654,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: read_block() with unaligned bytenr %llu\n", - (unsigned long long)block_ctx->dev_bytenr); + block_ctx->dev_bytenr); return -1; } @@ -1772,10 +1751,8 @@ static void btrfsic_dump_database(struct btrfsic_state *state) printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num); + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num); list_for_each(elem_ref_to, &b_all->ref_to_list) { const struct btrfsic_block_link *const l = @@ -1787,16 +1764,13 @@ static void btrfsic_dump_database(struct btrfsic_state *state) " refers %u* to" " %c @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num, + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } @@ -1810,16 +1784,12 @@ static void btrfsic_dump_database(struct btrfsic_state *state) " is ref %u* from" " %c @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num, + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long) l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long) l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num); } @@ -1896,8 +1866,8 @@ again: struct list_head *tmp_ref_to; if (block->is_superblock) { - bytenr = le64_to_cpu(((struct btrfs_super_block *) - mapped_datav[0])->bytenr); + bytenr = btrfs_super_bytenr((struct btrfs_super_block *) + mapped_datav[0]); if (num_pages * PAGE_CACHE_SIZE < BTRFS_SUPER_INFO_SIZE) { printk(KERN_INFO @@ -1923,8 +1893,9 @@ again: return; } processed_len = state->metablock_size; - bytenr = le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->bytenr); + bytenr = btrfs_stack_header_bytenr( + (struct btrfs_header *) + mapped_datav[0]); btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, dev_bytenr); @@ -1935,12 +1906,9 @@ again: " found in hash table, %c," " bytenr mismatch" " (!= stored %llu).\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block), - (unsigned long long) block->logical_bytenr); block->logical_bytenr = bytenr; } else if (state->print_mask & @@ -1948,9 +1916,7 @@ again: printk(KERN_INFO "Written block @%llu (%s/%llu/%d)" " found in hash table, %c.\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block)); } else { @@ -1966,9 +1932,7 @@ again: printk(KERN_INFO "Written block @%llu (%s/%llu/%d)" " found in hash table, %c.\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block)); } @@ -1985,21 +1949,14 @@ again: " new(gen=%llu)," " which is referenced by most recent superblock" " (superblockgen=%llu)!\n", - btrfsic_get_block_type(state, block), - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, - block->mirror_num, - (unsigned long long)block->generation, - (unsigned long long) - le64_to_cpu(block->disk_key.objectid), + btrfsic_get_block_type(state, block), bytenr, + dev_state->name, dev_bytenr, block->mirror_num, + block->generation, + btrfs_disk_key_objectid(&block->disk_key), block->disk_key.type, - (unsigned long long) - le64_to_cpu(block->disk_key.offset), - (unsigned long long) - le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->generation), - (unsigned long long) + btrfs_disk_key_offset(&block->disk_key), + btrfs_stack_header_generation( + (struct btrfs_header *) mapped_datav[0]), state->max_superblock_generation); btrfsic_dump_tree(state); } @@ -2008,15 +1965,12 @@ again: printk(KERN_INFO "btrfs: attempt to overwrite %c-block" " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," " which is not yet iodone!\n", - btrfsic_get_block_type(state, block), - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, - block->mirror_num, - (unsigned long long)block->generation, - (unsigned long long) - le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->generation)); + btrfsic_get_block_type(state, block), bytenr, + dev_state->name, dev_bytenr, block->mirror_num, + block->generation, + btrfs_stack_header_generation( + (struct btrfs_header *) + mapped_datav[0])); /* it would not be safe to go on */ btrfsic_dump_tree(state); goto continue_loop; @@ -2056,7 +2010,7 @@ again: if (ret) { printk(KERN_INFO "btrfsic: btrfsic_map_block(root @%llu)" - " failed!\n", (unsigned long long)bytenr); + " failed!\n", bytenr); goto continue_loop; } block_ctx.datav = mapped_datav; @@ -2140,7 +2094,7 @@ again: printk(KERN_INFO "btrfsic: btrfsic_process_metablock" "(root @%llu) failed!\n", - (unsigned long long)dev_bytenr); + dev_bytenr); } else { block->is_metadata = 0; block->mirror_num = 0; /* unknown */ @@ -2168,8 +2122,7 @@ again: if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Written block (%s/%llu/?)" " !found in hash table, D.\n", - dev_state->name, - (unsigned long long)dev_bytenr); + dev_state->name, dev_bytenr); if (!state->include_extent_data) { /* ignore that written D block */ goto continue_loop; @@ -2184,17 +2137,16 @@ again: block_ctx.pagev = NULL; } else { processed_len = state->metablock_size; - bytenr = le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->bytenr); + bytenr = btrfs_stack_header_bytenr( + (struct btrfs_header *) + mapped_datav[0]); btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, dev_bytenr); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Written block @%llu (%s/%llu/?)" " !found in hash table, M.\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr); + bytenr, dev_state->name, dev_bytenr); ret = btrfsic_map_block(state, bytenr, processed_len, &block_ctx, 0); @@ -2202,7 +2154,7 @@ again: printk(KERN_INFO "btrfsic: btrfsic_map_block(root @%llu)" " failed!\n", - (unsigned long long)dev_bytenr); + dev_bytenr); goto continue_loop; } } @@ -2267,10 +2219,8 @@ again: printk(KERN_INFO "New written %c-block @%llu (%s/%llu/%d)\n", is_metadata ? 'M' : 'D', - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num); list_add(&block->all_blocks_node, &state->all_blocks_list); btrfsic_block_hashtable_add(block, &state->block_hashtable); @@ -2281,7 +2231,7 @@ again: printk(KERN_INFO "btrfsic: process_metablock(root @%llu)" " failed!\n", - (unsigned long long)dev_bytenr); + dev_bytenr); } btrfsic_release_block_ctx(&block_ctx); } @@ -2319,10 +2269,8 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", bio_error_status, btrfsic_get_block_type(dev_state->state, block), - (unsigned long long)block->logical_bytenr, - dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, dev_state->name, + block->dev_bytenr, block->mirror_num); next_block = block->next_in_same_bio; block->iodone_w_error = iodone_w_error; if (block->submit_bio_bh_rw & REQ_FLUSH) { @@ -2332,7 +2280,6 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) printk(KERN_INFO "bio_end_io() new %s flush_gen=%llu\n", dev_state->name, - (unsigned long long) dev_state->last_flush_gen); } if (block->submit_bio_bh_rw & REQ_FUA) @@ -2358,10 +2305,8 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", iodone_w_error, btrfsic_get_block_type(dev_state->state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num); block->iodone_w_error = iodone_w_error; if (block->submit_bio_bh_rw & REQ_FLUSH) { @@ -2370,8 +2315,7 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) printk(KERN_INFO "bh_end_io() new %s flush_gen=%llu\n", - dev_state->name, - (unsigned long long)dev_state->last_flush_gen); + dev_state->name, dev_state->last_flush_gen); } if (block->submit_bio_bh_rw & REQ_FUA) block->flush_gen = 0; /* FUA completed means block is on disk */ @@ -2396,26 +2340,20 @@ static int btrfsic_process_written_superblock( printk(KERN_INFO "btrfsic: superblock @%llu (%s/%llu/%d)" " with old gen %llu <= %llu\n", - (unsigned long long)superblock->logical_bytenr, + superblock->logical_bytenr, superblock->dev_state->name, - (unsigned long long)superblock->dev_bytenr, - superblock->mirror_num, - (unsigned long long) + superblock->dev_bytenr, superblock->mirror_num, btrfs_super_generation(super_hdr), - (unsigned long long) state->max_superblock_generation); } else { if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) printk(KERN_INFO "btrfsic: got new superblock @%llu (%s/%llu/%d)" " with new gen %llu > %llu\n", - (unsigned long long)superblock->logical_bytenr, + superblock->logical_bytenr, superblock->dev_state->name, - (unsigned long long)superblock->dev_bytenr, - superblock->mirror_num, - (unsigned long long) + superblock->dev_bytenr, superblock->mirror_num, btrfs_super_generation(super_hdr), - (unsigned long long) state->max_superblock_generation); state->max_superblock_generation = @@ -2432,43 +2370,41 @@ static int btrfsic_process_written_superblock( int num_copies; int mirror_num; const char *additional_string = NULL; - struct btrfs_disk_key tmp_disk_key; + struct btrfs_disk_key tmp_disk_key = {0}; - tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; - tmp_disk_key.offset = 0; + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_ROOT_ITEM_KEY); + btrfs_set_disk_key_objectid(&tmp_disk_key, 0); switch (pass) { case 0: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_ROOT_TREE_OBJECTID); additional_string = "root "; next_bytenr = btrfs_super_root(super_hdr); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "root@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "root@%llu\n", next_bytenr); break; case 1: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_CHUNK_TREE_OBJECTID); additional_string = "chunk "; next_bytenr = btrfs_super_chunk_root(super_hdr); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "chunk@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "chunk@%llu\n", next_bytenr); break; case 2: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_TREE_LOG_OBJECTID); additional_string = "log "; next_bytenr = btrfs_super_log_root(super_hdr); if (0 == next_bytenr) continue; if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "log@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "log@%llu\n", next_bytenr); break; } @@ -2477,7 +2413,7 @@ static int btrfsic_process_written_superblock( next_bytenr, BTRFS_SUPER_INFO_SIZE); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { int was_created; @@ -2493,8 +2429,7 @@ static int btrfsic_process_written_superblock( printk(KERN_INFO "btrfsic: btrfsic_map_block(@%llu," " mirror=%d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); return -1; } @@ -2579,26 +2514,22 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " %u* refers to %c @%llu (%s/%llu/%d)\n", recursion_level, btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num, + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); if (l->block_ref_to->never_written) { printk(KERN_INFO "btrfs: attempt to write superblock" " which references block %c @%llu (%s/%llu/%d)" " which is never written!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; } else if (!l->block_ref_to->is_iodone) { @@ -2606,10 +2537,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " which references block %c @%llu (%s/%llu/%d)" " which is not yet iodone!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; } else if (l->block_ref_to->iodone_w_error) { @@ -2617,10 +2547,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " which references block %c @%llu (%s/%llu/%d)" " which has write error!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; } else if (l->parent_generation != @@ -2634,13 +2563,12 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " with generation %llu !=" " parent generation %llu!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num, - (unsigned long long)l->block_ref_to->generation, - (unsigned long long)l->parent_generation); + l->block_ref_to->generation, + l->parent_generation); ret = -1; } else if (l->block_ref_to->flush_gen > l->block_ref_to->dev_state->last_flush_gen) { @@ -2650,13 +2578,10 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " (block flush_gen=%llu," " dev->flush_gen=%llu)!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, - l->block_ref_to->mirror_num, - (unsigned long long)block->flush_gen, - (unsigned long long) + l->block_ref_to->dev_bytenr, + l->block_ref_to->mirror_num, block->flush_gen, l->block_ref_to->dev_state->last_flush_gen); ret = -1; } else if (-1 == btrfsic_check_all_ref_blocks(state, @@ -2701,16 +2626,12 @@ static int btrfsic_is_block_ref_by_superblock( " is ref %u* from %c @%llu (%s/%llu/%d)\n", recursion_level, btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num, + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long) l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long) l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num); if (l->block_ref_from->is_superblock && @@ -2737,14 +2658,12 @@ static void btrfsic_print_add_link(const struct btrfsic_state *state, " to %c @%llu (%s/%llu/%d).\n", l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long)l->block_ref_from->logical_bytenr, + l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long)l->block_ref_from->dev_bytenr, - l->block_ref_from->mirror_num, + l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long)l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } @@ -2756,14 +2675,12 @@ static void btrfsic_print_rem_link(const struct btrfsic_state *state, " to %c @%llu (%s/%llu/%d).\n", l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long)l->block_ref_from->logical_bytenr, + l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long)l->block_ref_from->dev_bytenr, - l->block_ref_from->mirror_num, + l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long)l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } @@ -2807,10 +2724,8 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, */ indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num); if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { printk("[...]\n"); return; @@ -2943,10 +2858,8 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add( "New %s%c-block @%llu (%s/%llu/%d)\n", additional_string, btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - dev_state->name, - (unsigned long long)block->dev_bytenr, - mirror_num); + block->logical_bytenr, dev_state->name, + block->dev_bytenr, mirror_num); list_add(&block->all_blocks_node, &state->all_blocks_list); btrfsic_block_hashtable_add(block, &state->block_hashtable); if (NULL != was_created) @@ -2980,7 +2893,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, printk(KERN_INFO "btrfsic:" " btrfsic_map_block(logical @%llu," " mirror %d) failed!\n", - (unsigned long long)bytenr, mirror_num); + bytenr, mirror_num); continue; } @@ -2997,8 +2910,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," " buffer->log_bytenr=%llu, submit_bio(bdev=%s," " phys_bytenr=%llu)!\n", - (unsigned long long)bytenr, dev_state->name, - (unsigned long long)dev_bytenr); + bytenr, dev_state->name, dev_bytenr); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { ret = btrfsic_map_block(state, bytenr, state->metablock_size, @@ -3008,10 +2920,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, printk(KERN_INFO "Read logical bytenr @%llu maps to" " (%s/%llu/%d)\n", - (unsigned long long)bytenr, - block_ctx.dev->name, - (unsigned long long)block_ctx.dev_bytenr, - mirror_num); + bytenr, block_ctx.dev->name, + block_ctx.dev_bytenr, mirror_num); } WARN_ON(1); } @@ -3048,12 +2958,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO - "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," - " size=%lu, data=%p, bdev=%p)\n", - rw, (unsigned long)bh->b_blocknr, - (unsigned long long)dev_bytenr, - (unsigned long)bh->b_size, bh->b_data, - bh->b_bdev); + "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu)," + " size=%zu, data=%p, bdev=%p)\n", + rw, (unsigned long long)bh->b_blocknr, + dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev); btrfsic_process_written_block(dev_state, dev_bytenr, &bh->b_data, 1, NULL, NULL, bh, rw); @@ -3118,9 +3026,9 @@ void btrfsic_submit_bio(int rw, struct bio *bio) BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO "submit_bio(rw=0x%x, bi_vcnt=%u," - " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", - rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, - (unsigned long long)dev_bytenr, + " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", + rw, bio->bi_vcnt, + (unsigned long long)bio->bi_sector, dev_bytenr, bio->bi_bdev); mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, @@ -3213,19 +3121,19 @@ int btrfsic_mount(struct btrfs_root *root, if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->nodesize, (unsigned long)PAGE_CACHE_SIZE); + root->nodesize, PAGE_CACHE_SIZE); return -1; } if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->leafsize, (unsigned long)PAGE_CACHE_SIZE); + root->leafsize, PAGE_CACHE_SIZE); return -1; } if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); + root->sectorsize, PAGE_CACHE_SIZE); return -1; } state = kzalloc(sizeof(*state), GFP_NOFS); @@ -3369,10 +3277,8 @@ void btrfsic_unmount(struct btrfs_root *root, " @%llu (%s/%llu/%d) on umount which is" " not yet iodone!\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num); + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num); } mutex_unlock(&btrfsic_mutex); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b189bd1e7a3e..6aad98cb343f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -132,9 +132,8 @@ static int check_compressed_csum(struct inode *inode, printk(KERN_INFO "btrfs csum failed ino %llu " "extent %llu csum %u " "wanted %u mirror %d\n", - (unsigned long long)btrfs_ino(inode), - (unsigned long long)disk_start, - csum, *cb_sum, cb->mirror_num); + btrfs_ino(inode), disk_start, csum, *cb_sum, + cb->mirror_num); ret = -EIO; goto fail; } @@ -639,7 +638,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, faili = nr_pages - 1; cb->nr_pages = nr_pages; - add_ra_bio_pages(inode, em_start + em_len, cb); + /* In the parent-locked case, we only locked the range we are + * interested in. In all other cases, we can opportunistically + * cache decompressed data that goes beyond the requested range. */ + if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED)) + add_ra_bio_pages(inode, em_start + em_len, cb); /* include any pages we added in add_ra-bio_pages */ uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ed504607d8ec..64346721173f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -274,8 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, new_root_objectid); - write_extent_buffer(cow, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); WARN_ON(btrfs_header_generation(buf) > trans->transid); @@ -484,8 +483,27 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) struct rb_node **new; struct rb_node *parent = NULL; struct tree_mod_elem *cur; + int ret = 0; + + BUG_ON(!tm); + + tree_mod_log_write_lock(fs_info); + if (list_empty(&fs_info->tree_mod_seq_list)) { + tree_mod_log_write_unlock(fs_info); + /* + * Ok we no longer care about logging modifications, free up tm + * and return 0. Any callers shouldn't be using tm after + * calling tree_mod_log_insert, but if they do we can just + * change this to return a special error code to let the callers + * do their own thing. + */ + kfree(tm); + return 0; + } - BUG_ON(!tm || !tm->seq); + spin_lock(&fs_info->tree_mod_seq_lock); + tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); + spin_unlock(&fs_info->tree_mod_seq_lock); tm_root = &fs_info->tree_mod_log; new = &tm_root->rb_node; @@ -501,14 +519,17 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) else if (cur->seq > tm->seq) new = &((*new)->rb_right); else { + ret = -EEXIST; kfree(tm); - return -EEXIST; + goto out; } } rb_link_node(&tm->node, parent, new); rb_insert_color(&tm->node, tm_root); - return 0; +out: + tree_mod_log_write_unlock(fs_info); + return ret; } /* @@ -524,57 +545,19 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, return 1; if (eb && btrfs_header_level(eb) == 0) return 1; - - tree_mod_log_write_lock(fs_info); - if (list_empty(&fs_info->tree_mod_seq_list)) { - /* - * someone emptied the list while we were waiting for the lock. - * we must not add to the list when no blocker exists. - */ - tree_mod_log_write_unlock(fs_info); - return 1; - } - return 0; } -/* - * This allocates memory and gets a tree modification sequence number. - * - * Returns <0 on error. - * Returns >0 (the added sequence number) on success. - */ -static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, - struct tree_mod_elem **tm_ret) -{ - struct tree_mod_elem *tm; - - /* - * once we switch from spin locks to something different, we should - * honor the flags parameter here. - */ - tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC); - if (!tm) - return -ENOMEM; - - spin_lock(&fs_info->tree_mod_seq_lock); - tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); - spin_unlock(&fs_info->tree_mod_seq_lock); - - return tm->seq; -} - static inline int __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, int slot, enum mod_log_op op, gfp_t flags) { - int ret; struct tree_mod_elem *tm; - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - return ret; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = eb->start >> PAGE_CACHE_SHIFT; if (op != MOD_LOG_KEY_ADD) { @@ -589,34 +572,14 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, } static noinline int -tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op, gfp_t flags) +tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot, + enum mod_log_op op, gfp_t flags) { - int ret; - if (tree_mod_dont_log(fs_info, eb)) return 0; - ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); - - tree_mod_log_write_unlock(fs_info); - return ret; -} - -static noinline int -tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - int slot, enum mod_log_op op) -{ - return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); -} - -static noinline int -tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op) -{ - return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS); + return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); } static noinline int @@ -637,14 +600,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, * buffer, i.e. dst_slot < src_slot. */ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, - MOD_LOG_KEY_REMOVE_WHILE_MOVING); + ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot, + MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS); BUG_ON(ret < 0); } - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = eb->start >> PAGE_CACHE_SHIFT; tm->slot = src_slot; @@ -652,10 +615,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, tm->move.nr_items = nr_items; tm->op = MOD_LOG_MOVE_KEYS; - ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); - return ret; + return __tree_mod_log_insert(fs_info, tm); } static inline void @@ -670,8 +630,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) nritems = btrfs_header_nritems(eb); for (i = nritems - 1; i >= 0; i--) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i, - MOD_LOG_KEY_REMOVE_WHILE_FREEING); + ret = __tree_mod_log_insert_key(fs_info, eb, i, + MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); BUG_ON(ret < 0); } } @@ -683,7 +643,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, int log_removal) { struct tree_mod_elem *tm; - int ret; if (tree_mod_dont_log(fs_info, NULL)) return 0; @@ -691,9 +650,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (log_removal) __tree_mod_log_free_eb(fs_info, old_root); - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = new_root->start >> PAGE_CACHE_SHIFT; tm->old_root.logical = old_root->start; @@ -701,10 +660,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, tm->generation = btrfs_header_generation(old_root); tm->op = MOD_LOG_ROOT_REPLACE; - ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); - return ret; + return __tree_mod_log_insert(fs_info, tm); } static struct tree_mod_elem * @@ -784,23 +740,20 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, if (tree_mod_dont_log(fs_info, NULL)) return; - if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) { - tree_mod_log_write_unlock(fs_info); + if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) return; - } for (i = 0; i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, src, + ret = __tree_mod_log_insert_key(fs_info, src, i + src_offset, - MOD_LOG_KEY_REMOVE); + MOD_LOG_KEY_REMOVE, GFP_NOFS); BUG_ON(ret < 0); - ret = tree_mod_log_insert_key_locked(fs_info, dst, + ret = __tree_mod_log_insert_key(fs_info, dst, i + dst_offset, - MOD_LOG_KEY_ADD); + MOD_LOG_KEY_ADD, + GFP_NOFS); BUG_ON(ret < 0); } - - tree_mod_log_write_unlock(fs_info); } static inline void @@ -819,9 +772,9 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, { int ret; - ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, - MOD_LOG_KEY_REPLACE, - atomic ? GFP_ATOMIC : GFP_NOFS); + ret = __tree_mod_log_insert_key(fs_info, eb, slot, + MOD_LOG_KEY_REPLACE, + atomic ? GFP_ATOMIC : GFP_NOFS); BUG_ON(ret < 0); } @@ -830,10 +783,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { if (tree_mod_dont_log(fs_info, eb)) return; - __tree_mod_log_free_eb(fs_info, eb); - - tree_mod_log_write_unlock(fs_info); } static noinline void @@ -1046,8 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, root->root_key.objectid); - write_extent_buffer(cow, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); @@ -1083,7 +1032,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(trans->transid != btrfs_header_generation(parent)); tree_mod_log_insert_key(root->fs_info, parent, parent_slot, - MOD_LOG_KEY_REPLACE); + MOD_LOG_KEY_REPLACE, GFP_NOFS); btrfs_set_node_blockptr(parent, parent_slot, cow->start); btrfs_set_node_ptr_generation(parent, parent_slot, @@ -1116,7 +1065,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, int looped = 0; if (!time_seq) - return 0; + return NULL; /* * the very last operation that's logged for a root is the replacement @@ -1127,7 +1076,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, tm = tree_mod_log_search_oldest(fs_info, root_logical, time_seq); if (!looped && !tm) - return 0; + return NULL; /* * if there are no tree operation for the oldest root, we simply * return it. this should only happen if that (old) root is at @@ -1240,8 +1189,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, * is freed (its refcount is decremented). */ static struct extent_buffer * -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 time_seq) +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct extent_buffer *eb, u64 time_seq) { struct extent_buffer *eb_rewin; struct tree_mod_elem *tm; @@ -1256,11 +1205,18 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (!tm) return eb; + btrfs_set_path_blocking(path); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm->slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb->start, fs_info->tree_root->nodesize); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock_blocking(eb); + free_extent_buffer(eb); + return NULL; + } btrfs_set_header_bytenr(eb_rewin, eb->start); btrfs_set_header_backref_rev(eb_rewin, btrfs_header_backref_rev(eb)); @@ -1268,10 +1224,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); } else { eb_rewin = btrfs_clone_extent_buffer(eb); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock_blocking(eb); + free_extent_buffer(eb); + return NULL; + } } - btrfs_tree_read_unlock(eb); + btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); extent_buffer_get(eb_rewin); @@ -1335,8 +1296,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq) free_extent_buffer(eb_root); eb = alloc_dummy_extent_buffer(logical, root->nodesize); } else { + btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); eb = btrfs_clone_extent_buffer(eb_root); - btrfs_tree_read_unlock(eb_root); + btrfs_tree_read_unlock_blocking(eb_root); free_extent_buffer(eb_root); } @@ -1419,14 +1381,12 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, if (trans->transaction != root->fs_info->running_transaction) WARN(1, KERN_CRIT "trans %llu running %llu\n", - (unsigned long long)trans->transid, - (unsigned long long) + trans->transid, root->fs_info->running_transaction->transid); if (trans->transid != root->fs_info->generation) WARN(1, KERN_CRIT "trans %llu running %llu\n", - (unsigned long long)trans->transid, - (unsigned long long)root->fs_info->generation); + trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { *cow_ret = buf; @@ -2466,6 +2426,40 @@ done: return ret; } +static void key_search_validate(struct extent_buffer *b, + struct btrfs_key *key, + int level) +{ +#ifdef CONFIG_BTRFS_ASSERT + struct btrfs_disk_key disk_key; + + btrfs_cpu_key_to_disk(&disk_key, key); + + if (level == 0) + ASSERT(!memcmp_extent_buffer(b, &disk_key, + offsetof(struct btrfs_leaf, items[0].key), + sizeof(disk_key))); + else + ASSERT(!memcmp_extent_buffer(b, &disk_key, + offsetof(struct btrfs_node, ptrs[0].key), + sizeof(disk_key))); +#endif +} + +static int key_search(struct extent_buffer *b, struct btrfs_key *key, + int level, int *prev_cmp, int *slot) +{ + if (*prev_cmp != 0) { + *prev_cmp = bin_search(b, key, level, slot); + return *prev_cmp; + } + + key_search_validate(b, key, level); + *slot = 0; + + return 0; +} + /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -2494,6 +2488,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int write_lock_level = 0; u8 lowest_level = 0; int min_write_lock_level; + int prev_cmp; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); @@ -2524,6 +2519,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root min_write_lock_level = write_lock_level; again: + prev_cmp = -1; /* * we try very hard to do read locks on the root */ @@ -2624,7 +2620,7 @@ cow_done: if (!cow) btrfs_unlock_up_safe(p, level + 1); - ret = bin_search(b, key, level, &slot); + ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { int dec = 0; @@ -2759,6 +2755,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, int level; int lowest_unlock = 1; u8 lowest_level = 0; + int prev_cmp; lowest_level = p->lowest_level; WARN_ON(p->nodes[0] != NULL); @@ -2769,6 +2766,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, } again: + prev_cmp = -1; b = get_old_root(root, time_seq); level = btrfs_header_level(b); p->locks[level] = BTRFS_READ_LOCK; @@ -2786,7 +2784,7 @@ again: */ btrfs_unlock_up_safe(p, level + 1); - ret = bin_search(b, key, level, &slot); + ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { int dec = 0; @@ -2820,7 +2818,11 @@ again: btrfs_clear_path_blocking(p, b, BTRFS_READ_LOCK); } - b = tree_mod_log_rewind(root->fs_info, b, time_seq); + b = tree_mod_log_rewind(root->fs_info, p, b, time_seq); + if (!b) { + ret = -ENOMEM; + goto done; + } p->locks[level] = BTRFS_READ_LOCK; p->nodes[level] = b; } else { @@ -3143,13 +3145,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(c, root->root_key.objectid); - write_extent_buffer(c, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(c), + write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c), BTRFS_FSID_SIZE); write_extent_buffer(c, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(c), - BTRFS_UUID_SIZE); + btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE); btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); @@ -3208,7 +3208,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, } if (level) { ret = tree_mod_log_insert_key(root->fs_info, lower, slot, - MOD_LOG_KEY_ADD); + MOD_LOG_KEY_ADD, GFP_NOFS); BUG_ON(ret < 0); } btrfs_set_node_key(lower, key, slot); @@ -3284,10 +3284,9 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(split, root->root_key.objectid); write_extent_buffer(split, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(split), - BTRFS_FSID_SIZE); + btrfs_header_fsid(split), BTRFS_FSID_SIZE); write_extent_buffer(split, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(split), + btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); @@ -4040,11 +4039,10 @@ again: btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); write_extent_buffer(right, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(right), - BTRFS_FSID_SIZE); + btrfs_header_fsid(right), BTRFS_FSID_SIZE); write_extent_buffer(right, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(right), + btrfs_header_chunk_tree_uuid(right), BTRFS_UUID_SIZE); if (split == 0) { @@ -4642,7 +4640,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, (nritems - slot - 1)); } else if (level) { ret = tree_mod_log_insert_key(root->fs_info, parent, slot, - MOD_LOG_KEY_REMOVE); + MOD_LOG_KEY_REMOVE, GFP_NOFS); BUG_ON(ret < 0); } @@ -4814,7 +4812,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, * This may release the path, and so you may lose any locks held at the * time you call it. */ -int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) +static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { struct btrfs_key key; struct btrfs_disk_key found_key; @@ -5329,19 +5327,20 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; advance_right = ADVANCE; } else { + enum btrfs_compare_tree_result cmp; + WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); ret = tree_compare_item(left_root, left_path, right_path, tmp_buf); - if (ret) { - WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); - ret = changed_cb(left_root, right_root, - left_path, right_path, - &left_key, - BTRFS_COMPARE_TREE_CHANGED, - ctx); - if (ret < 0) - goto out; - } + if (ret) + cmp = BTRFS_COMPARE_TREE_CHANGED; + else + cmp = BTRFS_COMPARE_TREE_SAME; + ret = changed_cb(left_root, right_root, + left_path, right_path, + &left_key, cmp, ctx); + if (ret < 0) + goto out; advance_left = ADVANCE; advance_right = ADVANCE; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e795bf135e80..3c1da6f98a4d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -23,6 +23,7 @@ #include <linux/highmem.h> #include <linux/fs.h> #include <linux/rwsem.h> +#include <linux/semaphore.h> #include <linux/completion.h> #include <linux/backing-dev.h> #include <linux/wait.h> @@ -91,6 +92,9 @@ struct btrfs_ordered_sum; /* holds quota configuration and tracking */ #define BTRFS_QUOTA_TREE_OBJECTID 8ULL +/* for storing items that use the BTRFS_UUID_KEY* types */ +#define BTRFS_UUID_TREE_OBJECTID 9ULL + /* for storing balance parameters in the root tree */ #define BTRFS_BALANCE_OBJECTID -4ULL @@ -142,7 +146,7 @@ struct btrfs_ordered_sum; #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 -#define BTRFS_DEV_REPLACE_DEVID 0 +#define BTRFS_DEV_REPLACE_DEVID 0ULL /* * the max metadata block size. This limit is somewhat artificial, @@ -478,9 +482,10 @@ struct btrfs_super_block { char label[BTRFS_LABEL_SIZE]; __le64 cache_generation; + __le64 uuid_tree_generation; /* future expansion */ - __le64 reserved[31]; + __le64 reserved[30]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; } __attribute__ ((__packed__)); @@ -1188,6 +1193,7 @@ enum btrfs_caching_type { BTRFS_CACHE_STARTED = 1, BTRFS_CACHE_FAST = 2, BTRFS_CACHE_FINISHED = 3, + BTRFS_CACHE_ERROR = 4, }; enum btrfs_disk_cache_state { @@ -1302,6 +1308,7 @@ struct btrfs_fs_info { struct btrfs_root *fs_root; struct btrfs_root *csum_root; struct btrfs_root *quota_root; + struct btrfs_root *uuid_root; /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; @@ -1350,6 +1357,7 @@ struct btrfs_fs_info { u64 last_trans_log_full_commit; unsigned long mount_opt; unsigned long compress_type:4; + int commit_interval; /* * It is a suggestive number, the read side is safe even it gets a * wrong number because we will write out the data into a regular @@ -1411,6 +1419,13 @@ struct btrfs_fs_info { * before jumping into the main commit. */ struct mutex ordered_operations_mutex; + + /* + * Same as ordered_operations_mutex except this is for ordered extents + * and not the operations. + */ + struct mutex ordered_extent_flush_mutex; + struct rw_semaphore extent_commit_sem; struct rw_semaphore cleanup_work_sem; @@ -1641,6 +1656,9 @@ struct btrfs_fs_info { struct btrfs_dev_replace dev_replace; atomic_t mutually_exclusive_operation_running; + + struct semaphore uuid_tree_rescan_sem; + unsigned int update_uuid_tree_gen:1; }; /* @@ -1934,6 +1952,19 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_DEV_REPLACE_KEY 250 /* + * Stores items that allow to quickly map UUIDs to something else. + * These items are part of the filesystem UUID tree. + * The key is built like this: + * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits). + */ +#if BTRFS_UUID_SIZE != 16 +#error "UUID items require BTRFS_UUID_SIZE == 16!" +#endif +#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */ +#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to + * received subvols */ + +/* * string items are for debugging. They just store a short string of * data in the FS */ @@ -1967,6 +1998,9 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) +#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) + +#define BTRFS_DEFAULT_COMMIT_INTERVAL (30) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -2130,14 +2164,14 @@ BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, generation, 64); -static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) +static inline unsigned long btrfs_device_uuid(struct btrfs_dev_item *d) { - return (char *)d + offsetof(struct btrfs_dev_item, uuid); + return (unsigned long)d + offsetof(struct btrfs_dev_item, uuid); } -static inline char *btrfs_device_fsid(struct btrfs_dev_item *d) +static inline unsigned long btrfs_device_fsid(struct btrfs_dev_item *d) { - return (char *)d + offsetof(struct btrfs_dev_item, fsid); + return (unsigned long)d + offsetof(struct btrfs_dev_item, fsid); } BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); @@ -2240,6 +2274,23 @@ BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item, + sequence, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item, + transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item, + nbytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item, + block_group, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); static inline struct btrfs_timespec * btrfs_inode_atime(struct btrfs_inode_item *inode_item) @@ -2267,6 +2318,8 @@ btrfs_inode_ctime(struct btrfs_inode_item *inode_item) BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); /* struct btrfs_dev_extent */ BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, @@ -2277,10 +2330,10 @@ BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, chunk_offset, 64); BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); -static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) +static inline unsigned long btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) { unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid); - return (u8 *)((unsigned long)dev + ptr); + return (unsigned long)dev + ptr; } BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); @@ -2348,6 +2401,10 @@ BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32); /* struct btrfs_node */ BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr, + blockptr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr, + generation, 64); static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) { @@ -2404,6 +2461,8 @@ static inline void btrfs_set_node_key(struct extent_buffer *eb, /* struct btrfs_item */ BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_STACK_FUNCS(stack_item_size, struct btrfs_item, size, 32); static inline unsigned long btrfs_item_nr_offset(int nr) { @@ -2466,6 +2525,13 @@ BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, + data_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, + name_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, + transid, 64); static inline void btrfs_dir_item_key(struct extent_buffer *eb, struct btrfs_dir_item *item, @@ -2568,6 +2634,12 @@ BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); +BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, + nritems, 32); +BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) { @@ -2603,16 +2675,14 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, btrfs_set_header_flags(eb, flags); } -static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) +static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb) { - unsigned long ptr = offsetof(struct btrfs_header, fsid); - return (u8 *)ptr; + return offsetof(struct btrfs_header, fsid); } -static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) +static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) { - unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid); - return (u8 *)ptr; + return offsetof(struct btrfs_header, chunk_tree_uuid); } static inline int btrfs_is_leaf(struct extent_buffer *eb) @@ -2830,6 +2900,9 @@ BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, csum_type, 16); BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, cache_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); +BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, + uuid_tree_generation, 64); static inline int btrfs_super_csum_size(struct btrfs_super_block *s) { @@ -2847,6 +2920,14 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) /* struct btrfs_file_extent_item */ BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, + struct btrfs_file_extent_item, disk_bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, + struct btrfs_file_extent_item, offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, + struct btrfs_file_extent_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, + struct btrfs_file_extent_item, num_bytes, 64); static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) @@ -3107,11 +3188,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 root_objectid, u64 owner, u64 offset, struct btrfs_key *ins); -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, - u64 empty_size, u64 hint_byte, - struct btrfs_key *ins, int is_data); +int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, + u64 min_alloc_size, u64 empty_size, u64 hint_byte, + struct btrfs_key *ins, int is_data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, int full_backref, int for_cow); int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -3175,7 +3254,7 @@ void btrfs_orphan_release_metadata(struct inode *inode); int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int nitems, - u64 *qgroup_reserved); + u64 *qgroup_reserved, bool use_global_rsv); void btrfs_subvolume_release_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, u64 qgroup_reserved); @@ -3245,6 +3324,7 @@ enum btrfs_compare_tree_result { BTRFS_COMPARE_TREE_NEW, BTRFS_COMPARE_TREE_DELETED, BTRFS_COMPARE_TREE_CHANGED, + BTRFS_COMPARE_TREE_SAME, }; typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root, struct btrfs_root *right_root, @@ -3380,6 +3460,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) kfree(fs_info->dev_root); kfree(fs_info->csum_root); kfree(fs_info->quota_root); + kfree(fs_info->uuid_root); kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); kfree(fs_info); @@ -3414,8 +3495,6 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item); -void btrfs_read_root_item(struct extent_buffer *eb, int slot, - struct btrfs_root_item *item); int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, struct btrfs_path *path, struct btrfs_root_item *root_item, struct btrfs_key *root_key); @@ -3426,6 +3505,17 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *item); void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct btrfs_root *root); +/* uuid-tree.c */ +int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid); +int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid); +int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, + int (*check_func)(struct btrfs_fs_info *, u8 *, u8, + u64)); + /* dir-item.c */ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, const char *name, int name_len); @@ -3509,12 +3599,14 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, struct btrfs_inode_extref **extref_ret); /* file-item.c */ +struct btrfs_dio_private; int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 len); int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst); int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 logical_offset); + struct btrfs_dio_private *dip, struct bio *bio, + u64 logical_offset); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, @@ -3552,8 +3644,7 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create); -noinline int can_nocow_extent(struct btrfs_trans_handle *trans, - struct inode *inode, u64 offset, u64 *len, +noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *orig_start, u64 *orig_block_len, u64 *ram_bytes); @@ -3643,11 +3734,15 @@ extern const struct dentry_operations btrfs_dentry_operations; long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); void btrfs_update_iflags(struct inode *inode); void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); +int btrfs_is_empty_uuid(u8 *uuid); int btrfs_defrag_file(struct inode *inode, struct file *file, struct btrfs_ioctl_defrag_range_args *range, u64 newer_than, unsigned long max_pages); void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); +void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, + struct btrfs_ioctl_balance_args *bargs); + /* file.c */ int btrfs_auto_defrag_init(void); @@ -3720,6 +3815,22 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) #define btrfs_debug(fs_info, fmt, args...) \ btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) +#ifdef CONFIG_BTRFS_ASSERT + +static inline void assfail(char *expr, char *file, int line) +{ + printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d", + expr, file, line); + BUG(); +} + +#define ASSERT(expr) \ + (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) +#else +#define ASSERT(expr) ((void)0) +#endif + +#define btrfs_assert() __printf(5, 6) void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 375510913fe7..cbd9523ad09c 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -21,6 +21,7 @@ #include "delayed-inode.h" #include "disk-io.h" #include "transaction.h" +#include "ctree.h" #define BTRFS_DELAYED_WRITEBACK 512 #define BTRFS_DELAYED_BACKGROUND 128 @@ -1453,10 +1454,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, dir_item = (struct btrfs_dir_item *)delayed_item->data; dir_item->location = *disk_key; - dir_item->transid = cpu_to_le64(trans->transid); - dir_item->data_len = 0; - dir_item->name_len = cpu_to_le16(name_len); - dir_item->type = type; + btrfs_set_stack_dir_transid(dir_item, trans->transid); + btrfs_set_stack_dir_data_len(dir_item, 0); + btrfs_set_stack_dir_name_len(dir_item, name_len); + btrfs_set_stack_dir_type(dir_item, type); memcpy((char *)(dir_item + 1), name, name_len); ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item); @@ -1470,13 +1471,11 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, mutex_lock(&delayed_node->mutex); ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); if (unlikely(ret)) { - printk(KERN_ERR "err add delayed dir index item(name: %s) into " - "the insertion tree of the delayed node" + printk(KERN_ERR "err add delayed dir index item(name: %.*s) " + "into the insertion tree of the delayed node" "(root id: %llu, inode id: %llu, errno: %d)\n", - name, - (unsigned long long)delayed_node->root->objectid, - (unsigned long long)delayed_node->inode_id, - ret); + name_len, name, delayed_node->root->objectid, + delayed_node->inode_id, ret); BUG(); } mutex_unlock(&delayed_node->mutex); @@ -1547,9 +1546,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, printk(KERN_ERR "err add delayed dir index item(index: %llu) " "into the deletion tree of the delayed node" "(root id: %llu, inode id: %llu, errno: %d)\n", - (unsigned long long)index, - (unsigned long long)node->root->objectid, - (unsigned long long)node->inode_id, + index, node->root->objectid, node->inode_id, ret); BUG(); } @@ -1699,7 +1696,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, di = (struct btrfs_dir_item *)curr->data; name = (char *)(di + 1); - name_len = le16_to_cpu(di->name_len); + name_len = btrfs_stack_dir_name_len(di); d_type = btrfs_filetype_table[di->type]; btrfs_disk_key_to_cpu(&location, &di->location); @@ -1716,27 +1713,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, return 0; } -BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item, - generation, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item, - sequence, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item, - transid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item, - nbytes, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item, - block_group, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); - -BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); -BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); - static void fill_stack_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *inode_item, struct inode *inode) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index c219463fb1fd..e4d467be2dd4 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -241,7 +241,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, return 0; } -static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, +static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_node *ref) { @@ -600,7 +600,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, INIT_LIST_HEAD(&head_ref->cluster); mutex_init(&head_ref->mutex); - trace_btrfs_delayed_ref_head(ref, head_ref, action); + trace_add_delayed_ref_head(ref, head_ref, action); existing = tree_insert(&delayed_refs->root, &ref->rb_node); @@ -661,7 +661,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->type = BTRFS_TREE_BLOCK_REF_KEY; full_ref->level = level; - trace_btrfs_delayed_tree_ref(ref, full_ref, action); + trace_add_delayed_tree_ref(ref, full_ref, action); existing = tree_insert(&delayed_refs->root, &ref->rb_node); @@ -722,7 +722,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, full_ref->objectid = owner; full_ref->offset = offset; - trace_btrfs_delayed_data_ref(ref, full_ref, action); + trace_add_delayed_data_ref(ref, full_ref, action); existing = tree_insert(&delayed_refs->root, &ref->rb_node); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 5f8f3341c099..a64435359385 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -148,13 +148,13 @@ no_valid_dev_replace_entry_found: !btrfs_test_opt(dev_root, DEGRADED)) { ret = -EIO; pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n", - (unsigned long long)src_devid); + src_devid); } if (!dev_replace->tgtdev && !btrfs_test_opt(dev_root, DEGRADED)) { ret = -EIO; pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n", - (unsigned long long)BTRFS_DEV_REPLACE_DEVID); + BTRFS_DEV_REPLACE_DEVID); } if (dev_replace->tgtdev) { if (dev_replace->srcdev) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6b092a1c4e37..4cbb00af92ff 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -31,6 +31,7 @@ #include <linux/migrate.h> #include <linux/ratelimit.h> #include <linux/uuid.h> +#include <linux/semaphore.h> #include <asm/unaligned.h> #include "compat.h" #include "ctree.h" @@ -302,9 +303,8 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, printk_ratelimited(KERN_INFO "btrfs: %s checksum verify " "failed on %llu wanted %X found %X " "level %d\n", - root->fs_info->sb->s_id, - (unsigned long long)buf->start, val, found, - btrfs_header_level(buf)); + root->fs_info->sb->s_id, buf->start, + val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) kfree(result); return 1; @@ -345,9 +345,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, } printk_ratelimited("parent transid verify failed on %llu wanted %llu " "found %llu\n", - (unsigned long long)eb->start, - (unsigned long long)parent_transid, - (unsigned long long)btrfs_header_generation(eb)); + eb->start, parent_transid, btrfs_header_generation(eb)); ret = 1; clear_extent_buffer_uptodate(eb); out: @@ -497,8 +495,7 @@ static int check_tree_block_fsid(struct btrfs_root *root, u8 fsid[BTRFS_UUID_SIZE]; int ret = 1; - read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb), - BTRFS_FSID_SIZE); + read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE); while (fs_devices) { if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { ret = 0; @@ -512,8 +509,7 @@ static int check_tree_block_fsid(struct btrfs_root *root, #define CORRUPT(reason, eb, root, slot) \ printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ "root=%llu, slot=%d\n", reason, \ - (unsigned long long)btrfs_header_bytenr(eb), \ - (unsigned long long)root->objectid, slot) + btrfs_header_bytenr(eb), root->objectid, slot) static noinline int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) @@ -576,8 +572,9 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } -static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, - struct extent_state *state, int mirror) +static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, + u64 phy_offset, struct page *page, + u64 start, u64 end, int mirror) { struct extent_io_tree *tree; u64 found_start; @@ -612,14 +609,13 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, if (found_start != eb->start) { printk_ratelimited(KERN_INFO "btrfs bad tree block start " "%llu %llu\n", - (unsigned long long)found_start, - (unsigned long long)eb->start); + found_start, eb->start); ret = -EIO; goto err; } if (check_tree_block_fsid(root, eb)) { printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", - (unsigned long long)eb->start); + eb->start); ret = -EIO; goto err; } @@ -1148,6 +1144,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); + if (ret) { + free_extent_buffer(buf); + return NULL; + } return buf; } @@ -1291,11 +1291,10 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, btrfs_set_header_owner(leaf, objectid); root->node = leaf; - write_extent_buffer(leaf, fs_info->fsid, - (unsigned long)btrfs_header_fsid(leaf), + write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(leaf), + btrfs_header_chunk_tree_uuid(leaf), BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); @@ -1379,8 +1378,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, root->node = leaf; write_extent_buffer(root->node, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(root->node), - BTRFS_FSID_SIZE); + btrfs_header_fsid(root->node), BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); return root; @@ -1413,11 +1411,11 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, log_root->root_key.offset = root->root_key.objectid; inode_item = &log_root->root_item.inode; - inode_item->generation = cpu_to_le64(1); - inode_item->size = cpu_to_le64(3); - inode_item->nlink = cpu_to_le32(1); - inode_item->nbytes = cpu_to_le64(root->leafsize); - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + btrfs_set_stack_inode_generation(inode_item, 1); + btrfs_set_stack_inode_size(inode_item, 3); + btrfs_set_stack_inode_nlink(inode_item, 1); + btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_node(&log_root->root_item, log_root->node); @@ -1428,8 +1426,8 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, return 0; } -struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, - struct btrfs_key *key) +static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, + struct btrfs_key *key) { struct btrfs_root *root; struct btrfs_fs_info *fs_info = tree_root->fs_info; @@ -1529,8 +1527,8 @@ fail: return ret; } -struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, - u64 root_id) +static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id) { struct btrfs_root *root; @@ -1581,10 +1579,16 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) return fs_info->quota_root ? fs_info->quota_root : ERR_PTR(-ENOENT); + if (location->objectid == BTRFS_UUID_TREE_OBJECTID) + return fs_info->uuid_root ? fs_info->uuid_root : + ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, location->objectid); - if (root) + if (root) { + if (btrfs_root_refs(&root->root_item) == 0) + return ERR_PTR(-ENOENT); return root; + } root = btrfs_read_fs_root(fs_info->tree_root, location); if (IS_ERR(root)) @@ -1737,7 +1741,7 @@ static int transaction_kthread(void *arg) do { cannot_commit = false; - delay = HZ * 30; + delay = HZ * root->fs_info->commit_interval; mutex_lock(&root->fs_info->transaction_kthread_mutex); spin_lock(&root->fs_info->trans_lock); @@ -1749,7 +1753,8 @@ static int transaction_kthread(void *arg) now = get_seconds(); if (cur->state < TRANS_STATE_BLOCKED && - (now < cur->start_time || now - cur->start_time < 30)) { + (now < cur->start_time || + now - cur->start_time < root->fs_info->commit_interval)) { spin_unlock(&root->fs_info->trans_lock); delay = HZ * 5; goto sleep; @@ -2038,6 +2043,12 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) info->quota_root->node = NULL; info->quota_root->commit_root = NULL; } + if (info->uuid_root) { + free_extent_buffer(info->uuid_root->node); + free_extent_buffer(info->uuid_root->commit_root); + info->uuid_root->node = NULL; + info->uuid_root->commit_root = NULL; + } if (chunk_root) { free_extent_buffer(info->chunk_root->node); free_extent_buffer(info->chunk_root->commit_root); @@ -2098,11 +2109,14 @@ int open_ctree(struct super_block *sb, struct btrfs_root *chunk_root; struct btrfs_root *dev_root; struct btrfs_root *quota_root; + struct btrfs_root *uuid_root; struct btrfs_root *log_tree_root; int ret; int err = -EINVAL; int num_backups_tried = 0; int backup_index = 0; + bool create_uuid_tree; + bool check_uuid_tree; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); @@ -2189,6 +2203,7 @@ int open_ctree(struct super_block *sb, fs_info->defrag_inodes = RB_ROOT; fs_info->free_chunk_space = 0; fs_info->tree_mod_log = RB_ROOT; + fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); @@ -2270,6 +2285,7 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->ordered_operations_mutex); + mutex_init(&fs_info->ordered_extent_flush_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); @@ -2278,6 +2294,7 @@ int open_ctree(struct super_block *sb, init_rwsem(&fs_info->extent_commit_sem); init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); + sema_init(&fs_info->uuid_tree_rescan_sem, 1); fs_info->dev_replace.lock_owner = 0; atomic_set(&fs_info->dev_replace.nesting_level, 0); mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); @@ -2383,7 +2400,7 @@ int open_ctree(struct super_block *sb, if (features) { printk(KERN_ERR "BTRFS: couldn't mount because of " "unsupported optional features (%Lx).\n", - (unsigned long long)features); + features); err = -EINVAL; goto fail_alloc; } @@ -2453,7 +2470,7 @@ int open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY) && features) { printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " "unsupported option features (%Lx).\n", - (unsigned long long)features); + features); err = -EINVAL; goto fail_alloc; } @@ -2466,20 +2483,17 @@ int open_ctree(struct super_block *sb, &fs_info->generic_worker); btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", - fs_info->thread_pool_size, - &fs_info->generic_worker); + fs_info->thread_pool_size, NULL); btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", - fs_info->thread_pool_size, - &fs_info->generic_worker); + fs_info->thread_pool_size, NULL); btrfs_init_workers(&fs_info->submit_workers, "submit", min_t(u64, fs_devices->num_devices, - fs_info->thread_pool_size), - &fs_info->generic_worker); + fs_info->thread_pool_size), NULL); btrfs_init_workers(&fs_info->caching_workers, "cache", - 2, &fs_info->generic_worker); + fs_info->thread_pool_size, NULL); /* a higher idle thresh on the submit workers makes it much more * likely that bios will be send down in a sane order to the @@ -2575,7 +2589,7 @@ int open_ctree(struct super_block *sb, sb->s_blocksize = sectorsize; sb->s_blocksize_bits = blksize_bits(sectorsize); - if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) { + if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } @@ -2615,8 +2629,7 @@ int open_ctree(struct super_block *sb, chunk_root->commit_root = btrfs_root_node(chunk_root); read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), - BTRFS_UUID_SIZE); + btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE); ret = btrfs_read_chunk_tree(chunk_root); if (ret) { @@ -2696,6 +2709,22 @@ retry_root_backup: fs_info->quota_root = quota_root; } + location.objectid = BTRFS_UUID_TREE_OBJECTID; + uuid_root = btrfs_read_tree_root(tree_root, &location); + if (IS_ERR(uuid_root)) { + ret = PTR_ERR(uuid_root); + if (ret != -ENOENT) + goto recovery_tree_root; + create_uuid_tree = true; + check_uuid_tree = false; + } else { + uuid_root->track_dirty = 1; + fs_info->uuid_root = uuid_root; + create_uuid_tree = false; + check_uuid_tree = + generation != btrfs_super_uuid_tree_generation(disk_super); + } + fs_info->generation = generation; fs_info->last_trans_committed = generation; @@ -2882,6 +2911,29 @@ retry_root_backup: btrfs_qgroup_rescan_resume(fs_info); + if (create_uuid_tree) { + pr_info("btrfs: creating UUID tree\n"); + ret = btrfs_create_uuid_tree(fs_info); + if (ret) { + pr_warn("btrfs: failed to create the UUID tree %d\n", + ret); + close_ctree(tree_root); + return ret; + } + } else if (check_uuid_tree || + btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) { + pr_info("btrfs: checking UUID tree\n"); + ret = btrfs_check_uuid_tree(fs_info); + if (ret) { + pr_warn("btrfs: failed to check the UUID tree %d\n", + ret); + close_ctree(tree_root); + return ret; + } + } else { + fs_info->update_uuid_tree_gen = 1; + } + return 0; fail_qgroup: @@ -2983,15 +3035,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) */ for (i = 0; i < 1; i++) { bytenr = btrfs_sb_offset(i); - if (bytenr + 4096 >= i_size_read(bdev->bd_inode)) + if (bytenr + BTRFS_SUPER_INFO_SIZE >= + i_size_read(bdev->bd_inode)) break; - bh = __bread(bdev, bytenr / 4096, 4096); + bh = __bread(bdev, bytenr / 4096, + BTRFS_SUPER_INFO_SIZE); if (!bh) continue; super = (struct btrfs_super_block *)bh->b_data; if (btrfs_super_bytenr(super) != bytenr || - super->magic != cpu_to_le64(BTRFS_MAGIC)) { + btrfs_super_magic(super) != BTRFS_MAGIC) { brelse(bh); continue; } @@ -3311,7 +3365,6 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) int total_errors = 0; u64 flags; - max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); backup_super_roots(root->fs_info); @@ -3320,6 +3373,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) mutex_lock(&root->fs_info->fs_devices->device_list_mutex); head = &root->fs_info->fs_devices->devices; + max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; if (do_barriers) { ret = barrier_all_devices(root->fs_info); @@ -3362,8 +3416,10 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) printk(KERN_ERR "btrfs: %d errors while writing supers\n", total_errors); - /* This shouldn't happen. FUA is masked off if unsupported */ - BUG(); + /* FUA is masked off if unsupported and can't be the reason */ + btrfs_error(root->fs_info, -EIO, + "%d errors while writing supers", total_errors); + return -EIO; } total_errors = 0; @@ -3421,6 +3477,8 @@ static void free_fs_root(struct btrfs_root *root) { iput(root->cache_inode); WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); + btrfs_free_block_rsv(root, root->orphan_block_rsv); + root->orphan_block_rsv = NULL; if (root->anon_dev) free_anon_bdev(root->anon_dev); free_extent_buffer(root->node); @@ -3510,6 +3568,11 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); + /* wait for the uuid_scan task to finish */ + down(&fs_info->uuid_tree_rescan_sem); + /* avoid complains from lockdep et al., set sem back to initial state */ + up(&fs_info->uuid_tree_rescan_sem); + /* pause restriper - we want to resume on mount */ btrfs_pause_balance(fs_info); @@ -3573,6 +3636,9 @@ int close_ctree(struct btrfs_root *root) btrfs_free_stripe_hash_table(fs_info); + btrfs_free_block_rsv(root, root->orphan_block_rsv); + root->orphan_block_rsv = NULL; + return 0; } @@ -3608,9 +3674,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (transid != root->fs_info->generation) WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " "found %llu running %llu\n", - (unsigned long long)buf->start, - (unsigned long long)transid, - (unsigned long long)root->fs_info->generation); + buf->start, transid, root->fs_info->generation); was_dirty = set_extent_buffer_dirty(buf); if (!was_dirty) __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, @@ -3744,8 +3808,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->ordered_root_lock); } -int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, - struct btrfs_root *root) +static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, + struct btrfs_root *root) { struct rb_node *node; struct btrfs_delayed_ref_root *delayed_refs; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1204c8ef6f32..cfb3cf711b34 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -113,7 +113,8 @@ static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) { smp_mb(); - return cache->cached == BTRFS_CACHE_FINISHED; + return cache->cached == BTRFS_CACHE_FINISHED || + cache->cached == BTRFS_CACHE_ERROR; } static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) @@ -389,7 +390,7 @@ static noinline void caching_thread(struct btrfs_work *work) u64 total_found = 0; u64 last = 0; u32 nritems; - int ret = 0; + int ret = -ENOMEM; caching_ctl = container_of(work, struct btrfs_caching_control, work); block_group = caching_ctl->block_group; @@ -420,6 +421,7 @@ again: /* need to make sure the commit_root doesn't disappear */ down_read(&fs_info->extent_commit_sem); +next: ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto err; @@ -459,6 +461,16 @@ again: continue; } + if (key.objectid < last) { + key.objectid = last; + key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + + caching_ctl->progress = last; + btrfs_release_path(path); + goto next; + } + if (key.objectid < block_group->key.objectid) { path->slots[0]++; continue; @@ -506,6 +518,12 @@ err: mutex_unlock(&caching_ctl->mutex); out: + if (ret) { + spin_lock(&block_group->lock); + block_group->caching_ctl = NULL; + block_group->cached = BTRFS_CACHE_ERROR; + spin_unlock(&block_group->lock); + } wake_up(&caching_ctl->wait); put_caching_control(caching_ctl); @@ -771,10 +789,23 @@ again: goto out_free; if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = root->leafsize; - btrfs_release_path(path); - goto again; + metadata = 0; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == root->leafsize) + ret = 0; + } + if (ret) { + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = root->leafsize; + btrfs_release_path(path); + goto again; + } } if (ret == 0) { @@ -2011,6 +2042,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; ref = btrfs_delayed_node_to_data_ref(node); + trace_run_delayed_data_ref(node, ref, node->action); + if (node->type == BTRFS_SHARED_DATA_REF_KEY) parent = ref->parent; else @@ -2154,6 +2187,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, SKINNY_METADATA); ref = btrfs_delayed_node_to_tree_ref(node); + trace_run_delayed_tree_ref(node, ref, node->action); + if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) parent = ref->parent; else @@ -2212,6 +2247,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, */ BUG_ON(extent_op); head = btrfs_delayed_node_to_head(node); + trace_run_delayed_ref_head(node, head, node->action); + if (insert_reserved) { btrfs_pin_extent(root, node->bytenr, node->num_bytes, 1); @@ -2403,6 +2440,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, default: WARN_ON(1); } + } else { + list_del_init(&locked_ref->cluster); } spin_unlock(&delayed_refs->lock); @@ -2425,7 +2464,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, * list before we release it. */ if (btrfs_delayed_ref_is_head(ref)) { - list_del_init(&locked_ref->cluster); btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; } @@ -3799,8 +3837,12 @@ again: if (force < space_info->force_alloc) force = space_info->force_alloc; if (space_info->full) { + if (should_alloc_chunk(extent_root, space_info, force)) + ret = -ENOSPC; + else + ret = 0; spin_unlock(&space_info->lock); - return 0; + return ret; } if (!should_alloc_chunk(extent_root, space_info, force)) { @@ -4320,6 +4362,9 @@ static struct btrfs_block_rsv *get_block_rsv( if (root == root->fs_info->csum_root && trans->adding_csums) block_rsv = trans->block_rsv; + if (root == root->fs_info->uuid_root) + block_rsv = trans->block_rsv; + if (!block_rsv) block_rsv = root->block_rsv; @@ -4729,10 +4774,12 @@ void btrfs_orphan_release_metadata(struct inode *inode) int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int items, - u64 *qgroup_reserved) + u64 *qgroup_reserved, + bool use_global_rsv) { u64 num_bytes; int ret; + struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; if (root->fs_info->quota_enabled) { /* One for parent inode, two for dir entries */ @@ -4751,6 +4798,10 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, BTRFS_BLOCK_GROUP_METADATA); ret = btrfs_block_rsv_add(root, rsv, num_bytes, BTRFS_RESERVE_FLUSH_ALL); + + if (ret == -ENOSPC && use_global_rsv) + ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes); + if (ret) { if (*qgroup_reserved) btrfs_qgroup_free(root, *qgroup_reserved); @@ -5668,7 +5719,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (ret) { btrfs_err(info, "umm, got %d back from search, was looking for %llu", - ret, (unsigned long long)bytenr); + ret, bytenr); if (ret > 0) btrfs_print_leaf(extent_root, path->nodes[0]); @@ -5684,11 +5735,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, WARN_ON(1); btrfs_err(info, "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", - (unsigned long long)bytenr, - (unsigned long long)parent, - (unsigned long long)root_objectid, - (unsigned long long)owner_objectid, - (unsigned long long)owner_offset); + bytenr, parent, root_objectid, owner_objectid, + owner_offset); } else { btrfs_abort_transaction(trans, extent_root, ret); goto out; @@ -5717,7 +5765,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, -1, 1); if (ret) { btrfs_err(info, "umm, got %d back from search, was looking for %llu", - ret, (unsigned long long)bytenr); + ret, bytenr); btrfs_print_leaf(extent_root, path->nodes[0]); } if (ret < 0) { @@ -5999,8 +6047,11 @@ static u64 stripe_align(struct btrfs_root *root, * for our min num_bytes. Another option is to have it go ahead * and look in the rbtree for a free extent of a given size, but this * is a good start. + * + * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using + * any of the information in this block group. */ -static noinline int +static noinline void wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, u64 num_bytes) { @@ -6008,28 +6059,29 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, caching_ctl = get_caching_control(cache); if (!caching_ctl) - return 0; + return; wait_event(caching_ctl->wait, block_group_cache_done(cache) || (cache->free_space_ctl->free_space >= num_bytes)); put_caching_control(caching_ctl); - return 0; } static noinline int wait_block_group_cache_done(struct btrfs_block_group_cache *cache) { struct btrfs_caching_control *caching_ctl; + int ret = 0; caching_ctl = get_caching_control(cache); if (!caching_ctl) - return 0; + return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; wait_event(caching_ctl->wait, block_group_cache_done(cache)); - + if (cache->cached == BTRFS_CACHE_ERROR) + ret = -EIO; put_caching_control(caching_ctl); - return 0; + return ret; } int __get_raid_index(u64 flags) @@ -6070,8 +6122,7 @@ enum btrfs_loop_type { * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static noinline int find_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *orig_root, +static noinline int find_free_extent(struct btrfs_root *orig_root, u64 num_bytes, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, u64 flags) @@ -6212,6 +6263,8 @@ have_block_group: ret = 0; } + if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) + goto loop; if (unlikely(block_group->ro)) goto loop; @@ -6292,10 +6345,10 @@ refill_cluster: block_group->full_stripe_len); /* allocate a cluster in this block group */ - ret = btrfs_find_space_cluster(trans, root, - block_group, last_ptr, - search_start, num_bytes, - aligned_cluster); + ret = btrfs_find_space_cluster(root, block_group, + last_ptr, search_start, + num_bytes, + aligned_cluster); if (ret == 0) { /* * now pull our allocation out of this @@ -6426,17 +6479,28 @@ loop: index = 0; loop++; if (loop == LOOP_ALLOC_CHUNK) { + struct btrfs_trans_handle *trans; + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + ret = do_chunk_alloc(trans, root, flags, CHUNK_ALLOC_FORCE); /* * Do not bail out on ENOSPC since we * can do more things. */ - if (ret < 0 && ret != -ENOSPC) { + if (ret < 0 && ret != -ENOSPC) btrfs_abort_transaction(trans, root, ret); + else + ret = 0; + btrfs_end_transaction(trans, root); + if (ret) goto out; - } } if (loop == LOOP_NO_EMPTY_SIZE) { @@ -6463,19 +6527,15 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, spin_lock(&info->lock); printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", - (unsigned long long)info->flags, - (unsigned long long)(info->total_bytes - info->bytes_used - - info->bytes_pinned - info->bytes_reserved - - info->bytes_readonly), + info->flags, + info->total_bytes - info->bytes_used - info->bytes_pinned - + info->bytes_reserved - info->bytes_readonly, (info->full) ? "" : "not "); printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " "reserved=%llu, may_use=%llu, readonly=%llu\n", - (unsigned long long)info->total_bytes, - (unsigned long long)info->bytes_used, - (unsigned long long)info->bytes_pinned, - (unsigned long long)info->bytes_reserved, - (unsigned long long)info->bytes_may_use, - (unsigned long long)info->bytes_readonly); + info->total_bytes, info->bytes_used, info->bytes_pinned, + info->bytes_reserved, info->bytes_may_use, + info->bytes_readonly); spin_unlock(&info->lock); if (!dump_block_groups) @@ -6486,12 +6546,9 @@ again: list_for_each_entry(cache, &info->block_groups[index], list) { spin_lock(&cache->lock); printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", - (unsigned long long)cache->key.objectid, - (unsigned long long)cache->key.offset, - (unsigned long long)btrfs_block_group_used(&cache->item), - (unsigned long long)cache->pinned, - (unsigned long long)cache->reserved, - cache->ro ? "[readonly]" : ""); + cache->key.objectid, cache->key.offset, + btrfs_block_group_used(&cache->item), cache->pinned, + cache->reserved, cache->ro ? "[readonly]" : ""); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } @@ -6500,8 +6557,7 @@ again: up_read(&info->groups_sem); } -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data) @@ -6513,8 +6569,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, flags = btrfs_get_alloc_profile(root, is_data); again: WARN_ON(num_bytes < root->sectorsize); - ret = find_free_extent(trans, root, num_bytes, empty_size, - hint_byte, ins, flags); + ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, + flags); if (ret == -ENOSPC) { if (!final_tried) { @@ -6529,8 +6585,7 @@ again: sinfo = __find_space_info(root->fs_info, flags); btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu", - (unsigned long long)flags, - (unsigned long long)num_bytes); + flags, num_bytes); if (sinfo) dump_space_info(sinfo, num_bytes, 1); } @@ -6550,7 +6605,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, cache = btrfs_lookup_block_group(root->fs_info, start); if (!cache) { btrfs_err(root->fs_info, "Unable to find block group for %llu", - (unsigned long long)start); + start); return -ENOSPC; } @@ -6646,8 +6701,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ret = update_block_group(root, ins->objectid, ins->offset, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", - (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); + ins->objectid, ins->offset); BUG(); } return ret; @@ -6719,8 +6773,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, ret = update_block_group(root, ins->objectid, root->leafsize, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", - (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); + ins->objectid, ins->offset); BUG(); } return ret; @@ -6902,7 +6955,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, if (IS_ERR(block_rsv)) return ERR_CAST(block_rsv); - ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, + ret = btrfs_reserve_extent(root, blocksize, blocksize, empty_size, hint, &ins, 0); if (ret) { unuse_block_rsv(root->fs_info, block_rsv, blocksize); @@ -7173,6 +7226,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!next) return -ENOMEM; + btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, + level - 1); reada = 1; } btrfs_tree_lock(next); @@ -7658,7 +7713,7 @@ out: * don't have it in the radix (like when we recover after a power fail * or unmount) so we don't leak memory. */ - if (root_dropped == false) + if (!for_reloc && root_dropped == false) btrfs_add_dead_root(root); if (err) btrfs_std_error(root->fs_info, err); @@ -8192,7 +8247,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) * We haven't cached this block group, which means we could * possibly have excluded extents on this block group. */ - if (block_group->cached == BTRFS_CACHE_NO) + if (block_group->cached == BTRFS_CACHE_NO || + block_group->cached == BTRFS_CACHE_ERROR) free_excluded_extents(info->extent_root, block_group); btrfs_remove_free_space_cache(block_group); @@ -8409,9 +8465,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) * avoid allocating from un-mirrored block group if there are * mirrored block groups. */ - list_for_each_entry(cache, &space_info->block_groups[3], list) + list_for_each_entry(cache, + &space_info->block_groups[BTRFS_RAID_RAID0], + list) set_block_group_ro(cache, 1); - list_for_each_entry(cache, &space_info->block_groups[4], list) + list_for_each_entry(cache, + &space_info->block_groups[BTRFS_RAID_SINGLE], + list) set_block_group_ro(cache, 1); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fe443fece851..09582b81640c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -61,9 +61,8 @@ void btrfs_leak_debug_check(void) state = list_entry(states.next, struct extent_state, leak_list); printk(KERN_ERR "btrfs state leak: start %llu end %llu " "state %lu in tree %p refs %d\n", - (unsigned long long)state->start, - (unsigned long long)state->end, - state->state, state->tree, atomic_read(&state->refs)); + state->start, state->end, state->state, state->tree, + atomic_read(&state->refs)); list_del(&state->leak_list); kmem_cache_free(extent_state_cache, state); } @@ -71,8 +70,8 @@ void btrfs_leak_debug_check(void) while (!list_empty(&buffers)) { eb = list_entry(buffers.next, struct extent_buffer, leak_list); printk(KERN_ERR "btrfs buffer leak start %llu len %lu " - "refs %d\n", (unsigned long long)eb->start, - eb->len, atomic_read(&eb->refs)); + "refs %d\n", + eb->start, eb->len, atomic_read(&eb->refs)); list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } @@ -88,11 +87,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller, if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { printk_ratelimited(KERN_DEBUG "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", - caller, - (unsigned long long)btrfs_ino(inode), - (unsigned long long)isize, - (unsigned long long)start, - (unsigned long long)end); + caller, btrfs_ino(inode), isize, start, end); } } #else @@ -388,8 +383,7 @@ static int insert_state(struct extent_io_tree *tree, if (end < start) WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", - (unsigned long long)end, - (unsigned long long)start); + end, start); state->start = start; state->end = end; @@ -400,9 +394,8 @@ static int insert_state(struct extent_io_tree *tree, struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); printk(KERN_ERR "btrfs found node %llu %llu on insert of " - "%llu %llu\n", (unsigned long long)found->start, - (unsigned long long)found->end, - (unsigned long long)start, (unsigned long long)end); + "%llu %llu\n", + found->start, found->end, start, end); return -EEXIST; } state->tree = tree; @@ -762,15 +755,6 @@ static void cache_state(struct extent_state *state, } } -static void uncache_state(struct extent_state **cached_ptr) -{ - if (cached_ptr && (*cached_ptr)) { - struct extent_state *state = *cached_ptr; - *cached_ptr = NULL; - free_extent_state(state); - } -} - /* * set some bits on a range in the tree. This may require allocations or * sleeping, so the gfp mask is used to indicate what is allowed. @@ -1687,31 +1671,21 @@ out_failed: return found; } -int extent_clear_unlock_delalloc(struct inode *inode, - struct extent_io_tree *tree, - u64 start, u64 end, struct page *locked_page, - unsigned long op) +int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, + struct page *locked_page, + unsigned long clear_bits, + unsigned long page_ops) { + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; int ret; struct page *pages[16]; unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = end >> PAGE_CACHE_SHIFT; unsigned long nr_pages = end_index - index + 1; int i; - unsigned long clear_bits = 0; - - if (op & EXTENT_CLEAR_UNLOCK) - clear_bits |= EXTENT_LOCKED; - if (op & EXTENT_CLEAR_DIRTY) - clear_bits |= EXTENT_DIRTY; - - if (op & EXTENT_CLEAR_DELALLOC) - clear_bits |= EXTENT_DELALLOC; clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); - if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | - EXTENT_SET_PRIVATE2))) + if (page_ops == 0) return 0; while (nr_pages > 0) { @@ -1720,20 +1694,20 @@ int extent_clear_unlock_delalloc(struct inode *inode, nr_pages, ARRAY_SIZE(pages)), pages); for (i = 0; i < ret; i++) { - if (op & EXTENT_SET_PRIVATE2) + if (page_ops & PAGE_SET_PRIVATE2) SetPagePrivate2(pages[i]); if (pages[i] == locked_page) { page_cache_release(pages[i]); continue; } - if (op & EXTENT_CLEAR_DIRTY) + if (page_ops & PAGE_CLEAR_DIRTY) clear_page_dirty_for_io(pages[i]); - if (op & EXTENT_SET_WRITEBACK) + if (page_ops & PAGE_SET_WRITEBACK) set_page_writeback(pages[i]); - if (op & EXTENT_END_WRITEBACK) + if (page_ops & PAGE_END_WRITEBACK) end_page_writeback(pages[i]); - if (op & EXTENT_CLEAR_UNLOCK_PAGE) + if (page_ops & PAGE_UNLOCK) unlock_page(pages[i]); page_cache_release(pages[i]); } @@ -1810,7 +1784,7 @@ out: * set the private field for a given byte offset in the tree. If there isn't * an extent_state there already, this does nothing. */ -int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) { struct rb_node *node; struct extent_state *state; @@ -1837,64 +1811,6 @@ out: return ret; } -void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[], - int count) -{ - struct rb_node *node; - struct extent_state *state; - - spin_lock(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(tree, start); - BUG_ON(!node); - - state = rb_entry(node, struct extent_state, rb_node); - BUG_ON(state->start != start); - - while (count) { - state->private = *csums++; - count--; - state = next_state(state); - } - spin_unlock(&tree->lock); -} - -static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index) -{ - struct bio_vec *bvec = bio->bi_io_vec + bio_index; - - return page_offset(bvec->bv_page) + bvec->bv_offset; -} - -void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index, - u32 csums[], int count) -{ - struct rb_node *node; - struct extent_state *state = NULL; - u64 start; - - spin_lock(&tree->lock); - do { - start = __btrfs_get_bio_offset(bio, bio_index); - if (state == NULL || state->start != start) { - node = tree_search(tree, start); - BUG_ON(!node); - - state = rb_entry(node, struct extent_state, rb_node); - BUG_ON(state->start != start); - } - state->private = *csums++; - count--; - bio_index++; - - state = next_state(state); - } while (count); - spin_unlock(&tree->lock); -} - int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) { struct rb_node *node; @@ -2173,7 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page) EXTENT_LOCKED); spin_unlock(&BTRFS_I(inode)->io_tree.lock); - if (state && state->start == failrec->start) { + if (state && state->start <= failrec->start && + state->end >= failrec->start + failrec->len - 1) { fs_info = BTRFS_I(inode)->root->fs_info; num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); @@ -2201,9 +2118,9 @@ out: * needed */ -static int bio_readpage_error(struct bio *failed_bio, struct page *page, - u64 start, u64 end, int failed_mirror, - struct extent_state *state) +static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, + struct page *page, u64 start, u64 end, + int failed_mirror) { struct io_failure_record *failrec = NULL; u64 private; @@ -2213,6 +2130,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct bio *bio; + struct btrfs_io_bio *btrfs_failed_bio; + struct btrfs_io_bio *btrfs_bio; int num_copies; int ret; int read_mode; @@ -2296,23 +2215,12 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, * all the retry and error correction code that follows. no * matter what the error is, it is very likely to persist. */ - pr_debug("bio_readpage_error: cannot repair, num_copies == 1. " - "state=%p, num_copies=%d, next_mirror %d, " - "failed_mirror %d\n", state, num_copies, - failrec->this_mirror, failed_mirror); + pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", + num_copies, failrec->this_mirror, failed_mirror); free_io_failure(inode, failrec, 0); return -EIO; } - if (!state) { - spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, failrec->start, - EXTENT_LOCKED); - if (state && state->start != failrec->start) - state = NULL; - spin_unlock(&tree->lock); - } - /* * there are two premises: * a) deliver good data to the caller @@ -2349,9 +2257,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, read_mode = READ_SYNC; } - if (!state || failrec->this_mirror > num_copies) { - pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " - "next_mirror %d, failed_mirror %d\n", state, + if (failrec->this_mirror > num_copies) { + pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", num_copies, failrec->this_mirror, failed_mirror); free_io_failure(inode, failrec, 0); return -EIO; @@ -2362,12 +2269,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, free_io_failure(inode, failrec, 0); return -EIO; } - bio->bi_private = state; bio->bi_end_io = failed_bio->bi_end_io; bio->bi_sector = failrec->logical >> 9; bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; bio->bi_size = 0; + btrfs_failed_bio = btrfs_io_bio(failed_bio); + if (btrfs_failed_bio->csum) { + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = btrfs_bio->csum_inline; + phy_offset >>= inode->i_sb->s_blocksize_bits; + phy_offset *= csum_size; + memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset, + csum_size); + } + bio_add_page(bio, page, failrec->len, start - page_offset(page)); pr_debug("bio_readpage_error: submitting new read[%#x] to " @@ -2450,6 +2369,18 @@ static void end_bio_extent_writepage(struct bio *bio, int err) bio_put(bio); } +static void +endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, + int uptodate) +{ + struct extent_state *cached = NULL; + u64 end = start + len - 1; + + if (uptodate && tree->track_uptodate) + set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); + unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); +} + /* * after a readpage IO is done, we need to: * clear the uptodate bits on error @@ -2466,9 +2397,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec; + struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct extent_io_tree *tree; + u64 offset = 0; u64 start; u64 end; + u64 len; + u64 extent_start = 0; + u64 extent_len = 0; int mirror; int ret; @@ -2477,9 +2413,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) do { struct page *page = bvec->bv_page; - struct extent_state *cached = NULL; - struct extent_state *state; - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct inode *inode = page->mapping->host; pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " @@ -2500,37 +2433,32 @@ static void end_bio_extent_readpage(struct bio *bio, int err) start = page_offset(page); end = start + bvec->bv_offset + bvec->bv_len - 1; + len = bvec->bv_len; if (++bvec <= bvec_end) prefetchw(&bvec->bv_page->flags); - spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); - if (state && state->start == start) { - /* - * take a reference on the state, unlock will drop - * the ref - */ - cache_state(state, &cached); - } - spin_unlock(&tree->lock); - mirror = io_bio->mirror_num; - if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { - ret = tree->ops->readpage_end_io_hook(page, start, end, - state, mirror); + if (likely(uptodate && tree->ops && + tree->ops->readpage_end_io_hook)) { + ret = tree->ops->readpage_end_io_hook(io_bio, offset, + page, start, end, + mirror); if (ret) uptodate = 0; else clean_io_failure(start, page); } - if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { + if (likely(uptodate)) + goto readpage_ok; + + if (tree->ops && tree->ops->readpage_io_failed_hook) { ret = tree->ops->readpage_io_failed_hook(page, mirror); if (!ret && !err && test_bit(BIO_UPTODATE, &bio->bi_flags)) uptodate = 1; - } else if (!uptodate) { + } else { /* * The generic bio_readpage_error handles errors the * following way: If possible, new read requests are @@ -2541,24 +2469,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err) * can't handle the error it will return -EIO and we * remain responsible for that page. */ - ret = bio_readpage_error(bio, page, start, end, mirror, NULL); + ret = bio_readpage_error(bio, offset, page, start, end, + mirror); if (ret == 0) { uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; - uncache_state(&cached); continue; } } - - if (uptodate && tree->track_uptodate) { - set_extent_uptodate(tree, start, end, &cached, - GFP_ATOMIC); - } - unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); - - if (uptodate) { +readpage_ok: + if (likely(uptodate)) { loff_t i_size = i_size_read(inode); pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; @@ -2573,8 +2495,36 @@ static void end_bio_extent_readpage(struct bio *bio, int err) SetPageError(page); } unlock_page(page); + offset += len; + + if (unlikely(!uptodate)) { + if (extent_len) { + endio_readpage_release_extent(tree, + extent_start, + extent_len, 1); + extent_start = 0; + extent_len = 0; + } + endio_readpage_release_extent(tree, start, + end - start + 1, 0); + } else if (!extent_len) { + extent_start = start; + extent_len = end + 1 - start; + } else if (extent_start + extent_len == start) { + extent_len += end + 1 - start; + } else { + endio_readpage_release_extent(tree, extent_start, + extent_len, uptodate); + extent_start = start; + extent_len = end + 1 - start; + } } while (bvec <= bvec_end); + if (extent_len) + endio_readpage_release_extent(tree, extent_start, extent_len, + uptodate); + if (io_bio->end_io) + io_bio->end_io(io_bio, err); bio_put(bio); } @@ -2586,6 +2536,7 @@ struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, gfp_t gfp_flags) { + struct btrfs_io_bio *btrfs_bio; struct bio *bio; bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); @@ -2601,6 +2552,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, bio->bi_size = 0; bio->bi_bdev = bdev; bio->bi_sector = first_sector; + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = NULL; + btrfs_bio->csum_allocated = NULL; + btrfs_bio->end_io = NULL; } return bio; } @@ -2614,7 +2569,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) /* this also allocates from the btrfs_bioset */ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); + struct btrfs_io_bio *btrfs_bio; + struct bio *bio; + + bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); + if (bio) { + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = NULL; + btrfs_bio->csum_allocated = NULL; + btrfs_bio->end_io = NULL; + } + return bio; } @@ -2738,17 +2703,45 @@ void set_page_extent_mapped(struct page *page) } } +static struct extent_map * +__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, + u64 start, u64 len, get_extent_t *get_extent, + struct extent_map **em_cached) +{ + struct extent_map *em; + + if (em_cached && *em_cached) { + em = *em_cached; + if (em->in_tree && start >= em->start && + start < extent_map_end(em)) { + atomic_inc(&em->refs); + return em; + } + + free_extent_map(em); + *em_cached = NULL; + } + + em = get_extent(inode, page, pg_offset, start, len, 0); + if (em_cached && !IS_ERR_OR_NULL(em)) { + BUG_ON(*em_cached); + atomic_inc(&em->refs); + *em_cached = em; + } + return em; +} /* * basic readpage implementation. Locked extent state structs are inserted * into the tree that are removed when the IO is done (by the end_io * handlers) * XXX JDM: This needs looking at to ensure proper page locking */ -static int __extent_read_full_page(struct extent_io_tree *tree, - struct page *page, - get_extent_t *get_extent, - struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) +static int __do_readpage(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct extent_map **em_cached, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) { struct inode *inode = page->mapping->host; u64 start = page_offset(page); @@ -2762,35 +2755,26 @@ static int __extent_read_full_page(struct extent_io_tree *tree, sector_t sector; struct extent_map *em; struct block_device *bdev; - struct btrfs_ordered_extent *ordered; int ret; int nr = 0; + int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED; size_t pg_offset = 0; size_t iosize; size_t disk_io_size; size_t blocksize = inode->i_sb->s_blocksize; - unsigned long this_bio_flag = 0; + unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED; set_page_extent_mapped(page); + end = page_end; if (!PageUptodate(page)) { if (cleancache_get_page(page) == 0) { BUG_ON(blocksize != PAGE_SIZE); + unlock_extent(tree, start, end); goto out; } } - end = page_end; - while (1) { - lock_extent(tree, start, end); - ordered = btrfs_lookup_ordered_extent(inode, start); - if (!ordered) - break; - unlock_extent(tree, start, end); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - } - if (page->index == last_byte >> PAGE_CACHE_SHIFT) { char *userpage; size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); @@ -2817,15 +2801,18 @@ static int __extent_read_full_page(struct extent_io_tree *tree, kunmap_atomic(userpage); set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); - unlock_extent_cached(tree, cur, cur + iosize - 1, - &cached, GFP_NOFS); + if (!parent_locked) + unlock_extent_cached(tree, cur, + cur + iosize - 1, + &cached, GFP_NOFS); break; } - em = get_extent(inode, page, pg_offset, cur, - end - cur + 1, 0); + em = __get_extent_map(inode, page, pg_offset, cur, + end - cur + 1, get_extent, em_cached); if (IS_ERR_OR_NULL(em)) { SetPageError(page); - unlock_extent(tree, cur, end); + if (!parent_locked) + unlock_extent(tree, cur, end); break; } extent_offset = cur - em->start; @@ -2833,7 +2820,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, BUG_ON(end < cur); if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { - this_bio_flag = EXTENT_BIO_COMPRESSED; + this_bio_flag |= EXTENT_BIO_COMPRESSED; extent_set_compress_type(&this_bio_flag, em->compress_type); } @@ -2877,7 +2864,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1, NULL)) { check_page_uptodate(tree, page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); cur = cur + iosize; pg_offset += iosize; continue; @@ -2887,7 +2875,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, */ if (block_start == EXTENT_MAP_INLINE) { SetPageError(page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); cur = cur + iosize; pg_offset += iosize; continue; @@ -2905,7 +2894,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, *bio_flags = this_bio_flag; } else { SetPageError(page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); } cur = cur + iosize; pg_offset += iosize; @@ -2919,6 +2909,104 @@ out: return 0; } +static inline void __do_contiguous_readpages(struct extent_io_tree *tree, + struct page *pages[], int nr_pages, + u64 start, u64 end, + get_extent_t *get_extent, + struct extent_map **em_cached, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + struct inode *inode; + struct btrfs_ordered_extent *ordered; + int index; + + inode = pages[0]->mapping->host; + while (1) { + lock_extent(tree, start, end); + ordered = btrfs_lookup_ordered_range(inode, start, + end - start + 1); + if (!ordered) + break; + unlock_extent(tree, start, end); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } + + for (index = 0; index < nr_pages; index++) { + __do_readpage(tree, pages[index], get_extent, em_cached, bio, + mirror_num, bio_flags, rw); + page_cache_release(pages[index]); + } +} + +static void __extent_readpages(struct extent_io_tree *tree, + struct page *pages[], + int nr_pages, get_extent_t *get_extent, + struct extent_map **em_cached, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + u64 start = 0; + u64 end = 0; + u64 page_start; + int index; + int first_index = 0; + + for (index = 0; index < nr_pages; index++) { + page_start = page_offset(pages[index]); + if (!end) { + start = page_start; + end = start + PAGE_CACHE_SIZE - 1; + first_index = index; + } else if (end + 1 == page_start) { + end += PAGE_CACHE_SIZE; + } else { + __do_contiguous_readpages(tree, &pages[first_index], + index - first_index, start, + end, get_extent, em_cached, + bio, mirror_num, bio_flags, + rw); + start = page_start; + end = start + PAGE_CACHE_SIZE - 1; + first_index = index; + } + } + + if (end) + __do_contiguous_readpages(tree, &pages[first_index], + index - first_index, start, + end, get_extent, em_cached, bio, + mirror_num, bio_flags, rw); +} + +static int __extent_read_full_page(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + struct inode *inode = page->mapping->host; + struct btrfs_ordered_extent *ordered; + u64 start = page_offset(page); + u64 end = start + PAGE_CACHE_SIZE - 1; + int ret; + + while (1) { + lock_extent(tree, start, end); + ordered = btrfs_lookup_ordered_extent(inode, start); + if (!ordered) + break; + unlock_extent(tree, start, end); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } + + ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, + bio_flags, rw); + return ret; +} + int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num) { @@ -2933,6 +3021,20 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, return ret; } +int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, int mirror_num) +{ + struct bio *bio = NULL; + unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED; + int ret; + + ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, + &bio_flags, READ); + if (bio) + ret = submit_one_bio(READ, bio, mirror_num, bio_flags); + return ret; +} + static noinline void update_nr_written(struct page *page, struct writeback_control *wbc, unsigned long nr_written) @@ -3189,8 +3291,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (!PageWriteback(page)) { printk(KERN_ERR "btrfs warning page %lu not " "writeback, cur %llu end %llu\n", - page->index, (unsigned long long)cur, - (unsigned long long)end); + page->index, cur, end); } ret = submit_extent_page(write_flags, tree, page, @@ -3769,7 +3870,7 @@ int extent_readpages(struct extent_io_tree *tree, unsigned long bio_flags = 0; struct page *pagepool[16]; struct page *page; - int i = 0; + struct extent_map *em_cached = NULL; int nr = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { @@ -3786,18 +3887,16 @@ int extent_readpages(struct extent_io_tree *tree, pagepool[nr++] = page; if (nr < ARRAY_SIZE(pagepool)) continue; - for (i = 0; i < nr; i++) { - __extent_read_full_page(tree, pagepool[i], get_extent, - &bio, 0, &bio_flags, READ); - page_cache_release(pagepool[i]); - } + __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, + &bio, 0, &bio_flags, READ); nr = 0; } - for (i = 0; i < nr; i++) { - __extent_read_full_page(tree, pagepool[i], get_extent, - &bio, 0, &bio_flags, READ); - page_cache_release(pagepool[i]); - } + if (nr) + __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, + &bio, 0, &bio_flags, READ); + + if (em_cached) + free_extent_map(em_cached); BUG_ON(!list_empty(pages)); if (bio) @@ -4136,6 +4235,76 @@ static void __free_extent_buffer(struct extent_buffer *eb) kmem_cache_free(extent_buffer_cache, eb); } +static int extent_buffer_under_io(struct extent_buffer *eb) +{ + return (atomic_read(&eb->io_pages) || + test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || + test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); +} + +/* + * Helper for releasing extent buffer page. + */ +static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, + unsigned long start_idx) +{ + unsigned long index; + unsigned long num_pages; + struct page *page; + int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); + + BUG_ON(extent_buffer_under_io(eb)); + + num_pages = num_extent_pages(eb->start, eb->len); + index = start_idx + num_pages; + if (start_idx >= index) + return; + + do { + index--; + page = extent_buffer_page(eb, index); + if (page && mapped) { + spin_lock(&page->mapping->private_lock); + /* + * We do this since we'll remove the pages after we've + * removed the eb from the radix tree, so we could race + * and have this page now attached to the new eb. So + * only clear page_private if it's still connected to + * this eb. + */ + if (PagePrivate(page) && + page->private == (unsigned long)eb) { + BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); + BUG_ON(PageDirty(page)); + BUG_ON(PageWriteback(page)); + /* + * We need to make sure we haven't be attached + * to a new eb. + */ + ClearPagePrivate(page); + set_page_private(page, 0); + /* One for the page private */ + page_cache_release(page); + } + spin_unlock(&page->mapping->private_lock); + + } + if (page) { + /* One for when we alloced the page */ + page_cache_release(page); + } + } while (index != start_idx); +} + +/* + * Helper for releasing the extent buffer. + */ +static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) +{ + btrfs_release_extent_buffer_page(eb, 0); + __free_extent_buffer(eb); +} + static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len, @@ -4184,13 +4353,16 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) struct extent_buffer *new; unsigned long num_pages = num_extent_pages(src->start, src->len); - new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); + new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS); if (new == NULL) return NULL; for (i = 0; i < num_pages; i++) { - p = alloc_page(GFP_ATOMIC); - BUG_ON(!p); + p = alloc_page(GFP_NOFS); + if (!p) { + btrfs_release_extent_buffer(new); + return NULL; + } attach_extent_buffer_page(new, p); WARN_ON(PageDirty(p)); SetPageUptodate(p); @@ -4210,12 +4382,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) unsigned long num_pages = num_extent_pages(0, len); unsigned long i; - eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); + eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS); if (!eb) return NULL; for (i = 0; i < num_pages; i++) { - eb->pages[i] = alloc_page(GFP_ATOMIC); + eb->pages[i] = alloc_page(GFP_NOFS); if (!eb->pages[i]) goto err; } @@ -4231,76 +4403,6 @@ err: return NULL; } -static int extent_buffer_under_io(struct extent_buffer *eb) -{ - return (atomic_read(&eb->io_pages) || - test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || - test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); -} - -/* - * Helper for releasing extent buffer page. - */ -static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, - unsigned long start_idx) -{ - unsigned long index; - unsigned long num_pages; - struct page *page; - int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); - - BUG_ON(extent_buffer_under_io(eb)); - - num_pages = num_extent_pages(eb->start, eb->len); - index = start_idx + num_pages; - if (start_idx >= index) - return; - - do { - index--; - page = extent_buffer_page(eb, index); - if (page && mapped) { - spin_lock(&page->mapping->private_lock); - /* - * We do this since we'll remove the pages after we've - * removed the eb from the radix tree, so we could race - * and have this page now attached to the new eb. So - * only clear page_private if it's still connected to - * this eb. - */ - if (PagePrivate(page) && - page->private == (unsigned long)eb) { - BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); - BUG_ON(PageDirty(page)); - BUG_ON(PageWriteback(page)); - /* - * We need to make sure we haven't be attached - * to a new eb. - */ - ClearPagePrivate(page); - set_page_private(page, 0); - /* One for the page private */ - page_cache_release(page); - } - spin_unlock(&page->mapping->private_lock); - - } - if (page) { - /* One for when we alloced the page */ - page_cache_release(page); - } - } while (index != start_idx); -} - -/* - * Helper for releasing the extent buffer. - */ -static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) -{ - btrfs_release_extent_buffer_page(eb, 0); - __free_extent_buffer(eb); -} - static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; @@ -4771,7 +4873,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4813,8 +4915,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, if (start + min_len > eb->len) { WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " - "wanted %lu %lu\n", (unsigned long long)eb->start, - eb->len, start, min_len); + "wanted %lu %lu\n", + eb->start, eb->len, start, min_len); return -EINVAL; } @@ -4841,7 +4943,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4875,7 +4977,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4905,7 +5007,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4936,7 +5038,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, WARN_ON(src->len != dst_len); offset = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(dst, i); @@ -5022,9 +5124,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, while (len > 0) { dst_off_in_page = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); src_off_in_page = (start_offset + src_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; @@ -5075,9 +5177,9 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; dst_off_in_page = (start_offset + dst_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); src_off_in_page = (start_offset + src_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 3b8c4e26e1da..6dbc645f1f3d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -29,6 +29,7 @@ */ #define EXTENT_BIO_COMPRESSED 1 #define EXTENT_BIO_TREE_LOG 2 +#define EXTENT_BIO_PARENT_LOCKED 4 #define EXTENT_BIO_FLAG_SHIFT 16 /* these are bit numbers for test/set bit */ @@ -44,14 +45,11 @@ #define EXTENT_BUFFER_DUMMY 9 /* these are flags for extent_clear_unlock_delalloc */ -#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 -#define EXTENT_CLEAR_UNLOCK 0x2 -#define EXTENT_CLEAR_DELALLOC 0x4 -#define EXTENT_CLEAR_DIRTY 0x8 -#define EXTENT_SET_WRITEBACK 0x10 -#define EXTENT_END_WRITEBACK 0x20 -#define EXTENT_SET_PRIVATE2 0x40 -#define EXTENT_CLEAR_ACCOUNTING 0x80 +#define PAGE_UNLOCK (1 << 0) +#define PAGE_CLEAR_DIRTY (1 << 1) +#define PAGE_SET_WRITEBACK (1 << 2) +#define PAGE_END_WRITEBACK (1 << 3) +#define PAGE_SET_PRIVATE2 (1 << 4) /* * page->private values. Every page that is controlled by the extent @@ -62,6 +60,7 @@ struct extent_state; struct btrfs_root; +struct btrfs_io_bio; typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, struct bio *bio, int mirror_num, @@ -77,8 +76,9 @@ struct extent_io_ops { size_t size, struct bio *bio, unsigned long bio_flags); int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); - int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, - struct extent_state *state, int mirror); + int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, + struct page *page, u64 start, u64 end, + int mirror); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate); void (*set_bit_hook)(struct inode *inode, struct extent_state *state, @@ -200,6 +200,8 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num); +int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, int mirror_num); int __init extent_io_init(void); void extent_io_exit(void); @@ -261,11 +263,6 @@ int extent_readpages(struct extent_io_tree *tree, get_extent_t get_extent); int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len, get_extent_t *get_extent); -int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); -void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[], - int count); -void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, - int bvec_index, u32 csums[], int count); int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); @@ -330,10 +327,10 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long *map_len); int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); -int extent_clear_unlock_delalloc(struct inode *inode, - struct extent_io_tree *tree, - u64 start, u64 end, struct page *locked_page, - unsigned long op); +int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, + struct page *locked_page, + unsigned long bits_to_clear, + unsigned long page_ops); struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, gfp_t gfp_flags); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a7bfc9541803..4f53159bdb9d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -23,6 +23,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "volumes.h" #include "print-tree.h" #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ @@ -152,28 +153,54 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } +static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err) +{ + kfree(bio->csum_allocated); +} + static int __btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 logical_offset, u32 *dst, int dio) { - u32 sum[16]; - int len; struct bio_vec *bvec = bio->bi_io_vec; - int bio_index = 0; + struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio); + struct btrfs_csum_item *item = NULL; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_path *path; + u8 *csum; u64 offset = 0; u64 item_start_offset = 0; u64 item_last_offset = 0; u64 disk_bytenr; u32 diff; - u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + int nblocks; + int bio_index = 0; int count; - struct btrfs_path *path; - struct btrfs_csum_item *item = NULL; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); path = btrfs_alloc_path(); if (!path) return -ENOMEM; + + nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits; + if (!dst) { + if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { + btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size, + GFP_NOFS); + if (!btrfs_bio->csum_allocated) { + btrfs_free_path(path); + return -ENOMEM; + } + btrfs_bio->csum = btrfs_bio->csum_allocated; + btrfs_bio->end_io = btrfs_io_bio_endio_readpage; + } else { + btrfs_bio->csum = btrfs_bio->csum_inline; + } + csum = btrfs_bio->csum; + } else { + csum = (u8 *)dst; + } + if (bio->bi_size > PAGE_CACHE_SIZE * 8) path->reada = 2; @@ -194,11 +221,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, if (dio) offset = logical_offset; while (bio_index < bio->bi_vcnt) { - len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index); if (!dio) offset = page_offset(bvec->bv_page) + bvec->bv_offset; - count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum, - len); + count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, + (u32 *)csum, nblocks); if (count) goto found; @@ -213,7 +239,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, path, disk_bytenr, 0); if (IS_ERR(item)) { count = 1; - sum[0] = 0; + memset(csum, 0, csum_size); if (BTRFS_I(inode)->root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) { set_extent_bits(io_tree, offset, @@ -222,9 +248,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, } else { printk(KERN_INFO "btrfs no csum found " "for inode %llu start %llu\n", - (unsigned long long) - btrfs_ino(inode), - (unsigned long long)offset); + btrfs_ino(inode), offset); } item = NULL; btrfs_release_path(path); @@ -249,23 +273,14 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, diff = disk_bytenr - item_start_offset; diff = diff / root->sectorsize; diff = diff * csum_size; - count = min_t(int, len, (item_last_offset - disk_bytenr) >> - inode->i_sb->s_blocksize_bits); - read_extent_buffer(path->nodes[0], sum, + count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >> + inode->i_sb->s_blocksize_bits); + read_extent_buffer(path->nodes[0], csum, ((unsigned long)item) + diff, csum_size * count); found: - if (dst) { - memcpy(dst, sum, count * csum_size); - dst += count; - } else { - if (dio) - extent_cache_csums_dio(io_tree, offset, sum, - count); - else - extent_cache_csums(io_tree, bio, bio_index, sum, - count); - } + csum += count * csum_size; + nblocks -= count; while (count--) { disk_bytenr += bvec->bv_len; offset += bvec->bv_len; @@ -284,9 +299,19 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, } int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 offset) + struct btrfs_dio_private *dip, struct bio *bio, + u64 offset) { - return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); + int len = (bio->bi_sector << 9) - dip->disk_bytenr; + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + int ret; + + len >>= inode->i_sb->s_blocksize_bits; + len *= csum_size; + + ret = __btrfs_lookup_bio_sums(root, inode, bio, offset, + (u32 *)(dip->csum + len), 1); + return ret; } int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4d2eb6417145..bc5072b2db53 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1334,7 +1334,6 @@ fail: static noinline int check_can_nocow(struct inode *inode, loff_t pos, size_t *write_bytes) { - struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered; u64 lockstart, lockend; @@ -1356,16 +1355,8 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, btrfs_put_ordered_extent(ordered); } - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); - return PTR_ERR(trans); - } - num_bytes = lockend - lockstart + 1; - ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL, - NULL); - btrfs_end_transaction(trans, root); + ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); if (ret <= 0) { ret = 0; } else { diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 256d9597f0cf..3f0ddfce96e6 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -306,7 +306,7 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl) static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) { - BUG_ON(io_ctl->index >= io_ctl->num_pages); + ASSERT(io_ctl->index < io_ctl->num_pages); io_ctl->page = io_ctl->pages[io_ctl->index++]; io_ctl->cur = kmap(io_ctl->page); io_ctl->orig = io_ctl->cur; @@ -671,8 +671,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, btrfs_err(root->fs_info, "free space inode generation (%llu) " "did not match free space cache generation (%llu)", - (unsigned long long)BTRFS_I(inode)->generation, - (unsigned long long)generation); + BTRFS_I(inode)->generation, generation); return 0; } @@ -727,7 +726,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, goto free_cache; } } else { - BUG_ON(!num_bitmaps); + ASSERT(num_bitmaps); num_bitmaps--; e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); if (!e->bitmap) { @@ -1027,7 +1026,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, leaf = path->nodes[0]; if (ret > 0) { struct btrfs_key found_key; - BUG_ON(!path->slots[0]); + ASSERT(path->slots[0]); path->slots[0]--; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || @@ -1115,7 +1114,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, u64 offset) { - BUG_ON(offset < bitmap_start); + ASSERT(offset >= bitmap_start); offset -= bitmap_start; return (unsigned long)(div_u64(offset, unit)); } @@ -1270,7 +1269,7 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl, if (n) { entry = rb_entry(n, struct btrfs_free_space, offset_index); - BUG_ON(entry->offset > offset); + ASSERT(entry->offset <= offset); } else { if (fuzzy) return entry; @@ -1334,7 +1333,7 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl, { int ret = 0; - BUG_ON(!info->bitmap && !info->bytes); + ASSERT(info->bytes || info->bitmap); ret = tree_insert_offset(&ctl->free_space_offset, info->offset, &info->offset_index, (info->bitmap != NULL)); if (ret) @@ -1357,7 +1356,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) max_bitmaps = max(max_bitmaps, 1); - BUG_ON(ctl->total_bitmaps > max_bitmaps); + ASSERT(ctl->total_bitmaps <= max_bitmaps); /* * The goal is to keep the total amount of memory used per 1gb of space @@ -1401,7 +1400,7 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, start = offset_to_bit(info->offset, ctl->unit, offset); count = bytes_to_bits(bytes, ctl->unit); - BUG_ON(start + count > BITS_PER_BITMAP); + ASSERT(start + count <= BITS_PER_BITMAP); bitmap_clear(info->bitmap, start, count); @@ -1424,7 +1423,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, start = offset_to_bit(info->offset, ctl->unit, offset); count = bytes_to_bits(bytes, ctl->unit); - BUG_ON(start + count > BITS_PER_BITMAP); + ASSERT(start + count <= BITS_PER_BITMAP); bitmap_set(info->bitmap, start, count); @@ -1740,7 +1739,7 @@ no_cluster_bitmap: bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1, 0); if (!bitmap_info) { - BUG_ON(added); + ASSERT(added == 0); goto new_bitmap; } @@ -1880,7 +1879,7 @@ out: if (ret) { printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); - BUG_ON(ret == -EEXIST); + ASSERT(ret != -EEXIST); } return ret; @@ -1989,8 +1988,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, if (info->bytes >= bytes && !block_group->ro) count++; printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", - (unsigned long long)info->offset, - (unsigned long long)info->bytes, + info->offset, info->bytes, (info->bitmap) ? "yes" : "no"); } printk(KERN_INFO "block group has cluster?: %s\n", @@ -2369,7 +2367,7 @@ again: rb_erase(&entry->offset_index, &ctl->free_space_offset); ret = tree_insert_offset(&cluster->root, entry->offset, &entry->offset_index, 1); - BUG_ON(ret); /* -EEXIST; Logic error */ + ASSERT(!ret); /* -EEXIST; Logic error */ trace_btrfs_setup_cluster(block_group, cluster, total_found * ctl->unit, 1); @@ -2462,7 +2460,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, ret = tree_insert_offset(&cluster->root, entry->offset, &entry->offset_index, 0); total_size += entry->bytes; - BUG_ON(ret); /* -EEXIST; Logic error */ + ASSERT(!ret); /* -EEXIST; Logic error */ } while (node && entry != last); cluster->max_size = max_extent; @@ -2523,8 +2521,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, * returns zero and sets up cluster if things worked out, otherwise * it returns -enospc */ -int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_find_space_cluster(struct btrfs_root *root, struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, u64 offset, u64 bytes, u64 empty_size) @@ -2854,7 +2851,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) ret = search_bitmap(ctl, entry, &offset, &count); /* Logic error; Should be empty if it can't find anything */ - BUG_ON(ret); + ASSERT(!ret); ino = offset; bitmap_clear_bits(ctl, entry, offset, 1); @@ -2971,33 +2968,68 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, } #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -static struct btrfs_block_group_cache *init_test_block_group(void) +/* + * Use this if you need to make a bitmap or extent entry specifically, it + * doesn't do any of the merging that add_free_space does, this acts a lot like + * how the free space cache loading stuff works, so you can get really weird + * configurations. + */ +int test_add_free_space_entry(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes, bool bitmap) { - struct btrfs_block_group_cache *cache; + struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; + struct btrfs_free_space *info = NULL, *bitmap_info; + void *map = NULL; + u64 bytes_added; + int ret; - cache = kzalloc(sizeof(*cache), GFP_NOFS); - if (!cache) - return NULL; - cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), - GFP_NOFS); - if (!cache->free_space_ctl) { - kfree(cache); - return NULL; +again: + if (!info) { + info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); + if (!info) + return -ENOMEM; } - cache->key.objectid = 0; - cache->key.offset = 1024 * 1024 * 1024; - cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; - cache->sectorsize = 4096; + if (!bitmap) { + spin_lock(&ctl->tree_lock); + info->offset = offset; + info->bytes = bytes; + ret = link_free_space(ctl, info); + spin_unlock(&ctl->tree_lock); + if (ret) + kmem_cache_free(btrfs_free_space_cachep, info); + return ret; + } + + if (!map) { + map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); + if (!map) { + kmem_cache_free(btrfs_free_space_cachep, info); + return -ENOMEM; + } + } + + spin_lock(&ctl->tree_lock); + bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), + 1, 0); + if (!bitmap_info) { + info->bitmap = map; + map = NULL; + add_new_bitmap(ctl, info, offset); + bitmap_info = info; + } - spin_lock_init(&cache->lock); - INIT_LIST_HEAD(&cache->list); - INIT_LIST_HEAD(&cache->cluster_list); - INIT_LIST_HEAD(&cache->new_bg_list); + bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); + bytes -= bytes_added; + offset += bytes_added; + spin_unlock(&ctl->tree_lock); - btrfs_init_free_space_ctl(cache); + if (bytes) + goto again; - return cache; + if (map) + kfree(map); + return 0; } /* @@ -3005,8 +3037,8 @@ static struct btrfs_block_group_cache *init_test_block_group(void) * just used to check the absence of space, so if there is free space in the * range at all we will return 1. */ -static int check_exists(struct btrfs_block_group_cache *cache, u64 offset, - u64 bytes) +int test_check_exists(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes) { struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; struct btrfs_free_space *info; @@ -3083,411 +3115,4 @@ out: spin_unlock(&ctl->tree_lock); return ret; } - -/* - * Use this if you need to make a bitmap or extent entry specifically, it - * doesn't do any of the merging that add_free_space does, this acts a lot like - * how the free space cache loading stuff works, so you can get really weird - * configurations. - */ -static int add_free_space_entry(struct btrfs_block_group_cache *cache, - u64 offset, u64 bytes, bool bitmap) -{ - struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; - struct btrfs_free_space *info = NULL, *bitmap_info; - void *map = NULL; - u64 bytes_added; - int ret; - -again: - if (!info) { - info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); - if (!info) - return -ENOMEM; - } - - if (!bitmap) { - spin_lock(&ctl->tree_lock); - info->offset = offset; - info->bytes = bytes; - ret = link_free_space(ctl, info); - spin_unlock(&ctl->tree_lock); - if (ret) - kmem_cache_free(btrfs_free_space_cachep, info); - return ret; - } - - if (!map) { - map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); - if (!map) { - kmem_cache_free(btrfs_free_space_cachep, info); - return -ENOMEM; - } - } - - spin_lock(&ctl->tree_lock); - bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), - 1, 0); - if (!bitmap_info) { - info->bitmap = map; - map = NULL; - add_new_bitmap(ctl, info, offset); - bitmap_info = info; - } - - bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); - bytes -= bytes_added; - offset += bytes_added; - spin_unlock(&ctl->tree_lock); - - if (bytes) - goto again; - - if (map) - kfree(map); - return 0; -} - -#define test_msg(fmt, ...) printk(KERN_INFO "btrfs: selftest: " fmt, ##__VA_ARGS__) - -/* - * This test just does basic sanity checking, making sure we can add an exten - * entry and remove space from either end and the middle, and make sure we can - * remove space that covers adjacent extent entries. - */ -static int test_extents(struct btrfs_block_group_cache *cache) -{ - int ret = 0; - - test_msg("Running extent only tests\n"); - - /* First just make sure we can remove an entire entry */ - ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error adding initial extents %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error removing extent %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 4 * 1024 * 1024)) { - test_msg("Full remove left some lingering space\n"); - return -1; - } - - /* Ok edge and middle cases now */ - ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error adding half extent %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); - if (ret) { - test_msg("Error removing tail end %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); - if (ret) { - test_msg("Error removing front end %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); - if (ret) { - test_msg("Error removing middle piece %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 1 * 1024 * 1024)) { - test_msg("Still have space at the front\n"); - return -1; - } - - if (check_exists(cache, 2 * 1024 * 1024, 4096)) { - test_msg("Still have space in the middle\n"); - return -1; - } - - if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { - test_msg("Still have space at the end\n"); - return -1; - } - - /* Cleanup */ - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - return 0; -} - -static int test_bitmaps(struct btrfs_block_group_cache *cache) -{ - u64 next_bitmap_offset; - int ret; - - test_msg("Running bitmap only tests\n"); - - ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't create a bitmap entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error removing bitmap full range %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 4 * 1024 * 1024)) { - test_msg("Left some space in bitmap\n"); - return -1; - } - - ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add to our bitmap entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove middle chunk %d\n", ret); - return ret; - } - - /* - * The first bitmap we have starts at offset 0 so the next one is just - * at the end of the first bitmap. - */ - next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); - - /* Test a bit straddling two bitmaps */ - ret = add_free_space_entry(cache, next_bitmap_offset - - (2 * 1024 * 1024), 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add space that straddles two bitmaps %d\n", - ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, next_bitmap_offset - - (1 * 1024 * 1024), 2 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove overlapping space %d\n", ret); - return ret; - } - - if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), - 2 * 1024 * 1024)) { - test_msg("Left some space when removing overlapping\n"); - return -1; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - return 0; -} - -/* This is the high grade jackassery */ -static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) -{ - u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); - int ret; - - test_msg("Running bitmap and extent tests\n"); - - /* - * First let's do something simple, an extent at the same offset as the - * bitmap, but the free space completely in the extent and then - * completely in the bitmap. - */ - ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't create bitmap entry %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove extent entry %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 1 * 1024 * 1024)) { - test_msg("Left remnants after our remove\n"); - return -1; - } - - /* Now to add back the extent entry and remove from the bitmap */ - ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't re-add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove from bitmap %d\n", ret); - return ret; - } - - if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { - test_msg("Left remnants in the bitmap\n"); - return -1; - } - - /* - * Ok so a little more evil, extent entry and bitmap at the same offset, - * removing an overlapping chunk. - */ - ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add to a bitmap %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove overlapping space %d\n", ret); - return ret; - } - - if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { - test_msg("Left over peices after removing overlapping\n"); - return -1; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - /* Now with the extent entry offset into the bitmap */ - ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add space to the bitmap %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent to the cache %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); - if (ret) { - test_msg("Problem removing overlapping space %d\n", ret); - return ret; - } - - if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { - test_msg("Left something behind when removing space"); - return -1; - } - - /* - * This has blown up in the past, the extent entry starts before the - * bitmap entry, but we're trying to remove an offset that falls - * completely within the bitmap range and is in both the extent entry - * and the bitmap entry, looks like this - * - * [ extent ] - * [ bitmap ] - * [ del ] - */ - __btrfs_remove_free_space_cache(cache->free_space_ctl); - ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, - 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add bitmap %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, - 5 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, - 5 * 1024 * 1024); - if (ret) { - test_msg("Failed to free our space %d\n", ret); - return ret; - } - - if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024, - 5 * 1024 * 1024)) { - test_msg("Left stuff over\n"); - return -1; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - /* - * This blew up before, we have part of the free space in a bitmap and - * then the entirety of the rest of the space in an extent. This used - * to return -EAGAIN back from btrfs_remove_extent, make sure this - * doesn't happen. - */ - ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add bitmap entry %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); - if (ret) { - test_msg("Error removing bitmap and extent overlapping %d\n", ret); - return ret; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - return 0; -} - -void btrfs_test_free_space_cache(void) -{ - struct btrfs_block_group_cache *cache; - - test_msg("Running btrfs free space cache tests\n"); - - cache = init_test_block_group(); - if (!cache) { - test_msg("Couldn't run the tests\n"); - return; - } - - if (test_extents(cache)) - goto out; - if (test_bitmaps(cache)) - goto out; - if (test_bitmaps_and_extents(cache)) - goto out; -out: - __btrfs_remove_free_space_cache(cache->free_space_ctl); - kfree(cache->free_space_ctl); - kfree(cache); - test_msg("Free space cache tests finished\n"); -} -#undef test_msg -#else /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ -void btrfs_test_free_space_cache(void) {} -#endif /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ +#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 894116b71304..c74904167476 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -98,8 +98,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root); void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, u64 bytes); -int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_find_space_cluster(struct btrfs_root *root, struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, u64 offset, u64 bytes, u64 empty_size); @@ -113,6 +112,12 @@ int btrfs_return_cluster_to_free_space( int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, u64 *trimmed, u64 start, u64 end, u64 minlen); -void btrfs_test_free_space_cache(void); +/* Support functions for runnint our sanity tests */ +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +int test_add_free_space_entry(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes, bool bitmap); +int test_check_exists(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes); +#endif #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 82fdd7059d10..f338c5672d58 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -230,12 +230,13 @@ fail: * does the checks required to make sure the data is small enough * to fit as an inline extent. */ -static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, u64 start, u64 end, - size_t compressed_size, int compress_type, - struct page **compressed_pages) +static noinline int cow_file_range_inline(struct btrfs_root *root, + struct inode *inode, u64 start, + u64 end, size_t compressed_size, + int compress_type, + struct page **compressed_pages) { + struct btrfs_trans_handle *trans; u64 isize = i_size_read(inode); u64 actual_end = min(end + 1, isize); u64 inline_len = actual_end - start; @@ -256,9 +257,16 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, return 1; } + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + return PTR_ERR(trans); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; + ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); - if (ret) - return ret; + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } if (isize > actual_end) inline_len = min_t(u64, isize, actual_end); @@ -267,15 +275,18 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, compress_type, compressed_pages); if (ret && ret != -ENOSPC) { btrfs_abort_transaction(trans, root, ret); - return ret; + goto out; } else if (ret == -ENOSPC) { - return 1; + ret = 1; + goto out; } set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); btrfs_delalloc_release_metadata(inode, end + 1 - start); btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); - return 0; +out: + btrfs_end_transaction(trans, root); + return ret; } struct async_extent { @@ -343,7 +354,6 @@ static noinline int compress_file_range(struct inode *inode, int *num_added) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; u64 num_bytes; u64 blocksize = root->sectorsize; u64 actual_end; @@ -461,45 +471,36 @@ again: } cont: if (start == 0) { - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - trans = NULL; - goto cleanup_and_out; - } - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - /* lets try to make an inline extent */ if (ret || total_in < (actual_end - start)) { /* we didn't compress the entire range, try * to make an uncompressed inline extent. */ - ret = cow_file_range_inline(trans, root, inode, - start, end, 0, 0, NULL); + ret = cow_file_range_inline(root, inode, start, end, + 0, 0, NULL); } else { /* try making a compressed inline extent */ - ret = cow_file_range_inline(trans, root, inode, - start, end, + ret = cow_file_range_inline(root, inode, start, end, total_compressed, compress_type, pages); } if (ret <= 0) { + unsigned long clear_flags = EXTENT_DELALLOC | + EXTENT_DEFRAG; + clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0; + /* * inline extent creation worked or returned error, * we don't need to create any more async work items. * Unlock and free up our temp pages. */ - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | - EXTENT_CLEAR_DELALLOC | - EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); - - btrfs_end_transaction(trans, root); + extent_clear_unlock_delalloc(inode, start, end, NULL, + clear_flags, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); goto free_pages_out; } - btrfs_end_transaction(trans, root); } if (will_compress) { @@ -590,20 +591,6 @@ free_pages_out: kfree(pages); goto out; - -cleanup_and_out: - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_DIRTY | - EXTENT_CLEAR_DELALLOC | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - if (!trans || IS_ERR(trans)) - btrfs_error(root->fs_info, ret, "Failed to join transaction"); - else - btrfs_abort_transaction(trans, root, ret); - goto free_pages_out; } /* @@ -617,7 +604,6 @@ static noinline int submit_compressed_extents(struct inode *inode, { struct async_extent *async_extent; u64 alloc_hint = 0; - struct btrfs_trans_handle *trans; struct btrfs_key ins; struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -678,20 +664,10 @@ retry: lock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1); - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - } else { - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - ret = btrfs_reserve_extent(trans, root, + ret = btrfs_reserve_extent(root, async_extent->compressed_size, async_extent->compressed_size, 0, alloc_hint, &ins, 1); - if (ret && ret != -ENOSPC) - btrfs_abort_transaction(trans, root, ret); - btrfs_end_transaction(trans, root); - } - if (ret) { int i; @@ -770,16 +746,12 @@ retry: /* * clear dirty, set writeback and unlock the pages. */ - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - async_extent->start, + extent_clear_unlock_delalloc(inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, - NULL, EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK); - + NULL, EXTENT_LOCKED | EXTENT_DELALLOC, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK); ret = btrfs_submit_compressed_write(inode, async_extent->start, async_extent->ram_size, @@ -798,16 +770,13 @@ out: out_free_reserve: btrfs_free_reserved_extent(root, ins.objectid, ins.offset); out_free: - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - async_extent->start, + extent_clear_unlock_delalloc(inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, - NULL, EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + NULL, EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); kfree(async_extent); goto again; } @@ -857,14 +826,13 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, * required to start IO on it. It may be clean and already done with * IO when we return. */ -static noinline int __cow_file_range(struct btrfs_trans_handle *trans, - struct inode *inode, - struct btrfs_root *root, - struct page *locked_page, - u64 start, u64 end, int *page_started, - unsigned long *nr_written, - int unlock) +static noinline int cow_file_range(struct inode *inode, + struct page *locked_page, + u64 start, u64 end, int *page_started, + unsigned long *nr_written, + int unlock) { + struct btrfs_root *root = BTRFS_I(inode)->root; u64 alloc_hint = 0; u64 num_bytes; unsigned long ram_size; @@ -885,29 +853,24 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, /* if this is a small write inside eof, kick off defrag */ if (num_bytes < 64 * 1024 && (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) - btrfs_add_inode_defrag(trans, inode); + btrfs_add_inode_defrag(NULL, inode); if (start == 0) { /* lets try to make an inline extent */ - ret = cow_file_range_inline(trans, root, inode, - start, end, 0, 0, NULL); + ret = cow_file_range_inline(root, inode, start, end, 0, 0, + NULL); if (ret == 0) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + extent_clear_unlock_delalloc(inode, start, end, NULL, + EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DEFRAG, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); *nr_written = *nr_written + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; *page_started = 1; goto out; } else if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); goto out_unlock; } } @@ -922,13 +885,11 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, unsigned long op; cur_alloc_size = disk_num_bytes; - ret = btrfs_reserve_extent(trans, root, cur_alloc_size, + ret = btrfs_reserve_extent(root, cur_alloc_size, root->sectorsize, 0, alloc_hint, &ins, 1); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + if (ret < 0) goto out_unlock; - } em = alloc_extent_map(); if (!em) { @@ -974,10 +935,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, start, cur_alloc_size); - if (ret) { - btrfs_abort_transaction(trans, root, ret); + if (ret) goto out_reserve; - } } if (disk_num_bytes < cur_alloc_size) @@ -990,13 +949,13 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, * Do set the Private2 bit so we know this page was properly * setup for writepage */ - op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; - op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | - EXTENT_SET_PRIVATE2; + op = unlock ? PAGE_UNLOCK : 0; + op |= PAGE_SET_PRIVATE2; - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, start + ram_size - 1, - locked_page, op); + extent_clear_unlock_delalloc(inode, start, + start + ram_size - 1, locked_page, + EXTENT_LOCKED | EXTENT_DELALLOC, + op); disk_num_bytes -= cur_alloc_size; num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; @@ -1008,52 +967,14 @@ out: out_reserve: btrfs_free_reserved_extent(root, ins.objectid, ins.offset); out_unlock: - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - + extent_clear_unlock_delalloc(inode, start, end, locked_page, + EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | + EXTENT_DELALLOC | EXTENT_DEFRAG, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); goto out; } -static noinline int cow_file_range(struct inode *inode, - struct page *locked_page, - u64 start, u64 end, int *page_started, - unsigned long *nr_written, - int unlock) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - return PTR_ERR(trans); - } - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - - ret = __cow_file_range(trans, inode, root, locked_page, start, end, - page_started, nr_written, unlock); - - btrfs_end_transaction(trans, root); - - return ret; -} - /* * work queue call back to started compression on a file and pages */ @@ -1221,15 +1142,13 @@ static noinline int run_delalloc_nocow(struct inode *inode, path = btrfs_alloc_path(); if (!path) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + extent_clear_unlock_delalloc(inode, start, end, locked_page, + EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | + EXTENT_DEFRAG, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); return -ENOMEM; } @@ -1241,15 +1160,13 @@ static noinline int run_delalloc_nocow(struct inode *inode, trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + extent_clear_unlock_delalloc(inode, start, end, locked_page, + EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | + EXTENT_DEFRAG, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); btrfs_free_path(path); return PTR_ERR(trans); } @@ -1369,9 +1286,9 @@ out_check: btrfs_release_path(path); if (cow_start != (u64)-1) { - ret = __cow_file_range(trans, inode, root, locked_page, - cow_start, found_key.offset - 1, - page_started, nr_written, 1); + ret = cow_file_range(inode, locked_page, + cow_start, found_key.offset - 1, + page_started, nr_written, 1); if (ret) { btrfs_abort_transaction(trans, root, ret); goto error; @@ -1428,11 +1345,11 @@ out_check: } } - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - cur_offset, cur_offset + num_bytes - 1, - locked_page, EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | - EXTENT_SET_PRIVATE2); + extent_clear_unlock_delalloc(inode, cur_offset, + cur_offset + num_bytes - 1, + locked_page, EXTENT_LOCKED | + EXTENT_DELALLOC, PAGE_UNLOCK | + PAGE_SET_PRIVATE2); cur_offset = extent_end; if (cur_offset > end) break; @@ -1445,9 +1362,8 @@ out_check: } if (cow_start != (u64)-1) { - ret = __cow_file_range(trans, inode, root, locked_page, - cow_start, end, - page_started, nr_written, 1); + ret = cow_file_range(inode, locked_page, cow_start, end, + page_started, nr_written, 1); if (ret) { btrfs_abort_transaction(trans, root, ret); goto error; @@ -1460,16 +1376,13 @@ error: ret = err; if (ret && cur_offset < end) - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - cur_offset, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - + extent_clear_unlock_delalloc(inode, cur_offset, end, + locked_page, EXTENT_LOCKED | + EXTENT_DELALLOC | EXTENT_DEFRAG | + EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); btrfs_free_path(path); return ret; } @@ -2132,6 +2045,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, WARN_ON(1); return ret; } + ret = 0; while (1) { cond_resched(); @@ -2181,8 +2095,6 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, old->len || extent_offset + num_bytes <= old->extent_offset + old->offset) continue; - - ret = 0; break; } @@ -2238,16 +2150,18 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, static int relink_is_mergable(struct extent_buffer *leaf, struct btrfs_file_extent_item *fi, - u64 disk_bytenr) + struct new_sa_defrag_extent *new) { - if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr) + if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr) return 0; if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) return 0; - if (btrfs_file_extent_compression(leaf, fi) || - btrfs_file_extent_encryption(leaf, fi) || + if (btrfs_file_extent_compression(leaf, fi) != new->compress_type) + return 0; + + if (btrfs_file_extent_encryption(leaf, fi) || btrfs_file_extent_other_encoding(leaf, fi)) return 0; @@ -2391,8 +2305,8 @@ again: struct btrfs_file_extent_item); extent_len = btrfs_file_extent_num_bytes(leaf, fi); - if (relink_is_mergable(leaf, fi, new->bytenr) && - extent_len + found_key.offset == start) { + if (extent_len + found_key.offset == start && + relink_is_mergable(leaf, fi, new)) { btrfs_set_file_extent_num_bytes(leaf, fi, extent_len + len); btrfs_mark_buffer_dirty(leaf); @@ -2648,8 +2562,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) struct extent_state *cached_state = NULL; struct new_sa_defrag_extent *new = NULL; int compress_type = 0; - int ret; + int ret = 0; + u64 logical_len = ordered_extent->len; bool nolock; + bool truncated = false; nolock = btrfs_is_free_space_inode(inode); @@ -2658,6 +2574,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } + if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { + truncated = true; + logical_len = ordered_extent->truncated_len; + /* Truncated the entire extent, don't bother adding */ + if (!logical_len) + goto out; + } + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ btrfs_ordered_update_i_size(inode, 0, ordered_extent); @@ -2713,15 +2637,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ret = btrfs_mark_extent_written(trans, inode, ordered_extent->file_offset, ordered_extent->file_offset + - ordered_extent->len); + logical_len); } else { BUG_ON(root == root->fs_info->tree_root); ret = insert_reserved_file_extent(trans, inode, ordered_extent->file_offset, ordered_extent->start, ordered_extent->disk_len, - ordered_extent->len, - ordered_extent->len, + logical_len, logical_len, compress_type, 0, 0, BTRFS_FILE_EXTENT_REG); } @@ -2753,17 +2676,27 @@ out: if (trans) btrfs_end_transaction(trans, root); - if (ret) { - clear_extent_uptodate(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1, NULL, GFP_NOFS); + if (ret || truncated) { + u64 start, end; + + if (truncated) + start = ordered_extent->file_offset + logical_len; + else + start = ordered_extent->file_offset; + end = ordered_extent->file_offset + ordered_extent->len - 1; + clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS); + + /* Drop the cache for the part of the extent we didn't write. */ + btrfs_drop_extent_cache(inode, start, end, 0); /* * If the ordered extent had an IOERR or something else went * wrong we need to return the space for this ordered extent - * back to the allocator. + * back to the allocator. We only free the extent in the + * truncated case if we didn't write out the extent at all. */ - if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && + if ((ret || !logical_len) && + !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) btrfs_free_reserved_extent(root, ordered_extent->start, ordered_extent->disk_len); @@ -2827,16 +2760,16 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, * if there's a match, we allow the bio to finish. If not, the code in * extent_io.c will try to find good copies for us. */ -static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, - struct extent_state *state, int mirror) +static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, + u64 phy_offset, struct page *page, + u64 start, u64 end, int mirror) { size_t offset = start - page_offset(page); struct inode *inode = page->mapping->host; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; char *kaddr; - u64 private = ~(u32)0; - int ret; struct btrfs_root *root = BTRFS_I(inode)->root; + u32 csum_expected; u32 csum = ~(u32)0; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -2856,19 +2789,13 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, return 0; } - if (state && state->start == start) { - private = state->private; - ret = 0; - } else { - ret = get_state_private(io_tree, start, &private); - } - kaddr = kmap_atomic(page); - if (ret) - goto zeroit; + phy_offset >>= inode->i_sb->s_blocksize_bits; + csum_expected = *(((u32 *)io_bio->csum) + phy_offset); + kaddr = kmap_atomic(page); csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); btrfs_csum_final(csum, (char *)&csum); - if (csum != private) + if (csum != csum_expected) goto zeroit; kunmap_atomic(kaddr); @@ -2877,14 +2804,12 @@ good: zeroit: if (__ratelimit(&_rs)) - btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu", - (unsigned long long)btrfs_ino(page->mapping->host), - (unsigned long long)start, csum, - (unsigned long long)private); + btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", + btrfs_ino(page->mapping->host), start, csum, csum_expected); memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); kunmap_atomic(kaddr); - if (private == 0) + if (csum_expected == 0) return 0; return -EIO; } @@ -2971,8 +2896,10 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, btrfs_root_refs(&root->root_item) > 0) { ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, root->root_key.objectid); - BUG_ON(ret); - root->orphan_item_inserted = 0; + if (ret) + btrfs_abort_transaction(trans, root, ret); + else + root->orphan_item_inserted = 0; } if (block_rsv) { @@ -3041,11 +2968,18 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* insert an orphan item to track this unlinked/truncated file */ if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); - if (ret && ret != -EEXIST) { - clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &BTRFS_I(inode)->runtime_flags); - btrfs_abort_transaction(trans, root, ret); - return ret; + if (ret) { + if (reserve) { + clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, + &BTRFS_I(inode)->runtime_flags); + btrfs_orphan_release_metadata(inode); + } + if (ret != -EEXIST) { + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); + btrfs_abort_transaction(trans, root, ret); + return ret; + } } ret = 0; } @@ -3084,17 +3018,15 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans, release_rsv = 1; spin_unlock(&root->orphan_lock); - if (trans && delete_item) { + if (trans && delete_item) ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); - BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ - } if (release_rsv) { btrfs_orphan_release_metadata(inode); atomic_dec(&root->orphan_inodes); } - return 0; + return ret; } /* @@ -3224,8 +3156,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) found_key.objectid); ret = btrfs_del_orphan_item(trans, root, found_key.objectid); - BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ btrfs_end_transaction(trans, root); + if (ret) + goto out; continue; } @@ -3657,8 +3590,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, if (ret) { btrfs_info(root->fs_info, "failed to delete reference to %.*s, inode %llu parent %llu", - name_len, name, - (unsigned long long)ino, (unsigned long long)dir_ino); + name_len, name, ino, dir_ino); btrfs_abort_transaction(trans, root, ret); goto err; } @@ -3929,6 +3861,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u64 extent_num_bytes = 0; u64 extent_offset = 0; u64 item_end = 0; + u64 last_size = (u64)-1; u32 found_type = (u8)-1; int found_extent; int del_item; @@ -4026,6 +3959,11 @@ search_again: if (found_type != BTRFS_EXTENT_DATA_KEY) goto delete; + if (del_item) + last_size = found_key.offset; + else + last_size = new_size; + if (extent_type != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); @@ -4137,6 +4075,8 @@ out: btrfs_abort_transaction(trans, root, ret); } error: + if (last_size != (u64)-1) + btrfs_ordered_update_i_size(inode, last_size, NULL); btrfs_free_path(path); return err; } @@ -4465,8 +4405,26 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) btrfs_inode_resume_unlocked_dio(inode); ret = btrfs_truncate(inode); - if (ret && inode->i_nlink) - btrfs_orphan_del(NULL, inode); + if (ret && inode->i_nlink) { + int err; + + /* + * failed to truncate, disk_i_size is only adjusted down + * as we remove extents, so it should represent the true + * size of the inode, so reset the in memory size and + * delete our orphan entry. + */ + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + btrfs_orphan_del(NULL, inode); + return ret; + } + i_size_write(inode, BTRFS_I(inode)->disk_i_size); + err = btrfs_orphan_del(trans, inode); + if (err) + btrfs_abort_transaction(trans, root, err); + btrfs_end_transaction(trans, root); + } } return ret; @@ -4601,10 +4559,15 @@ void btrfs_evict_inode(struct inode *inode) btrfs_free_block_rsv(root, rsv); + /* + * Errors here aren't a big deal, it just means we leave orphan items + * in the tree. They will be cleaned up on the next mount. + */ if (ret == 0) { trans->block_rsv = root->orphan_block_rsv; - ret = btrfs_orphan_del(trans, inode); - BUG_ON(ret); + btrfs_orphan_del(trans, inode); + } else { + btrfs_orphan_del(NULL, inode); } trans->block_rsv = &root->fs_info->trans_block_rsv; @@ -6161,10 +6124,7 @@ insert: btrfs_release_path(path); if (em->start > start || extent_map_end(em) <= start) { btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]", - (unsigned long long)em->start, - (unsigned long long)em->len, - (unsigned long long)start, - (unsigned long long)len); + em->start, em->len, start, len); err = -EIO; goto out; } @@ -6362,39 +6322,32 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, u64 start, u64 len) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; struct extent_map *em; struct btrfs_key ins; u64 alloc_hint; int ret; - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return ERR_CAST(trans); - - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - alloc_hint = get_extent_allocation_hint(inode, start, len); - ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, + ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, alloc_hint, &ins, 1); - if (ret) { - em = ERR_PTR(ret); - goto out; - } + if (ret) + return ERR_PTR(ret); em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, ins.offset, ins.offset, ins.offset, 0); - if (IS_ERR(em)) - goto out; + if (IS_ERR(em)) { + btrfs_free_reserved_extent(root, ins.objectid, ins.offset); + return em; + } ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, ins.offset, ins.offset, 0); if (ret) { btrfs_free_reserved_extent(root, ins.objectid, ins.offset); - em = ERR_PTR(ret); + free_extent_map(em); + return ERR_PTR(ret); } -out: - btrfs_end_transaction(trans, root); + return em; } @@ -6402,11 +6355,11 @@ out: * returns 1 when the nocow is safe, < 1 on error, 0 if the * block must be cow'd */ -noinline int can_nocow_extent(struct btrfs_trans_handle *trans, - struct inode *inode, u64 offset, u64 *len, +noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *orig_start, u64 *orig_block_len, u64 *ram_bytes) { + struct btrfs_trans_handle *trans; struct btrfs_path *path; int ret; struct extent_buffer *leaf; @@ -6424,7 +6377,7 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), + ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), offset, 0); if (ret < 0) goto out; @@ -6489,9 +6442,19 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans, * look for other files referencing this extent, if we * find any we must cow */ - if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), - key.offset - backref_offset, disk_bytenr)) + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = 0; goto out; + } + + ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), + key.offset - backref_offset, disk_bytenr); + btrfs_end_transaction(trans, root); + if (ret) { + ret = 0; + goto out; + } /* * adjust disk_bytenr and num_bytes to cover just the bytes @@ -6633,7 +6596,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, u64 start = iblock << inode->i_blkbits; u64 lockstart, lockend; u64 len = bh_result->b_size; - struct btrfs_trans_handle *trans; int unlock_bits = EXTENT_LOCKED; int ret = 0; @@ -6715,16 +6677,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, len = min(len, em->len - (start - em->start)); block_start = em->block_start + (start - em->start); - /* - * we're not going to log anything, but we do need - * to make sure the current transaction stays open - * while we look for nocow cross refs - */ - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - goto must_cow; - - if (can_nocow_extent(trans, inode, start, &len, &orig_start, + if (can_nocow_extent(inode, start, &len, &orig_start, &orig_block_len, &ram_bytes) == 1) { if (type == BTRFS_ORDERED_PREALLOC) { free_extent_map(em); @@ -6733,24 +6686,20 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, block_start, len, orig_block_len, ram_bytes, type); - if (IS_ERR(em)) { - btrfs_end_transaction(trans, root); + if (IS_ERR(em)) goto unlock_err; - } } ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len, len, type); - btrfs_end_transaction(trans, root); if (ret) { free_extent_map(em); goto unlock_err; } goto unlock; } - btrfs_end_transaction(trans, root); } -must_cow: + /* * this will cow the extent, reset the len in case we changed * it above @@ -6813,26 +6762,6 @@ unlock_err: return ret; } -struct btrfs_dio_private { - struct inode *inode; - u64 logical_offset; - u64 disk_bytenr; - u64 bytes; - void *private; - - /* number of bios pending for this dio */ - atomic_t pending_bios; - - /* IO errors */ - int errors; - - /* orig_bio is our btrfs_io_bio */ - struct bio *orig_bio; - - /* dio_bio came from fs/direct-io.c */ - struct bio *dio_bio; -}; - static void btrfs_endio_direct_read(struct bio *bio, int err) { struct btrfs_dio_private *dip = bio->bi_private; @@ -6841,6 +6770,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct bio *dio_bio; + u32 *csums = (u32 *)dip->csum; + int index = 0; u64 start; start = dip->logical_offset; @@ -6849,12 +6780,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct page *page = bvec->bv_page; char *kaddr; u32 csum = ~(u32)0; - u64 private = ~(u32)0; unsigned long flags; - if (get_state_private(&BTRFS_I(inode)->io_tree, - start, &private)) - goto failed; local_irq_save(flags); kaddr = kmap_atomic(page); csum = btrfs_csum_data(kaddr + bvec->bv_offset, @@ -6864,18 +6791,17 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) local_irq_restore(flags); flush_dcache_page(bvec->bv_page); - if (csum != private) { -failed: - btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u", - (unsigned long long)btrfs_ino(inode), - (unsigned long long)start, - csum, (unsigned)private); + if (csum != csums[index]) { + btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", + btrfs_ino(inode), start, csum, + csums[index]); err = -EIO; } } start += bvec->bv_len; bvec++; + index++; } while (bvec <= bvec_end); unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, @@ -6956,7 +6882,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) if (err) { printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " "sector %#Lx len %u err no %d\n", - (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw, + btrfs_ino(dip->inode), bio->bi_rw, (unsigned long long)bio->bi_sector, bio->bi_size, err); dip->errors = 1; @@ -6992,6 +6918,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, int rw, u64 file_offset, int skip_sum, int async_submit) { + struct btrfs_dio_private *dip = bio->bi_private; int write = rw & REQ_WRITE; struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -7026,7 +6953,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, if (ret) goto err; } else if (!skip_sum) { - ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); + ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio, + file_offset); if (ret) goto err; } @@ -7061,6 +6989,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, bio_put(orig_bio); return -EIO; } + if (map_length >= orig_bio->bi_size) { bio = orig_bio; goto submit; @@ -7156,19 +7085,28 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, struct btrfs_dio_private *dip; struct bio *io_bio; int skip_sum; + int sum_len; int write = rw & REQ_WRITE; int ret = 0; + u16 csum_size; skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); - if (!io_bio) { ret = -ENOMEM; goto free_ordered; } - dip = kmalloc(sizeof(*dip), GFP_NOFS); + if (!skip_sum && !write) { + csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits; + sum_len *= csum_size; + } else { + sum_len = 0; + } + + dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS); if (!dip) { ret = -ENOMEM; goto free_io_bio; @@ -7443,10 +7381,23 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, * whoever cleared the private bit is responsible * for the finish_ordered_io */ - if (TestClearPagePrivate2(page) && - btrfs_dec_test_ordered_pending(inode, &ordered, page_start, - PAGE_CACHE_SIZE, 1)) { - btrfs_finish_ordered_io(ordered); + if (TestClearPagePrivate2(page)) { + struct btrfs_ordered_inode_tree *tree; + u64 new_len; + + tree = &BTRFS_I(inode)->ordered_tree; + + spin_lock_irq(&tree->lock); + set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); + new_len = page_start - ordered->file_offset; + if (new_len < ordered->truncated_len) + ordered->truncated_len = new_len; + spin_unlock_irq(&tree->lock); + + if (btrfs_dec_test_ordered_pending(inode, &ordered, + page_start, + PAGE_CACHE_SIZE, 1)) + btrfs_finish_ordered_io(ordered); } btrfs_put_ordered_extent(ordered); cached_state = NULL; @@ -7612,7 +7563,6 @@ static int btrfs_truncate(struct inode *inode) u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); - btrfs_ordered_update_i_size(inode, inode->i_size, NULL); /* * Yes ladies and gentelment, this is indeed ugly. The fact is we have @@ -7876,7 +7826,7 @@ void btrfs_destroy_inode(struct inode *inode) if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &BTRFS_I(inode)->runtime_flags)) { btrfs_info(root->fs_info, "inode %llu still on the orphan list", - (unsigned long long)btrfs_ino(inode)); + btrfs_ino(inode)); atomic_dec(&root->orphan_inodes); } @@ -7886,8 +7836,7 @@ void btrfs_destroy_inode(struct inode *inode) break; else { btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup", - (unsigned long long)ordered->file_offset, - (unsigned long long)ordered->len); + ordered->file_offset, ordered->len); btrfs_remove_ordered_extent(inode, ordered); btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); @@ -8161,10 +8110,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_name.name, new_dentry->d_name.len); } - if (!ret && new_inode->i_nlink == 0) { + if (!ret && new_inode->i_nlink == 0) ret = btrfs_orphan_add(trans, new_dentry->d_inode); - BUG_ON(ret); - } if (ret) { btrfs_abort_transaction(trans, root, ret); goto out_fail; @@ -8525,8 +8472,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); cur_bytes = max(cur_bytes, min_size); - ret = btrfs_reserve_extent(trans, root, cur_bytes, - min_size, 0, *alloc_hint, &ins, 1); + ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, + *alloc_hint, &ins, 1); if (ret) { if (own_trans) btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 238a05545ee2..1a5b9462dd9a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -43,6 +43,7 @@ #include <linux/blkdev.h> #include <linux/uuid.h> #include <linux/btrfs.h> +#include <linux/uaccess.h> #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -57,6 +58,9 @@ #include "send.h" #include "dev-replace.h" +static int btrfs_clone(struct inode *src, struct inode *inode, + u64 off, u64 olen, u64 olen_aligned, u64 destoff); + /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) { @@ -363,6 +367,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) return 0; } +int btrfs_is_empty_uuid(u8 *uuid) +{ + static char empty_uuid[BTRFS_UUID_SIZE] = {0}; + + return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE); +} + static noinline int create_subvol(struct inode *dir, struct dentry *dentry, char *name, int namelen, @@ -396,7 +407,7 @@ static noinline int create_subvol(struct inode *dir, * of create_snapshot(). */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, - 7, &qgroup_reserved); + 8, &qgroup_reserved, false); if (ret) return ret; @@ -425,26 +436,25 @@ static noinline int create_subvol(struct inode *dir, btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(leaf, objectid); - write_extent_buffer(leaf, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(leaf), + write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(leaf), + btrfs_header_chunk_tree_uuid(leaf), BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); memset(&root_item, 0, sizeof(root_item)); inode_item = &root_item.inode; - inode_item->generation = cpu_to_le64(1); - inode_item->size = cpu_to_le64(3); - inode_item->nlink = cpu_to_le32(1); - inode_item->nbytes = cpu_to_le64(root->leafsize); - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + btrfs_set_stack_inode_generation(inode_item, 1); + btrfs_set_stack_inode_size(inode_item, 3); + btrfs_set_stack_inode_nlink(inode_item, 1); + btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); - root_item.flags = 0; - root_item.byte_limit = 0; - inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); + btrfs_set_root_flags(&root_item, 0); + btrfs_set_root_limit(&root_item, 0); + btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); btrfs_set_root_bytenr(&root_item, leaf->start); btrfs_set_root_generation(&root_item, trans->transid); @@ -457,8 +467,8 @@ static noinline int create_subvol(struct inode *dir, btrfs_root_generation(&root_item)); uuid_le_gen(&new_uuid); memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); - root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); - root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec); + btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec); + btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec); root_item.ctime = root_item.otime; btrfs_set_root_ctransid(&root_item, trans->transid); btrfs_set_root_otransid(&root_item, trans->transid); @@ -518,9 +528,14 @@ static noinline int create_subvol(struct inode *dir, ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, objectid, root->root_key.objectid, btrfs_ino(dir), index, name, namelen); - BUG_ON(ret); + ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, + root_item.uuid, BTRFS_UUID_KEY_SUBVOL, + objectid); + if (ret) + btrfs_abort_transaction(trans, root, ret); + fail: trans->block_rsv = NULL; trans->bytes_reserved = 0; @@ -573,10 +588,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, * 1 - root item * 2 - root ref/backref * 1 - root of snapshot + * 1 - UUID item */ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, - &pending_snapshot->block_rsv, 7, - &pending_snapshot->qgroup_reserved); + &pending_snapshot->block_rsv, 8, + &pending_snapshot->qgroup_reserved, + false); if (ret) goto out; @@ -1267,9 +1284,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, cluster = max_cluster; } - if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) - BTRFS_I(inode)->force_compress = compress_type; - if (i + cluster > ra_index) { ra_index = max(i, ra_index); btrfs_force_ra(inode->i_mapping, ra, file, ra_index, @@ -1278,6 +1292,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, } mutex_lock(&inode->i_mutex); + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) + BTRFS_I(inode)->force_compress = compress_type; ret = cluster_pages_for_defrag(inode, pages, i, cluster); if (ret < 0) { mutex_unlock(&inode->i_mutex); @@ -1334,10 +1350,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, atomic_read(&root->fs_info->async_delalloc_pages) == 0)); } atomic_dec(&root->fs_info->async_submit_draining); - - mutex_lock(&inode->i_mutex); - BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; - mutex_unlock(&inode->i_mutex); } if (range->compress_type == BTRFS_COMPRESS_LZO) { @@ -1347,6 +1359,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ret = defrag_count; out_ra: + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { + mutex_lock(&inode->i_mutex); + BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; + mutex_unlock(&inode->i_mutex); + } if (!file) kfree(ra); kfree(pages); @@ -1377,9 +1394,8 @@ static noinline int btrfs_ioctl_resize(struct file *file, if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); mnt_drop_write_file(file); - return -EINVAL; + return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } mutex_lock(&root->fs_info->volume_mutex); @@ -1403,14 +1419,13 @@ static noinline int btrfs_ioctl_resize(struct file *file, ret = -EINVAL; goto out_free; } - printk(KERN_INFO "btrfs: resizing devid %llu\n", - (unsigned long long)devid); + printk(KERN_INFO "btrfs: resizing devid %llu\n", devid); } device = btrfs_find_device(root->fs_info, devid, NULL, NULL); if (!device) { printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", - (unsigned long long)devid); + devid); ret = -ENODEV; goto out_free; } @@ -1418,7 +1433,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, if (!device->writeable) { printk(KERN_INFO "btrfs: resizer unable to apply on " "readonly device %llu\n", - (unsigned long long)devid); + devid); ret = -EPERM; goto out_free; } @@ -1470,8 +1485,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, new_size *= root->sectorsize; printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", - rcu_str_deref(device->name), - (unsigned long long)new_size); + rcu_str_deref(device->name), new_size); if (new_size > old_size) { trans = btrfs_start_transaction(root, 0); @@ -1721,13 +1735,28 @@ out: static noinline int may_destroy_subvol(struct btrfs_root *root) { struct btrfs_path *path; + struct btrfs_dir_item *di; struct btrfs_key key; + u64 dir_id; int ret; path = btrfs_alloc_path(); if (!path) return -ENOMEM; + /* Make sure this root isn't set as the default subvol */ + dir_id = btrfs_super_root_dir(root->fs_info->super_copy); + di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, + dir_id, "default", 7, 0); + if (di && !IS_ERR(di)) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); + if (key.objectid == root->root_key.objectid) { + ret = -ENOTEMPTY; + goto out; + } + btrfs_release_path(path); + } + key.objectid = root->root_key.objectid; key.type = BTRFS_ROOT_REF_KEY; key.offset = (u64)-1; @@ -1993,25 +2022,29 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; + else if (ret > 0) { + ret = btrfs_previous_item(root, path, dirid, + BTRFS_INODE_REF_KEY); + if (ret < 0) + goto out; + else if (ret > 0) { + ret = -ENOENT; + goto out; + } + } l = path->nodes[0]; slot = path->slots[0]; - if (ret > 0 && slot > 0) - slot--; btrfs_item_key_to_cpu(l, &key, slot); - if (ret > 0 && (key.objectid != dirid || - key.type != BTRFS_INODE_REF_KEY)) { - ret = -ENOENT; - goto out; - } - iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); len = btrfs_inode_ref_name_len(l, iref); ptr -= len + 1; total_len += len + 1; - if (ptr < name) + if (ptr < name) { + ret = -ENAMETOOLONG; goto out; + } *(ptr + len) = '/'; read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); @@ -2024,8 +2057,6 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, key.offset = (u64)-1; dirid = key.objectid; } - if (ptr < name) - goto out; memmove(name, ptr, total_len); name[total_len]='\0'; ret = 0; @@ -2174,7 +2205,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, * ref/backref. */ err = btrfs_subvolume_reserve_metadata(root, &block_rsv, - 5, &qgroup_reserved); + 5, &qgroup_reserved, true); if (err) goto out_up_write; @@ -2213,6 +2244,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out_end_trans; } } + + ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL, + dest->root_key.objectid); + if (ret && ret != -ENOENT) { + btrfs_abort_transaction(trans, root, ret); + err = ret; + goto out_end_trans; + } + if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) { + ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + dest->root_item.received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + dest->root_key.objectid); + if (ret && ret != -ENOENT) { + btrfs_abort_transaction(trans, root, ret); + err = ret; + goto out_end_trans; + } + } + out_end_trans: trans->block_rsv = NULL; trans->bytes_reserved = 0; @@ -2326,8 +2378,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - return -EINVAL; + return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } mutex_lock(&root->fs_info->volume_mutex); @@ -2400,10 +2451,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) if (!fi_args) return -ENOMEM; + mutex_lock(&fs_devices->device_list_mutex); fi_args->num_devices = fs_devices->num_devices; memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); - mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { if (device->devid > fi_args->max_id) fi_args->max_id = device->devid; @@ -2424,7 +2475,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; int ret = 0; char *s_uuid = NULL; - char empty_uuid[BTRFS_UUID_SIZE] = {0}; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -2433,7 +2483,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) if (IS_ERR(di_args)) return PTR_ERR(di_args); - if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0) + if (!btrfs_is_empty_uuid(di_args->uuid)) s_uuid = di_args->uuid; mutex_lock(&fs_devices->device_list_mutex); @@ -2469,150 +2519,336 @@ out: return ret; } -static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, - u64 off, u64 olen, u64 destoff) +static struct page *extent_same_get_page(struct inode *inode, u64 off) +{ + struct page *page; + pgoff_t index; + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; + + index = off >> PAGE_CACHE_SHIFT; + + page = grab_cache_page(inode->i_mapping, index); + if (!page) + return NULL; + + if (!PageUptodate(page)) { + if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, + 0)) + return NULL; + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + return NULL; + } + } + unlock_page(page); + + return page; +} + +static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) +{ + /* do any pending delalloc/csum calc on src, one way or + another, and lock file content */ + while (1) { + struct btrfs_ordered_extent *ordered; + lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); + ordered = btrfs_lookup_first_ordered_extent(inode, + off + len - 1); + if (!ordered && + !test_range_bit(&BTRFS_I(inode)->io_tree, off, + off + len - 1, EXTENT_DELALLOC, 0, NULL)) + break; + unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); + if (ordered) + btrfs_put_ordered_extent(ordered); + btrfs_wait_ordered_range(inode, off, len); + } +} + +static void btrfs_double_unlock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); + + mutex_unlock(&inode1->i_mutex); + mutex_unlock(&inode2->i_mutex); +} + +static void btrfs_double_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + if (inode1 < inode2) { + swap(inode1, inode2); + swap(loff1, loff2); + } + + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + lock_extent_range(inode1, loff1, len); + if (inode1 != inode2) { + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + lock_extent_range(inode2, loff2, len); + } +} + +static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, + u64 dst_loff, u64 len) +{ + int ret = 0; + struct page *src_page, *dst_page; + unsigned int cmp_len = PAGE_CACHE_SIZE; + void *addr, *dst_addr; + + while (len) { + if (len < PAGE_CACHE_SIZE) + cmp_len = len; + + src_page = extent_same_get_page(src, loff); + if (!src_page) + return -EINVAL; + dst_page = extent_same_get_page(dst, dst_loff); + if (!dst_page) { + page_cache_release(src_page); + return -EINVAL; + } + addr = kmap_atomic(src_page); + dst_addr = kmap_atomic(dst_page); + + flush_dcache_page(src_page); + flush_dcache_page(dst_page); + + if (memcmp(addr, dst_addr, cmp_len)) + ret = BTRFS_SAME_DATA_DIFFERS; + + kunmap_atomic(addr); + kunmap_atomic(dst_addr); + page_cache_release(src_page); + page_cache_release(dst_page); + + if (ret) + break; + + loff += cmp_len; + dst_loff += cmp_len; + len -= cmp_len; + } + + return ret; +} + +static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len) +{ + u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize; + + if (off + len > inode->i_size || off + len < off) + return -EINVAL; + /* Check that we are block aligned - btrfs_clone() requires this */ + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs)) + return -EINVAL; + + return 0; +} + +static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, + struct inode *dst, u64 dst_loff) { - struct inode *inode = file_inode(file); - struct btrfs_root *root = BTRFS_I(inode)->root; - struct fd src_file; - struct inode *src; - struct btrfs_trans_handle *trans; - struct btrfs_path *path; - struct extent_buffer *leaf; - char *buf; - struct btrfs_key key; - u32 nritems; - int slot; int ret; - u64 len = olen; - u64 bs = root->fs_info->sb->s_blocksize; - int same_inode = 0; /* - * TODO: - * - split compressed inline extents. annoying: we need to - * decompress into destination's address_space (the file offset - * may change, so source mapping won't do), then recompress (or - * otherwise reinsert) a subrange. - * - allow ranges within the same file to be cloned (provided - * they don't overlap)? + * btrfs_clone() can't handle extents in the same file + * yet. Once that works, we can drop this check and replace it + * with a check for the same inode, but overlapping extents. */ - - /* the destination must be opened for writing */ - if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) + if (src == dst) return -EINVAL; - if (btrfs_root_readonly(root)) - return -EROFS; + btrfs_double_lock(src, loff, dst, dst_loff, len); + + ret = extent_same_check_offsets(src, loff, len); + if (ret) + goto out_unlock; + + ret = extent_same_check_offsets(dst, dst_loff, len); + if (ret) + goto out_unlock; + + /* don't make the dst file partly checksummed */ + if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != + (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { + ret = -EINVAL; + goto out_unlock; + } + + ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); + if (ret == 0) + ret = btrfs_clone(src, dst, loff, len, len, dst_loff); + +out_unlock: + btrfs_double_unlock(src, loff, dst, dst_loff, len); + + return ret; +} + +#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) + +static long btrfs_ioctl_file_extent_same(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_same_args *args = argp; + struct btrfs_ioctl_same_args same; + struct btrfs_ioctl_same_extent_info info; + struct inode *src = file->f_dentry->d_inode; + struct file *dst_file = NULL; + struct inode *dst; + u64 off; + u64 len; + int i; + int ret; + u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; + bool is_admin = capable(CAP_SYS_ADMIN); + + if (!(file->f_mode & FMODE_READ)) + return -EINVAL; ret = mnt_want_write_file(file); if (ret) return ret; - src_file = fdget(srcfd); - if (!src_file.file) { - ret = -EBADF; - goto out_drop_write; + if (copy_from_user(&same, + (struct btrfs_ioctl_same_args __user *)argp, + sizeof(same))) { + ret = -EFAULT; + goto out; } - ret = -EXDEV; - if (src_file.file->f_path.mnt != file->f_path.mnt) - goto out_fput; + off = same.logical_offset; + len = same.length; - src = file_inode(src_file.file); + /* + * Limit the total length we will dedupe for each operation. + * This is intended to bound the total time spent in this + * ioctl to something sane. + */ + if (len > BTRFS_MAX_DEDUPE_LEN) + len = BTRFS_MAX_DEDUPE_LEN; - ret = -EINVAL; - if (src == inode) - same_inode = 1; + if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) { + /* + * Btrfs does not support blocksize < page_size. As a + * result, btrfs_cmp_data() won't correctly handle + * this situation without an update. + */ + ret = -EINVAL; + goto out; + } - /* the src must be open for reading */ - if (!(src_file.file->f_mode & FMODE_READ)) - goto out_fput; + ret = -EISDIR; + if (S_ISDIR(src->i_mode)) + goto out; - /* don't make the dst file partly checksummed */ - if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != - (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) - goto out_fput; + ret = -EACCES; + if (!S_ISREG(src->i_mode)) + goto out; - ret = -EISDIR; - if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) - goto out_fput; + ret = 0; + for (i = 0; i < same.dest_count; i++) { + if (copy_from_user(&info, &args->info[i], sizeof(info))) { + ret = -EFAULT; + goto out; + } - ret = -EXDEV; - if (src->i_sb != inode->i_sb) - goto out_fput; + info.bytes_deduped = 0; - ret = -ENOMEM; - buf = vmalloc(btrfs_level_size(root, 0)); - if (!buf) - goto out_fput; + dst_file = fget(info.fd); + if (!dst_file) { + info.status = -EBADF; + goto next; + } - path = btrfs_alloc_path(); - if (!path) { - vfree(buf); - goto out_fput; - } - path->reada = 2; + if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { + info.status = -EINVAL; + goto next; + } - if (!same_inode) { - if (inode < src) { - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + info.status = -EXDEV; + if (file->f_path.mnt != dst_file->f_path.mnt) + goto next; + + dst = dst_file->f_dentry->d_inode; + if (src->i_sb != dst->i_sb) + goto next; + + if (S_ISDIR(dst->i_mode)) { + info.status = -EISDIR; + goto next; } - } else { - mutex_lock(&src->i_mutex); - } - /* determine range to clone */ - ret = -EINVAL; - if (off + len > src->i_size || off + len < off) - goto out_unlock; - if (len == 0) - olen = len = src->i_size - off; - /* if we extend to eof, continue to block boundary */ - if (off + len == src->i_size) - len = ALIGN(src->i_size, bs) - off; + if (!S_ISREG(dst->i_mode)) { + info.status = -EACCES; + goto next; + } - /* verify the end result is block aligned */ - if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || - !IS_ALIGNED(destoff, bs)) - goto out_unlock; + info.status = btrfs_extent_same(src, off, len, dst, + info.logical_offset); + if (info.status == 0) + info.bytes_deduped += len; - /* verify if ranges are overlapped within the same file */ - if (same_inode) { - if (destoff + len > off && destoff < off + len) - goto out_unlock; - } +next: + if (dst_file) + fput(dst_file); - if (destoff > inode->i_size) { - ret = btrfs_cont_expand(inode, inode->i_size, destoff); - if (ret) - goto out_unlock; + if (__put_user_unaligned(info.status, &args->info[i].status) || + __put_user_unaligned(info.bytes_deduped, + &args->info[i].bytes_deduped)) { + ret = -EFAULT; + goto out; + } } - /* truncate page cache pages from target inode range */ - truncate_inode_pages_range(&inode->i_data, destoff, - PAGE_CACHE_ALIGN(destoff + len) - 1); +out: + mnt_drop_write_file(file); + return ret; +} - /* do any pending delalloc/csum calc on src, one way or - another, and lock file content */ - while (1) { - struct btrfs_ordered_extent *ordered; - lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); - ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1); - if (!ordered && - !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1, - EXTENT_DELALLOC, 0, NULL)) - break; - unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); - if (ordered) - btrfs_put_ordered_extent(ordered); - btrfs_wait_ordered_range(src, off, len); +/** + * btrfs_clone() - clone a range from inode file to another + * + * @src: Inode to clone from + * @inode: Inode to clone to + * @off: Offset within source to start clone from + * @olen: Original length, passed by user, of range to clone + * @olen_aligned: Block-aligned value of olen, extent_same uses + * identical values here + * @destoff: Offset within @inode to start clone + */ +static int btrfs_clone(struct inode *src, struct inode *inode, + u64 off, u64 olen, u64 olen_aligned, u64 destoff) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_path *path = NULL; + struct extent_buffer *leaf; + struct btrfs_trans_handle *trans; + char *buf = NULL; + struct btrfs_key key; + u32 nritems; + int slot; + int ret; + u64 len = olen_aligned; + + ret = -ENOMEM; + buf = vmalloc(btrfs_level_size(root, 0)); + if (!buf) + return ret; + + path = btrfs_alloc_path(); + if (!path) { + vfree(buf); + return ret; } + path->reada = 2; /* clone data */ key.objectid = btrfs_ino(src); key.type = BTRFS_EXTENT_DATA_KEY; @@ -2858,15 +3094,132 @@ next: key.offset++; } ret = 0; + out: btrfs_release_path(path); + btrfs_free_path(path); + vfree(buf); + return ret; +} + +static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, + u64 off, u64 olen, u64 destoff) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct fd src_file; + struct inode *src; + int ret; + u64 len = olen; + u64 bs = root->fs_info->sb->s_blocksize; + int same_inode = 0; + + /* + * TODO: + * - split compressed inline extents. annoying: we need to + * decompress into destination's address_space (the file offset + * may change, so source mapping won't do), then recompress (or + * otherwise reinsert) a subrange. + * - allow ranges within the same file to be cloned (provided + * they don't overlap)? + */ + + /* the destination must be opened for writing */ + if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) + return -EINVAL; + + if (btrfs_root_readonly(root)) + return -EROFS; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + src_file = fdget(srcfd); + if (!src_file.file) { + ret = -EBADF; + goto out_drop_write; + } + + ret = -EXDEV; + if (src_file.file->f_path.mnt != file->f_path.mnt) + goto out_fput; + + src = file_inode(src_file.file); + + ret = -EINVAL; + if (src == inode) + same_inode = 1; + + /* the src must be open for reading */ + if (!(src_file.file->f_mode & FMODE_READ)) + goto out_fput; + + /* don't make the dst file partly checksummed */ + if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != + (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) + goto out_fput; + + ret = -EISDIR; + if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) + goto out_fput; + + ret = -EXDEV; + if (src->i_sb != inode->i_sb) + goto out_fput; + + if (!same_inode) { + if (inode < src) { + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + } + } else { + mutex_lock(&src->i_mutex); + } + + /* determine range to clone */ + ret = -EINVAL; + if (off + len > src->i_size || off + len < off) + goto out_unlock; + if (len == 0) + olen = len = src->i_size - off; + /* if we extend to eof, continue to block boundary */ + if (off + len == src->i_size) + len = ALIGN(src->i_size, bs) - off; + + /* verify the end result is block aligned */ + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || + !IS_ALIGNED(destoff, bs)) + goto out_unlock; + + /* verify if ranges are overlapped within the same file */ + if (same_inode) { + if (destoff + len > off && destoff < off + len) + goto out_unlock; + } + + if (destoff > inode->i_size) { + ret = btrfs_cont_expand(inode, inode->i_size, destoff); + if (ret) + goto out_unlock; + } + + /* truncate page cache pages from target inode range */ + truncate_inode_pages_range(&inode->i_data, destoff, + PAGE_CACHE_ALIGN(destoff + len) - 1); + + lock_extent_range(src, off, len); + + ret = btrfs_clone(src, inode, off, olen, len, destoff); + unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); out_unlock: mutex_unlock(&src->i_mutex); if (!same_inode) mutex_unlock(&inode->i_mutex); - vfree(buf); - btrfs_free_path(path); out_fput: fdput(src_file); out_drop_write: @@ -3312,11 +3665,13 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) switch (p->cmd) { case BTRFS_IOCTL_DEV_REPLACE_CMD_START: + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + if (atomic_xchg( &root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - ret = -EINPROGRESS; + ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } else { ret = btrfs_dev_replace_start(root, p); atomic_set( @@ -3560,8 +3915,7 @@ again: } else { /* this is (1) */ mutex_unlock(&fs_info->balance_mutex); - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - ret = -EINVAL; + ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; goto out; } @@ -3967,6 +4321,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, struct btrfs_trans_handle *trans; struct timespec ct = CURRENT_TIME; int ret = 0; + int received_uuid_changed; ret = mnt_want_write_file(file); if (ret < 0) @@ -3996,7 +4351,11 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, goto out; } - trans = btrfs_start_transaction(root, 1); + /* + * 1 - root item + * 2 - uuid items (received uuid + subvol uuid) + */ + trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; @@ -4007,24 +4366,42 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, sa->rtime.sec = ct.tv_sec; sa->rtime.nsec = ct.tv_nsec; + received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, + BTRFS_UUID_SIZE); + if (received_uuid_changed && + !btrfs_is_empty_uuid(root_item->received_uuid)) + btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + root_item->received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + root->root_key.objectid); memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); btrfs_set_root_stransid(root_item, sa->stransid); btrfs_set_root_rtransid(root_item, sa->rtransid); - root_item->stime.sec = cpu_to_le64(sa->stime.sec); - root_item->stime.nsec = cpu_to_le32(sa->stime.nsec); - root_item->rtime.sec = cpu_to_le64(sa->rtime.sec); - root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec); + btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec); + btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec); + btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec); + btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec); ret = btrfs_update_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); if (ret < 0) { btrfs_end_transaction(trans, root); - trans = NULL; goto out; - } else { - ret = btrfs_commit_transaction(trans, root); - if (ret < 0) + } + if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { + ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, + sa->uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + root->root_key.objectid); + if (ret < 0 && ret != -EEXIST) { + btrfs_abort_transaction(trans, root, ret); goto out; + } + } + ret = btrfs_commit_transaction(trans, root); + if (ret < 0) { + btrfs_abort_transaction(trans, root, ret); + goto out; } ret = copy_to_user(arg, sa, sizeof(*sa)); @@ -4041,18 +4418,22 @@ out: static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) { struct btrfs_root *root = BTRFS_I(file_inode(file))->root; - const char *label = root->fs_info->super_copy->label; - size_t len = strnlen(label, BTRFS_LABEL_SIZE); + size_t len; int ret; + char label[BTRFS_LABEL_SIZE]; + + spin_lock(&root->fs_info->super_lock); + memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE); + spin_unlock(&root->fs_info->super_lock); + + len = strnlen(label, BTRFS_LABEL_SIZE); if (len == BTRFS_LABEL_SIZE) { pr_warn("btrfs: label is too long, return the first %zu bytes\n", --len); } - mutex_lock(&root->fs_info->volume_mutex); ret = copy_to_user(arg, label, len); - mutex_unlock(&root->fs_info->volume_mutex); return ret ? -EFAULT : 0; } @@ -4081,18 +4462,18 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) if (ret) return ret; - mutex_lock(&root->fs_info->volume_mutex); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_unlock; } + spin_lock(&root->fs_info->super_lock); strcpy(super_block->label, label); + spin_unlock(&root->fs_info->super_lock); ret = btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->volume_mutex); mnt_drop_write_file(file); return ret; } @@ -4207,6 +4588,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_get_fslabel(file, argp); case BTRFS_IOC_SET_FSLABEL: return btrfs_ioctl_set_fslabel(file, argp); + case BTRFS_IOC_FILE_EXTENT_SAME: + return btrfs_ioctl_file_extent_same(file, argp); } return -ENOTTY; diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index f93151a98886..b6a6f07c5ce2 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -207,8 +207,10 @@ static int lzo_compress_pages(struct list_head *ws, } /* we're making it bigger, give up */ - if (tot_in > 8192 && tot_in < tot_out) + if (tot_in > 8192 && tot_in < tot_out) { + ret = -1; goto out; + } /* we're all done */ if (tot_in >= len) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 81369827e514..966b413a33b8 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -67,7 +67,7 @@ static void ordered_data_tree_panic(struct inode *inode, int errno, { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset " - "%llu\n", (unsigned long long)offset); + "%llu\n", offset); } /* @@ -205,6 +205,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->bytes_left = len; entry->inode = igrab(inode); entry->compress_type = compress_type; + entry->truncated_len = (u64)-1; if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); @@ -336,14 +337,12 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, *file_offset = dec_end; if (dec_start > dec_end) { printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", - (unsigned long long)dec_start, - (unsigned long long)dec_end); + dec_start, dec_end); } to_dec = dec_end - dec_start; if (to_dec > entry->bytes_left) { printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", - (unsigned long long)entry->bytes_left, - (unsigned long long)to_dec); + entry->bytes_left, to_dec); } entry->bytes_left -= to_dec; if (!uptodate) @@ -403,8 +402,7 @@ have_entry: if (io_size > entry->bytes_left) { printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", - (unsigned long long)entry->bytes_left, - (unsigned long long)io_size); + entry->bytes_left, io_size); } entry->bytes_left -= io_size; if (!uptodate) @@ -671,7 +669,7 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, INIT_LIST_HEAD(&splice); INIT_LIST_HEAD(&works); - mutex_lock(&root->fs_info->ordered_operations_mutex); + mutex_lock(&root->fs_info->ordered_extent_flush_mutex); spin_lock(&root->fs_info->ordered_root_lock); list_splice_init(&cur_trans->ordered_operations, &splice); while (!list_empty(&splice)) { @@ -718,7 +716,7 @@ out: list_del_init(&work->list); btrfs_wait_and_free_delalloc_work(work); } - mutex_unlock(&root->fs_info->ordered_operations_mutex); + mutex_unlock(&root->fs_info->ordered_extent_flush_mutex); return ret; } @@ -923,12 +921,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *test; int ret = 1; - if (ordered) + spin_lock_irq(&tree->lock); + if (ordered) { offset = entry_end(ordered); - else + if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) + offset = min(offset, + ordered->file_offset + + ordered->truncated_len); + } else { offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); - - spin_lock_irq(&tree->lock); + } disk_i_size = BTRFS_I(inode)->disk_i_size; /* truncate file */ diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 68844d59ee6f..d9a5aa097b4f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -69,6 +69,7 @@ struct btrfs_ordered_sum { * the isize. */ #define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered ordered extent */ +#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */ struct btrfs_ordered_extent { /* logical offset in the file */ @@ -96,6 +97,12 @@ struct btrfs_ordered_extent { */ u64 outstanding_isize; + /* + * If we get truncated we need to adjust the file extent we enter for + * this ordered extent so that we do not expose stale data. + */ + u64 truncated_len; + /* flags (described above) */ unsigned long flags; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index dc0024f17c1f..0088bedc8631 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -26,14 +26,12 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) int i; printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu " "num_stripes %d\n", - (unsigned long long)btrfs_chunk_length(eb, chunk), - (unsigned long long)btrfs_chunk_owner(eb, chunk), - (unsigned long long)btrfs_chunk_type(eb, chunk), - num_stripes); + btrfs_chunk_length(eb, chunk), btrfs_chunk_owner(eb, chunk), + btrfs_chunk_type(eb, chunk), num_stripes); for (i = 0 ; i < num_stripes ; i++) { printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i, - (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i), - (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); + btrfs_stripe_devid_nr(eb, chunk, i), + btrfs_stripe_offset_nr(eb, chunk, i)); } } static void print_dev_item(struct extent_buffer *eb, @@ -41,18 +39,18 @@ static void print_dev_item(struct extent_buffer *eb, { printk(KERN_INFO "\t\tdev item devid %llu " "total_bytes %llu bytes used %llu\n", - (unsigned long long)btrfs_device_id(eb, dev_item), - (unsigned long long)btrfs_device_total_bytes(eb, dev_item), - (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); + btrfs_device_id(eb, dev_item), + btrfs_device_total_bytes(eb, dev_item), + btrfs_device_bytes_used(eb, dev_item)); } static void print_extent_data_ref(struct extent_buffer *eb, struct btrfs_extent_data_ref *ref) { printk(KERN_INFO "\t\textent data backref root %llu " "objectid %llu offset %llu count %u\n", - (unsigned long long)btrfs_extent_data_ref_root(eb, ref), - (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref), - (unsigned long long)btrfs_extent_data_ref_offset(eb, ref), + btrfs_extent_data_ref_root(eb, ref), + btrfs_extent_data_ref_objectid(eb, ref), + btrfs_extent_data_ref_offset(eb, ref), btrfs_extent_data_ref_count(eb, ref)); } @@ -87,19 +85,17 @@ static void print_extent_item(struct extent_buffer *eb, int slot) flags = btrfs_extent_flags(eb, ei); printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n", - (unsigned long long)btrfs_extent_refs(eb, ei), - (unsigned long long)btrfs_extent_generation(eb, ei), - (unsigned long long)flags); + btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei), + flags); if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { struct btrfs_tree_block_info *info; info = (struct btrfs_tree_block_info *)(ei + 1); btrfs_tree_block_key(eb, info, &key); - printk(KERN_INFO "\t\ttree block key (%llu %x %llu) " + printk(KERN_INFO "\t\ttree block key (%llu %u %llu) " "level %d\n", - (unsigned long long)btrfs_disk_key_objectid(&key), - key.type, - (unsigned long long)btrfs_disk_key_offset(&key), + btrfs_disk_key_objectid(&key), key.type, + btrfs_disk_key_offset(&key), btrfs_tree_block_level(eb, info)); iref = (struct btrfs_extent_inline_ref *)(info + 1); } else { @@ -115,11 +111,11 @@ static void print_extent_item(struct extent_buffer *eb, int slot) switch (type) { case BTRFS_TREE_BLOCK_REF_KEY: printk(KERN_INFO "\t\ttree block backref " - "root %llu\n", (unsigned long long)offset); + "root %llu\n", offset); break; case BTRFS_SHARED_BLOCK_REF_KEY: printk(KERN_INFO "\t\tshared block backref " - "parent %llu\n", (unsigned long long)offset); + "parent %llu\n", offset); break; case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); @@ -129,8 +125,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot) sref = (struct btrfs_shared_data_ref *)(iref + 1); printk(KERN_INFO "\t\tshared data backref " "parent %llu count %u\n", - (unsigned long long)offset, - btrfs_shared_data_ref_count(eb, sref)); + offset, btrfs_shared_data_ref_count(eb, sref)); break; default: BUG(); @@ -148,13 +143,32 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot) ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0); printk("\t\textent back ref root %llu gen %llu " "owner %llu num_refs %lu\n", - (unsigned long long)btrfs_ref_root_v0(eb, ref0), - (unsigned long long)btrfs_ref_generation_v0(eb, ref0), - (unsigned long long)btrfs_ref_objectid_v0(eb, ref0), + btrfs_ref_root_v0(eb, ref0), + btrfs_ref_generation_v0(eb, ref0), + btrfs_ref_objectid_v0(eb, ref0), (unsigned long)btrfs_ref_count_v0(eb, ref0)); } #endif +static void print_uuid_item(struct extent_buffer *l, unsigned long offset, + u32 item_size) +{ + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + return; + } + while (item_size) { + __le64 subvol_id; + + read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id)); + printk(KERN_INFO "\t\tsubvol_id %llu\n", + (unsigned long long)le64_to_cpu(subvol_id)); + item_size -= sizeof(u64); + offset += sizeof(u64); + } +} + void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; @@ -177,39 +191,34 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) nr = btrfs_header_nritems(l); btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d", - (unsigned long long)btrfs_header_bytenr(l), nr, - btrfs_leaf_free_space(root, l)); + btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(l, i); btrfs_item_key_to_cpu(l, &key, i); type = btrfs_key_type(&key); - printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d " + printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " "itemsize %d\n", - i, - (unsigned long long)key.objectid, type, - (unsigned long long)key.offset, + i, key.objectid, type, key.offset, btrfs_item_offset(l, item), btrfs_item_size(l, item)); switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); printk(KERN_INFO "\t\tinode generation %llu size %llu " "mode %o\n", - (unsigned long long) btrfs_inode_generation(l, ii), - (unsigned long long)btrfs_inode_size(l, ii), + btrfs_inode_size(l, ii), btrfs_inode_mode(l, ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(l, di, &found_key); printk(KERN_INFO "\t\tdir oid %llu type %u\n", - (unsigned long long)found_key.objectid, + found_key.objectid, btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n", - (unsigned long long) btrfs_disk_root_bytenr(l, ri), btrfs_disk_root_refs(l, ri)); break; @@ -245,17 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) } printk(KERN_INFO "\t\textent data disk bytenr %llu " "nr %llu\n", - (unsigned long long) btrfs_file_extent_disk_bytenr(l, fi), - (unsigned long long) btrfs_file_extent_disk_num_bytes(l, fi)); printk(KERN_INFO "\t\textent data offset %llu " "nr %llu ram %llu\n", - (unsigned long long) btrfs_file_extent_offset(l, fi), - (unsigned long long) btrfs_file_extent_num_bytes(l, fi), - (unsigned long long) btrfs_file_extent_ram_bytes(l, fi)); break; case BTRFS_EXTENT_REF_V0_KEY: @@ -269,7 +273,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); printk(KERN_INFO "\t\tblock group used %llu\n", - (unsigned long long) btrfs_disk_block_group_used(l, bi)); break; case BTRFS_CHUNK_ITEM_KEY: @@ -286,13 +289,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n" "\t\tchunk objectid %llu chunk offset %llu " "length %llu\n", - (unsigned long long) btrfs_dev_extent_chunk_tree(l, dev_extent), - (unsigned long long) btrfs_dev_extent_chunk_objectid(l, dev_extent), - (unsigned long long) btrfs_dev_extent_chunk_offset(l, dev_extent), - (unsigned long long) btrfs_dev_extent_length(l, dev_extent)); break; case BTRFS_DEV_STATS_KEY: @@ -301,6 +300,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_DEV_REPLACE_KEY: printk(KERN_INFO "\t\tdev replace\n"); break; + case BTRFS_UUID_KEY_SUBVOL: + case BTRFS_UUID_KEY_RECEIVED_SUBVOL: + print_uuid_item(l, btrfs_item_ptr_offset(l, i), + btrfs_item_size_nr(l, i)); + break; }; } } @@ -320,16 +324,13 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) return; } btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u", - (unsigned long long)btrfs_header_bytenr(c), - level, nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); + btrfs_header_bytenr(c), level, nr, + (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { btrfs_node_key_to_cpu(c, &key, i); printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n", - i, - (unsigned long long)key.objectid, - key.type, - (unsigned long long)key.offset, - (unsigned long long)btrfs_node_blockptr(c, i)); + i, key.objectid, key.type, key.offset, + btrfs_node_blockptr(c, i)); } for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1280eff8af56..4e6ef490619e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -157,18 +157,11 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, return qgroup; } -/* must be called with qgroup_lock held */ -static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) +static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) { - struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); struct btrfs_qgroup_list *list; - if (!qgroup) - return -ENOENT; - - rb_erase(&qgroup->node, &fs_info->qgroup_tree); list_del(&qgroup->dirty); - while (!list_empty(&qgroup->groups)) { list = list_first_entry(&qgroup->groups, struct btrfs_qgroup_list, next_group); @@ -185,7 +178,18 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) kfree(list); } kfree(qgroup); +} +/* must be called with qgroup_lock held */ +static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) +{ + struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); + + if (!qgroup) + return -ENOENT; + + rb_erase(&qgroup->node, &fs_info->qgroup_tree); + __del_qgroup_rb(qgroup); return 0; } @@ -394,8 +398,7 @@ next1: if (ret == -ENOENT) { printk(KERN_WARNING "btrfs: orphan qgroup relation 0x%llx->0x%llx\n", - (unsigned long long)found_key.objectid, - (unsigned long long)found_key.offset); + found_key.objectid, found_key.offset); ret = 0; /* ignore the error */ } if (ret) @@ -428,39 +431,28 @@ out: } /* - * This is only called from close_ctree() or open_ctree(), both in single- - * treaded paths. Clean up the in-memory structures. No locking needed. + * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), + * first two are in single-threaded paths.And for the third one, we have set + * quota_root to be null with qgroup_lock held before, so it is safe to clean + * up the in-memory structures without qgroup_lock held. */ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) { struct rb_node *n; struct btrfs_qgroup *qgroup; - struct btrfs_qgroup_list *list; while ((n = rb_first(&fs_info->qgroup_tree))) { qgroup = rb_entry(n, struct btrfs_qgroup, node); rb_erase(n, &fs_info->qgroup_tree); - - while (!list_empty(&qgroup->groups)) { - list = list_first_entry(&qgroup->groups, - struct btrfs_qgroup_list, - next_group); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - - while (!list_empty(&qgroup->members)) { - list = list_first_entry(&qgroup->members, - struct btrfs_qgroup_list, - next_member); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - kfree(qgroup); + __del_qgroup_rb(qgroup); } + /* + * we call btrfs_free_qgroup_config() when umounting + * filesystem and disabling quota, so we set qgroup_ulit + * to be null here to avoid double free. + */ ulist_free(fs_info->qgroup_ulist); + fs_info->qgroup_ulist = NULL; } static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, @@ -946,13 +938,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, fs_info->pending_quota_state = 0; quota_root = fs_info->quota_root; fs_info->quota_root = NULL; - btrfs_free_qgroup_config(fs_info); spin_unlock(&fs_info->qgroup_lock); - if (!quota_root) { - ret = -EINVAL; - goto out; - } + btrfs_free_qgroup_config(fs_info); ret = btrfs_clean_quota_tree(trans, quota_root); if (ret) @@ -1174,7 +1162,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, if (ret) { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; printk(KERN_INFO "unable to update quota limit for %llu\n", - (unsigned long long)qgroupid); + qgroupid); } spin_lock(&fs_info->qgroup_lock); @@ -1884,10 +1872,9 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, path, 1, 0); pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", - (unsigned long long)fs_info->qgroup_rescan_progress.objectid, + fs_info->qgroup_rescan_progress.objectid, fs_info->qgroup_rescan_progress.type, - (unsigned long long)fs_info->qgroup_rescan_progress.offset, - ret); + fs_info->qgroup_rescan_progress.offset, ret); if (ret) { /* diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 0525e1389f5b..d0ecfbd9cc9f 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1540,8 +1540,10 @@ static int full_stripe_write(struct btrfs_raid_bio *rbio) int ret; ret = alloc_rbio_parity_pages(rbio); - if (ret) + if (ret) { + __free_raid_bio(rbio); return ret; + } ret = lock_stripe_add(rbio); if (ret == 0) @@ -1687,11 +1689,8 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, struct blk_plug_cb *cb; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); - if (IS_ERR(rbio)) { - kfree(raid_map); - kfree(bbio); + if (IS_ERR(rbio)) return PTR_ERR(rbio); - } bio_list_add(&rbio->bio_list, bio); rbio->bio_list_bytes = bio->bi_size; @@ -2041,9 +2040,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, int ret; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); - if (IS_ERR(rbio)) { + if (IS_ERR(rbio)) return PTR_ERR(rbio); - } rbio->read_rebuild = 1; bio_list_add(&rbio->bio_list, bio); @@ -2052,6 +2050,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, rbio->faila = find_logical_bio_stripe(rbio, bio); if (rbio->faila == -1) { BUG(); + kfree(raid_map); + kfree(bbio); kfree(rbio); return -EIO; } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 12096496cc99..aacc2121e87c 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -335,7 +335,7 @@ static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) if (bnode->root) fs_info = bnode->root->fs_info; btrfs_panic(fs_info, errno, "Inconsistency in backref cache " - "found at offset %llu\n", (unsigned long long)bytenr); + "found at offset %llu\n", bytenr); } /* @@ -641,6 +641,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); return 1; } + if (key.type == BTRFS_METADATA_ITEM_KEY && + item_size <= sizeof(*ei)) { + WARN_ON(item_size < sizeof(*ei)); + return 1; + } if (key.type == BTRFS_EXTENT_ITEM_KEY) { bi = (struct btrfs_tree_block_info *)(ei + 1); @@ -691,6 +696,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, int cowonly; int ret; int err = 0; + bool need_check = true; path1 = btrfs_alloc_path(); path2 = btrfs_alloc_path(); @@ -914,6 +920,7 @@ again: cur->bytenr); lower = cur; + need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { if (!path2->nodes[level]) { BUG_ON(btrfs_root_bytenr(&root->root_item) != @@ -957,14 +964,12 @@ again: /* * add the block to pending list if we - * need check its backrefs. only block - * at 'cur->level + 1' is added to the - * tail of pending list. this guarantees - * we check backrefs from lower level - * blocks to upper level blocks. + * need check its backrefs, we only do this once + * while walking up a tree as we will catch + * anything else later on. */ - if (!upper->checked && - level == cur->level + 1) { + if (!upper->checked && need_check) { + need_check = false; list_add_tail(&edge->list[UPPER], &list); } else @@ -2314,8 +2319,13 @@ again: BUG_ON(root->reloc_root != reloc_root); ret = merge_reloc_root(rc, root); - if (ret) + if (ret) { + __update_reloc_root(reloc_root, 1); + free_extent_buffer(reloc_root->node); + free_extent_buffer(reloc_root->commit_root); + kfree(reloc_root); goto out; + } } else { list_del_init(&reloc_root->root_list); } @@ -2344,9 +2354,6 @@ again: if (IS_ERR(root)) continue; - if (btrfs_root_refs(&root->root_item) == 0) - continue; - trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); @@ -3628,7 +3635,7 @@ int add_data_references(struct reloc_control *rc, unsigned long ptr; unsigned long end; u32 blocksize = btrfs_level_size(rc->extent_root, 0); - int ret; + int ret = 0; int err = 0; eb = path->nodes[0]; @@ -3655,6 +3662,10 @@ int add_data_references(struct reloc_control *rc, } else { BUG(); } + if (ret) { + err = ret; + goto out; + } ptr += btrfs_extent_inline_ref_size(key.type); } WARN_ON(ptr > end); @@ -3700,6 +3711,7 @@ int add_data_references(struct reloc_control *rc, } path->slots[0]++; } +out: btrfs_release_path(path); if (err) free_block_list(blocks); @@ -4219,8 +4231,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) } printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", - (unsigned long long)rc->block_group->key.objectid, - (unsigned long long)rc->block_group->flags); + rc->block_group->key.objectid, rc->block_group->flags); ret = btrfs_start_all_delalloc_inodes(fs_info, 0); if (ret < 0) { @@ -4242,7 +4253,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) break; printk(KERN_INFO "btrfs: found %llu extents\n", - (unsigned long long)rc->extents_found); + rc->extents_found); if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ffb1036ef10d..0b1f4ef8db98 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -29,8 +29,8 @@ * generation numbers as then we know the root was once mounted with an older * kernel that was not aware of the root item structure change. */ -void btrfs_read_root_item(struct extent_buffer *eb, int slot, - struct btrfs_root_item *item) +static void btrfs_read_root_item(struct extent_buffer *eb, int slot, + struct btrfs_root_item *item) { uuid_le uuid; int len; @@ -155,8 +155,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); printk(KERN_CRIT "unable to update root key %llu %u %llu\n", - (unsigned long long)key->objectid, key->type, - (unsigned long long)key->offset); + key->objectid, key->type, key->offset); BUG_ON(1); } @@ -490,13 +489,13 @@ again: */ void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) { - u64 inode_flags = le64_to_cpu(root_item->inode.flags); + u64 inode_flags = btrfs_stack_inode_flags(&root_item->inode); if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; - root_item->inode.flags = cpu_to_le64(inode_flags); - root_item->flags = 0; - root_item->byte_limit = 0; + btrfs_set_stack_inode_flags(&root_item->inode, inode_flags); + btrfs_set_root_flags(root_item, 0); + btrfs_set_root_limit(root_item, 0); } } @@ -507,8 +506,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct timespec ct = CURRENT_TIME; spin_lock(&root->root_item_lock); - item->ctransid = cpu_to_le64(trans->transid); - item->ctime.sec = cpu_to_le64(ct.tv_sec); - item->ctime.nsec = cpu_to_le32(ct.tv_nsec); + btrfs_set_root_ctransid(item, trans->transid); + btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec); + btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec); spin_unlock(&root->root_item_lock); } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 64a157becbe5..0afcd452fcb3 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -754,8 +754,7 @@ out: num_uncorrectable_read_errors); printk_ratelimited_in_rcu(KERN_ERR "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", - (unsigned long long)fixup->logical, - rcu_str_deref(fixup->dev->name)); + fixup->logical, rcu_str_deref(fixup->dev->name)); } btrfs_free_path(path); @@ -1154,8 +1153,7 @@ corrected_error: spin_unlock(&sctx->stat_lock); printk_ratelimited_in_rcu(KERN_ERR "btrfs: fixed up error at logical %llu on dev %s\n", - (unsigned long long)logical, - rcu_str_deref(dev->name)); + logical, rcu_str_deref(dev->name)); } } else { did_not_correct_error: @@ -1164,8 +1162,7 @@ did_not_correct_error: spin_unlock(&sctx->stat_lock); printk_ratelimited_in_rcu(KERN_ERR "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", - (unsigned long long)logical, - rcu_str_deref(dev->name)); + logical, rcu_str_deref(dev->name)); } out: @@ -1345,12 +1342,12 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, mapped_buffer = kmap_atomic(sblock->pagev[0]->page); h = (struct btrfs_header *)mapped_buffer; - if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) || + if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) || memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, BTRFS_UUID_SIZE)) { sblock->header_error = 1; - } else if (generation != le64_to_cpu(h->generation)) { + } else if (generation != btrfs_stack_header_generation(h)) { sblock->header_error = 1; sblock->generation_error = 1; } @@ -1720,10 +1717,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) * b) the page is already kmapped */ - if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr)) + if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h)) ++fail; - if (sblock->pagev[0]->generation != le64_to_cpu(h->generation)) + if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) ++fail; if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) @@ -1786,10 +1783,10 @@ static int scrub_checksum_super(struct scrub_block *sblock) s = (struct btrfs_super_block *)mapped_buffer; memcpy(on_disk_csum, s->csum, sctx->csum_size); - if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr)) + if (sblock->pagev[0]->logical != btrfs_super_bytenr(s)) ++fail_cor; - if (sblock->pagev[0]->generation != le64_to_cpu(s->generation)) + if (sblock->pagev[0]->generation != btrfs_super_generation(s)) ++fail_gen; if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) @@ -2455,8 +2452,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, printk(KERN_ERR "btrfs scrub: tree block %llu spanning " "stripes, ignored. logical=%llu\n", - (unsigned long long)key.objectid, - (unsigned long long)logical); + key.objectid, logical); goto next; } @@ -2863,9 +2859,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, if (fs_info->chunk_root->sectorsize != PAGE_SIZE) { /* not supported for data w/o checksums */ printk(KERN_ERR - "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n", - fs_info->chunk_root->sectorsize, - (unsigned long long)PAGE_SIZE); + "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n", + fs_info->chunk_root->sectorsize, PAGE_SIZE); return -EINVAL; } @@ -3175,11 +3170,9 @@ static void copy_nocow_pages_worker(struct btrfs_work *work) copy_nocow_pages_for_inode, nocow_ctx); if (ret != 0 && ret != -ENOENT) { - pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n", - (unsigned long long)logical, - (unsigned long long)physical_for_dev_replace, - (unsigned long long)len, - (unsigned long long)mirror_num, ret); + pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n", + logical, physical_for_dev_replace, len, mirror_num, + ret); not_written = 1; goto out; } @@ -3224,11 +3217,6 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) return PTR_ERR(local_root); } - if (btrfs_root_refs(&local_root->root_item) == 0) { - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - return -ENOENT; - } - key.type = BTRFS_INODE_ITEM_KEY; key.objectid = inum; key.offset = 0; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 2e14fd89a8b4..e46e0ed74925 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -26,6 +26,7 @@ #include <linux/radix-tree.h> #include <linux/crc32c.h> #include <linux/vmalloc.h> +#include <linux/string.h> #include "send.h" #include "backref.h" @@ -54,8 +55,8 @@ struct fs_path { char *buf; int buf_len; - int reversed:1; - int virtual_mem:1; + unsigned int reversed:1; + unsigned int virtual_mem:1; char inline_buf[]; }; char pad[PAGE_SIZE]; @@ -1668,6 +1669,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, u64 *who_ino, u64 *who_gen) { int ret = 0; + u64 gen; u64 other_inode = 0; u8 other_type = 0; @@ -1678,6 +1680,24 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, if (ret <= 0) goto out; + /* + * If we have a parent root we need to verify that the parent dir was + * not delted and then re-created, if it was then we have no overwrite + * and we can just unlink this entry. + */ + if (sctx->parent_root) { + ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, + NULL, NULL, NULL); + if (ret < 0 && ret != -ENOENT) + goto out; + if (ret) { + ret = 0; + goto out; + } + if (gen != dir_gen) + goto out; + } + ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, &other_inode, &other_type); if (ret < 0 && ret != -ENOENT) @@ -2519,7 +2539,8 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(eb, di, &di_key); - if (di_key.objectid < sctx->send_progress) { + if (di_key.type != BTRFS_ROOT_ITEM_KEY && + di_key.objectid < sctx->send_progress) { ret = 1; goto out; } @@ -2581,7 +2602,6 @@ static int record_ref(struct list_head *head, u64 dir, u64 dir_gen, struct fs_path *path) { struct recorded_ref *ref; - char *tmp; ref = kmalloc(sizeof(*ref), GFP_NOFS); if (!ref) @@ -2591,25 +2611,35 @@ static int record_ref(struct list_head *head, u64 dir, ref->dir_gen = dir_gen; ref->full_path = path; - tmp = strrchr(ref->full_path->start, '/'); - if (!tmp) { - ref->name_len = ref->full_path->end - ref->full_path->start; - ref->name = ref->full_path->start; + ref->name = (char *)kbasename(ref->full_path->start); + ref->name_len = ref->full_path->end - ref->name; + ref->dir_path = ref->full_path->start; + if (ref->name == ref->full_path->start) ref->dir_path_len = 0; - ref->dir_path = ref->full_path->start; - } else { - tmp++; - ref->name_len = ref->full_path->end - tmp; - ref->name = tmp; - ref->dir_path = ref->full_path->start; + else ref->dir_path_len = ref->full_path->end - ref->full_path->start - 1 - ref->name_len; - } list_add_tail(&ref->list, head); return 0; } +static int dup_ref(struct recorded_ref *ref, struct list_head *list) +{ + struct recorded_ref *new; + + new = kmalloc(sizeof(*ref), GFP_NOFS); + if (!new) + return -ENOMEM; + + new->dir = ref->dir; + new->dir_gen = ref->dir_gen; + new->full_path = NULL; + INIT_LIST_HEAD(&new->list); + list_add_tail(&new->list, list); + return 0; +} + static void __free_recorded_refs(struct list_head *head) { struct recorded_ref *cur; @@ -2724,9 +2754,7 @@ static int process_recorded_refs(struct send_ctx *sctx) int ret = 0; struct recorded_ref *cur; struct recorded_ref *cur2; - struct ulist *check_dirs = NULL; - struct ulist_iterator uit; - struct ulist_node *un; + struct list_head check_dirs; struct fs_path *valid_path = NULL; u64 ow_inode = 0; u64 ow_gen; @@ -2740,6 +2768,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); * which is always '..' */ BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); + INIT_LIST_HEAD(&check_dirs); valid_path = fs_path_alloc(); if (!valid_path) { @@ -2747,12 +2776,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); goto out; } - check_dirs = ulist_alloc(GFP_NOFS); - if (!check_dirs) { - ret = -ENOMEM; - goto out; - } - /* * First, check if the first ref of the current inode was overwritten * before. If yes, we know that the current inode was already orphanized @@ -2889,8 +2912,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); goto out; } } - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } @@ -2918,8 +2940,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); } list_for_each_entry(cur, &sctx->deleted_refs, list) { - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } @@ -2930,8 +2951,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); */ cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, list); - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } else if (!S_ISDIR(sctx->cur_inode_mode)) { @@ -2951,12 +2971,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); if (ret < 0) goto out; } - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } - /* * If the inode is still orphan, unlink the orphan. This may * happen when a previous inode did overwrite the first ref @@ -2978,33 +2996,32 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); * deletion and if it's finally possible to perform the rmdir now. * We also update the inode stats of the parent dirs here. */ - ULIST_ITER_INIT(&uit); - while ((un = ulist_next(check_dirs, &uit))) { + list_for_each_entry(cur, &check_dirs, list) { /* * In case we had refs into dirs that were not processed yet, * we don't need to do the utime and rmdir logic for these dirs. * The dir will be processed later. */ - if (un->val > sctx->cur_ino) + if (cur->dir > sctx->cur_ino) continue; - ret = get_cur_inode_state(sctx, un->val, un->aux); + ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; if (ret == inode_state_did_create || ret == inode_state_no_change) { /* TODO delayed utimes */ - ret = send_utimes(sctx, un->val, un->aux); + ret = send_utimes(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; } else if (ret == inode_state_did_delete) { - ret = can_rmdir(sctx, un->val, sctx->cur_ino); + ret = can_rmdir(sctx, cur->dir, sctx->cur_ino); if (ret < 0) goto out; if (ret) { - ret = get_cur_path(sctx, un->val, un->aux, - valid_path); + ret = get_cur_path(sctx, cur->dir, + cur->dir_gen, valid_path); if (ret < 0) goto out; ret = send_rmdir(sctx, valid_path); @@ -3017,8 +3034,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); ret = 0; out: + __free_recorded_refs(&check_dirs); free_recorded_refs(sctx); - ulist_free(check_dirs); fs_path_free(valid_path); return ret; } @@ -3119,6 +3136,8 @@ out: struct find_ref_ctx { u64 dir; + u64 dir_gen; + struct btrfs_root *root; struct fs_path *name; int found_idx; }; @@ -3128,9 +3147,21 @@ static int __find_iref(int num, u64 dir, int index, void *ctx_) { struct find_ref_ctx *ctx = ctx_; + u64 dir_gen; + int ret; if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { + /* + * To avoid doing extra lookups we'll only do this if everything + * else matches. + */ + ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + if (dir_gen != ctx->dir_gen) + return 0; ctx->found_idx = num; return 1; } @@ -3140,14 +3171,16 @@ static int __find_iref(int num, u64 dir, int index, static int find_iref(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, - u64 dir, struct fs_path *name) + u64 dir, u64 dir_gen, struct fs_path *name) { int ret; struct find_ref_ctx ctx; ctx.dir = dir; ctx.name = name; + ctx.dir_gen = dir_gen; ctx.found_idx = -1; + ctx.root = root; ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); if (ret < 0) @@ -3163,11 +3196,17 @@ static int __record_changed_new_ref(int num, u64 dir, int index, struct fs_path *name, void *ctx) { + u64 dir_gen; int ret; struct send_ctx *sctx = ctx; + ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + ret = find_iref(sctx->parent_root, sctx->right_path, - sctx->cmp_key, dir, name); + sctx->cmp_key, dir, dir_gen, name); if (ret == -ENOENT) ret = __record_new_ref(num, dir, index, name, sctx); else if (ret > 0) @@ -3180,11 +3219,17 @@ static int __record_changed_deleted_ref(int num, u64 dir, int index, struct fs_path *name, void *ctx) { + u64 dir_gen; int ret; struct send_ctx *sctx = ctx; + ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, - dir, name); + dir, dir_gen, name); if (ret == -ENOENT) ret = __record_deleted_ref(num, dir, index, name, sctx); else if (ret > 0) @@ -3869,7 +3914,8 @@ static int is_extent_unchanged(struct send_ctx *sctx, btrfs_item_key_to_cpu(eb, &found_key, slot); if (found_key.objectid != key.objectid || found_key.type != key.type) { - ret = 0; + /* If we're a hole then just pretend nothing changed */ + ret = (left_disknr) ? 0 : 1; goto out; } @@ -3895,7 +3941,8 @@ static int is_extent_unchanged(struct send_ctx *sctx, * This may only happen on the first iteration. */ if (found_key.offset + right_len <= ekey->offset) { - ret = 0; + /* If we're a hole just pretend nothing changed */ + ret = (left_disknr) ? 0 : 1; goto out; } @@ -3960,8 +4007,8 @@ static int process_extent(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_key *key) { - int ret = 0; struct clone_root *found_clone = NULL; + int ret = 0; if (S_ISLNK(sctx->cur_inode_mode)) return 0; @@ -3974,6 +4021,32 @@ static int process_extent(struct send_ctx *sctx, ret = 0; goto out; } + } else { + struct btrfs_file_extent_item *ei; + u8 type; + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + type = btrfs_file_extent_type(path->nodes[0], ei); + if (type == BTRFS_FILE_EXTENT_PREALLOC || + type == BTRFS_FILE_EXTENT_REG) { + /* + * The send spec does not have a prealloc command yet, + * so just leave a hole for prealloc'ed extents until + * we have enough commands queued up to justify rev'ing + * the send spec. + */ + if (type == BTRFS_FILE_EXTENT_PREALLOC) { + ret = 0; + goto out; + } + + /* Have a hole, just skip it. */ + if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) { + ret = 0; + goto out; + } + } } ret = find_extent_clone(sctx, path, key->objectid, key->offset, @@ -4361,6 +4434,64 @@ static int changed_extent(struct send_ctx *sctx, return ret; } +static int dir_changed(struct send_ctx *sctx, u64 dir) +{ + u64 orig_gen, new_gen; + int ret; + + ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL, + NULL, NULL); + if (ret) + return ret; + + ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + + return (orig_gen != new_gen) ? 1 : 0; +} + +static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path, + struct btrfs_key *key) +{ + struct btrfs_inode_extref *extref; + struct extent_buffer *leaf; + u64 dirid = 0, last_dirid = 0; + unsigned long ptr; + u32 item_size; + u32 cur_offset = 0; + int ref_name_len; + int ret = 0; + + /* Easy case, just check this one dirid */ + if (key->type == BTRFS_INODE_REF_KEY) { + dirid = key->offset; + + ret = dir_changed(sctx, dirid); + goto out; + } + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + while (cur_offset < item_size) { + extref = (struct btrfs_inode_extref *)(ptr + + cur_offset); + dirid = btrfs_inode_extref_parent(leaf, extref); + ref_name_len = btrfs_inode_extref_name_len(leaf, extref); + cur_offset += ref_name_len + sizeof(*extref); + if (dirid == last_dirid) + continue; + ret = dir_changed(sctx, dirid); + if (ret) + break; + last_dirid = dirid; + } +out: + return ret; +} + /* * Updates compare related fields in sctx and simply forwards to the actual * changed_xxx functions. @@ -4376,6 +4507,19 @@ static int changed_cb(struct btrfs_root *left_root, int ret = 0; struct send_ctx *sctx = ctx; + if (result == BTRFS_COMPARE_TREE_SAME) { + if (key->type != BTRFS_INODE_REF_KEY && + key->type != BTRFS_INODE_EXTREF_KEY) + return 0; + ret = compare_refs(sctx, left_path, key); + if (!ret) + return 0; + if (ret < 0) + return ret; + result = BTRFS_COMPARE_TREE_CHANGED; + ret = 0; + } + sctx->left_path = left_path; sctx->right_path = right_path; sctx->cmp_key = key; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8eb6191d86da..3aab10ce63e8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -56,6 +56,8 @@ #include "rcu-string.h" #include "dev-replace.h" #include "free-space-cache.h" +#include "backref.h" +#include "tests/btrfs-tests.h" #define CREATE_TRACE_POINTS #include <trace/events/btrfs.h> @@ -320,14 +322,15 @@ enum { Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_skip_balance, Opt_check_integrity, Opt_check_integrity_including_extent_data, - Opt_check_integrity_print_mask, Opt_fatal_errors, + Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree, + Opt_commit_interval, Opt_err, }; static match_table_t tokens = { {Opt_degraded, "degraded"}, {Opt_subvol, "subvol=%s"}, - {Opt_subvolid, "subvolid=%d"}, + {Opt_subvolid, "subvolid=%s"}, {Opt_device, "device=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, @@ -360,7 +363,9 @@ static match_table_t tokens = { {Opt_check_integrity, "check_int"}, {Opt_check_integrity_including_extent_data, "check_int_data"}, {Opt_check_integrity_print_mask, "check_int_print_mask=%d"}, + {Opt_rescan_uuid_tree, "rescan_uuid_tree"}, {Opt_fatal_errors, "fatal_errors=%s"}, + {Opt_commit_interval, "commit=%d"}, {Opt_err, NULL}, }; @@ -496,10 +501,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, NOBARRIER); break; case Opt_thread_pool: - intarg = 0; - match_int(&args[0], &intarg); - if (intarg) + ret = match_int(&args[0], &intarg); + if (ret) { + goto out; + } else if (intarg > 0) { info->thread_pool_size = intarg; + } else { + ret = -EINVAL; + goto out; + } break; case Opt_max_inline: num = match_strdup(&args[0]); @@ -513,7 +523,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) root->sectorsize); } printk(KERN_INFO "btrfs: max_inline at %llu\n", - (unsigned long long)info->max_inline); + info->max_inline); + } else { + ret = -ENOMEM; + goto out; } break; case Opt_alloc_start: @@ -525,7 +538,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) kfree(num); printk(KERN_INFO "btrfs: allocations start at %llu\n", - (unsigned long long)info->alloc_start); + info->alloc_start); + } else { + ret = -ENOMEM; + goto out; } break; case Opt_noacl: @@ -540,12 +556,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); break; case Opt_ratio: - intarg = 0; - match_int(&args[0], &intarg); - if (intarg) { + ret = match_int(&args[0], &intarg); + if (ret) { + goto out; + } else if (intarg >= 0) { info->metadata_ratio = intarg; printk(KERN_INFO "btrfs: metadata ratio %d\n", info->metadata_ratio); + } else { + ret = -EINVAL; + goto out; } break; case Opt_discard: @@ -554,6 +574,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) case Opt_space_cache: btrfs_set_opt(info->mount_opt, SPACE_CACHE); break; + case Opt_rescan_uuid_tree: + btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); + break; case Opt_no_space_cache: printk(KERN_INFO "btrfs: disabling disk space caching\n"); btrfs_clear_opt(info->mount_opt, SPACE_CACHE); @@ -596,13 +619,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); break; case Opt_check_integrity_print_mask: - intarg = 0; - match_int(&args[0], &intarg); - if (intarg) { + ret = match_int(&args[0], &intarg); + if (ret) { + goto out; + } else if (intarg >= 0) { info->check_integrity_print_mask = intarg; printk(KERN_INFO "btrfs:" " check_integrity_print_mask 0x%x\n", info->check_integrity_print_mask); + } else { + ret = -EINVAL; + goto out; } break; #else @@ -626,6 +653,29 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) goto out; } break; + case Opt_commit_interval: + intarg = 0; + ret = match_int(&args[0], &intarg); + if (ret < 0) { + printk(KERN_ERR + "btrfs: invalid commit interval\n"); + ret = -EINVAL; + goto out; + } + if (intarg > 0) { + if (intarg > 300) { + printk(KERN_WARNING + "btrfs: excessive commit interval %d\n", + intarg); + } + info->commit_interval = intarg; + } else { + printk(KERN_INFO + "btrfs: using default commit interval %ds\n", + BTRFS_DEFAULT_COMMIT_INTERVAL); + info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; + } + break; case Opt_err: printk(KERN_INFO "btrfs: unrecognized mount option " "'%s'\n", p); @@ -654,8 +704,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, { substring_t args[MAX_OPT_ARGS]; char *device_name, *opts, *orig, *p; + char *num = NULL; int error = 0; - int intarg; if (!options) return 0; @@ -679,17 +729,23 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, case Opt_subvol: kfree(*subvol_name); *subvol_name = match_strdup(&args[0]); + if (!*subvol_name) { + error = -ENOMEM; + goto out; + } break; case Opt_subvolid: - intarg = 0; - error = match_int(&args[0], &intarg); - if (!error) { + num = match_strdup(&args[0]); + if (num) { + *subvol_objectid = memparse(num, NULL); + kfree(num); /* we want the original fs_tree */ - if (!intarg) + if (!*subvol_objectid) *subvol_objectid = BTRFS_FS_TREE_OBJECTID; - else - *subvol_objectid = intarg; + } else { + error = -EINVAL; + goto out; } break; case Opt_subvolrootid: @@ -892,11 +948,9 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) if (btrfs_test_opt(root, NOBARRIER)) seq_puts(seq, ",nobarrier"); if (info->max_inline != 8192 * 1024) - seq_printf(seq, ",max_inline=%llu", - (unsigned long long)info->max_inline); + seq_printf(seq, ",max_inline=%llu", info->max_inline); if (info->alloc_start != 0) - seq_printf(seq, ",alloc_start=%llu", - (unsigned long long)info->alloc_start); + seq_printf(seq, ",alloc_start=%llu", info->alloc_start); if (info->thread_pool_size != min_t(unsigned long, num_online_cpus() + 2, 8)) seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); @@ -928,6 +982,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",space_cache"); else seq_puts(seq, ",nospace_cache"); + if (btrfs_test_opt(root, RESCAN_UUID_TREE)) + seq_puts(seq, ",rescan_uuid_tree"); if (btrfs_test_opt(root, CLEAR_CACHE)) seq_puts(seq, ",clear_cache"); if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) @@ -940,8 +996,24 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",inode_cache"); if (btrfs_test_opt(root, SKIP_BALANCE)) seq_puts(seq, ",skip_balance"); + if (btrfs_test_opt(root, RECOVERY)) + seq_puts(seq, ",recovery"); +#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY + if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA)) + seq_puts(seq, ",check_int_data"); + else if (btrfs_test_opt(root, CHECK_INTEGRITY)) + seq_puts(seq, ",check_int"); + if (info->check_integrity_print_mask) + seq_printf(seq, ",check_int_print_mask=%d", + info->check_integrity_print_mask); +#endif + if (info->metadata_ratio) + seq_printf(seq, ",metadata_ratio=%d", + info->metadata_ratio); if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR)) seq_puts(seq, ",fatal_errors=panic"); + if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) + seq_printf(seq, ",commit=%d", info->commit_interval); return 0; } @@ -1696,6 +1768,11 @@ static void btrfs_print_info(void) "\n"); } +static int btrfs_run_sanity_tests(void) +{ + return btrfs_test_free_space_cache(); +} + static int __init init_btrfs_fs(void) { int err; @@ -1734,23 +1811,32 @@ static int __init init_btrfs_fs(void) if (err) goto free_auto_defrag; - err = btrfs_interface_init(); + err = btrfs_prelim_ref_init(); if (err) - goto free_delayed_ref; + goto free_prelim_ref; - err = register_filesystem(&btrfs_fs_type); + err = btrfs_interface_init(); if (err) - goto unregister_ioctl; + goto free_delayed_ref; btrfs_init_lockdep(); btrfs_print_info(); - btrfs_test_free_space_cache(); + + err = btrfs_run_sanity_tests(); + if (err) + goto unregister_ioctl; + + err = register_filesystem(&btrfs_fs_type); + if (err) + goto unregister_ioctl; return 0; unregister_ioctl: btrfs_interface_exit(); +free_prelim_ref: + btrfs_prelim_ref_exit(); free_delayed_ref: btrfs_delayed_ref_exit(); free_auto_defrag: @@ -1777,6 +1863,7 @@ static void __exit exit_btrfs_fs(void) btrfs_delayed_ref_exit(); btrfs_auto_defrag_exit(); btrfs_delayed_inode_exit(); + btrfs_prelim_ref_exit(); ordered_data_exit(); extent_map_exit(); extent_io_exit(); diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h new file mode 100644 index 000000000000..580877625776 --- /dev/null +++ b/fs/btrfs/tests/btrfs-tests.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2013 Fusion IO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_TESTS +#define __BTRFS_TESTS + +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + +#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__) + +int btrfs_test_free_space_cache(void); +#else +static inline int btrfs_test_free_space_cache(void) +{ + return 0; +} +#endif + +#endif diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c new file mode 100644 index 000000000000..6fc82010dc15 --- /dev/null +++ b/fs/btrfs/tests/free-space-tests.c @@ -0,0 +1,395 @@ +/* + * Copyright (C) 2013 Fusion IO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/slab.h> +#include "btrfs-tests.h" +#include "../ctree.h" +#include "../free-space-cache.h" + +#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) +static struct btrfs_block_group_cache *init_test_block_group(void) +{ + struct btrfs_block_group_cache *cache; + + cache = kzalloc(sizeof(*cache), GFP_NOFS); + if (!cache) + return NULL; + cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), + GFP_NOFS); + if (!cache->free_space_ctl) { + kfree(cache); + return NULL; + } + + cache->key.objectid = 0; + cache->key.offset = 1024 * 1024 * 1024; + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + cache->sectorsize = 4096; + + spin_lock_init(&cache->lock); + INIT_LIST_HEAD(&cache->list); + INIT_LIST_HEAD(&cache->cluster_list); + INIT_LIST_HEAD(&cache->new_bg_list); + + btrfs_init_free_space_ctl(cache); + + return cache; +} + +/* + * This test just does basic sanity checking, making sure we can add an exten + * entry and remove space from either end and the middle, and make sure we can + * remove space that covers adjacent extent entries. + */ +static int test_extents(struct btrfs_block_group_cache *cache) +{ + int ret = 0; + + test_msg("Running extent only tests\n"); + + /* First just make sure we can remove an entire entry */ + ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error adding initial extents %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error removing extent %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 4 * 1024 * 1024)) { + test_msg("Full remove left some lingering space\n"); + return -1; + } + + /* Ok edge and middle cases now */ + ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error adding half extent %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); + if (ret) { + test_msg("Error removing tail end %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); + if (ret) { + test_msg("Error removing front end %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); + if (ret) { + test_msg("Error removing middle peice %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 1 * 1024 * 1024)) { + test_msg("Still have space at the front\n"); + return -1; + } + + if (test_check_exists(cache, 2 * 1024 * 1024, 4096)) { + test_msg("Still have space in the middle\n"); + return -1; + } + + if (test_check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { + test_msg("Still have space at the end\n"); + return -1; + } + + /* Cleanup */ + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + return 0; +} + +static int test_bitmaps(struct btrfs_block_group_cache *cache) +{ + u64 next_bitmap_offset; + int ret; + + test_msg("Running bitmap only tests\n"); + + ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't create a bitmap entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error removing bitmap full range %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 4 * 1024 * 1024)) { + test_msg("Left some space in bitmap\n"); + return -1; + } + + ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add to our bitmap entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove middle chunk %d\n", ret); + return ret; + } + + /* + * The first bitmap we have starts at offset 0 so the next one is just + * at the end of the first bitmap. + */ + next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + + /* Test a bit straddling two bitmaps */ + ret = test_add_free_space_entry(cache, next_bitmap_offset - + (2 * 1024 * 1024), 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add space that straddles two bitmaps %d\n", + ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, next_bitmap_offset - + (1 * 1024 * 1024), 2 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove overlapping space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), + 2 * 1024 * 1024)) { + test_msg("Left some space when removing overlapping\n"); + return -1; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + return 0; +} + +/* This is the high grade jackassery */ +static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) +{ + u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + int ret; + + test_msg("Running bitmap and extent tests\n"); + + /* + * First let's do something simple, an extent at the same offset as the + * bitmap, but the free space completely in the extent and then + * completely in the bitmap. + */ + ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't create bitmap entry %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove extent entry %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 1 * 1024 * 1024)) { + test_msg("Left remnants after our remove\n"); + return -1; + } + + /* Now to add back the extent entry and remove from the bitmap */ + ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't re-add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove from bitmap %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { + test_msg("Left remnants in the bitmap\n"); + return -1; + } + + /* + * Ok so a little more evil, extent entry and bitmap at the same offset, + * removing an overlapping chunk. + */ + ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add to a bitmap %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove overlapping space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { + test_msg("Left over peices after removing overlapping\n"); + return -1; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + /* Now with the extent entry offset into the bitmap */ + ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add space to the bitmap %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent to the cache %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); + if (ret) { + test_msg("Problem removing overlapping space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { + test_msg("Left something behind when removing space"); + return -1; + } + + /* + * This has blown up in the past, the extent entry starts before the + * bitmap entry, but we're trying to remove an offset that falls + * completely within the bitmap range and is in both the extent entry + * and the bitmap entry, looks like this + * + * [ extent ] + * [ bitmap ] + * [ del ] + */ + __btrfs_remove_free_space_cache(cache->free_space_ctl); + ret = test_add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, + 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add bitmap %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, + 5 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, + 5 * 1024 * 1024); + if (ret) { + test_msg("Failed to free our space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, bitmap_offset + 1 * 1024 * 1024, + 5 * 1024 * 1024)) { + test_msg("Left stuff over\n"); + return -1; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + /* + * This blew up before, we have part of the free space in a bitmap and + * then the entirety of the rest of the space in an extent. This used + * to return -EAGAIN back from btrfs_remove_extent, make sure this + * doesn't happen. + */ + ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add bitmap entry %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); + if (ret) { + test_msg("Error removing bitmap and extent overlapping %d\n", ret); + return ret; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + return 0; +} + +int btrfs_test_free_space_cache(void) +{ + struct btrfs_block_group_cache *cache; + int ret; + + test_msg("Running btrfs free space cache tests\n"); + + cache = init_test_block_group(); + if (!cache) { + test_msg("Couldn't run the tests\n"); + return 0; + } + + ret = test_extents(cache); + if (ret) + goto out; + ret = test_bitmaps(cache); + if (ret) + goto out; + ret = test_bitmaps_and_extents(cache); + if (ret) + goto out; +out: + __btrfs_remove_free_space_cache(cache->free_space_ctl); + kfree(cache->free_space_ctl); + kfree(cache); + test_msg("Free space cache tests finished\n"); + return ret; +} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index af1931a5960d..cac4a3f76323 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -837,7 +837,7 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, * them in one of two extent_io trees. This is used to make sure all of * those extents are on disk for transaction or log commit */ -int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, +static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark) { int ret; @@ -1225,8 +1225,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_set_root_stransid(new_root_item, 0); btrfs_set_root_rtransid(new_root_item, 0); } - new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); - new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); + btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec); + btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec); btrfs_set_root_otransid(new_root_item, trans->transid); old = btrfs_lock_root_node(root); @@ -1311,8 +1311,26 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, dentry->d_name.len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b, + BTRFS_UUID_KEY_SUBVOL, objectid); + if (ret) { btrfs_abort_transaction(trans, root, ret); + goto fail; + } + if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) { + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + new_root_item->received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + objectid); + if (ret && ret != -EEXIST) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + } fail: pending->error = ret; dir_item_existed: @@ -1362,6 +1380,8 @@ static void update_super_roots(struct btrfs_root *root) super->root_level = root_item->level; if (btrfs_test_opt(root, SPACE_CACHE)) super->cache_generation = root_item->generation; + if (root->fs_info->update_uuid_tree_gen) + super->uuid_tree_generation = root_item->generation; } int btrfs_transaction_in_commit(struct btrfs_fs_info *info) @@ -1928,8 +1948,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) list_del_init(&root->root_list); spin_unlock(&fs_info->trans_lock); - pr_debug("btrfs: cleaner removing %llu\n", - (unsigned long long)root->objectid); + pr_debug("btrfs: cleaner removing %llu\n", root->objectid); btrfs_kill_all_delayed_nodes(root); @@ -1942,6 +1961,5 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) * If we encounter a transaction abort during snapshot cleaning, we * don't want to crash here */ - BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS); - return 1; + return (ret < 0) ? 0 : 1; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index defbc4269897..5c2af8491621 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -160,8 +160,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, void btrfs_throttle(struct btrfs_root *root); int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, - struct extent_io_tree *dirty_pages, int mark); int btrfs_write_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark); int btrfs_wait_marked_extents(struct btrfs_root *root, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ff60d8978ae2..0d9613c3f5e5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -747,7 +747,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (ret) goto out; - btrfs_run_delayed_items(trans, root); + else + ret = btrfs_run_delayed_items(trans, root); out: kfree(name); iput(inode); @@ -923,7 +924,9 @@ again: kfree(victim_name); if (ret) return ret; - btrfs_run_delayed_items(trans, root); + ret = btrfs_run_delayed_items(trans, root); + if (ret) + return ret; *search_done = 1; goto again; } @@ -990,7 +993,9 @@ again: inode, victim_name, victim_name_len); - btrfs_run_delayed_items(trans, root); + if (!ret) + ret = btrfs_run_delayed_items( + trans, root); } iput(victim_parent); kfree(victim_name); @@ -1536,8 +1541,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, name_len = btrfs_dir_name_len(eb, di); name = kmalloc(name_len, GFP_NOFS); - if (!name) - return -ENOMEM; + if (!name) { + ret = -ENOMEM; + goto out; + } log_type = btrfs_dir_type(eb, di); read_extent_buffer(eb, name, (unsigned long)(di + 1), @@ -1810,7 +1817,7 @@ again: ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (!ret) - btrfs_run_delayed_items(trans, root); + ret = btrfs_run_delayed_items(trans, root); kfree(name); iput(inode); if (ret) diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c new file mode 100644 index 000000000000..dd0dea3766f7 --- /dev/null +++ b/fs/btrfs/uuid-tree.c @@ -0,0 +1,358 @@ +/* + * Copyright (C) STRATO AG 2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include <linux/uuid.h> +#include <asm/unaligned.h> +#include "ctree.h" +#include "transaction.h" +#include "disk-io.h" +#include "print-tree.h" + + +static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key) +{ + key->type = type; + key->objectid = get_unaligned_le64(uuid); + key->offset = get_unaligned_le64(uuid + sizeof(u64)); +} + +/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */ +static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid, + u8 type, u64 subid) +{ + int ret; + struct btrfs_path *path = NULL; + struct extent_buffer *eb; + int slot; + u32 item_size; + unsigned long offset; + struct btrfs_key key; + + if (WARN_ON_ONCE(!uuid_root)) { + ret = -ENOENT; + goto out; + } + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + btrfs_uuid_to_key(uuid, type, &key); + ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = -ENOENT; + goto out; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(eb, slot); + offset = btrfs_item_ptr_offset(eb, slot); + ret = -ENOENT; + + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + goto out; + } + while (item_size) { + __le64 data; + + read_extent_buffer(eb, &data, offset, sizeof(data)); + if (le64_to_cpu(data) == subid) { + ret = 0; + break; + } + offset += sizeof(data); + item_size -= sizeof(data); + } + +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid_cpu) +{ + int ret; + struct btrfs_path *path = NULL; + struct btrfs_key key; + struct extent_buffer *eb; + int slot; + unsigned long offset; + __le64 subid_le; + + ret = btrfs_uuid_tree_lookup(uuid_root, uuid, type, subid_cpu); + if (ret != -ENOENT) + return ret; + + if (WARN_ON_ONCE(!uuid_root)) { + ret = -EINVAL; + goto out; + } + + btrfs_uuid_to_key(uuid, type, &key); + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + ret = btrfs_insert_empty_item(trans, uuid_root, path, &key, + sizeof(subid_le)); + if (ret >= 0) { + /* Add an item for the type for the first time */ + eb = path->nodes[0]; + slot = path->slots[0]; + offset = btrfs_item_ptr_offset(eb, slot); + } else if (ret == -EEXIST) { + /* + * An item with that type already exists. + * Extend the item and store the new subid at the end. + */ + btrfs_extend_item(uuid_root, path, sizeof(subid_le)); + eb = path->nodes[0]; + slot = path->slots[0]; + offset = btrfs_item_ptr_offset(eb, slot); + offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le); + } else if (ret < 0) { + pr_warn("btrfs: insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!\n", + ret, (unsigned long long)key.objectid, + (unsigned long long)key.offset, type); + goto out; + } + + ret = 0; + subid_le = cpu_to_le64(subid_cpu); + write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le)); + btrfs_mark_buffer_dirty(eb); + +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid) +{ + int ret; + struct btrfs_path *path = NULL; + struct btrfs_key key; + struct extent_buffer *eb; + int slot; + unsigned long offset; + u32 item_size; + unsigned long move_dst; + unsigned long move_src; + unsigned long move_len; + + if (WARN_ON_ONCE(!uuid_root)) { + ret = -EINVAL; + goto out; + } + + btrfs_uuid_to_key(uuid, type, &key); + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1); + if (ret < 0) { + pr_warn("btrfs: error %d while searching for uuid item!\n", + ret); + goto out; + } + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + offset = btrfs_item_ptr_offset(eb, slot); + item_size = btrfs_item_size_nr(eb, slot); + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + ret = -ENOENT; + goto out; + } + while (item_size) { + __le64 read_subid; + + read_extent_buffer(eb, &read_subid, offset, sizeof(read_subid)); + if (le64_to_cpu(read_subid) == subid) + break; + offset += sizeof(read_subid); + item_size -= sizeof(read_subid); + } + + if (!item_size) { + ret = -ENOENT; + goto out; + } + + item_size = btrfs_item_size_nr(eb, slot); + if (item_size == sizeof(subid)) { + ret = btrfs_del_item(trans, uuid_root, path); + goto out; + } + + move_dst = offset; + move_src = offset + sizeof(subid); + move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot)); + memmove_extent_buffer(eb, move_dst, move_src, move_len); + btrfs_truncate_item(uuid_root, path, item_size - sizeof(subid), 1); + +out: + btrfs_free_path(path); + return ret; +} + +static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid) +{ + struct btrfs_trans_handle *trans; + int ret; + + /* 1 - for the uuid item */ + trans = btrfs_start_transaction(uuid_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + ret = btrfs_uuid_tree_rem(trans, uuid_root, uuid, type, subid); + btrfs_end_transaction(trans, uuid_root); + +out: + return ret; +} + +int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, + int (*check_func)(struct btrfs_fs_info *, u8 *, u8, + u64)) +{ + struct btrfs_root *root = fs_info->uuid_root; + struct btrfs_key key; + struct btrfs_key max_key; + struct btrfs_path *path; + int ret = 0; + struct extent_buffer *leaf; + int slot; + u32 item_size; + unsigned long offset; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + key.objectid = 0; + key.type = 0; + key.offset = 0; + max_key.objectid = (u64)-1; + max_key.type = (u8)-1; + max_key.offset = (u64)-1; + +again_search_slot: + path->keep_locks = 1; + ret = btrfs_search_forward(root, &key, &max_key, path, 0); + if (ret) { + if (ret > 0) + ret = 0; + goto out; + } + + while (1) { + cond_resched(); + leaf = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + + if (key.type != BTRFS_UUID_KEY_SUBVOL && + key.type != BTRFS_UUID_KEY_RECEIVED_SUBVOL) + goto skip; + + offset = btrfs_item_ptr_offset(leaf, slot); + item_size = btrfs_item_size_nr(leaf, slot); + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + goto skip; + } + while (item_size) { + u8 uuid[BTRFS_UUID_SIZE]; + __le64 subid_le; + u64 subid_cpu; + + put_unaligned_le64(key.objectid, uuid); + put_unaligned_le64(key.offset, uuid + sizeof(u64)); + read_extent_buffer(leaf, &subid_le, offset, + sizeof(subid_le)); + subid_cpu = le64_to_cpu(subid_le); + ret = check_func(fs_info, uuid, key.type, subid_cpu); + if (ret < 0) + goto out; + if (ret > 0) { + btrfs_release_path(path); + ret = btrfs_uuid_iter_rem(root, uuid, key.type, + subid_cpu); + if (ret == 0) { + /* + * this might look inefficient, but the + * justification is that it is an + * exception that check_func returns 1, + * and that in the regular case only one + * entry per UUID exists. + */ + goto again_search_slot; + } + if (ret < 0 && ret != -ENOENT) + goto out; + } + item_size -= sizeof(subid_le); + offset += sizeof(subid_le); + } + +skip: + ret = btrfs_next_item(root, path); + if (ret == 0) + continue; + else if (ret > 0) + ret = 0; + break; + } + +out: + btrfs_free_path(path); + if (ret) + pr_warn("btrfs: btrfs_uuid_tree_iterate failed %d\n", ret); + return 0; +} diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 67a085381845..0052ca8264d9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -26,6 +26,7 @@ #include <linux/ratelimit.h> #include <linux/kthread.h> #include <linux/raid/pq.h> +#include <linux/semaphore.h> #include <asm/div64.h> #include "compat.h" #include "ctree.h" @@ -62,6 +63,48 @@ static void unlock_chunks(struct btrfs_root *root) mutex_unlock(&root->fs_info->chunk_mutex); } +static struct btrfs_fs_devices *__alloc_fs_devices(void) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS); + if (!fs_devs) + return ERR_PTR(-ENOMEM); + + mutex_init(&fs_devs->device_list_mutex); + + INIT_LIST_HEAD(&fs_devs->devices); + INIT_LIST_HEAD(&fs_devs->alloc_list); + INIT_LIST_HEAD(&fs_devs->list); + + return fs_devs; +} + +/** + * alloc_fs_devices - allocate struct btrfs_fs_devices + * @fsid: a pointer to UUID for this FS. If NULL a new UUID is + * generated. + * + * Return: a pointer to a new &struct btrfs_fs_devices on success; + * ERR_PTR() on error. Returned struct is not linked onto any lists and + * can be destroyed with kfree() right away. + */ +static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = __alloc_fs_devices(); + if (IS_ERR(fs_devs)) + return fs_devs; + + if (fsid) + memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE); + else + generate_random_uuid(fs_devs->fsid); + + return fs_devs; +} + static void free_fs_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device; @@ -101,6 +144,27 @@ void btrfs_cleanup_fs_uuids(void) } } +static struct btrfs_device *__alloc_device(void) +{ + struct btrfs_device *dev; + + dev = kzalloc(sizeof(*dev), GFP_NOFS); + if (!dev) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&dev->dev_list); + INIT_LIST_HEAD(&dev->dev_alloc_list); + + spin_lock_init(&dev->io_lock); + + spin_lock_init(&dev->reada_lock); + atomic_set(&dev->reada_in_flight, 0); + INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); + + return dev; +} + static noinline struct btrfs_device *__find_device(struct list_head *head, u64 devid, u8 *uuid) { @@ -395,16 +459,14 @@ static noinline int device_list_add(const char *path, fs_devices = find_fsid(disk_super->fsid); if (!fs_devices) { - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return -ENOMEM; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); + fs_devices = alloc_fs_devices(disk_super->fsid); + if (IS_ERR(fs_devices)) + return PTR_ERR(fs_devices); + list_add(&fs_devices->list, &fs_uuids); - memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; - mutex_init(&fs_devices->device_list_mutex); + device = NULL; } else { device = __find_device(&fs_devices->devices, devid, @@ -414,17 +476,12 @@ static noinline int device_list_add(const char *path, if (fs_devices->opened) return -EBUSY; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(NULL, &devid, + disk_super->dev_item.uuid); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - return -ENOMEM; + return PTR_ERR(device); } - device->devid = devid; - device->dev_stats_valid = 0; - device->work.func = pending_bios_fn; - memcpy(device->uuid, disk_super->dev_item.uuid, - BTRFS_UUID_SIZE); - spin_lock_init(&device->io_lock); name = rcu_string_strdup(path, GFP_NOFS); if (!name) { @@ -432,22 +489,13 @@ static noinline int device_list_add(const char *path, return -ENOMEM; } rcu_assign_pointer(device->name, name); - INIT_LIST_HEAD(&device->dev_alloc_list); - - /* init readahead state */ - spin_lock_init(&device->reada_lock); - device->reada_curr_zone = NULL; - atomic_set(&device->reada_in_flight, 0); - device->reada_next = 0; - INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT); - INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT); mutex_lock(&fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); + fs_devices->num_devices++; mutex_unlock(&fs_devices->device_list_mutex); device->fs_devices = fs_devices; - fs_devices->num_devices++; } else if (!device->name || strcmp(device->name->str, path)) { name = rcu_string_strdup(path, GFP_NOFS); if (!name) @@ -474,25 +522,21 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) struct btrfs_device *device; struct btrfs_device *orig_dev; - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return ERR_PTR(-ENOMEM); + fs_devices = alloc_fs_devices(orig->fsid); + if (IS_ERR(fs_devices)) + return fs_devices; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); - INIT_LIST_HEAD(&fs_devices->list); - mutex_init(&fs_devices->device_list_mutex); fs_devices->latest_devid = orig->latest_devid; fs_devices->latest_trans = orig->latest_trans; fs_devices->total_devices = orig->total_devices; - memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); /* We have held the volume lock, it is safe to get the devices. */ list_for_each_entry(orig_dev, &orig->devices, dev_list) { struct rcu_string *name; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &orig_dev->devid, + orig_dev->uuid); + if (IS_ERR(device)) goto error; /* @@ -506,13 +550,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) } rcu_assign_pointer(device->name, name); - device->devid = orig_dev->devid; - device->work.func = pending_bios_fn; - memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_list); - INIT_LIST_HEAD(&device->dev_alloc_list); - list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; fs_devices->num_devices++; @@ -636,23 +673,22 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->can_discard) fs_devices->num_can_discard--; + if (device->missing) + fs_devices->missing_devices--; - new_device = kmalloc(sizeof(*new_device), GFP_NOFS); - BUG_ON(!new_device); /* -ENOMEM */ - memcpy(new_device, device, sizeof(*new_device)); + new_device = btrfs_alloc_device(NULL, &device->devid, + device->uuid); + BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ /* Safe because we are under uuid_mutex */ if (device->name) { name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(device->name && !name); /* -ENOMEM */ + BUG_ON(!name); /* -ENOMEM */ rcu_assign_pointer(new_device->name, name); } - new_device->bdev = NULL; - new_device->writeable = 0; - new_device->in_fs_metadata = 0; - new_device->can_discard = 0; - spin_lock_init(&new_device->io_lock); + list_replace_rcu(&device->dev_list, &new_device->dev_list); + new_device->fs_devices = device->fs_devices; call_rcu(&device->rcu, free_device); } @@ -865,7 +901,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, disk_super = p + (bytenr & ~PAGE_CACHE_MASK); if (btrfs_super_bytenr(disk_super) != bytenr || - disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) + btrfs_super_magic(disk_super) != BTRFS_MAGIC) goto error_unmap; devid = btrfs_stack_device_id(&disk_super->dev_item); @@ -880,8 +916,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, printk(KERN_INFO "device fsid %pU ", disk_super->fsid); } - printk(KERN_CONT "devid %llu transid %llu %s\n", - (unsigned long long)devid, (unsigned long long)transid, path); + printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); if (!ret && fs_devices_ret) @@ -1278,8 +1313,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), - BTRFS_UUID_SIZE); + btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE); btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); @@ -1307,15 +1341,14 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info) return ret; } -static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) +static noinline int find_next_devid(struct btrfs_fs_info *fs_info, + u64 *devid_ret) { int ret; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_path *path; - root = root->fs_info->chunk_root; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1324,20 +1357,21 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) key.type = BTRFS_DEV_ITEM_KEY; key.offset = (u64)-1; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0); if (ret < 0) goto error; BUG_ON(ret == 0); /* Corruption */ - ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, + ret = btrfs_previous_item(fs_info->chunk_root, path, + BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY); if (ret) { - *objectid = 1; + *devid_ret = 1; } else { btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - *objectid = found_key.offset + 1; + *devid_ret = found_key.offset + 1; } ret = 0; error: @@ -1391,9 +1425,9 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_bandwidth(leaf, dev_item, 0); btrfs_set_device_start_offset(leaf, dev_item, 0); - ptr = (unsigned long)btrfs_device_uuid(dev_item); + ptr = btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); - ptr = (unsigned long)btrfs_device_fsid(dev_item); + ptr = btrfs_device_fsid(dev_item); write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); @@ -1562,7 +1596,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) clear_super = true; } + mutex_unlock(&uuid_mutex); ret = btrfs_shrink_device(device, 0); + mutex_lock(&uuid_mutex); if (ret) goto error_undo; @@ -1586,7 +1622,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) /* * the device list mutex makes sure that we don't change * the device list while someone else is writing out all - * the device supers. + * the device supers. Whoever is writing all supers, should + * lock the device list mutex before getting the number of + * devices in the super block (super_copy). Conversely, + * whoever updates the number of devices in the super block + * (super_copy) should hold the device list mutex. */ cur_devices = device->fs_devices; @@ -1610,10 +1650,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) device->fs_devices->open_devices--; call_rcu(&device->rcu, free_device); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (cur_devices->open_devices == 0) { struct btrfs_fs_devices *fs_devices; @@ -1793,9 +1833,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) if (!fs_devices->seeding) return -EINVAL; - seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!seed_devices) - return -ENOMEM; + seed_devices = __alloc_fs_devices(); + if (IS_ERR(seed_devices)) + return PTR_ERR(seed_devices); old_devices = clone_fs_devices(fs_devices); if (IS_ERR(old_devices)) { @@ -1814,7 +1854,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) mutex_lock(&root->fs_info->fs_devices->device_list_mutex); list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, synchronize_rcu); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); list_for_each_entry(device, &seed_devices->devices, dev_list) { @@ -1830,6 +1869,8 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) generate_random_uuid(fs_devices->fsid); memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + super_flags = btrfs_super_flags(disk_super) & ~BTRFS_SUPER_FLAG_SEEDING; btrfs_set_super_flags(disk_super, super_flags); @@ -1889,11 +1930,9 @@ next_slot: dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); devid = btrfs_device_id(leaf, dev_item); - read_extent_buffer(leaf, dev_uuid, - (unsigned long)btrfs_device_uuid(dev_item), + read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); - read_extent_buffer(leaf, fs_uuid, - (unsigned long)btrfs_device_fsid(dev_item), + read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); @@ -1956,10 +1995,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(root->fs_info, NULL, NULL); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - ret = -ENOMEM; + ret = PTR_ERR(device); goto error; } @@ -1971,13 +2010,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } rcu_assign_pointer(device->name, name); - ret = find_next_devid(root, &device->devid); - if (ret) { - rcu_string_free(device->name); - kfree(device); - goto error; - } - trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { rcu_string_free(device->name); @@ -1992,9 +2024,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (blk_queue_discard(q)) device->can_discard = 1; device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - spin_lock_init(&device->io_lock); device->generation = trans->transid; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -2121,6 +2150,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, struct btrfs_fs_info *fs_info = root->fs_info; struct list_head *devices; struct rcu_string *name; + u64 devid = BTRFS_DEV_REPLACE_DEVID; int ret = 0; *device_out = NULL; @@ -2142,9 +2172,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, } } - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { - ret = -ENOMEM; + device = btrfs_alloc_device(NULL, &devid, NULL); + if (IS_ERR(device)) { + ret = PTR_ERR(device); goto error; } @@ -2161,10 +2191,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->can_discard = 1; mutex_lock(&root->fs_info->fs_devices->device_list_mutex); device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - device->devid = BTRFS_DEV_REPLACE_DEVID; - spin_lock_init(&device->io_lock); device->generation = 0; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -2971,10 +2997,6 @@ again: if (found_key.objectid != key.objectid) break; - /* chunk zero is special */ - if (found_key.offset == 0) - break; - chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); if (!counting) { @@ -3010,6 +3032,8 @@ again: spin_unlock(&fs_info->balance_lock); } loop: + if (found_key.offset == 0) + break; key.offset = found_key.offset - 1; } @@ -3074,9 +3098,6 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) atomic_set(&fs_info->mutually_exclusive_operation_running, 0); } -void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, - struct btrfs_ioctl_balance_args *bargs); - /* * Should be called with both balance and volume mutexes held */ @@ -3139,7 +3160,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->data.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "data profile %llu\n", - (unsigned long long)bctl->data.target); + bctl->data.target); ret = -EINVAL; goto out; } @@ -3148,7 +3169,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->meta.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "metadata profile %llu\n", - (unsigned long long)bctl->meta.target); + bctl->meta.target); ret = -EINVAL; goto out; } @@ -3157,7 +3178,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->sys.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "system profile %llu\n", - (unsigned long long)bctl->sys.target); + bctl->sys.target); ret = -EINVAL; goto out; } @@ -3430,6 +3451,264 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) return 0; } +static int btrfs_uuid_scan_kthread(void *data) +{ + struct btrfs_fs_info *fs_info = data; + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_key key; + struct btrfs_key max_key; + struct btrfs_path *path = NULL; + int ret = 0; + struct extent_buffer *eb; + int slot; + struct btrfs_root_item root_item; + u32 item_size; + struct btrfs_trans_handle *trans = NULL; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + key.objectid = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = 0; + + max_key.objectid = (u64)-1; + max_key.type = BTRFS_ROOT_ITEM_KEY; + max_key.offset = (u64)-1; + + path->keep_locks = 1; + + while (1) { + ret = btrfs_search_forward(root, &key, &max_key, path, 0); + if (ret) { + if (ret > 0) + ret = 0; + break; + } + + if (key.type != BTRFS_ROOT_ITEM_KEY || + (key.objectid < BTRFS_FIRST_FREE_OBJECTID && + key.objectid != BTRFS_FS_TREE_OBJECTID) || + key.objectid > BTRFS_LAST_FREE_OBJECTID) + goto skip; + + eb = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(eb, slot); + if (item_size < sizeof(root_item)) + goto skip; + + read_extent_buffer(eb, &root_item, + btrfs_item_ptr_offset(eb, slot), + (int)sizeof(root_item)); + if (btrfs_root_refs(&root_item) == 0) + goto skip; + + if (!btrfs_is_empty_uuid(root_item.uuid) || + !btrfs_is_empty_uuid(root_item.received_uuid)) { + if (trans) + goto update_tree; + + btrfs_release_path(path); + /* + * 1 - subvol uuid item + * 1 - received_subvol uuid item + */ + trans = btrfs_start_transaction(fs_info->uuid_root, 2); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + continue; + } else { + goto skip; + } +update_tree: + if (!btrfs_is_empty_uuid(root_item.uuid)) { + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + root_item.uuid, + BTRFS_UUID_KEY_SUBVOL, + key.objectid); + if (ret < 0) { + pr_warn("btrfs: uuid_tree_add failed %d\n", + ret); + break; + } + } + + if (!btrfs_is_empty_uuid(root_item.received_uuid)) { + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + root_item.received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + key.objectid); + if (ret < 0) { + pr_warn("btrfs: uuid_tree_add failed %d\n", + ret); + break; + } + } + +skip: + if (trans) { + ret = btrfs_end_transaction(trans, fs_info->uuid_root); + trans = NULL; + if (ret) + break; + } + + btrfs_release_path(path); + if (key.offset < (u64)-1) { + key.offset++; + } else if (key.type < BTRFS_ROOT_ITEM_KEY) { + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + } else if (key.objectid < (u64)-1) { + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + key.objectid++; + } else { + break; + } + cond_resched(); + } + +out: + btrfs_free_path(path); + if (trans && !IS_ERR(trans)) + btrfs_end_transaction(trans, fs_info->uuid_root); + if (ret) + pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret); + else + fs_info->update_uuid_tree_gen = 1; + up(&fs_info->uuid_tree_rescan_sem); + return 0; +} + +/* + * Callback for btrfs_uuid_tree_iterate(). + * returns: + * 0 check succeeded, the entry is not outdated. + * < 0 if an error occured. + * > 0 if the check failed, which means the caller shall remove the entry. + */ +static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info, + u8 *uuid, u8 type, u64 subid) +{ + struct btrfs_key key; + int ret = 0; + struct btrfs_root *subvol_root; + + if (type != BTRFS_UUID_KEY_SUBVOL && + type != BTRFS_UUID_KEY_RECEIVED_SUBVOL) + goto out; + + key.objectid = subid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + subvol_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(subvol_root)) { + ret = PTR_ERR(subvol_root); + if (ret == -ENOENT) + ret = 1; + goto out; + } + + switch (type) { + case BTRFS_UUID_KEY_SUBVOL: + if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE)) + ret = 1; + break; + case BTRFS_UUID_KEY_RECEIVED_SUBVOL: + if (memcmp(uuid, subvol_root->root_item.received_uuid, + BTRFS_UUID_SIZE)) + ret = 1; + break; + } + +out: + return ret; +} + +static int btrfs_uuid_rescan_kthread(void *data) +{ + struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data; + int ret; + + /* + * 1st step is to iterate through the existing UUID tree and + * to delete all entries that contain outdated data. + * 2nd step is to add all missing entries to the UUID tree. + */ + ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry); + if (ret < 0) { + pr_warn("btrfs: iterating uuid_tree failed %d\n", ret); + up(&fs_info->uuid_tree_rescan_sem); + return ret; + } + return btrfs_uuid_scan_kthread(data); +} + +int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *uuid_root; + struct task_struct *task; + int ret; + + /* + * 1 - root node + * 1 - root item + */ + trans = btrfs_start_transaction(tree_root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + uuid_root = btrfs_create_tree(trans, fs_info, + BTRFS_UUID_TREE_OBJECTID); + if (IS_ERR(uuid_root)) { + btrfs_abort_transaction(trans, tree_root, + PTR_ERR(uuid_root)); + return PTR_ERR(uuid_root); + } + + fs_info->uuid_root = uuid_root; + + ret = btrfs_commit_transaction(trans, tree_root); + if (ret) + return ret; + + down(&fs_info->uuid_tree_rescan_sem); + task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid"); + if (IS_ERR(task)) { + /* fs_info->update_uuid_tree_gen remains 0 in all error case */ + pr_warn("btrfs: failed to start uuid_scan task\n"); + up(&fs_info->uuid_tree_rescan_sem); + return PTR_ERR(task); + } + + return 0; +} + +int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info) +{ + struct task_struct *task; + + down(&fs_info->uuid_tree_rescan_sem); + task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); + if (IS_ERR(task)) { + /* fs_info->update_uuid_tree_gen remains 0 in all error case */ + pr_warn("btrfs: failed to start uuid_rescan task\n"); + up(&fs_info->uuid_tree_rescan_sem); + return PTR_ERR(task); + } + + return 0; +} + /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. @@ -4194,13 +4473,13 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) * and exit, so return 1 so the callers don't try to use other copies. */ if (!em) { - btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical, + btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical, logical+len); return 1; } if (em->start > logical || em->start + em->len < logical) { - btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got " + btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " "%Lu-%Lu\n", logical, logical+len, em->start, em->start + em->len); return 1; @@ -4375,8 +4654,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, if (!em) { btrfs_crit(fs_info, "unable to find logical %llu len %llu", - (unsigned long long)logical, - (unsigned long long)*length); + logical, *length); return -EINVAL; } @@ -4671,6 +4949,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); if (!bbio) { + kfree(raid_map); ret = -ENOMEM; goto out; } @@ -5246,9 +5525,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, if (map_length < length) { btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu", - (unsigned long long)logical, - (unsigned long long)length, - (unsigned long long)map_length); + logical, length, map_length); BUG(); } @@ -5314,23 +5591,72 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, struct btrfs_device *device; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &devid, dev_uuid); + if (IS_ERR(device)) return NULL; - list_add(&device->dev_list, - &fs_devices->devices); - device->devid = devid; - device->work.func = pending_bios_fn; + + list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; - device->missing = 1; fs_devices->num_devices++; + + device->missing = 1; fs_devices->missing_devices++; - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_alloc_list); - memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); + return device; } +/** + * btrfs_alloc_device - allocate struct btrfs_device + * @fs_info: used only for generating a new devid, can be NULL if + * devid is provided (i.e. @devid != NULL). + * @devid: a pointer to devid for this device. If NULL a new devid + * is generated. + * @uuid: a pointer to UUID for this device. If NULL a new UUID + * is generated. + * + * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR() + * on error. Returned struct is not linked onto any lists and can be + * destroyed with kfree() right away. + */ +struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, + const u64 *devid, + const u8 *uuid) +{ + struct btrfs_device *dev; + u64 tmp; + + if (!devid && !fs_info) { + WARN_ON(1); + return ERR_PTR(-EINVAL); + } + + dev = __alloc_device(); + if (IS_ERR(dev)) + return dev; + + if (devid) + tmp = *devid; + else { + int ret; + + ret = find_next_devid(fs_info, &tmp); + if (ret) { + kfree(dev); + return ERR_PTR(ret); + } + } + dev->devid = tmp; + + if (uuid) + memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE); + else + generate_random_uuid(dev->uuid); + + dev->work.func = pending_bios_fn; + + return dev; +} + static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) @@ -5437,7 +5763,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); device->is_tgtdev_for_dev_replace = 0; - ptr = (unsigned long)btrfs_device_uuid(dev_item); + ptr = btrfs_device_uuid(dev_item); read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); } @@ -5500,11 +5826,9 @@ static int read_one_dev(struct btrfs_root *root, u8 dev_uuid[BTRFS_UUID_SIZE]; devid = btrfs_device_id(leaf, dev_item); - read_extent_buffer(leaf, dev_uuid, - (unsigned long)btrfs_device_uuid(dev_item), + read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); - read_extent_buffer(leaf, fs_uuid, - (unsigned long)btrfs_device_fsid(dev_item), + read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { @@ -5519,8 +5843,7 @@ static int read_one_dev(struct btrfs_root *root, return -EIO; if (!device) { - btrfs_warn(root->fs_info, "devid %llu missing", - (unsigned long long)devid); + btrfs_warn(root->fs_info, "devid %llu missing", devid); device = add_missing_dev(root, devid, dev_uuid); if (!device) return -ENOMEM; @@ -5644,14 +5967,15 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) mutex_lock(&uuid_mutex); lock_chunks(root); - /* first we search for all of the device items, and then we - * read in all of the chunk items. This way we can create chunk - * mappings that reference all of the devices that are afound + /* + * Read all device items, and then all the chunk items. All + * device items are found before any chunk item (their object id + * is smaller than the lowest possible object id for a chunk + * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID). */ key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.offset = 0; key.type = 0; -again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto error; @@ -5667,17 +5991,13 @@ again: break; } btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { - if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) - break; - if (found_key.type == BTRFS_DEV_ITEM_KEY) { - struct btrfs_dev_item *dev_item; - dev_item = btrfs_item_ptr(leaf, slot, + if (found_key.type == BTRFS_DEV_ITEM_KEY) { + struct btrfs_dev_item *dev_item; + dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); - ret = read_one_dev(root, leaf, dev_item); - if (ret) - goto error; - } + ret = read_one_dev(root, leaf, dev_item); + if (ret) + goto error; } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -5687,11 +6007,6 @@ again: } path->slots[0]++; } - if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { - key.objectid = 0; - btrfs_release_path(path); - goto again; - } ret = 0; error: unlock_chunks(root); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 86705583480d..b72f540c8b29 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -152,6 +152,8 @@ struct btrfs_fs_devices { int rotating; }; +#define BTRFS_BIO_INLINE_CSUM_SIZE 64 + /* * we need the mirror number and stripe index to be passed around * the call chain while we are processing end_io (especially errors). @@ -161,9 +163,14 @@ struct btrfs_fs_devices { * we allocate are actually btrfs_io_bios. We'll cram as much of * struct btrfs_bio as we can into this over time. */ +typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err); struct btrfs_io_bio { unsigned long mirror_num; unsigned long stripe_index; + u8 *csum; + u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; + u8 *csum_allocated; + btrfs_io_bio_end_io_t *end_io; struct bio bio; }; @@ -298,6 +305,9 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, char *device_path, struct btrfs_device **device); +struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, + const u64 *devid, + const u8 *uuid); int btrfs_rm_device(struct btrfs_root *root, char *device_path); void btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); @@ -315,6 +325,8 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); int btrfs_recover_balance(struct btrfs_fs_info *fs_info); int btrfs_pause_balance(struct btrfs_fs_info *fs_info); int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); +int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); +int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 num_bytes, diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index aa0d68b086eb..1964d212ab08 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_CIFS) += cifs.o cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \ cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ - readdir.o ioctl.o sess.o export.o smb1ops.o + readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o cifs-$(CONFIG_CIFS_ACL) += cifsacl.o diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index fe8d6276410a..d8eac3b6cefb 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -91,6 +91,8 @@ extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen, #endif /* CONFIG_CIFS_SMB2 */ #endif +wchar_t cifs_toupper(wchar_t in); + /* * UniStrcat: Concatenate the second string to the first * diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 85ea98d139fc..a16b4e58bcc6 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -255,6 +255,7 @@ cifs_alloc_inode(struct super_block *sb) cifs_inode->server_eof = 0; cifs_inode->uniqueid = 0; cifs_inode->createtime = 0; + cifs_inode->epoch = 0; #ifdef CONFIG_CIFS_SMB2 get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE); #endif @@ -357,6 +358,18 @@ cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb) seq_printf(s, "loose"); } +static void +cifs_show_nls(struct seq_file *s, struct nls_table *cur) +{ + struct nls_table *def; + + /* Display iocharset= option if it's not default charset */ + def = load_nls_default(); + if (def != cur) + seq_printf(s, ",iocharset=%s", cur->charset); + unload_nls(def); +} + /* * cifs_show_options() is for displaying mount options in /proc/mounts. * Not all settable options are displayed but most of the important @@ -418,6 +431,9 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho", cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); + + cifs_show_nls(s, cifs_sb->local_nls); + if (tcon->seal) seq_printf(s, ",seal"); if (tcon->nocase) @@ -718,7 +734,7 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, written = generic_file_aio_write(iocb, iov, nr_segs, pos); - if (CIFS_I(inode)->clientCanCacheAll) + if (CIFS_CACHE_WRITE(CIFS_I(inode))) return written; rc = filemap_fdatawrite(inode->i_mapping); @@ -743,7 +759,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) * We need to be sure that all dirty pages are written and the * server has the newest file length. */ - if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping && + if (!CIFS_CACHE_READ(CIFS_I(inode)) && inode->i_mapping && inode->i_mapping->nrpages != 0) { rc = filemap_fdatawait(inode->i_mapping); if (rc) { @@ -767,8 +783,10 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) { - /* note that this is called by vfs setlease with i_lock held - to protect *lease from going away */ + /* + * Note that this is called by vfs setlease with i_lock held to + * protect *lease from going away. + */ struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; @@ -776,20 +794,19 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) return -EINVAL; /* check if file is oplocked */ - if (((arg == F_RDLCK) && - (CIFS_I(inode)->clientCanCacheRead)) || - ((arg == F_WRLCK) && - (CIFS_I(inode)->clientCanCacheAll))) + if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) || + ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode)))) return generic_setlease(file, arg, lease); else if (tlink_tcon(cfile->tlink)->local_lease && - !CIFS_I(inode)->clientCanCacheRead) - /* If the server claims to support oplock on this - file, then we still need to check oplock even - if the local_lease mount option is set, but there - are servers which do not support oplock for which - this mount option may be useful if the user - knows that the file won't be changed on the server - by anyone else */ + !CIFS_CACHE_READ(CIFS_I(inode))) + /* + * If the server claims to support oplock on this file, then we + * still need to check oplock even if the local_lease mount + * option is set, but there are servers which do not support + * oplock for which this mount option may be useful if the user + * knows that the file won't be changed on the server by anyone + * else. + */ return generic_setlease(file, arg, lease); else return -EAGAIN; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 52ca861ed35e..cfa14c80ef3b 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -28,6 +28,7 @@ #include "cifsacl.h" #include <crypto/internal/hash.h> #include <linux/scatterlist.h> +#include <uapi/linux/cifs/cifs_mount.h> #ifdef CONFIG_CIFS_SMB2 #include "smb2pdu.h" #endif @@ -41,12 +42,7 @@ #define MAX_SES_INFO 2 #define MAX_TCON_INFO 4 -#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) -#define MAX_SERVER_SIZE 15 -#define MAX_SHARE_SIZE 80 -#define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */ -#define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */ -#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */ +#define MAX_TREE_SIZE (2 + CIFS_NI_MAXHOST + 1 + CIFS_MAX_SHARE_LEN + 1) #define CIFS_MIN_RCV_POOL 4 @@ -135,6 +131,7 @@ struct cifs_secmech { /* per smb session structure/fields */ struct ntlmssp_auth { + bool sesskey_per_smbsess; /* whether session key is per smb session */ __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */ __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */ unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */ @@ -308,6 +305,9 @@ struct smb_version_operations { int (*create_hardlink)(const unsigned int, struct cifs_tcon *, const char *, const char *, struct cifs_sb_info *); + /* query symlink target */ + int (*query_symlink)(const unsigned int, struct cifs_tcon *, + const char *, char **, struct cifs_sb_info *); /* open a file for non-posix mounts */ int (*open)(const unsigned int, struct cifs_open_parms *, __u32 *, FILE_ALL_INFO *); @@ -361,18 +361,24 @@ struct smb_version_operations { /* push brlocks from the cache to the server */ int (*push_mand_locks)(struct cifsFileInfo *); /* get lease key of the inode */ - void (*get_lease_key)(struct inode *, struct cifs_fid *fid); + void (*get_lease_key)(struct inode *, struct cifs_fid *); /* set lease key of the inode */ - void (*set_lease_key)(struct inode *, struct cifs_fid *fid); + void (*set_lease_key)(struct inode *, struct cifs_fid *); /* generate new lease key */ - void (*new_lease_key)(struct cifs_fid *fid); - /* The next two functions will need to be changed to per smb session */ - void (*generate_signingkey)(struct TCP_Server_Info *server); - int (*calc_signature)(struct smb_rqst *rqst, - struct TCP_Server_Info *server); - int (*query_mf_symlink)(const unsigned char *path, char *pbuf, - unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb, - unsigned int xid); + void (*new_lease_key)(struct cifs_fid *); + int (*generate_signingkey)(struct cifs_ses *); + int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *); + int (*query_mf_symlink)(const unsigned char *, char *, unsigned int *, + struct cifs_sb_info *, unsigned int); + /* if we can do cache read operations */ + bool (*is_read_op)(__u32); + /* set oplock level for the inode */ + void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int, + bool *); + /* create lease context buffer for CREATE request */ + char * (*create_lease_buf)(u8 *, u8); + /* parse lease context buffer and return oplock/epoch info */ + __u8 (*parse_lease_buf)(void *, unsigned int *); }; struct smb_version_values { @@ -390,9 +396,9 @@ struct smb_version_values { unsigned int cap_unix; unsigned int cap_nt_find; unsigned int cap_large_files; - unsigned int oplock_read; __u16 signing_enabled; __u16 signing_required; + size_t create_lease_size; }; #define HEADER_SIZE(server) (server->vals->header_size) @@ -548,7 +554,6 @@ struct TCP_Server_Info { int timeAdj; /* Adjust for difference in server time zone in sec */ __u64 CurrentMid; /* multiplex id - rotating counter */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ - char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */ /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* for signing, protected by srv_mutex */ @@ -731,6 +736,7 @@ struct cifs_ses { bool need_reconnect:1; /* connection reset, uid now invalid */ #ifdef CONFIG_CIFS_SMB2 __u16 session_flags; + char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */ #endif /* CONFIG_CIFS_SMB2 */ }; @@ -935,6 +941,8 @@ struct cifs_fid { __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ #endif struct cifs_pending_open *pending_open; + unsigned int epoch; + bool purge_cache; }; struct cifs_fid_locks { @@ -1032,6 +1040,17 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file) struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file); void cifsFileInfo_put(struct cifsFileInfo *cifs_file); +#define CIFS_CACHE_READ_FLG 1 +#define CIFS_CACHE_HANDLE_FLG 2 +#define CIFS_CACHE_RH_FLG (CIFS_CACHE_READ_FLG | CIFS_CACHE_HANDLE_FLG) +#define CIFS_CACHE_WRITE_FLG 4 +#define CIFS_CACHE_RW_FLG (CIFS_CACHE_READ_FLG | CIFS_CACHE_WRITE_FLG) +#define CIFS_CACHE_RHW_FLG (CIFS_CACHE_RW_FLG | CIFS_CACHE_HANDLE_FLG) + +#define CIFS_CACHE_READ(cinode) (cinode->oplock & CIFS_CACHE_READ_FLG) +#define CIFS_CACHE_HANDLE(cinode) (cinode->oplock & CIFS_CACHE_HANDLE_FLG) +#define CIFS_CACHE_WRITE(cinode) (cinode->oplock & CIFS_CACHE_WRITE_FLG) + /* * One of these for each file inode */ @@ -1043,8 +1062,8 @@ struct cifsInodeInfo { /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ - bool clientCanCacheRead; /* read oplock */ - bool clientCanCacheAll; /* read and writebehind oplock */ + unsigned int oplock; /* oplock/lease level we have */ + unsigned int epoch; /* used to track lease state changes */ bool delete_pending; /* DELETE_ON_CLOSE is set */ bool invalid_mapping; /* pagecache is invalid */ unsigned long time; /* jiffies of last update of inode */ @@ -1502,7 +1521,7 @@ extern mempool_t *cifs_mid_poolp; extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; #define SMB20_VERSION_STRING "2.0" -/*extern struct smb_version_operations smb20_operations; */ /* not needed yet */ +extern struct smb_version_operations smb20_operations; extern struct smb_version_values smb20_values; #define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 11ca24a8e054..948676db8e2e 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1495,11 +1495,12 @@ struct reparse_data { __u32 ReparseTag; __u16 ReparseDataLength; __u16 Reserved; - __u16 AltNameOffset; - __u16 AltNameLen; - __u16 TargetNameOffset; - __u16 TargetNameLen; - char LinkNamesBuf[1]; + __u16 SubstituteNameOffset; + __u16 SubstituteNameLength; + __u16 PrintNameOffset; + __u16 PrintNameLength; + __u32 Flags; + char PathBuffer[0]; } __attribute__((packed)); struct cifs_quota_data { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index b29a012bed33..b5ec2a268f56 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -357,13 +357,9 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **syminfo, const struct nls_table *nls_codepage); -#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL -extern int CIFSSMBQueryReparseLinkInfo(const unsigned int xid, - struct cifs_tcon *tcon, - const unsigned char *searchName, - char *symlinkinfo, const int buflen, __u16 fid, - const struct nls_table *nls_codepage); -#endif /* temporarily unused until cifs_symlink fixed */ +extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, + __u16 fid, char **symlinkinfo, + const struct nls_table *nls_codepage); extern int CIFSSMBOpen(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const int disposition, const int access_flags, const int omode, @@ -435,7 +431,7 @@ extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *); extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *); extern void cifs_crypto_shash_release(struct TCP_Server_Info *); extern int calc_seckey(struct cifs_ses *); -extern void generate_smb3signingkey(struct TCP_Server_Info *); +extern int generate_smb3signingkey(struct cifs_ses *); #ifdef CONFIG_CIFS_WEAK_PW_HASH extern int calc_lanman_hash(const char *password, const char *cryptkey, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a89c4cb4e6cf..a3d74fea1623 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3067,7 +3067,6 @@ querySymLinkRetry: return rc; } -#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL /* * Recent Windows versions now create symlinks more frequently * and they use the "reparse point" mechanism below. We can of course @@ -3079,18 +3078,22 @@ querySymLinkRetry: * it is not compiled in by default until callers fixed up and more tested. */ int -CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *searchName, - char *symlinkinfo, const int buflen, __u16 fid, - const struct nls_table *nls_codepage) +CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, + __u16 fid, char **symlinkinfo, + const struct nls_table *nls_codepage) { int rc = 0; int bytes_returned; struct smb_com_transaction_ioctl_req *pSMB; struct smb_com_transaction_ioctl_rsp *pSMBr; + bool is_unicode; + unsigned int sub_len; + char *sub_start; + struct reparse_data *reparse_buf; + __u32 data_offset, data_count; + char *end_of_smb; - cifs_dbg(FYI, "In Windows reparse style QueryLink for path %s\n", - searchName); + cifs_dbg(FYI, "In Windows reparse style QueryLink for fid %u\n", fid); rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -3119,66 +3122,55 @@ CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc); - } else { /* decode response */ - __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); - __u32 data_count = le32_to_cpu(pSMBr->DataCount); - if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) { - /* BB also check enough total bytes returned */ - rc = -EIO; /* bad smb */ - goto qreparse_out; - } - if (data_count && (data_count < 2048)) { - char *end_of_smb = 2 /* sizeof byte count */ + - get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount; - - struct reparse_data *reparse_buf = - (struct reparse_data *) - ((char *)&pSMBr->hdr.Protocol - + data_offset); - if ((char *)reparse_buf >= end_of_smb) { - rc = -EIO; - goto qreparse_out; - } - if ((reparse_buf->LinkNamesBuf + - reparse_buf->TargetNameOffset + - reparse_buf->TargetNameLen) > end_of_smb) { - cifs_dbg(FYI, "reparse buf beyond SMB\n"); - rc = -EIO; - goto qreparse_out; - } + goto qreparse_out; + } - if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { - cifs_from_ucs2(symlinkinfo, (__le16 *) - (reparse_buf->LinkNamesBuf + - reparse_buf->TargetNameOffset), - buflen, - reparse_buf->TargetNameLen, - nls_codepage, 0); - } else { /* ASCII names */ - strncpy(symlinkinfo, - reparse_buf->LinkNamesBuf + - reparse_buf->TargetNameOffset, - min_t(const int, buflen, - reparse_buf->TargetNameLen)); - } - } else { - rc = -EIO; - cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n"); - } - symlinkinfo[buflen] = 0; /* just in case so the caller - does not go off the end of the buffer */ - cifs_dbg(FYI, "readlink result - %s\n", symlinkinfo); + data_offset = le32_to_cpu(pSMBr->DataOffset); + data_count = le32_to_cpu(pSMBr->DataCount); + if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) { + /* BB also check enough total bytes returned */ + rc = -EIO; /* bad smb */ + goto qreparse_out; + } + if (!data_count || (data_count > 2048)) { + rc = -EIO; + cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n"); + goto qreparse_out; + } + end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount; + reparse_buf = (struct reparse_data *) + ((char *)&pSMBr->hdr.Protocol + data_offset); + if ((char *)reparse_buf >= end_of_smb) { + rc = -EIO; + goto qreparse_out; } + if ((reparse_buf->PathBuffer + reparse_buf->PrintNameOffset + + reparse_buf->PrintNameLength) > end_of_smb) { + cifs_dbg(FYI, "reparse buf beyond SMB\n"); + rc = -EIO; + goto qreparse_out; + } + sub_start = reparse_buf->SubstituteNameOffset + reparse_buf->PathBuffer; + sub_len = reparse_buf->SubstituteNameLength; + if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) + is_unicode = true; + else + is_unicode = false; + /* BB FIXME investigate remapping reserved chars here */ + *symlinkinfo = cifs_strndup_from_utf16(sub_start, sub_len, is_unicode, + nls_codepage); + if (!*symlinkinfo) + rc = -ENOMEM; qreparse_out: cifs_buf_release(pSMB); - /* Note: On -EAGAIN error only caller can retry on handle based calls - since file handle passed in no longer valid */ - + /* + * Note: On -EAGAIN error only caller can retry on handle based calls + * since file handle passed in no longer valid. + */ return rc; } -#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */ #ifdef CONFIG_CIFS_POSIX diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d67c550c4980..a279ffc0bc29 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -379,6 +379,7 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); /* we should try only the port we connected to before */ + mutex_lock(&server->srv_mutex); rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); @@ -390,6 +391,7 @@ cifs_reconnect(struct TCP_Server_Info *server) server->tcpStatus = CifsNeedNegotiate; spin_unlock(&GlobalMid_Lock); } + mutex_unlock(&server->srv_mutex); } while (server->tcpStatus == CifsNeedReconnect); return rc; @@ -1114,7 +1116,7 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) break; #ifdef CONFIG_CIFS_SMB2 case Smb_20: - vol->ops = &smb21_operations; /* currently identical with 2.1 */ + vol->ops = &smb20_operations; vol->vals = &smb20_values; break; case Smb_21: @@ -1575,8 +1577,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (strnlen(string, MAX_USERNAME_SIZE) > - MAX_USERNAME_SIZE) { + if (strnlen(string, CIFS_MAX_USERNAME_LEN) > + CIFS_MAX_USERNAME_LEN) { printk(KERN_WARNING "CIFS: username too long\n"); goto cifs_parse_mount_err; } @@ -2221,13 +2223,13 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol) /* anything else takes username/password */ if (strncmp(ses->user_name, vol->username ? vol->username : "", - MAX_USERNAME_SIZE)) + CIFS_MAX_USERNAME_LEN)) return 0; if (strlen(vol->username) != 0 && ses->password != NULL && strncmp(ses->password, vol->password ? vol->password : "", - MAX_PASSWORD_SIZE)) + CIFS_MAX_PASSWORD_LEN)) return 0; } return 1; @@ -2352,7 +2354,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) } len = delim - payload; - if (len > MAX_USERNAME_SIZE || len <= 0) { + if (len > CIFS_MAX_USERNAME_LEN || len <= 0) { cifs_dbg(FYI, "Bad value from username search (len=%zd)\n", len); rc = -EINVAL; @@ -2369,7 +2371,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) cifs_dbg(FYI, "%s: username=%s\n", __func__, vol->username); len = key->datalen - (len + 1); - if (len > MAX_PASSWORD_SIZE || len <= 0) { + if (len > CIFS_MAX_PASSWORD_LEN || len <= 0) { cifs_dbg(FYI, "Bad len for password search (len=%zd)\n", len); rc = -EINVAL; kfree(vol->username); @@ -3826,33 +3828,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, if (server->ops->sess_setup) rc = server->ops->sess_setup(xid, ses, nls_info); - if (rc) { + if (rc) cifs_dbg(VFS, "Send error in SessSetup = %d\n", rc); - } else { - mutex_lock(&server->srv_mutex); - if (!server->session_estab) { - server->session_key.response = ses->auth_key.response; - server->session_key.len = ses->auth_key.len; - server->sequence_number = 0x2; - server->session_estab = true; - ses->auth_key.response = NULL; - if (server->ops->generate_signingkey) - server->ops->generate_signingkey(server); - } - mutex_unlock(&server->srv_mutex); - - cifs_dbg(FYI, "CIFS Session Established successfully\n"); - spin_lock(&GlobalMid_Lock); - ses->status = CifsGood; - ses->need_reconnect = false; - spin_unlock(&GlobalMid_Lock); - } - - kfree(ses->auth_key.response); - ses->auth_key.response = NULL; - ses->auth_key.len = 0; - kfree(ses->ntlmssp); - ses->ntlmssp = NULL; return rc; } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index d62ce0d48141..d3e2eaa503a6 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -32,6 +32,7 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" +#include "cifs_unicode.h" static void renew_parental_timestamps(struct dentry *direntry) @@ -834,12 +835,17 @@ static int cifs_ci_hash(const struct dentry *dentry, struct qstr *q) { struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; unsigned long hash; - int i; + wchar_t c; + int i, charlen; hash = init_name_hash(); - for (i = 0; i < q->len; i++) - hash = partial_name_hash(nls_tolower(codepage, q->name[i]), - hash); + for (i = 0; i < q->len; i += charlen) { + charlen = codepage->char2uni(&q->name[i], q->len - i, &c); + /* error out if we can't convert the character */ + if (unlikely(charlen < 0)) + return charlen; + hash = partial_name_hash(cifs_toupper(c), hash); + } q->hash = end_name_hash(hash); return 0; @@ -849,11 +855,47 @@ static int cifs_ci_compare(const struct dentry *parent, const struct dentry *den unsigned int len, const char *str, const struct qstr *name) { struct nls_table *codepage = CIFS_SB(parent->d_sb)->local_nls; + wchar_t c1, c2; + int i, l1, l2; - if ((name->len == len) && - (nls_strnicmp(codepage, name->name, str, len) == 0)) - return 0; - return 1; + /* + * We make the assumption here that uppercase characters in the local + * codepage are always the same length as their lowercase counterparts. + * + * If that's ever not the case, then this will fail to match it. + */ + if (name->len != len) + return 1; + + for (i = 0; i < len; i += l1) { + /* Convert characters in both strings to UTF-16. */ + l1 = codepage->char2uni(&str[i], len - i, &c1); + l2 = codepage->char2uni(&name->name[i], name->len - i, &c2); + + /* + * If we can't convert either character, just declare it to + * be 1 byte long and compare the original byte. + */ + if (unlikely(l1 < 0 && l2 < 0)) { + if (str[i] != name->name[i]) + return 1; + l1 = 1; + continue; + } + + /* + * Here, we again ass|u|me that upper/lowercase versions of + * a character are the same length in the local NLS. + */ + if (l1 != l2) + return 1; + + /* Now compare uppercase versions of these characters */ + if (cifs_toupper(c1) != cifs_toupper(c2)) + return 1; + } + + return 0; } const struct dentry_operations cifs_ci_dentry_ops = { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9d0dd952ad79..d044b35ce228 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -313,8 +313,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, * If the server returned a read oplock and we have mandatory brlocks, * set oplock level to None. */ - if (oplock == server->vals->oplock_read && - cifs_has_mand_locks(cinode)) { + if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); oplock = 0; } @@ -324,6 +323,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, oplock = fid->pending_open->oplock; list_del(&fid->pending_open->olist); + fid->purge_cache = false; server->ops->set_fid(cfile, fid, oplock); list_add(&cfile->tlist, &tcon->openFileList); @@ -334,6 +334,9 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, list_add_tail(&cfile->flist, &cinode->openFileList); spin_unlock(&cifs_file_list_lock); + if (fid->purge_cache) + cifs_invalidate_mapping(inode); + file->private_data = cfile; return cfile; } @@ -1524,12 +1527,12 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, * read won't conflict with non-overlapted locks due to * pagereading. */ - if (!CIFS_I(inode)->clientCanCacheAll && - CIFS_I(inode)->clientCanCacheRead) { + if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && + CIFS_CACHE_READ(CIFS_I(inode))) { cifs_invalidate_mapping(inode); cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", inode); - CIFS_I(inode)->clientCanCacheRead = false; + CIFS_I(inode)->oplock = 0; } rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, @@ -2213,7 +2216,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n", file->f_path.dentry->d_name.name, datasync); - if (!CIFS_I(inode)->clientCanCacheRead) { + if (!CIFS_CACHE_READ(CIFS_I(inode))) { rc = cifs_invalidate_mapping(inode); if (rc) { cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); @@ -2577,7 +2580,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); ssize_t written; - if (cinode->clientCanCacheAll) { + if (CIFS_CACHE_WRITE(cinode)) { if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) @@ -2591,7 +2594,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, * these pages but not on the region from pos to ppos+len-1. */ written = cifs_user_writev(iocb, iov, nr_segs, pos); - if (written > 0 && cinode->clientCanCacheRead) { + if (written > 0 && CIFS_CACHE_READ(cinode)) { /* * Windows 7 server can delay breaking level2 oplock if a write * request comes - break it on the client to prevent reading @@ -2600,7 +2603,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, cifs_invalidate_mapping(inode); cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n", inode); - cinode->clientCanCacheRead = false; + cinode->oplock = 0; } return written; } @@ -2957,7 +2960,7 @@ cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, * on pages affected by this read but not on the region from pos to * pos+len-1. */ - if (!cinode->clientCanCacheRead) + if (!CIFS_CACHE_READ(cinode)) return cifs_user_readv(iocb, iov, nr_segs, pos); if (cap_unix(tcon->ses) && @@ -3093,7 +3096,7 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) xid = get_xid(); - if (!CIFS_I(inode)->clientCanCacheRead) { + if (!CIFS_CACHE_READ(CIFS_I(inode))) { rc = cifs_invalidate_mapping(inode); if (rc) return rc; @@ -3526,7 +3529,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping, * is, when the page lies beyond the EOF, or straddles the EOF * and the write will cover all of the existing data. */ - if (CIFS_I(mapping->host)->clientCanCacheRead) { + if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { i_size = i_size_read(mapping->host); if (page_start >= i_size || (offset == 0 && (pos + len) >= i_size)) { @@ -3609,20 +3612,20 @@ void cifs_oplock_break(struct work_struct *work) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; - if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead && + if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && cifs_has_mand_locks(cinode)) { cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", inode); - cinode->clientCanCacheRead = false; + cinode->oplock = 0; } if (inode && S_ISREG(inode->i_mode)) { - if (cinode->clientCanCacheRead) + if (CIFS_CACHE_READ(cinode)) break_lease(inode, O_RDONLY); else break_lease(inode, O_WRONLY); rc = filemap_fdatawrite(inode->i_mapping); - if (cinode->clientCanCacheRead == 0) { + if (!CIFS_CACHE_READ(cinode)) { rc = filemap_fdatawait(inode->i_mapping); mapping_set_error(inode->i_mapping, rc); cifs_invalidate_mapping(inode); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 2a92c5c6ecfd..f9ff9c173f78 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -101,7 +101,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) } /* don't bother with revalidation if we have an oplock */ - if (cifs_i->clientCanCacheRead) { + if (CIFS_CACHE_READ(cifs_i)) { cifs_dbg(FYI, "%s: inode %llu is oplocked\n", __func__, cifs_i->uniqueid); return; @@ -549,6 +549,10 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, * when Unix extensions are disabled - fake it. */ fattr->cf_nlink = 2; + } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { + fattr->cf_mode = S_IFLNK; + fattr->cf_dtype = DT_LNK; + fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; @@ -646,7 +650,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, cifs_dbg(FYI, "Getting info on %s\n", full_path); if ((data == NULL) && (*inode != NULL)) { - if (CIFS_I(*inode)->clientCanCacheRead) { + if (CIFS_CACHE_READ(CIFS_I(*inode))) { cifs_dbg(FYI, "No need to revalidate cached inode sizes\n"); goto cgii_exit; } @@ -1657,7 +1661,7 @@ cifs_inode_needs_reval(struct inode *inode) struct cifsInodeInfo *cifs_i = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - if (cifs_i->clientCanCacheRead) + if (CIFS_CACHE_READ(cifs_i)) return false; if (!lookupCacheEnabled) @@ -1800,7 +1804,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, * We need to be sure that all dirty pages are written and the server * has actual ctime, mtime and file length. */ - if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping && + if (!CIFS_CACHE_READ(CIFS_I(inode)) && inode->i_mapping && inode->i_mapping->nrpages != 0) { rc = filemap_fdatawait(inode->i_mapping); if (rc) { diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 562044f700e5..7e36ceba0c7a 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -509,6 +509,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; xid = get_xid(); @@ -519,25 +520,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) goto out; } tcon = tlink_tcon(tlink); - - /* - * For now, we just handle symlinks with unix extensions enabled. - * Eventually we should handle NTFS reparse points, and MacOS - * symlink support. For instance... - * - * rc = CIFSSMBQueryReparseLinkInfo(...) - * - * For now, just return -EACCES when the server doesn't support posix - * extensions. Note that we still allow querying symlinks when posix - * extensions are manually disabled. We could disable these as well - * but there doesn't seem to be any harm in allowing the client to - * read them. - */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) && - !cap_unix(tcon->ses)) { - rc = -EACCES; - goto out; - } + server = tcon->ses->server; full_path = build_path_from_dentry(direntry); if (!full_path) @@ -559,6 +542,9 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) if ((rc != 0) && cap_unix(tcon->ses)) rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path, cifs_sb->local_nls); + else if (rc != 0 && server->ops->query_symlink) + rc = server->ops->query_symlink(xid, tcon, full_path, + &target_path, cifs_sb); kfree(full_path); out: diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index f7d4b2285efe..138a011633fe 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -105,6 +105,7 @@ sesInfoFree(struct cifs_ses *buf_to_free) } kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); + kfree(buf_to_free->auth_key.response); kfree(buf_to_free); } @@ -545,19 +546,15 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) oplock &= 0xF; if (oplock == OPLOCK_EXCLUSIVE) { - cinode->clientCanCacheAll = true; - cinode->clientCanCacheRead = true; + cinode->oplock = CIFS_CACHE_WRITE_FLG | CIFS_CACHE_READ_FLG; cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", &cinode->vfs_inode); } else if (oplock == OPLOCK_READ) { - cinode->clientCanCacheAll = false; - cinode->clientCanCacheRead = true; + cinode->oplock = CIFS_CACHE_READ_FLG; cifs_dbg(FYI, "Level II Oplock granted on inode %p\n", &cinode->vfs_inode); - } else { - cinode->clientCanCacheAll = false; - cinode->clientCanCacheRead = false; - } + } else + cinode->oplock = 0; } bool diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 69d2c826a23b..42ef03be089f 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -172,6 +172,9 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) if (cifs_dfs_is_possible(cifs_sb) && (fattr->cf_cifsattrs & ATTR_REPARSE)) fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; + } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { + fattr->cf_mode = S_IFLNK; + fattr->cf_dtype = DT_LNK; } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 08dd37bb23aa..5f99b7f19e78 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -226,7 +226,7 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, *(bcc_ptr+1) = 0; } else { bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->user_name, - MAX_USERNAME_SIZE, nls_cp); + CIFS_MAX_USERNAME_LEN, nls_cp); } bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* account for null termination */ @@ -246,8 +246,8 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, /* BB what about null user mounts - check that we do this BB */ /* copy user */ if (ses->user_name != NULL) { - strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE); - bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE); + strncpy(bcc_ptr, ses->user_name, CIFS_MAX_USERNAME_LEN); + bcc_ptr += strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN); } /* else null user mount */ *bcc_ptr = 0; @@ -428,7 +428,8 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; if (ses->server->sign) { flags |= NTLMSSP_NEGOTIATE_SIGN; - if (!ses->server->session_estab) + if (!ses->server->session_estab || + ses->ntlmssp->sesskey_per_smbsess) flags |= NTLMSSP_NEGOTIATE_KEY_XCH; } @@ -466,7 +467,8 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; if (ses->server->sign) { flags |= NTLMSSP_NEGOTIATE_SIGN; - if (!ses->server->session_estab) + if (!ses->server->session_estab || + ses->ntlmssp->sesskey_per_smbsess) flags |= NTLMSSP_NEGOTIATE_KEY_XCH; } @@ -501,7 +503,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, } else { int len; len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName, - MAX_USERNAME_SIZE, nls_cp); + CIFS_MAX_USERNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->DomainName.Length = cpu_to_le16(len); @@ -517,7 +519,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, } else { int len; len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name, - MAX_USERNAME_SIZE, nls_cp); + CIFS_MAX_USERNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->UserName.Length = cpu_to_le16(len); @@ -629,7 +631,8 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, type = select_sectype(ses->server, ses->sectype); cifs_dbg(FYI, "sess setup type %d\n", type); if (type == Unspecified) { - cifs_dbg(VFS, "Unable to select appropriate authentication method!"); + cifs_dbg(VFS, + "Unable to select appropriate authentication method!"); return -EINVAL; } @@ -640,6 +643,8 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); if (!ses->ntlmssp) return -ENOMEM; + ses->ntlmssp->sesskey_per_smbsess = false; + } ssetup_ntlmssp_authenticate: @@ -815,8 +820,9 @@ ssetup_ntlmssp_authenticate: ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, GFP_KERNEL); if (!ses->auth_key.response) { - cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory", - msg->sesskey_len); + cifs_dbg(VFS, + "Kerberos can't allocate (%u bytes) memory", + msg->sesskey_len); rc = -ENOMEM; goto ssetup_exit; } @@ -1005,5 +1011,37 @@ ssetup_exit: if ((phase == NtLmChallenge) && (rc == 0)) goto ssetup_ntlmssp_authenticate; + if (!rc) { + mutex_lock(&ses->server->srv_mutex); + if (!ses->server->session_estab) { + if (ses->server->sign) { + ses->server->session_key.response = + kmemdup(ses->auth_key.response, + ses->auth_key.len, GFP_KERNEL); + if (!ses->server->session_key.response) { + rc = -ENOMEM; + mutex_unlock(&ses->server->srv_mutex); + goto keycp_exit; + } + ses->server->session_key.len = + ses->auth_key.len; + } + ses->server->sequence_number = 0x2; + ses->server->session_estab = true; + } + mutex_unlock(&ses->server->srv_mutex); + + cifs_dbg(FYI, "CIFS session established successfully\n"); + spin_lock(&GlobalMid_Lock); + ses->status = CifsGood; + ses->need_reconnect = false; + spin_unlock(&GlobalMid_Lock); + } + +keycp_exit: + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + kfree(ses->ntlmssp); + return rc; } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 60943978aec3..8233b174de3d 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -700,7 +700,7 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); cfile->fid.netfid = fid->netfid; cifs_set_oplock_level(cinode, oplock); - cinode->can_cache_brlcks = cinode->clientCanCacheAll; + cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); } static void @@ -837,7 +837,7 @@ cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, { return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, false, - cinode->clientCanCacheRead ? 1 : 0); + CIFS_CACHE_READ(cinode) ? 1 : 0); } static int @@ -881,6 +881,43 @@ cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, (__u8)type, wait, 0); } +static int +cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, + const char *full_path, char **target_path, + struct cifs_sb_info *cifs_sb) +{ + int rc; + int oplock = 0; + __u16 netfid; + + cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + + rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, + FILE_READ_ATTRIBUTES, OPEN_REPARSE_POINT, &netfid, + &oplock, NULL, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc) + return rc; + + rc = CIFSSMBQuerySymLink(xid, tcon, netfid, target_path, + cifs_sb->local_nls); + if (rc) { + CIFSSMBClose(xid, tcon, netfid); + return rc; + } + + convert_delimiter(*target_path, '/'); + CIFSSMBClose(xid, tcon, netfid); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + return rc; +} + +static bool +cifs_is_read_op(__u32 oplock) +{ + return oplock == OPLOCK_READ; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -927,6 +964,7 @@ struct smb_version_operations smb1_operations = { .rename_pending_delete = cifs_rename_pending_delete, .rename = CIFSSMBRename, .create_hardlink = CIFSCreateHardLink, + .query_symlink = cifs_query_symlink, .open = cifs_open_file, .set_fid = cifs_set_fid, .close = cifs_close_file, @@ -945,6 +983,7 @@ struct smb_version_operations smb1_operations = { .mand_unlock_range = cifs_unlock_range, .push_mand_locks = cifs_push_mandatory_locks, .query_mf_symlink = open_query_close_cifs_symlink, + .is_read_op = cifs_is_read_op, }; struct smb_version_values smb1_values = { @@ -960,7 +999,6 @@ struct smb_version_values smb1_values = { .cap_unix = CAP_UNIX, .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, .cap_large_files = CAP_LARGE_FILES, - .oplock_read = OPLOCK_READ, .signing_enabled = SECMODE_SIGN_ENABLED, .signing_required = SECMODE_SIGN_REQUIRED, }; diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 04a81a4142c3..3f17b4550831 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -34,29 +34,6 @@ #include "fscache.h" #include "smb2proto.h" -void -smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) -{ - oplock &= 0xFF; - if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) - return; - if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE || - oplock == SMB2_OPLOCK_LEVEL_BATCH) { - cinode->clientCanCacheAll = true; - cinode->clientCanCacheRead = true; - cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", - &cinode->vfs_inode); - } else if (oplock == SMB2_OPLOCK_LEVEL_II) { - cinode->clientCanCacheAll = false; - cinode->clientCanCacheRead = true; - cifs_dbg(FYI, "Level II Oplock granted on inode %p\n", - &cinode->vfs_inode); - } else { - cinode->clientCanCacheAll = false; - cinode->clientCanCacheRead = false; - } -} - int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, FILE_ALL_INFO *buf) @@ -86,7 +63,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); - rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data); + rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data, NULL); if (rc) goto out; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index c6ec1633309a..78ff88c467b9 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -60,7 +60,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = &fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); if (rc) { kfree(utf16_path); return rc; @@ -136,7 +136,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, return -ENOMEM; rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, - FILE_READ_ATTRIBUTES, FILE_OPEN, 0, smb2_data, + FILE_READ_ATTRIBUTES, FILE_OPEN, + OPEN_REPARSE_POINT, smb2_data, SMB2_OP_QUERY_INFO); if (rc) goto out; @@ -191,8 +192,8 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, - CREATE_DELETE_ON_CLOSE, NULL, - SMB2_OP_DELETE); + CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT, + NULL, SMB2_OP_DELETE); } static int diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index b0c43345cd98..fb3966265b6e 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -171,6 +171,10 @@ smb2_check_message(char *buf, unsigned int length) if (4 + len != clc_len) { cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n", clc_len, 4 + len, mid); + /* create failed on symlink */ + if (command == SMB2_CREATE_HE && + hdr->Status == STATUS_STOPPED_ON_SYMLINK) + return 0; /* Windows 7 server returns 24 bytes more */ if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) return 0; @@ -376,23 +380,15 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode) { - if (cinode->clientCanCacheAll) - return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING; - else if (cinode->clientCanCacheRead) - return SMB2_LEASE_READ_CACHING; - return 0; -} - -__u8 smb2_map_lease_to_oplock(__le32 lease_state) -{ - if (lease_state & SMB2_LEASE_WRITE_CACHING) { - if (lease_state & SMB2_LEASE_HANDLE_CACHING) - return SMB2_OPLOCK_LEVEL_BATCH; - else - return SMB2_OPLOCK_LEVEL_EXCLUSIVE; - } else if (lease_state & SMB2_LEASE_READ_CACHING) - return SMB2_OPLOCK_LEVEL_II; - return 0; + __le32 lease = 0; + + if (CIFS_CACHE_WRITE(cinode)) + lease |= SMB2_LEASE_WRITE_CACHING; + if (CIFS_CACHE_HANDLE(cinode)) + lease |= SMB2_LEASE_HANDLE_CACHING; + if (CIFS_CACHE_READ(cinode)) + lease |= SMB2_LEASE_READ_CACHING; + return lease; } struct smb2_lease_break_work { @@ -417,96 +413,109 @@ cifs_ses_oplock_break(struct work_struct *work) } static bool -smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) +smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, + struct smb2_lease_break_work *lw) { - struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; - struct list_head *tmp, *tmp1, *tmp2; - struct cifs_ses *ses; - struct cifs_tcon *tcon; - struct cifsInodeInfo *cinode; + bool found; + __u8 lease_state; + struct list_head *tmp; struct cifsFileInfo *cfile; + struct TCP_Server_Info *server = tcon->ses->server; struct cifs_pending_open *open; - struct smb2_lease_break_work *lw; - bool found; + struct cifsInodeInfo *cinode; int ack_req = le32_to_cpu(rsp->Flags & SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); - lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); - if (!lw) - return false; + lease_state = le32_to_cpu(rsp->NewLeaseState); - INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); - lw->lease_state = rsp->NewLeaseState; + list_for_each(tmp, &tcon->openFileList) { + cfile = list_entry(tmp, struct cifsFileInfo, tlist); + cinode = CIFS_I(cfile->dentry->d_inode); - cifs_dbg(FYI, "Checking for lease break\n"); + if (memcmp(cinode->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; - /* look up tcon based on tid & uid */ - spin_lock(&cifs_tcp_ses_lock); - list_for_each(tmp, &server->smb_ses_list) { - ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + cifs_dbg(FYI, "found in the open list\n"); + cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + le32_to_cpu(rsp->NewLeaseState)); - spin_lock(&cifs_file_list_lock); - list_for_each(tmp1, &ses->tcon_list) { - tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); + server->ops->set_oplock_level(cinode, lease_state, 0, NULL); - cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); - list_for_each(tmp2, &tcon->openFileList) { - cfile = list_entry(tmp2, struct cifsFileInfo, - tlist); - cinode = CIFS_I(cfile->dentry->d_inode); + if (ack_req) + cfile->oplock_break_cancelled = false; + else + cfile->oplock_break_cancelled = true; - if (memcmp(cinode->lease_key, rsp->LeaseKey, - SMB2_LEASE_KEY_SIZE)) - continue; + queue_work(cifsiod_wq, &cfile->oplock_break); + kfree(lw); + return true; + } - cifs_dbg(FYI, "found in the open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", - le32_to_cpu(rsp->NewLeaseState)); + found = false; + list_for_each_entry(open, &tcon->pending_opens, olist) { + if (memcmp(open->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; + + if (!found && ack_req) { + found = true; + memcpy(lw->lease_key, open->lease_key, + SMB2_LEASE_KEY_SIZE); + lw->tlink = cifs_get_tlink(open->tlink); + queue_work(cifsiod_wq, &lw->lease_break); + } - smb2_set_oplock_level(cinode, - smb2_map_lease_to_oplock(rsp->NewLeaseState)); + cifs_dbg(FYI, "found in the pending open list\n"); + cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + le32_to_cpu(rsp->NewLeaseState)); - if (ack_req) - cfile->oplock_break_cancelled = false; - else - cfile->oplock_break_cancelled = true; + open->oplock = lease_state; + } + return found; +} - queue_work(cifsiod_wq, &cfile->oplock_break); +static bool +smb2_is_valid_lease_break(char *buffer) +{ + struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; + struct list_head *tmp, *tmp1, *tmp2; + struct TCP_Server_Info *server; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct smb2_lease_break_work *lw; - spin_unlock(&cifs_file_list_lock); - spin_unlock(&cifs_tcp_ses_lock); - return true; - } + lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); + if (!lw) + return false; - found = false; - list_for_each_entry(open, &tcon->pending_opens, olist) { - if (memcmp(open->lease_key, rsp->LeaseKey, - SMB2_LEASE_KEY_SIZE)) - continue; + INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); + lw->lease_state = rsp->NewLeaseState; - if (!found && ack_req) { - found = true; - memcpy(lw->lease_key, open->lease_key, - SMB2_LEASE_KEY_SIZE); - lw->tlink = cifs_get_tlink(open->tlink); - queue_work(cifsiod_wq, - &lw->lease_break); - } + cifs_dbg(FYI, "Checking for lease break\n"); - cifs_dbg(FYI, "found in the pending open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", - le32_to_cpu(rsp->NewLeaseState)); + /* look up tcon based on tid & uid */ + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &cifs_tcp_ses_list) { + server = list_entry(tmp, struct TCP_Server_Info, tcp_ses_list); - open->oplock = - smb2_map_lease_to_oplock(rsp->NewLeaseState); - } - if (found) { - spin_unlock(&cifs_file_list_lock); - spin_unlock(&cifs_tcp_ses_lock); - return true; + list_for_each(tmp1, &server->smb_ses_list) { + ses = list_entry(tmp1, struct cifs_ses, smb_ses_list); + + spin_lock(&cifs_file_list_lock); + list_for_each(tmp2, &ses->tcon_list) { + tcon = list_entry(tmp2, struct cifs_tcon, + tcon_list); + cifs_stats_inc( + &tcon->stats.cifs_stats.num_oplock_brks); + if (smb2_tcon_has_lease(tcon, rsp, lw)) { + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } } + spin_unlock(&cifs_file_list_lock); } - spin_unlock(&cifs_file_list_lock); } spin_unlock(&cifs_tcp_ses_lock); kfree(lw); @@ -532,7 +541,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) if (rsp->StructureSize != smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { if (le16_to_cpu(rsp->StructureSize) == 44) - return smb2_is_valid_lease_break(buffer, server); + return smb2_is_valid_lease_break(buffer); else return false; } @@ -560,14 +569,15 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) cifs_dbg(FYI, "file id match, oplock break\n"); cinode = CIFS_I(cfile->dentry->d_inode); - if (!cinode->clientCanCacheAll && + if (!CIFS_CACHE_WRITE(cinode) && rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE) cfile->oplock_break_cancelled = true; else cfile->oplock_break_cancelled = false; - smb2_set_oplock_level(cinode, - rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0); + server->ops->set_oplock_level(cinode, + rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0, + 0, NULL); queue_work(cifsiod_wq, &cfile->oplock_break); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f259e6cc8357..861b33214144 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -24,6 +24,7 @@ #include "smb2proto.h" #include "cifsproto.h" #include "cifs_debug.h" +#include "cifs_unicode.h" #include "smb2status.h" #include "smb2glob.h" @@ -229,7 +230,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = &fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); if (rc) { kfree(utf16_path); return rc; @@ -376,10 +377,13 @@ static void smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) { struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + cfile->fid.persistent_fid = fid->persistent_fid; cfile->fid.volatile_fid = fid->volatile_fid; - smb2_set_oplock_level(cinode, oplock); - cinode->can_cache_brlcks = cinode->clientCanCacheAll; + server->ops->set_oplock_level(cinode, oplock, fid->epoch, + &fid->purge_cache); + cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); } static void @@ -463,7 +467,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); kfree(utf16_path); if (rc) { cifs_dbg(VFS, "open dir failed\n"); @@ -530,7 +534,7 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, return SMB2_oplock_break(0, tcon, fid->persistent_fid, fid->volatile_fid, - cinode->clientCanCacheRead ? 1 : 0); + CIFS_CACHE_READ(cinode) ? 1 : 0); } static int @@ -550,7 +554,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = &fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL); + rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL); if (rc) return rc; buf->f_type = SMB2_MAGIC_NUMBER; @@ -596,7 +600,245 @@ smb2_new_lease_key(struct cifs_fid *fid) get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE); } -struct smb_version_operations smb21_operations = { +static int +smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, + const char *full_path, char **target_path, + struct cifs_sb_info *cifs_sb) +{ + int rc; + __le16 *utf16_path; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + struct cifs_open_parms oparms; + struct cifs_fid fid; + struct smb2_err_rsp *err_buf = NULL; + struct smb2_symlink_err_rsp *symlink; + unsigned int sub_len, sub_offset; + + cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + + utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); + if (!utf16_path) + return -ENOMEM; + + oparms.tcon = tcon; + oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.disposition = FILE_OPEN; + oparms.create_options = 0; + oparms.fid = &fid; + oparms.reconnect = false; + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_buf); + + if (!rc || !err_buf) { + kfree(utf16_path); + return -ENOENT; + } + /* open must fail on symlink - reset rc */ + rc = 0; + symlink = (struct smb2_symlink_err_rsp *)err_buf->ErrorData; + sub_len = le16_to_cpu(symlink->SubstituteNameLength); + sub_offset = le16_to_cpu(symlink->SubstituteNameOffset); + *target_path = cifs_strndup_from_utf16( + (char *)symlink->PathBuffer + sub_offset, + sub_len, true, cifs_sb->local_nls); + if (!(*target_path)) { + kfree(utf16_path); + return -ENOMEM; + } + convert_delimiter(*target_path, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + kfree(utf16_path); + return rc; +} + +static void +smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, + unsigned int epoch, bool *purge_cache) +{ + oplock &= 0xFF; + if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) + return; + if (oplock == SMB2_OPLOCK_LEVEL_BATCH) { + cinode->oplock = CIFS_CACHE_RHW_FLG; + cifs_dbg(FYI, "Batch Oplock granted on inode %p\n", + &cinode->vfs_inode); + } else if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) { + cinode->oplock = CIFS_CACHE_RW_FLG; + cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", + &cinode->vfs_inode); + } else if (oplock == SMB2_OPLOCK_LEVEL_II) { + cinode->oplock = CIFS_CACHE_READ_FLG; + cifs_dbg(FYI, "Level II Oplock granted on inode %p\n", + &cinode->vfs_inode); + } else + cinode->oplock = 0; +} + +static void +smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, + unsigned int epoch, bool *purge_cache) +{ + char message[5] = {0}; + + oplock &= 0xFF; + if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) + return; + + cinode->oplock = 0; + if (oplock & SMB2_LEASE_READ_CACHING_HE) { + cinode->oplock |= CIFS_CACHE_READ_FLG; + strcat(message, "R"); + } + if (oplock & SMB2_LEASE_HANDLE_CACHING_HE) { + cinode->oplock |= CIFS_CACHE_HANDLE_FLG; + strcat(message, "H"); + } + if (oplock & SMB2_LEASE_WRITE_CACHING_HE) { + cinode->oplock |= CIFS_CACHE_WRITE_FLG; + strcat(message, "W"); + } + if (!cinode->oplock) + strcat(message, "None"); + cifs_dbg(FYI, "%s Lease granted on inode %p\n", message, + &cinode->vfs_inode); +} + +static void +smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, + unsigned int epoch, bool *purge_cache) +{ + unsigned int old_oplock = cinode->oplock; + + smb21_set_oplock_level(cinode, oplock, epoch, purge_cache); + + if (purge_cache) { + *purge_cache = false; + if (old_oplock == CIFS_CACHE_READ_FLG) { + if (cinode->oplock == CIFS_CACHE_READ_FLG && + (epoch - cinode->epoch > 0)) + *purge_cache = true; + else if (cinode->oplock == CIFS_CACHE_RH_FLG && + (epoch - cinode->epoch > 1)) + *purge_cache = true; + else if (cinode->oplock == CIFS_CACHE_RHW_FLG && + (epoch - cinode->epoch > 1)) + *purge_cache = true; + else if (cinode->oplock == 0 && + (epoch - cinode->epoch > 0)) + *purge_cache = true; + } else if (old_oplock == CIFS_CACHE_RH_FLG) { + if (cinode->oplock == CIFS_CACHE_RH_FLG && + (epoch - cinode->epoch > 0)) + *purge_cache = true; + else if (cinode->oplock == CIFS_CACHE_RHW_FLG && + (epoch - cinode->epoch > 1)) + *purge_cache = true; + } + cinode->epoch = epoch; + } +} + +static bool +smb2_is_read_op(__u32 oplock) +{ + return oplock == SMB2_OPLOCK_LEVEL_II; +} + +static bool +smb21_is_read_op(__u32 oplock) +{ + return (oplock & SMB2_LEASE_READ_CACHING_HE) && + !(oplock & SMB2_LEASE_WRITE_CACHING_HE); +} + +static __le32 +map_oplock_to_lease(u8 oplock) +{ + if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) + return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING; + else if (oplock == SMB2_OPLOCK_LEVEL_II) + return SMB2_LEASE_READ_CACHING; + else if (oplock == SMB2_OPLOCK_LEVEL_BATCH) + return SMB2_LEASE_HANDLE_CACHING | SMB2_LEASE_READ_CACHING | + SMB2_LEASE_WRITE_CACHING; + return 0; +} + +static char * +smb2_create_lease_buf(u8 *lease_key, u8 oplock) +{ + struct create_lease *buf; + + buf = kzalloc(sizeof(struct create_lease), GFP_KERNEL); + if (!buf) + return NULL; + + buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); + buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + buf->lcontext.LeaseState = map_oplock_to_lease(oplock); + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct create_lease, lcontext)); + buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context)); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct create_lease, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + buf->Name[0] = 'R'; + buf->Name[1] = 'q'; + buf->Name[2] = 'L'; + buf->Name[3] = 's'; + return (char *)buf; +} + +static char * +smb3_create_lease_buf(u8 *lease_key, u8 oplock) +{ + struct create_lease_v2 *buf; + + buf = kzalloc(sizeof(struct create_lease_v2), GFP_KERNEL); + if (!buf) + return NULL; + + buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); + buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + buf->lcontext.LeaseState = map_oplock_to_lease(oplock); + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct create_lease_v2, lcontext)); + buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2)); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct create_lease_v2, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + buf->Name[0] = 'R'; + buf->Name[1] = 'q'; + buf->Name[2] = 'L'; + buf->Name[3] = 's'; + return (char *)buf; +} + +static __u8 +smb2_parse_lease_buf(void *buf, unsigned int *epoch) +{ + struct create_lease *lc = (struct create_lease *)buf; + + *epoch = 0; /* not used */ + if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) + return SMB2_OPLOCK_LEVEL_NOCHANGE; + return le32_to_cpu(lc->lcontext.LeaseState); +} + +static __u8 +smb3_parse_lease_buf(void *buf, unsigned int *epoch) +{ + struct create_lease_v2 *lc = (struct create_lease_v2 *)buf; + + *epoch = le16_to_cpu(lc->lcontext.Epoch); + if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) + return SMB2_OPLOCK_LEVEL_NOCHANGE; + return le32_to_cpu(lc->lcontext.LeaseState); +} + +struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -638,6 +880,7 @@ struct smb_version_operations smb21_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, + .query_symlink = smb2_query_symlink, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, @@ -660,8 +903,82 @@ struct smb_version_operations smb21_operations = { .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, .calc_signature = smb2_calc_signature, + .is_read_op = smb2_is_read_op, + .set_oplock_level = smb2_set_oplock_level, + .create_lease_buf = smb2_create_lease_buf, + .parse_lease_buf = smb2_parse_lease_buf, }; +struct smb_version_operations smb21_operations = { + .compare_fids = smb2_compare_fids, + .setup_request = smb2_setup_request, + .setup_async_request = smb2_setup_async_request, + .check_receive = smb2_check_receive, + .add_credits = smb2_add_credits, + .set_credits = smb2_set_credits, + .get_credits_field = smb2_get_credits_field, + .get_credits = smb2_get_credits, + .get_next_mid = smb2_get_next_mid, + .read_data_offset = smb2_read_data_offset, + .read_data_length = smb2_read_data_length, + .map_error = map_smb2_to_linux_error, + .find_mid = smb2_find_mid, + .check_message = smb2_check_message, + .dump_detail = smb2_dump_detail, + .clear_stats = smb2_clear_stats, + .print_stats = smb2_print_stats, + .is_oplock_break = smb2_is_valid_oplock_break, + .need_neg = smb2_need_neg, + .negotiate = smb2_negotiate, + .negotiate_wsize = smb2_negotiate_wsize, + .negotiate_rsize = smb2_negotiate_rsize, + .sess_setup = SMB2_sess_setup, + .logoff = SMB2_logoff, + .tree_connect = SMB2_tcon, + .tree_disconnect = SMB2_tdis, + .is_path_accessible = smb2_is_path_accessible, + .can_echo = smb2_can_echo, + .echo = SMB2_echo, + .query_path_info = smb2_query_path_info, + .get_srv_inum = smb2_get_srv_inum, + .query_file_info = smb2_query_file_info, + .set_path_size = smb2_set_path_size, + .set_file_size = smb2_set_file_size, + .set_file_info = smb2_set_file_info, + .mkdir = smb2_mkdir, + .mkdir_setinfo = smb2_mkdir_setinfo, + .rmdir = smb2_rmdir, + .unlink = smb2_unlink, + .rename = smb2_rename_path, + .create_hardlink = smb2_create_hardlink, + .query_symlink = smb2_query_symlink, + .open = smb2_open_file, + .set_fid = smb2_set_fid, + .close = smb2_close_file, + .flush = smb2_flush_file, + .async_readv = smb2_async_readv, + .async_writev = smb2_async_writev, + .sync_read = smb2_sync_read, + .sync_write = smb2_sync_write, + .query_dir_first = smb2_query_dir_first, + .query_dir_next = smb2_query_dir_next, + .close_dir = smb2_close_dir, + .calc_smb_size = smb2_calc_size, + .is_status_pending = smb2_is_status_pending, + .oplock_response = smb2_oplock_response, + .queryfs = smb2_queryfs, + .mand_lock = smb2_mand_lock, + .mand_unlock_range = smb2_unlock_range, + .push_mand_locks = smb2_push_mandatory_locks, + .get_lease_key = smb2_get_lease_key, + .set_lease_key = smb2_set_lease_key, + .new_lease_key = smb2_new_lease_key, + .calc_signature = smb2_calc_signature, + .is_read_op = smb21_is_read_op, + .set_oplock_level = smb21_set_oplock_level, + .create_lease_buf = smb2_create_lease_buf, + .parse_lease_buf = smb2_parse_lease_buf, +}; struct smb_version_operations smb30_operations = { .compare_fids = smb2_compare_fids, @@ -706,6 +1023,7 @@ struct smb_version_operations smb30_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, + .query_symlink = smb2_query_symlink, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, @@ -729,6 +1047,10 @@ struct smb_version_operations smb30_operations = { .new_lease_key = smb2_new_lease_key, .generate_signingkey = generate_smb3signingkey, .calc_signature = smb3_calc_signature, + .is_read_op = smb21_is_read_op, + .set_oplock_level = smb3_set_oplock_level, + .create_lease_buf = smb3_create_lease_buf, + .parse_lease_buf = smb3_parse_lease_buf, }; struct smb_version_values smb20_values = { @@ -746,9 +1068,9 @@ struct smb_version_values smb20_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, - .oplock_read = SMB2_OPLOCK_LEVEL_II, .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease), }; struct smb_version_values smb21_values = { @@ -766,9 +1088,9 @@ struct smb_version_values smb21_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, - .oplock_read = SMB2_OPLOCK_LEVEL_II, .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease), }; struct smb_version_values smb30_values = { @@ -786,9 +1108,9 @@ struct smb_version_values smb30_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, - .oplock_read = SMB2_OPLOCK_LEVEL_II, .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease_v2), }; struct smb_version_values smb302_values = { @@ -806,7 +1128,7 @@ struct smb_version_values smb302_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, - .oplock_read = SMB2_OPLOCK_LEVEL_II, .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease_v2), }; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index abc9c2809b51..eba0efde66d7 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -478,12 +478,20 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, } /* + * If we are here due to reconnect, free per-smb session key + * in case signing was required. + */ + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + + /* * If memory allocation is successful, caller of this function * frees it. */ ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); if (!ses->ntlmssp) return -ENOMEM; + ses->ntlmssp->sesskey_per_smbsess = true; /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */ ses->sectype = RawNTLMSSP; @@ -628,6 +636,40 @@ ssetup_exit: /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */ if ((phase == NtLmChallenge) && (rc == 0)) goto ssetup_ntlmssp_authenticate; + + if (!rc) { + mutex_lock(&server->srv_mutex); + if (server->sign && server->ops->generate_signingkey) { + rc = server->ops->generate_signingkey(ses); + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + if (rc) { + cifs_dbg(FYI, + "SMB3 session key generation failed\n"); + mutex_unlock(&server->srv_mutex); + goto keygen_exit; + } + } + if (!server->session_estab) { + server->sequence_number = 0x2; + server->session_estab = true; + } + mutex_unlock(&server->srv_mutex); + + cifs_dbg(FYI, "SMB2/3 session established successfully\n"); + spin_lock(&GlobalMid_Lock); + ses->status = CifsGood; + ses->need_reconnect = false; + spin_unlock(&GlobalMid_Lock); + } + +keygen_exit: + if (!server->sign) { + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + } + kfree(ses->ntlmssp); + return rc; } @@ -813,39 +855,6 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) return rc; } -static struct create_lease * -create_lease_buf(u8 *lease_key, u8 oplock) -{ - struct create_lease *buf; - - buf = kzalloc(sizeof(struct create_lease), GFP_KERNEL); - if (!buf) - return NULL; - - buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); - buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); - if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) - buf->lcontext.LeaseState = SMB2_LEASE_WRITE_CACHING | - SMB2_LEASE_READ_CACHING; - else if (oplock == SMB2_OPLOCK_LEVEL_II) - buf->lcontext.LeaseState = SMB2_LEASE_READ_CACHING; - else if (oplock == SMB2_OPLOCK_LEVEL_BATCH) - buf->lcontext.LeaseState = SMB2_LEASE_HANDLE_CACHING | - SMB2_LEASE_READ_CACHING | - SMB2_LEASE_WRITE_CACHING; - - buf->ccontext.DataOffset = cpu_to_le16(offsetof - (struct create_lease, lcontext)); - buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context)); - buf->ccontext.NameOffset = cpu_to_le16(offsetof - (struct create_lease, Name)); - buf->ccontext.NameLength = cpu_to_le16(4); - buf->Name[0] = 'R'; - buf->Name[1] = 'q'; - buf->Name[2] = 'L'; - buf->Name[3] = 's'; - return buf; -} static struct create_durable * create_durable_buf(void) @@ -894,55 +903,49 @@ create_reconnect_durable_buf(struct cifs_fid *fid) } static __u8 -parse_lease_state(struct smb2_create_rsp *rsp) +parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, + unsigned int *epoch) { char *data_offset; - struct create_lease *lc; - bool found = false; + struct create_context *cc; unsigned int next = 0; char *name; data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); - lc = (struct create_lease *)data_offset; + cc = (struct create_context *)data_offset; do { - lc = (struct create_lease *)((char *)lc + next); - name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc; - if (le16_to_cpu(lc->ccontext.NameLength) != 4 || + cc = (struct create_context *)((char *)cc + next); + name = le16_to_cpu(cc->NameOffset) + (char *)cc; + if (le16_to_cpu(cc->NameLength) != 4 || strncmp(name, "RqLs", 4)) { - next = le32_to_cpu(lc->ccontext.Next); + next = le32_to_cpu(cc->Next); continue; } - if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) - return SMB2_OPLOCK_LEVEL_NOCHANGE; - found = true; - break; + return server->ops->parse_lease_buf(cc, epoch); } while (next != 0); - if (!found) - return 0; - - return smb2_map_lease_to_oplock(lc->lcontext.LeaseState); + return 0; } static int -add_lease_context(struct kvec *iov, unsigned int *num_iovec, __u8 *oplock) +add_lease_context(struct TCP_Server_Info *server, struct kvec *iov, + unsigned int *num_iovec, __u8 *oplock) { struct smb2_create_req *req = iov[0].iov_base; unsigned int num = *num_iovec; - iov[num].iov_base = create_lease_buf(oplock+1, *oplock); + iov[num].iov_base = server->ops->create_lease_buf(oplock+1, *oplock); if (iov[num].iov_base == NULL) return -ENOMEM; - iov[num].iov_len = sizeof(struct create_lease); + iov[num].iov_len = server->vals->create_lease_size; req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; if (!req->CreateContextsOffset) req->CreateContextsOffset = cpu_to_le32( sizeof(struct smb2_create_req) - 4 + iov[num - 1].iov_len); - req->CreateContextsLength = cpu_to_le32( - le32_to_cpu(req->CreateContextsLength) + - sizeof(struct create_lease)); - inc_rfc1001_len(&req->hdr, sizeof(struct create_lease)); + le32_add_cpu(&req->CreateContextsLength, + server->vals->create_lease_size); + inc_rfc1001_len(&req->hdr, server->vals->create_lease_size); *num_iovec = num + 1; return 0; } @@ -967,9 +970,7 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec, req->CreateContextsOffset = cpu_to_le32(sizeof(struct smb2_create_req) - 4 + iov[1].iov_len); - req->CreateContextsLength = - cpu_to_le32(le32_to_cpu(req->CreateContextsLength) + - sizeof(struct create_durable)); + le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable)); inc_rfc1001_len(&req->hdr, sizeof(struct create_durable)); *num_iovec = num + 1; return 0; @@ -977,7 +978,8 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec, int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, - __u8 *oplock, struct smb2_file_all_info *buf) + __u8 *oplock, struct smb2_file_all_info *buf, + struct smb2_err_rsp **err_buf) { struct smb2_create_req *req; struct smb2_create_rsp *rsp; @@ -1048,11 +1050,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (!server->oplocks) *oplock = SMB2_OPLOCK_LEVEL_NONE; - if (!(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) || + if (!(server->capabilities & SMB2_GLOBAL_CAP_LEASING) || *oplock == SMB2_OPLOCK_LEVEL_NONE) req->RequestedOplockLevel = *oplock; else { - rc = add_lease_context(iov, &num_iovecs, oplock); + rc = add_lease_context(server, iov, &num_iovecs, oplock); if (rc) { cifs_small_buf_release(req); kfree(copy_path); @@ -1062,11 +1064,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) { /* need to set Next field of lease context if we request it */ - if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) { + if (server->capabilities & SMB2_GLOBAL_CAP_LEASING) { struct create_context *ccontext = (struct create_context *)iov[num_iovecs-1].iov_base; ccontext->Next = - cpu_to_le32(sizeof(struct create_lease)); + cpu_to_le32(server->vals->create_lease_size); } rc = add_durable_context(iov, &num_iovecs, oparms); if (rc) { @@ -1082,6 +1084,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (rc != 0) { cifs_stats_fail_inc(tcon, SMB2_CREATE_HE); + if (err_buf) + *err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4, + GFP_KERNEL); goto creat_exit; } @@ -1098,7 +1103,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) - *oplock = parse_lease_state(rsp); + *oplock = parse_lease_state(server, rsp, &oparms->fid->epoch); else *oplock = rsp->OplockLevel; creat_exit: diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 36b0d37ea69b..b83d0118a757 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -150,6 +150,20 @@ struct smb2_err_rsp { __u8 ErrorData[1]; /* variable length */ } __packed; +struct smb2_symlink_err_rsp { + __le32 SymLinkLength; + __le32 SymLinkErrorTag; + __le32 ReparseTag; + __le16 ReparseDataLength; + __le16 UnparsedPathLength; + __le16 SubstituteNameOffset; + __le16 SubstituteNameLength; + __le16 PrintNameOffset; + __le16 PrintNameLength; + __le32 Flags; + __u8 PathBuffer[0]; +} __packed; + #define SMB2_CLIENT_GUID_SIZE 16 extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; @@ -462,6 +476,10 @@ struct create_context { __u8 Buffer[0]; } __packed; +#define SMB2_LEASE_READ_CACHING_HE 0x01 +#define SMB2_LEASE_HANDLE_CACHING_HE 0x02 +#define SMB2_LEASE_WRITE_CACHING_HE 0x04 + #define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00) #define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01) #define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02) @@ -479,12 +497,31 @@ struct lease_context { __le64 LeaseDuration; } __packed; +struct lease_context_v2 { + __le64 LeaseKeyLow; + __le64 LeaseKeyHigh; + __le32 LeaseState; + __le32 LeaseFlags; + __le64 LeaseDuration; + __le64 ParentLeaseKeyLow; + __le64 ParentLeaseKeyHigh; + __le16 Epoch; + __le16 Reserved; +} __packed; + struct create_lease { struct create_context ccontext; __u8 Name[8]; struct lease_context lcontext; } __packed; +struct create_lease_v2 { + struct create_context ccontext; + __u8 Name[8]; + struct lease_context_v2 lcontext; + __u8 Pad[4]; +} __packed; + struct create_durable { struct create_context ccontext; __u8 Name[8]; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 1a5ecbed40ed..e3fb4801ee96 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -53,7 +53,6 @@ extern int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server); extern void smb2_echo_request(struct work_struct *work); extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); -extern __u8 smb2_map_lease_to_oplock(__le32 lease_state); extern bool smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv); @@ -87,7 +86,6 @@ extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, extern int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, FILE_ALL_INFO *buf); -extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); extern int smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); @@ -106,7 +104,8 @@ extern int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, __u8 *oplock, - struct smb2_file_all_info *buf); + struct smb2_file_all_info *buf, + struct smb2_err_rsp **err_buf); extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data, u32 indatalen, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 4f2300d020c7..340abca3aa52 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -114,6 +114,23 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server) return 0; } +static struct cifs_ses * +smb2_find_smb_ses(struct smb2_hdr *smb2hdr, struct TCP_Server_Info *server) +{ + struct cifs_ses *ses; + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (ses->Suid != smb2hdr->SessionId) + continue; + spin_unlock(&cifs_tcp_ses_lock); + return ses; + } + spin_unlock(&cifs_tcp_ses_lock); + + return NULL; +} + int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) @@ -124,6 +141,13 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; + struct cifs_ses *ses; + + ses = smb2_find_smb_ses(smb2_pdu, server); + if (!ses) { + cifs_dbg(VFS, "%s: Could not find session\n", __func__); + return 0; + } memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); @@ -135,7 +159,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) } rc = crypto_shash_setkey(server->secmech.hmacsha256, - server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not update with response\n", __func__); return rc; @@ -198,8 +222,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) return rc; } -void -generate_smb3signingkey(struct TCP_Server_Info *server) +int +generate_smb3signingkey(struct cifs_ses *ses) { unsigned char zero = 0x0; __u8 i[4] = {0, 0, 0, 1}; @@ -209,90 +233,99 @@ generate_smb3signingkey(struct TCP_Server_Info *server) unsigned char *hashptr = prfhash; memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE); - memset(server->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE); + memset(ses->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE); - rc = smb3_crypto_shash_allocate(server); + rc = smb3_crypto_shash_allocate(ses->server); if (rc) { cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_setkey(server->secmech.hmacsha256, - server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + rc = crypto_shash_setkey(ses->server->secmech.hmacsha256, + ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not set with session key\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); + rc = crypto_shash_init(&ses->server->secmech.sdeschmacsha256->shash); if (rc) { cifs_dbg(VFS, "%s: Could not init sign hmac\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash, i, 4); if (rc) { cifs_dbg(VFS, "%s: Could not update with n\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash, "SMB2AESCMAC", 12); if (rc) { cifs_dbg(VFS, "%s: Could not update with label\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash, &zero, 1); if (rc) { cifs_dbg(VFS, "%s: Could not update with zero\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash, "SmbSign", 8); if (rc) { cifs_dbg(VFS, "%s: Could not update with context\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash, L, 4); if (rc) { cifs_dbg(VFS, "%s: Could not update with L\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_final(&ses->server->secmech.sdeschmacsha256->shash, hashptr); if (rc) { cifs_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__); goto smb3signkey_ret; } - memcpy(server->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE); + memcpy(ses->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE); smb3signkey_ret: - return; + return rc; } int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { - int i, rc; + int i; + int rc = 0; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; unsigned char *sigptr = smb3_signature; struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; + struct cifs_ses *ses; + + ses = smb2_find_smb_ses(smb2_pdu, server); + if (!ses) { + cifs_dbg(VFS, "%s: Could not find session\n", __func__); + return 0; + } memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE); memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); rc = crypto_shash_setkey(server->secmech.cmacaes, - server->smb3signingkey, SMB2_CMACAES_SIZE); + ses->smb3signingkey, SMB2_CMACAES_SIZE); + if (rc) { cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__); return rc; @@ -389,6 +422,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; if ((smb2_pdu->Command == SMB2_NEGOTIATE) || + (smb2_pdu->Command == SMB2_SESSION_SETUP) || (smb2_pdu->Command == SMB2_OPLOCK_BREAK) || (!server->session_estab)) return 0; diff --git a/fs/cifs/winucase.c b/fs/cifs/winucase.c new file mode 100644 index 000000000000..1506d4fddb2c --- /dev/null +++ b/fs/cifs/winucase.c @@ -0,0 +1,663 @@ +/* + * fs/cifs/winucase.c + * + * Copyright (c) Jeffrey Layton <jlayton@redhat.com>, 2013 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * The const tables in this file were converted from the following info + * provided by Microsoft: + * + * 3.1.5.3 Mapping UTF-16 Strings to Upper Case: + * + * http://msdn.microsoft.com/en-us/library/hh877830.aspx + * http://www.microsoft.com/en-us/download/details.aspx?displaylang=en&id=10921 + * + * In particular, the table in "Windows 8 Upper Case Mapping Table.txt" was + * post-processed using the winucase_convert.pl script. + */ + +#include <linux/nls.h> + +wchar_t cifs_toupper(wchar_t in); /* quiet sparse */ + +static const wchar_t t2_00[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0000, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178, +}; + +static const wchar_t t2_01[256] = { + 0x0000, 0x0100, 0x0000, 0x0102, 0x0000, 0x0104, 0x0000, 0x0106, + 0x0000, 0x0108, 0x0000, 0x010a, 0x0000, 0x010c, 0x0000, 0x010e, + 0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0000, 0x0116, + 0x0000, 0x0118, 0x0000, 0x011a, 0x0000, 0x011c, 0x0000, 0x011e, + 0x0000, 0x0120, 0x0000, 0x0122, 0x0000, 0x0124, 0x0000, 0x0126, + 0x0000, 0x0128, 0x0000, 0x012a, 0x0000, 0x012c, 0x0000, 0x012e, + 0x0000, 0x0000, 0x0000, 0x0132, 0x0000, 0x0134, 0x0000, 0x0136, + 0x0000, 0x0000, 0x0139, 0x0000, 0x013b, 0x0000, 0x013d, 0x0000, + 0x013f, 0x0000, 0x0141, 0x0000, 0x0143, 0x0000, 0x0145, 0x0000, + 0x0147, 0x0000, 0x0000, 0x014a, 0x0000, 0x014c, 0x0000, 0x014e, + 0x0000, 0x0150, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156, + 0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x0000, 0x015e, + 0x0000, 0x0160, 0x0000, 0x0162, 0x0000, 0x0164, 0x0000, 0x0166, + 0x0000, 0x0168, 0x0000, 0x016a, 0x0000, 0x016c, 0x0000, 0x016e, + 0x0000, 0x0170, 0x0000, 0x0172, 0x0000, 0x0174, 0x0000, 0x0176, + 0x0000, 0x0000, 0x0179, 0x0000, 0x017b, 0x0000, 0x017d, 0x0000, + 0x0243, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0000, 0x0000, + 0x0187, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x01f6, 0x0000, 0x0000, + 0x0000, 0x0198, 0x023d, 0x0000, 0x0000, 0x0000, 0x0220, 0x0000, + 0x0000, 0x01a0, 0x0000, 0x01a2, 0x0000, 0x01a4, 0x0000, 0x0000, + 0x01a7, 0x0000, 0x0000, 0x0000, 0x0000, 0x01ac, 0x0000, 0x0000, + 0x01af, 0x0000, 0x0000, 0x0000, 0x01b3, 0x0000, 0x01b5, 0x0000, + 0x0000, 0x01b8, 0x0000, 0x0000, 0x0000, 0x01bc, 0x0000, 0x01f7, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01c4, 0x0000, + 0x0000, 0x01c7, 0x0000, 0x0000, 0x01ca, 0x0000, 0x01cd, 0x0000, + 0x01cf, 0x0000, 0x01d1, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000, + 0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x018e, 0x0000, 0x01de, + 0x0000, 0x01e0, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6, + 0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee, + 0x0000, 0x0000, 0x0000, 0x01f1, 0x0000, 0x01f4, 0x0000, 0x0000, + 0x0000, 0x01f8, 0x0000, 0x01fa, 0x0000, 0x01fc, 0x0000, 0x01fe, +}; + +static const wchar_t t2_02[256] = { + 0x0000, 0x0200, 0x0000, 0x0202, 0x0000, 0x0204, 0x0000, 0x0206, + 0x0000, 0x0208, 0x0000, 0x020a, 0x0000, 0x020c, 0x0000, 0x020e, + 0x0000, 0x0210, 0x0000, 0x0212, 0x0000, 0x0214, 0x0000, 0x0216, + 0x0000, 0x0218, 0x0000, 0x021a, 0x0000, 0x021c, 0x0000, 0x021e, + 0x0000, 0x0000, 0x0000, 0x0222, 0x0000, 0x0224, 0x0000, 0x0226, + 0x0000, 0x0228, 0x0000, 0x022a, 0x0000, 0x022c, 0x0000, 0x022e, + 0x0000, 0x0230, 0x0000, 0x0232, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x023b, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0241, 0x0000, 0x0000, 0x0000, 0x0000, 0x0246, + 0x0000, 0x0248, 0x0000, 0x024a, 0x0000, 0x024c, 0x0000, 0x024e, + 0x2c6f, 0x2c6d, 0x0000, 0x0181, 0x0186, 0x0000, 0x0189, 0x018a, + 0x0000, 0x018f, 0x0000, 0x0190, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0193, 0x0000, 0x0000, 0x0194, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0197, 0x0196, 0x0000, 0x2c62, 0x0000, 0x0000, 0x0000, 0x019c, + 0x0000, 0x2c6e, 0x019d, 0x0000, 0x0000, 0x019f, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c64, 0x0000, 0x0000, + 0x01a6, 0x0000, 0x0000, 0x01a9, 0x0000, 0x0000, 0x0000, 0x0000, + 0x01ae, 0x0244, 0x01b1, 0x01b2, 0x0245, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x01b7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_03[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0370, 0x0000, 0x0372, 0x0000, 0x0000, 0x0000, 0x0376, + 0x0000, 0x0000, 0x0000, 0x03fd, 0x03fe, 0x03ff, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0386, 0x0388, 0x0389, 0x038a, + 0x0000, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, + 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, + 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x038c, 0x038e, 0x038f, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03cf, + 0x0000, 0x03d8, 0x0000, 0x03da, 0x0000, 0x03dc, 0x0000, 0x03de, + 0x0000, 0x03e0, 0x0000, 0x03e2, 0x0000, 0x03e4, 0x0000, 0x03e6, + 0x0000, 0x03e8, 0x0000, 0x03ea, 0x0000, 0x03ec, 0x0000, 0x03ee, + 0x0000, 0x0000, 0x03f9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x03f7, 0x0000, 0x0000, 0x03fa, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_04[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, + 0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, + 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f, + 0x0000, 0x0460, 0x0000, 0x0462, 0x0000, 0x0464, 0x0000, 0x0466, + 0x0000, 0x0468, 0x0000, 0x046a, 0x0000, 0x046c, 0x0000, 0x046e, + 0x0000, 0x0470, 0x0000, 0x0472, 0x0000, 0x0474, 0x0000, 0x0476, + 0x0000, 0x0478, 0x0000, 0x047a, 0x0000, 0x047c, 0x0000, 0x047e, + 0x0000, 0x0480, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x048a, 0x0000, 0x048c, 0x0000, 0x048e, + 0x0000, 0x0490, 0x0000, 0x0492, 0x0000, 0x0494, 0x0000, 0x0496, + 0x0000, 0x0498, 0x0000, 0x049a, 0x0000, 0x049c, 0x0000, 0x049e, + 0x0000, 0x04a0, 0x0000, 0x04a2, 0x0000, 0x04a4, 0x0000, 0x04a6, + 0x0000, 0x04a8, 0x0000, 0x04aa, 0x0000, 0x04ac, 0x0000, 0x04ae, + 0x0000, 0x04b0, 0x0000, 0x04b2, 0x0000, 0x04b4, 0x0000, 0x04b6, + 0x0000, 0x04b8, 0x0000, 0x04ba, 0x0000, 0x04bc, 0x0000, 0x04be, + 0x0000, 0x0000, 0x04c1, 0x0000, 0x04c3, 0x0000, 0x04c5, 0x0000, + 0x04c7, 0x0000, 0x04c9, 0x0000, 0x04cb, 0x0000, 0x04cd, 0x04c0, + 0x0000, 0x04d0, 0x0000, 0x04d2, 0x0000, 0x04d4, 0x0000, 0x04d6, + 0x0000, 0x04d8, 0x0000, 0x04da, 0x0000, 0x04dc, 0x0000, 0x04de, + 0x0000, 0x04e0, 0x0000, 0x04e2, 0x0000, 0x04e4, 0x0000, 0x04e6, + 0x0000, 0x04e8, 0x0000, 0x04ea, 0x0000, 0x04ec, 0x0000, 0x04ee, + 0x0000, 0x04f0, 0x0000, 0x04f2, 0x0000, 0x04f4, 0x0000, 0x04f6, + 0x0000, 0x04f8, 0x0000, 0x04fa, 0x0000, 0x04fc, 0x0000, 0x04fe, +}; + +static const wchar_t t2_05[256] = { + 0x0000, 0x0500, 0x0000, 0x0502, 0x0000, 0x0504, 0x0000, 0x0506, + 0x0000, 0x0508, 0x0000, 0x050a, 0x0000, 0x050c, 0x0000, 0x050e, + 0x0000, 0x0510, 0x0000, 0x0512, 0x0000, 0x0514, 0x0000, 0x0516, + 0x0000, 0x0518, 0x0000, 0x051a, 0x0000, 0x051c, 0x0000, 0x051e, + 0x0000, 0x0520, 0x0000, 0x0522, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, + 0x0538, 0x0539, 0x053a, 0x053b, 0x053c, 0x053d, 0x053e, 0x053f, + 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, + 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, 0x054e, 0x054f, + 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_1d[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xa77d, 0x0000, 0x0000, 0x0000, 0x2c63, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_1e[256] = { + 0x0000, 0x1e00, 0x0000, 0x1e02, 0x0000, 0x1e04, 0x0000, 0x1e06, + 0x0000, 0x1e08, 0x0000, 0x1e0a, 0x0000, 0x1e0c, 0x0000, 0x1e0e, + 0x0000, 0x1e10, 0x0000, 0x1e12, 0x0000, 0x1e14, 0x0000, 0x1e16, + 0x0000, 0x1e18, 0x0000, 0x1e1a, 0x0000, 0x1e1c, 0x0000, 0x1e1e, + 0x0000, 0x1e20, 0x0000, 0x1e22, 0x0000, 0x1e24, 0x0000, 0x1e26, + 0x0000, 0x1e28, 0x0000, 0x1e2a, 0x0000, 0x1e2c, 0x0000, 0x1e2e, + 0x0000, 0x1e30, 0x0000, 0x1e32, 0x0000, 0x1e34, 0x0000, 0x1e36, + 0x0000, 0x1e38, 0x0000, 0x1e3a, 0x0000, 0x1e3c, 0x0000, 0x1e3e, + 0x0000, 0x1e40, 0x0000, 0x1e42, 0x0000, 0x1e44, 0x0000, 0x1e46, + 0x0000, 0x1e48, 0x0000, 0x1e4a, 0x0000, 0x1e4c, 0x0000, 0x1e4e, + 0x0000, 0x1e50, 0x0000, 0x1e52, 0x0000, 0x1e54, 0x0000, 0x1e56, + 0x0000, 0x1e58, 0x0000, 0x1e5a, 0x0000, 0x1e5c, 0x0000, 0x1e5e, + 0x0000, 0x1e60, 0x0000, 0x1e62, 0x0000, 0x1e64, 0x0000, 0x1e66, + 0x0000, 0x1e68, 0x0000, 0x1e6a, 0x0000, 0x1e6c, 0x0000, 0x1e6e, + 0x0000, 0x1e70, 0x0000, 0x1e72, 0x0000, 0x1e74, 0x0000, 0x1e76, + 0x0000, 0x1e78, 0x0000, 0x1e7a, 0x0000, 0x1e7c, 0x0000, 0x1e7e, + 0x0000, 0x1e80, 0x0000, 0x1e82, 0x0000, 0x1e84, 0x0000, 0x1e86, + 0x0000, 0x1e88, 0x0000, 0x1e8a, 0x0000, 0x1e8c, 0x0000, 0x1e8e, + 0x0000, 0x1e90, 0x0000, 0x1e92, 0x0000, 0x1e94, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x1ea0, 0x0000, 0x1ea2, 0x0000, 0x1ea4, 0x0000, 0x1ea6, + 0x0000, 0x1ea8, 0x0000, 0x1eaa, 0x0000, 0x1eac, 0x0000, 0x1eae, + 0x0000, 0x1eb0, 0x0000, 0x1eb2, 0x0000, 0x1eb4, 0x0000, 0x1eb6, + 0x0000, 0x1eb8, 0x0000, 0x1eba, 0x0000, 0x1ebc, 0x0000, 0x1ebe, + 0x0000, 0x1ec0, 0x0000, 0x1ec2, 0x0000, 0x1ec4, 0x0000, 0x1ec6, + 0x0000, 0x1ec8, 0x0000, 0x1eca, 0x0000, 0x1ecc, 0x0000, 0x1ece, + 0x0000, 0x1ed0, 0x0000, 0x1ed2, 0x0000, 0x1ed4, 0x0000, 0x1ed6, + 0x0000, 0x1ed8, 0x0000, 0x1eda, 0x0000, 0x1edc, 0x0000, 0x1ede, + 0x0000, 0x1ee0, 0x0000, 0x1ee2, 0x0000, 0x1ee4, 0x0000, 0x1ee6, + 0x0000, 0x1ee8, 0x0000, 0x1eea, 0x0000, 0x1eec, 0x0000, 0x1eee, + 0x0000, 0x1ef0, 0x0000, 0x1ef2, 0x0000, 0x1ef4, 0x0000, 0x1ef6, + 0x0000, 0x1ef8, 0x0000, 0x1efa, 0x0000, 0x1efc, 0x0000, 0x1efe, +}; + +static const wchar_t t2_1f[256] = { + 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f18, 0x1f19, 0x1f1a, 0x1f1b, 0x1f1c, 0x1f1d, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, 0x1f3c, 0x1f3d, 0x1f3e, 0x1f3f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x1f59, 0x0000, 0x1f5b, 0x0000, 0x1f5d, 0x0000, 0x1f5f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1fba, 0x1fbb, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fda, 0x1fdb, + 0x1ff8, 0x1ff9, 0x1fea, 0x1feb, 0x1ffa, 0x1ffb, 0x0000, 0x0000, + 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1fb8, 0x1fb9, 0x0000, 0x1fbc, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x1fcc, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1fd8, 0x1fd9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1fe8, 0x1fe9, 0x0000, 0x0000, 0x0000, 0x1fec, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x1ffc, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_21[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2132, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, + 0x2168, 0x2169, 0x216a, 0x216b, 0x216c, 0x216d, 0x216e, 0x216f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x2183, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_24[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x24b6, 0x24b7, 0x24b8, 0x24b9, 0x24ba, 0x24bb, 0x24bc, 0x24bd, + 0x24be, 0x24bf, 0x24c0, 0x24c1, 0x24c2, 0x24c3, 0x24c4, 0x24c5, + 0x24c6, 0x24c7, 0x24c8, 0x24c9, 0x24ca, 0x24cb, 0x24cc, 0x24cd, + 0x24ce, 0x24cf, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_2c[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x2c00, 0x2c01, 0x2c02, 0x2c03, 0x2c04, 0x2c05, 0x2c06, 0x2c07, + 0x2c08, 0x2c09, 0x2c0a, 0x2c0b, 0x2c0c, 0x2c0d, 0x2c0e, 0x2c0f, + 0x2c10, 0x2c11, 0x2c12, 0x2c13, 0x2c14, 0x2c15, 0x2c16, 0x2c17, + 0x2c18, 0x2c19, 0x2c1a, 0x2c1b, 0x2c1c, 0x2c1d, 0x2c1e, 0x2c1f, + 0x2c20, 0x2c21, 0x2c22, 0x2c23, 0x2c24, 0x2c25, 0x2c26, 0x2c27, + 0x2c28, 0x2c29, 0x2c2a, 0x2c2b, 0x2c2c, 0x2c2d, 0x2c2e, 0x0000, + 0x0000, 0x2c60, 0x0000, 0x0000, 0x0000, 0x023a, 0x023e, 0x0000, + 0x2c67, 0x0000, 0x2c69, 0x0000, 0x2c6b, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x2c72, 0x0000, 0x0000, 0x2c75, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x2c80, 0x0000, 0x2c82, 0x0000, 0x2c84, 0x0000, 0x2c86, + 0x0000, 0x2c88, 0x0000, 0x2c8a, 0x0000, 0x2c8c, 0x0000, 0x2c8e, + 0x0000, 0x2c90, 0x0000, 0x2c92, 0x0000, 0x2c94, 0x0000, 0x2c96, + 0x0000, 0x2c98, 0x0000, 0x2c9a, 0x0000, 0x2c9c, 0x0000, 0x2c9e, + 0x0000, 0x2ca0, 0x0000, 0x2ca2, 0x0000, 0x2ca4, 0x0000, 0x2ca6, + 0x0000, 0x2ca8, 0x0000, 0x2caa, 0x0000, 0x2cac, 0x0000, 0x2cae, + 0x0000, 0x2cb0, 0x0000, 0x2cb2, 0x0000, 0x2cb4, 0x0000, 0x2cb6, + 0x0000, 0x2cb8, 0x0000, 0x2cba, 0x0000, 0x2cbc, 0x0000, 0x2cbe, + 0x0000, 0x2cc0, 0x0000, 0x2cc2, 0x0000, 0x2cc4, 0x0000, 0x2cc6, + 0x0000, 0x2cc8, 0x0000, 0x2cca, 0x0000, 0x2ccc, 0x0000, 0x2cce, + 0x0000, 0x2cd0, 0x0000, 0x2cd2, 0x0000, 0x2cd4, 0x0000, 0x2cd6, + 0x0000, 0x2cd8, 0x0000, 0x2cda, 0x0000, 0x2cdc, 0x0000, 0x2cde, + 0x0000, 0x2ce0, 0x0000, 0x2ce2, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_2d[256] = { + 0x10a0, 0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7, + 0x10a8, 0x10a9, 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af, + 0x10b0, 0x10b1, 0x10b2, 0x10b3, 0x10b4, 0x10b5, 0x10b6, 0x10b7, + 0x10b8, 0x10b9, 0x10ba, 0x10bb, 0x10bc, 0x10bd, 0x10be, 0x10bf, + 0x10c0, 0x10c1, 0x10c2, 0x10c3, 0x10c4, 0x10c5, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_a6[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xa640, 0x0000, 0xa642, 0x0000, 0xa644, 0x0000, 0xa646, + 0x0000, 0xa648, 0x0000, 0xa64a, 0x0000, 0xa64c, 0x0000, 0xa64e, + 0x0000, 0xa650, 0x0000, 0xa652, 0x0000, 0xa654, 0x0000, 0xa656, + 0x0000, 0xa658, 0x0000, 0xa65a, 0x0000, 0xa65c, 0x0000, 0xa65e, + 0x0000, 0x0000, 0x0000, 0xa662, 0x0000, 0xa664, 0x0000, 0xa666, + 0x0000, 0xa668, 0x0000, 0xa66a, 0x0000, 0xa66c, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xa680, 0x0000, 0xa682, 0x0000, 0xa684, 0x0000, 0xa686, + 0x0000, 0xa688, 0x0000, 0xa68a, 0x0000, 0xa68c, 0x0000, 0xa68e, + 0x0000, 0xa690, 0x0000, 0xa692, 0x0000, 0xa694, 0x0000, 0xa696, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_a7[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0xa722, 0x0000, 0xa724, 0x0000, 0xa726, + 0x0000, 0xa728, 0x0000, 0xa72a, 0x0000, 0xa72c, 0x0000, 0xa72e, + 0x0000, 0x0000, 0x0000, 0xa732, 0x0000, 0xa734, 0x0000, 0xa736, + 0x0000, 0xa738, 0x0000, 0xa73a, 0x0000, 0xa73c, 0x0000, 0xa73e, + 0x0000, 0xa740, 0x0000, 0xa742, 0x0000, 0xa744, 0x0000, 0xa746, + 0x0000, 0xa748, 0x0000, 0xa74a, 0x0000, 0xa74c, 0x0000, 0xa74e, + 0x0000, 0xa750, 0x0000, 0xa752, 0x0000, 0xa754, 0x0000, 0xa756, + 0x0000, 0xa758, 0x0000, 0xa75a, 0x0000, 0xa75c, 0x0000, 0xa75e, + 0x0000, 0xa760, 0x0000, 0xa762, 0x0000, 0xa764, 0x0000, 0xa766, + 0x0000, 0xa768, 0x0000, 0xa76a, 0x0000, 0xa76c, 0x0000, 0xa76e, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0xa779, 0x0000, 0xa77b, 0x0000, 0x0000, 0xa77e, + 0x0000, 0xa780, 0x0000, 0xa782, 0x0000, 0xa784, 0x0000, 0xa786, + 0x0000, 0x0000, 0x0000, 0x0000, 0xa78b, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_ff[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27, + 0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f, + 0xff30, 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, + 0xff38, 0xff39, 0xff3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t *const toplevel[256] = { + t2_00, t2_01, t2_02, t2_03, t2_04, t2_05, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, t2_1d, t2_1e, t2_1f, + NULL, t2_21, NULL, NULL, t2_24, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, t2_2c, t2_2d, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, t2_a6, t2_a7, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, t2_ff, +}; + +/** + * cifs_toupper - convert a wchar_t from lower to uppercase + * @in: character to convert from lower to uppercase + * + * This function consults the static tables above to convert a wchar_t from + * lower to uppercase. In the event that there is no mapping, the original + * "in" character is returned. + */ +wchar_t +cifs_toupper(wchar_t in) +{ + unsigned char idx; + const wchar_t *tbl; + wchar_t out; + + /* grab upper byte */ + idx = (in & 0xff00) >> 8; + + /* find pointer to 2nd layer table */ + tbl = toplevel[idx]; + if (!tbl) + return in; + + /* grab lower byte */ + idx = in & 0xff; + + /* look up character in table */ + out = tbl[idx]; + if (out) + return out; + + return in; +} diff --git a/fs/dcache.c b/fs/dcache.c index 4d9df3c940e6..1bd4614ce93b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -37,6 +37,7 @@ #include <linux/rculist_bl.h> #include <linux/prefetch.h> #include <linux/ratelimit.h> +#include <linux/list_lru.h> #include "internal.h" #include "mount.h" @@ -48,7 +49,7 @@ * - the dcache hash table * s_anon bl list spinlock protects: * - the s_anon list (see __d_drop) - * dcache_lru_lock protects: + * dentry->d_sb->s_dentry_lru_lock protects: * - the dcache lru lists and counters * d_lock protects: * - d_flags @@ -63,7 +64,7 @@ * Ordering: * dentry->d_inode->i_lock * dentry->d_lock - * dcache_lru_lock + * dentry->d_sb->s_dentry_lru_lock * dcache_hash_bucket lock * s_anon lock * @@ -81,7 +82,6 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); @@ -90,8 +90,8 @@ static struct kmem_cache *dentry_cache __read_mostly; /** * read_seqbegin_or_lock - begin a sequence number check or locking block - * lock: sequence lock - * seq : sequence number to be checked + * @lock: sequence lock + * @seq : sequence number to be checked * * First try it once optimistically without taking the lock. If that fails, * take the lock. The sequence number is also used as a marker for deciding @@ -103,7 +103,7 @@ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) if (!(*seq & 1)) /* Even */ *seq = read_seqbegin(lock); else /* Odd */ - write_seqlock(lock); + read_seqlock_excl(lock); } static inline int need_seqretry(seqlock_t *lock, int seq) @@ -114,7 +114,7 @@ static inline int need_seqretry(seqlock_t *lock, int seq) static inline void done_seqretry(seqlock_t *lock, int seq) { if (seq & 1) - write_sequnlock(lock); + read_sequnlock_excl(lock); } /* @@ -146,23 +146,47 @@ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; -static DEFINE_PER_CPU(unsigned int, nr_dentry); +static DEFINE_PER_CPU(long, nr_dentry); +static DEFINE_PER_CPU(long, nr_dentry_unused); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) -static int get_nr_dentry(void) + +/* + * Here we resort to our own counters instead of using generic per-cpu counters + * for consistency with what the vfs inode code does. We are expected to harvest + * better code and performance by having our own specialized counters. + * + * Please note that the loop is done over all possible CPUs, not over all online + * CPUs. The reason for this is that we don't want to play games with CPUs going + * on and off. If one of them goes off, we will just keep their counters. + * + * glommer: See cffbc8a for details, and if you ever intend to change this, + * please update all vfs counters to match. + */ +static long get_nr_dentry(void) { int i; - int sum = 0; + long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_dentry, i); return sum < 0 ? 0 : sum; } +static long get_nr_dentry_unused(void) +{ + int i; + long sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_dentry_unused, i); + return sum < 0 ? 0 : sum; +} + int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); - return proc_dointvec(table, write, buffer, lenp, ppos); + dentry_stat.nr_unused = get_nr_dentry_unused(); + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #endif @@ -333,52 +357,35 @@ static void dentry_unlink_inode(struct dentry * dentry) } /* - * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held. + * dentry_lru_(add|del)_list) must be called with d_lock held. */ static void dentry_lru_add(struct dentry *dentry) { if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { - spin_lock(&dcache_lru_lock); + if (list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)) + this_cpu_inc(nr_dentry_unused); dentry->d_flags |= DCACHE_LRU_LIST; - list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); - dentry->d_sb->s_nr_dentry_unused++; - dentry_stat.nr_unused++; - spin_unlock(&dcache_lru_lock); } } -static void __dentry_lru_del(struct dentry *dentry) -{ - list_del_init(&dentry->d_lru); - dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); - dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; -} - /* * Remove a dentry with references from the LRU. + * + * If we are on the shrink list, then we can get to try_prune_one_dentry() and + * lose our last reference through the parent walk. In this case, we need to + * remove ourselves from the shrink list, not the LRU. */ static void dentry_lru_del(struct dentry *dentry) { - if (!list_empty(&dentry->d_lru)) { - spin_lock(&dcache_lru_lock); - __dentry_lru_del(dentry); - spin_unlock(&dcache_lru_lock); + if (dentry->d_flags & DCACHE_SHRINK_LIST) { + list_del_init(&dentry->d_lru); + dentry->d_flags &= ~DCACHE_SHRINK_LIST; + return; } -} -static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) -{ - spin_lock(&dcache_lru_lock); - if (list_empty(&dentry->d_lru)) { - dentry->d_flags |= DCACHE_LRU_LIST; - list_add_tail(&dentry->d_lru, list); - dentry->d_sb->s_nr_dentry_unused++; - dentry_stat.nr_unused++; - } else { - list_move_tail(&dentry->d_lru, list); - } - spin_unlock(&dcache_lru_lock); + if (list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)) + this_cpu_dec(nr_dentry_unused); + dentry->d_flags &= ~DCACHE_LRU_LIST; } /** @@ -474,7 +481,8 @@ EXPORT_SYMBOL(d_drop); * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ -static inline struct dentry *dentry_kill(struct dentry *dentry) +static inline struct dentry * +dentry_kill(struct dentry *dentry, int unlock_on_failure) __releases(dentry->d_lock) { struct inode *inode; @@ -483,8 +491,10 @@ static inline struct dentry *dentry_kill(struct dentry *dentry) inode = dentry->d_inode; if (inode && !spin_trylock(&inode->i_lock)) { relock: - spin_unlock(&dentry->d_lock); - cpu_relax(); + if (unlock_on_failure) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + } return dentry; /* try again with same dentry */ } if (IS_ROOT(dentry)) @@ -567,7 +577,7 @@ repeat: return; kill_it: - dentry = dentry_kill(dentry); + dentry = dentry_kill(dentry, 1); if (dentry) goto repeat; } @@ -787,12 +797,12 @@ EXPORT_SYMBOL(d_prune_aliases); * * This may fail if locks cannot be acquired no problem, just try again. */ -static void try_prune_one_dentry(struct dentry *dentry) +static struct dentry * try_prune_one_dentry(struct dentry *dentry) __releases(dentry->d_lock) { struct dentry *parent; - parent = dentry_kill(dentry); + parent = dentry_kill(dentry, 0); /* * If dentry_kill returns NULL, we have nothing more to do. * if it returns the same dentry, trylocks failed. In either @@ -804,17 +814,18 @@ static void try_prune_one_dentry(struct dentry *dentry) * fragmentation. */ if (!parent) - return; + return NULL; if (parent == dentry) - return; + return dentry; /* Prune ancestors. */ dentry = parent; while (dentry) { if (lockref_put_or_lock(&dentry->d_lockref)) - return; - dentry = dentry_kill(dentry); + return NULL; + dentry = dentry_kill(dentry, 1); } + return NULL; } static void shrink_dentry_list(struct list_head *list) @@ -833,76 +844,143 @@ static void shrink_dentry_list(struct list_head *list) } /* + * The dispose list is isolated and dentries are not accounted + * to the LRU here, so we can simply remove it from the list + * here regardless of whether it is referenced or not. + */ + list_del_init(&dentry->d_lru); + dentry->d_flags &= ~DCACHE_SHRINK_LIST; + + /* * We found an inuse dentry which was not removed from - * the LRU because of laziness during lookup. Do not free - * it - just keep it off the LRU list. + * the LRU because of laziness during lookup. Do not free it. */ if (dentry->d_lockref.count) { - dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; } - rcu_read_unlock(); - try_prune_one_dentry(dentry); + dentry = try_prune_one_dentry(dentry); rcu_read_lock(); + if (dentry) { + dentry->d_flags |= DCACHE_SHRINK_LIST; + list_add(&dentry->d_lru, list); + spin_unlock(&dentry->d_lock); + } } rcu_read_unlock(); } +static enum lru_status +dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) +{ + struct list_head *freeable = arg; + struct dentry *dentry = container_of(item, struct dentry, d_lru); + + + /* + * we are inverting the lru lock/dentry->d_lock here, + * so use a trylock. If we fail to get the lock, just skip + * it + */ + if (!spin_trylock(&dentry->d_lock)) + return LRU_SKIP; + + /* + * Referenced dentries are still in use. If they have active + * counts, just remove them from the LRU. Otherwise give them + * another pass through the LRU. + */ + if (dentry->d_lockref.count) { + list_del_init(&dentry->d_lru); + spin_unlock(&dentry->d_lock); + return LRU_REMOVED; + } + + if (dentry->d_flags & DCACHE_REFERENCED) { + dentry->d_flags &= ~DCACHE_REFERENCED; + spin_unlock(&dentry->d_lock); + + /* + * The list move itself will be made by the common LRU code. At + * this point, we've dropped the dentry->d_lock but keep the + * lru lock. This is safe to do, since every list movement is + * protected by the lru lock even if both locks are held. + * + * This is guaranteed by the fact that all LRU management + * functions are intermediated by the LRU API calls like + * list_lru_add and list_lru_del. List movement in this file + * only ever occur through this functions or through callbacks + * like this one, that are called from the LRU API. + * + * The only exceptions to this are functions like + * shrink_dentry_list, and code that first checks for the + * DCACHE_SHRINK_LIST flag. Those are guaranteed to be + * operating only with stack provided lists after they are + * properly isolated from the main list. It is thus, always a + * local access. + */ + return LRU_ROTATE; + } + + dentry->d_flags |= DCACHE_SHRINK_LIST; + list_move_tail(&dentry->d_lru, freeable); + this_cpu_dec(nr_dentry_unused); + spin_unlock(&dentry->d_lock); + + return LRU_REMOVED; +} + /** * prune_dcache_sb - shrink the dcache * @sb: superblock - * @count: number of entries to try to free + * @nr_to_scan : number of entries to try to free + * @nid: which node to scan for freeable entities * - * Attempt to shrink the superblock dcache LRU by @count entries. This is + * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is * done when we need more memory an called from the superblock shrinker * function. * * This function may fail to free any resources if all the dentries are in * use. */ -void prune_dcache_sb(struct super_block *sb, int count) +long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, + int nid) { - struct dentry *dentry; - LIST_HEAD(referenced); - LIST_HEAD(tmp); + LIST_HEAD(dispose); + long freed; -relock: - spin_lock(&dcache_lru_lock); - while (!list_empty(&sb->s_dentry_lru)) { - dentry = list_entry(sb->s_dentry_lru.prev, - struct dentry, d_lru); - BUG_ON(dentry->d_sb != sb); - - if (!spin_trylock(&dentry->d_lock)) { - spin_unlock(&dcache_lru_lock); - cpu_relax(); - goto relock; - } + freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate, + &dispose, &nr_to_scan); + shrink_dentry_list(&dispose); + return freed; +} - if (dentry->d_flags & DCACHE_REFERENCED) { - dentry->d_flags &= ~DCACHE_REFERENCED; - list_move(&dentry->d_lru, &referenced); - spin_unlock(&dentry->d_lock); - } else { - list_move_tail(&dentry->d_lru, &tmp); - dentry->d_flags |= DCACHE_SHRINK_LIST; - spin_unlock(&dentry->d_lock); - if (!--count) - break; - } - cond_resched_lock(&dcache_lru_lock); - } - if (!list_empty(&referenced)) - list_splice(&referenced, &sb->s_dentry_lru); - spin_unlock(&dcache_lru_lock); +static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, + spinlock_t *lru_lock, void *arg) +{ + struct list_head *freeable = arg; + struct dentry *dentry = container_of(item, struct dentry, d_lru); + + /* + * we are inverting the lru lock/dentry->d_lock here, + * so use a trylock. If we fail to get the lock, just skip + * it + */ + if (!spin_trylock(&dentry->d_lock)) + return LRU_SKIP; - shrink_dentry_list(&tmp); + dentry->d_flags |= DCACHE_SHRINK_LIST; + list_move_tail(&dentry->d_lru, freeable); + this_cpu_dec(nr_dentry_unused); + spin_unlock(&dentry->d_lock); + + return LRU_REMOVED; } + /** * shrink_dcache_sb - shrink dcache for a superblock * @sb: superblock @@ -912,16 +990,17 @@ relock: */ void shrink_dcache_sb(struct super_block *sb) { - LIST_HEAD(tmp); + long freed; - spin_lock(&dcache_lru_lock); - while (!list_empty(&sb->s_dentry_lru)) { - list_splice_init(&sb->s_dentry_lru, &tmp); - spin_unlock(&dcache_lru_lock); - shrink_dentry_list(&tmp); - spin_lock(&dcache_lru_lock); - } - spin_unlock(&dcache_lru_lock); + do { + LIST_HEAD(dispose); + + freed = list_lru_walk(&sb->s_dentry_lru, + dentry_lru_isolate_shrink, &dispose, UINT_MAX); + + this_cpu_sub(nr_dentry_unused, freed); + shrink_dentry_list(&dispose); + } while (freed > 0); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -1283,7 +1362,8 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) if (dentry->d_lockref.count) { dentry_lru_del(dentry); } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - dentry_lru_move_list(dentry, &data->dispose); + dentry_lru_del(dentry); + list_add_tail(&dentry->d_lru, &data->dispose); dentry->d_flags |= DCACHE_SHRINK_LIST; data->found++; ret = D_WALK_NORETRY; @@ -2673,9 +2753,9 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen) /** * prepend_name - prepend a pathname in front of current buffer pointer - * buffer: buffer pointer - * buflen: allocated length of the buffer - * name: name string and length qstr structure + * @buffer: buffer pointer + * @buflen: allocated length of the buffer + * @name: name string and length qstr structure * * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to * make sure that either the old or the new name pointer and length are @@ -2713,14 +2793,15 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * @buffer: pointer to the end of the buffer * @buflen: pointer to buffer length * - * The function tries to write out the pathname without taking any lock other - * than the RCU read lock to make sure that dentries won't go away. It only - * checks the sequence number of the global rename_lock as any change in the - * dentry's d_seq will be preceded by changes in the rename_lock sequence - * number. If the sequence number had been change, it will restart the whole - * pathname back-tracing sequence again. It performs a total of 3 trials of - * lockless back-tracing sequences before falling back to take the - * rename_lock. + * The function will first try to write out the pathname without taking any + * lock other than the RCU read lock to make sure that dentries won't go away. + * It only checks the sequence number of the global rename_lock as any change + * in the dentry's d_seq will be preceded by changes in the rename_lock + * sequence number. If the sequence number had been changed, it will restart + * the whole pathname back-tracing sequence again by taking the rename_lock. + * In this case, there is no need to take the RCU read lock as the recursive + * parent pointer references will keep the dentry chain alive as long as no + * rename operation is performed. */ static int prepend_path(const struct path *path, const struct path *root, @@ -2868,6 +2949,16 @@ static int prepend_unreachable(char **buffer, int *buflen) return prepend(buffer, buflen, "(unreachable)", 13); } +static void get_fs_root_rcu(struct fs_struct *fs, struct path *root) +{ + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + *root = fs->root; + } while (read_seqcount_retry(&fs->seq, seq)); +} + /** * d_path - return the path of a dentry * @path: path to report @@ -2900,13 +2991,15 @@ char *d_path(const struct path *path, char *buf, int buflen) if (path->dentry->d_op && path->dentry->d_op->d_dname) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); - get_fs_root(current->fs, &root); + rcu_read_lock(); + get_fs_root_rcu(current->fs, &root); br_read_lock(&vfsmount_lock); error = path_with_deleted(path, &root, &res, &buflen); br_read_unlock(&vfsmount_lock); + rcu_read_unlock(); + if (error < 0) res = ERR_PTR(error); - path_put(&root); return res; } EXPORT_SYMBOL(d_path); @@ -3014,6 +3107,18 @@ Elong: return ERR_PTR(-ENAMETOOLONG); } +static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root, + struct path *pwd) +{ + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + *root = fs->root; + *pwd = fs->pwd; + } while (read_seqcount_retry(&fs->seq, seq)); +} + /* * NOTE! The user-level library version returns a * character pointer. The kernel system call just @@ -3036,23 +3141,25 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) { int error; struct path pwd, root; - char *page = (char *) __get_free_page(GFP_USER); + char *page = __getname(); if (!page) return -ENOMEM; - get_fs_root_and_pwd(current->fs, &root, &pwd); + rcu_read_lock(); + get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); error = -ENOENT; br_read_lock(&vfsmount_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; - char *cwd = page + PAGE_SIZE; - int buflen = PAGE_SIZE; + char *cwd = page + PATH_MAX; + int buflen = PATH_MAX; prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &root, &cwd, &buflen); br_read_unlock(&vfsmount_lock); + rcu_read_unlock(); if (error < 0) goto out; @@ -3065,7 +3172,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) } error = -ERANGE; - len = PAGE_SIZE + page - cwd; + len = PATH_MAX + page - cwd; if (len <= size) { error = len; if (copy_to_user(buf, cwd, len)) @@ -3073,12 +3180,11 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) } } else { br_read_unlock(&vfsmount_lock); + rcu_read_unlock(); } out: - path_put(&pwd); - path_put(&root); - free_page((unsigned long) page); + __putname(page); return error; } diff --git a/fs/drop_caches.c b/fs/drop_caches.c index c00e055b6282..9fd702f5bfb2 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -44,6 +44,7 @@ static void drop_slab(void) .gfp_mask = GFP_KERNEL, }; + nodes_setall(shrink.nodes_to_scan); do { nr_objects = shrink_slab(&shrink, 1000, 1000); } while (nr_objects > 10); diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 2d1bdbe78c04..3981ff783950 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -931,13 +931,15 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, struct ext4_inode_info *ei; struct list_head *cur, *tmp; LIST_HEAD(skipped); - int ret, nr_shrunk = 0; + int nr_shrunk = 0; int retried = 0, skip_precached = 1, nr_skipped = 0; spin_lock(&sbi->s_es_lru_lock); retry: list_for_each_safe(cur, tmp, &sbi->s_es_lru) { + int shrunk; + /* * If we have already reclaimed all extents from extent * status tree, just stop the loop immediately. @@ -964,13 +966,13 @@ retry: continue; write_lock(&ei->i_es_lock); - ret = __es_try_to_reclaim_extents(ei, nr_to_scan); + shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); if (ei->i_es_lru_nr == 0) list_del_init(&ei->i_es_lru); write_unlock(&ei->i_es_lock); - nr_shrunk += ret; - nr_to_scan -= ret; + nr_shrunk += shrunk; + nr_to_scan -= shrunk; if (nr_to_scan == 0) break; } @@ -1007,7 +1009,20 @@ retry: return nr_shrunk; } -static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long ext4_es_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long nr; + struct ext4_sb_info *sbi; + + sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); + nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); + trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); + return nr; +} + +static unsigned long ext4_es_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct ext4_sb_info *sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); @@ -1022,9 +1037,8 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); - ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); - return ret; + return nr_shrunk; } void ext4_es_register_shrinker(struct ext4_sb_info *sbi) @@ -1032,7 +1046,8 @@ void ext4_es_register_shrinker(struct ext4_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_es_lru); spin_lock_init(&sbi->s_es_lru_lock); sbi->s_es_last_sorted = 0; - sbi->s_es_shrinker.shrink = ext4_es_shrink; + sbi->s_es_shrinker.scan_objects = ext4_es_scan; + sbi->s_es_shrinker.count_objects = ext4_es_count; sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; register_shrinker(&sbi->s_es_shrinker); } @@ -1076,7 +1091,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, struct ext4_es_tree *tree = &ei->i_es_tree; struct rb_node *node; struct extent_status *es; - int nr_shrunk = 0; + unsigned long nr_shrunk = 0; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 722329cac98f..c2f41b4d00b9 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1427,21 +1427,22 @@ __acquires(&lru_lock) * gfs2_dispose_glock_lru() above. */ -static void gfs2_scan_glock_lru(int nr) +static long gfs2_scan_glock_lru(int nr) { struct gfs2_glock *gl; LIST_HEAD(skipped); LIST_HEAD(dispose); + long freed = 0; spin_lock(&lru_lock); - while(nr && !list_empty(&lru_list)) { + while ((nr-- >= 0) && !list_empty(&lru_list)) { gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); /* Test for being demotable */ if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); - nr--; + freed++; continue; } @@ -1451,23 +1452,28 @@ static void gfs2_scan_glock_lru(int nr) if (!list_empty(&dispose)) gfs2_dispose_glock_lru(&dispose); spin_unlock(&lru_lock); + + return freed; } -static int gfs2_shrink_glock_memory(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) { - if (sc->nr_to_scan) { - if (!(sc->gfp_mask & __GFP_FS)) - return -1; - gfs2_scan_glock_lru(sc->nr_to_scan); - } + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; + return gfs2_scan_glock_lru(sc->nr_to_scan); +} - return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; +static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + return vfs_pressure_ratio(atomic_read(&lru_count)); } static struct shrinker glock_shrinker = { - .shrink = gfs2_shrink_glock_memory, .seeks = DEFAULT_SEEKS, + .count_objects = gfs2_glock_shrink_count, + .scan_objects = gfs2_glock_shrink_scan, }; /** diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 7b0f5043cf24..351586e24e30 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -32,7 +32,8 @@ struct workqueue_struct *gfs2_control_wq; static struct shrinker qd_shrinker = { - .shrink = gfs2_shrink_qd_memory, + .count_objects = gfs2_qd_shrink_count, + .scan_objects = gfs2_qd_shrink_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 3768c2f40e43..db441359ee8c 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -75,17 +75,16 @@ static LIST_HEAD(qd_lru_list); static atomic_t qd_lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(qd_lru_lock); -int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) +unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct gfs2_quota_data *qd; struct gfs2_sbd *sdp; int nr_to_scan = sc->nr_to_scan; - - if (nr_to_scan == 0) - goto out; + long freed = 0; if (!(sc->gfp_mask & __GFP_FS)) - return -1; + return SHRINK_STOP; spin_lock(&qd_lru_lock); while (nr_to_scan && !list_empty(&qd_lru_list)) { @@ -110,11 +109,16 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) kmem_cache_free(gfs2_quotad_cachep, qd); spin_lock(&qd_lru_lock); nr_to_scan--; + freed++; } spin_unlock(&qd_lru_lock); + return freed; +} -out: - return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100; +unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + return vfs_pressure_ratio(atomic_read(&qd_lru_count)); } static u64 qd2index(struct gfs2_quota_data *qd) diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 4f5e6e44ed83..0f64d9deb1b0 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -53,8 +53,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } -extern int gfs2_shrink_qd_memory(struct shrinker *shrink, - struct shrink_control *sc); +extern unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, + struct shrink_control *sc); +extern unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc); extern const struct quotactl_ops gfs2_quotactl_ops; #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/inode.c b/fs/inode.c index 93a0625b46e4..b33ba8e021cc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -17,6 +17,7 @@ #include <linux/prefetch.h> #include <linux/buffer_head.h> /* for inode_has_buffers */ #include <linux/ratelimit.h> +#include <linux/list_lru.h> #include "internal.h" /* @@ -24,7 +25,7 @@ * * inode->i_lock protects: * inode->i_state, inode->i_hash, __iget() - * inode->i_sb->s_inode_lru_lock protects: + * Inode LRU list locks protect: * inode->i_sb->s_inode_lru, inode->i_lru * inode_sb_list_lock protects: * sb->s_inodes, inode->i_sb_list @@ -37,7 +38,7 @@ * * inode_sb_list_lock * inode->i_lock - * inode->i_sb->s_inode_lru_lock + * Inode LRU list locks * * bdi->wb.list_lock * inode->i_lock @@ -70,33 +71,33 @@ EXPORT_SYMBOL(empty_aops); */ struct inodes_stat_t inodes_stat; -static DEFINE_PER_CPU(unsigned int, nr_inodes); -static DEFINE_PER_CPU(unsigned int, nr_unused); +static DEFINE_PER_CPU(unsigned long, nr_inodes); +static DEFINE_PER_CPU(unsigned long, nr_unused); static struct kmem_cache *inode_cachep __read_mostly; -static int get_nr_inodes(void) +static long get_nr_inodes(void) { int i; - int sum = 0; + long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_inodes, i); return sum < 0 ? 0 : sum; } -static inline int get_nr_inodes_unused(void) +static inline long get_nr_inodes_unused(void) { int i; - int sum = 0; + long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_unused, i); return sum < 0 ? 0 : sum; } -int get_nr_dirty_inodes(void) +long get_nr_dirty_inodes(void) { /* not actually dirty inodes, but a wild approximation */ - int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); + long nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); return nr_dirty > 0 ? nr_dirty : 0; } @@ -109,7 +110,7 @@ int proc_nr_inodes(ctl_table *table, int write, { inodes_stat.nr_inodes = get_nr_inodes(); inodes_stat.nr_unused = get_nr_inodes_unused(); - return proc_dointvec(table, write, buffer, lenp, ppos); + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #endif @@ -401,13 +402,8 @@ EXPORT_SYMBOL(ihold); static void inode_lru_list_add(struct inode *inode) { - spin_lock(&inode->i_sb->s_inode_lru_lock); - if (list_empty(&inode->i_lru)) { - list_add(&inode->i_lru, &inode->i_sb->s_inode_lru); - inode->i_sb->s_nr_inodes_unused++; + if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_inc(nr_unused); - } - spin_unlock(&inode->i_sb->s_inode_lru_lock); } /* @@ -425,13 +421,9 @@ void inode_add_lru(struct inode *inode) static void inode_lru_list_del(struct inode *inode) { - spin_lock(&inode->i_sb->s_inode_lru_lock); - if (!list_empty(&inode->i_lru)) { - list_del_init(&inode->i_lru); - inode->i_sb->s_nr_inodes_unused--; + + if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_dec(nr_unused); - } - spin_unlock(&inode->i_sb->s_inode_lru_lock); } /** @@ -675,24 +667,8 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) return busy; } -static int can_unuse(struct inode *inode) -{ - if (inode->i_state & ~I_REFERENCED) - return 0; - if (inode_has_buffers(inode)) - return 0; - if (atomic_read(&inode->i_count)) - return 0; - if (inode->i_data.nrpages) - return 0; - return 1; -} - /* - * Walk the superblock inode LRU for freeable inodes and attempt to free them. - * This is called from the superblock shrinker function with a number of inodes - * to trim from the LRU. Inodes to be freed are moved to a temporary list and - * then are freed outside inode_lock by dispose_list(). + * Isolate the inode from the LRU in preparation for freeing it. * * Any inodes which are pinned purely because of attached pagecache have their * pagecache removed. If the inode has metadata buffers attached to @@ -706,89 +682,82 @@ static int can_unuse(struct inode *inode) * LRU does not have strict ordering. Hence we don't want to reclaim inodes * with this flag set because they are the inodes that are out of order. */ -void prune_icache_sb(struct super_block *sb, int nr_to_scan) +static enum lru_status +inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) { - LIST_HEAD(freeable); - int nr_scanned; - unsigned long reap = 0; + struct list_head *freeable = arg; + struct inode *inode = container_of(item, struct inode, i_lru); - spin_lock(&sb->s_inode_lru_lock); - for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) { - struct inode *inode; + /* + * we are inverting the lru lock/inode->i_lock here, so use a trylock. + * If we fail to get the lock, just skip it. + */ + if (!spin_trylock(&inode->i_lock)) + return LRU_SKIP; - if (list_empty(&sb->s_inode_lru)) - break; + /* + * Referenced or dirty inodes are still in use. Give them another pass + * through the LRU as we canot reclaim them now. + */ + if (atomic_read(&inode->i_count) || + (inode->i_state & ~I_REFERENCED)) { + list_del_init(&inode->i_lru); + spin_unlock(&inode->i_lock); + this_cpu_dec(nr_unused); + return LRU_REMOVED; + } - inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru); + /* recently referenced inodes get one more pass */ + if (inode->i_state & I_REFERENCED) { + inode->i_state &= ~I_REFERENCED; + spin_unlock(&inode->i_lock); + return LRU_ROTATE; + } - /* - * we are inverting the sb->s_inode_lru_lock/inode->i_lock here, - * so use a trylock. If we fail to get the lock, just move the - * inode to the back of the list so we don't spin on it. - */ - if (!spin_trylock(&inode->i_lock)) { - list_move(&inode->i_lru, &sb->s_inode_lru); - continue; + if (inode_has_buffers(inode) || inode->i_data.nrpages) { + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(lru_lock); + if (remove_inode_buffers(inode)) { + unsigned long reap; + reap = invalidate_mapping_pages(&inode->i_data, 0, -1); + if (current_is_kswapd()) + __count_vm_events(KSWAPD_INODESTEAL, reap); + else + __count_vm_events(PGINODESTEAL, reap); + if (current->reclaim_state) + current->reclaim_state->reclaimed_slab += reap; } + iput(inode); + spin_lock(lru_lock); + return LRU_RETRY; + } - /* - * Referenced or dirty inodes are still in use. Give them - * another pass through the LRU as we canot reclaim them now. - */ - if (atomic_read(&inode->i_count) || - (inode->i_state & ~I_REFERENCED)) { - list_del_init(&inode->i_lru); - spin_unlock(&inode->i_lock); - sb->s_nr_inodes_unused--; - this_cpu_dec(nr_unused); - continue; - } + WARN_ON(inode->i_state & I_NEW); + inode->i_state |= I_FREEING; + list_move(&inode->i_lru, freeable); + spin_unlock(&inode->i_lock); - /* recently referenced inodes get one more pass */ - if (inode->i_state & I_REFERENCED) { - inode->i_state &= ~I_REFERENCED; - list_move(&inode->i_lru, &sb->s_inode_lru); - spin_unlock(&inode->i_lock); - continue; - } - if (inode_has_buffers(inode) || inode->i_data.nrpages) { - __iget(inode); - spin_unlock(&inode->i_lock); - spin_unlock(&sb->s_inode_lru_lock); - if (remove_inode_buffers(inode)) - reap += invalidate_mapping_pages(&inode->i_data, - 0, -1); - iput(inode); - spin_lock(&sb->s_inode_lru_lock); - - if (inode != list_entry(sb->s_inode_lru.next, - struct inode, i_lru)) - continue; /* wrong inode or list_empty */ - /* avoid lock inversions with trylock */ - if (!spin_trylock(&inode->i_lock)) - continue; - if (!can_unuse(inode)) { - spin_unlock(&inode->i_lock); - continue; - } - } - WARN_ON(inode->i_state & I_NEW); - inode->i_state |= I_FREEING; - spin_unlock(&inode->i_lock); + this_cpu_dec(nr_unused); + return LRU_REMOVED; +} - list_move(&inode->i_lru, &freeable); - sb->s_nr_inodes_unused--; - this_cpu_dec(nr_unused); - } - if (current_is_kswapd()) - __count_vm_events(KSWAPD_INODESTEAL, reap); - else - __count_vm_events(PGINODESTEAL, reap); - spin_unlock(&sb->s_inode_lru_lock); - if (current->reclaim_state) - current->reclaim_state->reclaimed_slab += reap; +/* + * Walk the superblock inode LRU for freeable inodes and attempt to free them. + * This is called from the superblock shrinker function with a number of inodes + * to trim from the LRU. Inodes to be freed are moved to a temporary list and + * then are freed outside inode_lock by dispose_list(). + */ +long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, + int nid) +{ + LIST_HEAD(freeable); + long freed; + freed = list_lru_walk_node(&sb->s_inode_lru, nid, inode_lru_isolate, + &freeable, &nr_to_scan); dispose_list(&freeable); + return freed; } static void __wait_on_freeing_inode(struct inode *inode); diff --git a/fs/internal.h b/fs/internal.h index 2be46ea5dd0b..513e0d859a6c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -114,6 +114,8 @@ extern int open_check_o_direct(struct file *f); * inode.c */ extern spinlock_t inode_sb_list_lock; +extern long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, + int nid); extern void inode_add_lru(struct inode *inode); /* @@ -121,7 +123,7 @@ extern void inode_add_lru(struct inode *inode); */ extern void inode_wb_list_del(struct inode *inode); -extern int get_nr_dirty_inodes(void); +extern long get_nr_dirty_inodes(void); extern void evict_inodes(struct super_block *); extern int invalidate_inodes(struct super_block *, bool); @@ -130,6 +132,8 @@ extern int invalidate_inodes(struct super_block *, bool); */ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); extern int d_set_mounted(struct dentry *dentry); +extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, + int nid); /* * read_write.c diff --git a/fs/mbcache.c b/fs/mbcache.c index 8c32ef3ba88e..e519e45bf673 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -86,18 +86,6 @@ static LIST_HEAD(mb_cache_list); static LIST_HEAD(mb_cache_lru_list); static DEFINE_SPINLOCK(mb_cache_spinlock); -/* - * What the mbcache registers as to get shrunk dynamically. - */ - -static int mb_cache_shrink_fn(struct shrinker *shrink, - struct shrink_control *sc); - -static struct shrinker mb_cache_shrinker = { - .shrink = mb_cache_shrink_fn, - .seeks = DEFAULT_SEEKS, -}; - static inline int __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) { @@ -151,7 +139,7 @@ forget: /* - * mb_cache_shrink_fn() memory pressure callback + * mb_cache_shrink_scan() memory pressure callback * * This function is called by the kernel memory management when memory * gets low. @@ -159,17 +147,16 @@ forget: * @shrink: (ignored) * @sc: shrink_control passed from reclaim * - * Returns the number of objects which are present in the cache. + * Returns the number of objects freed. */ -static int -mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(free_list); - struct mb_cache *cache; struct mb_cache_entry *entry, *tmp; - int count = 0; int nr_to_scan = sc->nr_to_scan; gfp_t gfp_mask = sc->gfp_mask; + unsigned long freed = 0; mb_debug("trying to free %d entries", nr_to_scan); spin_lock(&mb_cache_spinlock); @@ -179,19 +166,37 @@ mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) struct mb_cache_entry, e_lru_list); list_move_tail(&ce->e_lru_list, &free_list); __mb_cache_entry_unhash(ce); + freed++; + } + spin_unlock(&mb_cache_spinlock); + list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { + __mb_cache_entry_forget(entry, gfp_mask); } + return freed; +} + +static unsigned long +mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + struct mb_cache *cache; + unsigned long count = 0; + + spin_lock(&mb_cache_spinlock); list_for_each_entry(cache, &mb_cache_list, c_cache_list) { mb_debug("cache %s (%d)", cache->c_name, atomic_read(&cache->c_entry_count)); count += atomic_read(&cache->c_entry_count); } spin_unlock(&mb_cache_spinlock); - list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { - __mb_cache_entry_forget(entry, gfp_mask); - } - return (count / 100) * sysctl_vfs_cache_pressure; + + return vfs_pressure_ratio(count); } +static struct shrinker mb_cache_shrinker = { + .count_objects = mb_cache_shrink_count, + .scan_objects = mb_cache_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; /* * mb_cache_create() create a new cache diff --git a/fs/namei.c b/fs/namei.c index 409a441ba2ae..0dc4cbf21f37 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -660,29 +660,6 @@ static __always_inline void set_root_rcu(struct nameidata *nd) } } -static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) -{ - int ret; - - if (IS_ERR(link)) - goto fail; - - if (*link == '/') { - set_root(nd); - path_put(&nd->path); - nd->path = nd->root; - path_get(&nd->root); - nd->flags |= LOOKUP_JUMPED; - } - nd->inode = nd->path.dentry->d_inode; - - ret = link_path_walk(link, nd); - return ret; -fail: - path_put(&nd->path); - return PTR_ERR(link); -} - static void path_put_conditional(struct path *path, struct nameidata *nd) { dput(path->dentry); @@ -874,7 +851,20 @@ follow_link(struct path *link, struct nameidata *nd, void **p) error = 0; s = nd_get_link(nd); if (s) { - error = __vfs_follow_link(nd, s); + if (unlikely(IS_ERR(s))) { + path_put(&nd->path); + put_link(nd, link, *p); + return PTR_ERR(s); + } + if (*s == '/') { + set_root(nd); + path_put(&nd->path); + nd->path = nd->root; + path_get(&nd->root); + nd->flags |= LOOKUP_JUMPED; + } + nd->inode = nd->path.dentry->d_inode; + error = link_path_walk(s, nd); if (unlikely(error)) put_link(nd, link, *p); } @@ -2271,12 +2261,15 @@ mountpoint_last(struct nameidata *nd, struct path *path) dentry = d_alloc(dir, &nd->last); if (!dentry) { error = -ENOMEM; + mutex_unlock(&dir->d_inode->i_mutex); goto out; } dentry = lookup_real(dir->d_inode, dentry, nd->flags); error = PTR_ERR(dentry); - if (IS_ERR(dentry)) + if (IS_ERR(dentry)) { + mutex_unlock(&dir->d_inode->i_mutex); goto out; + } } mutex_unlock(&dir->d_inode->i_mutex); @@ -4236,11 +4229,6 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) return res; } -int vfs_follow_link(struct nameidata *nd, const char *link) -{ - return __vfs_follow_link(nd, link); -} - /* get the link contents into pagecache */ static char *page_getlink(struct dentry * dentry, struct page **ppage) { @@ -4352,7 +4340,6 @@ EXPORT_SYMBOL(vfs_path_lookup); EXPORT_SYMBOL(inode_permission); EXPORT_SYMBOL(unlock_rename); EXPORT_SYMBOL(vfs_create); -EXPORT_SYMBOL(vfs_follow_link); EXPORT_SYMBOL(vfs_link); EXPORT_SYMBOL(vfs_mkdir); EXPORT_SYMBOL(vfs_mknod); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e79bc6ce828e..de434f309af0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2006,17 +2006,18 @@ static void nfs_access_free_list(struct list_head *head) } } -int nfs_access_cache_shrinker(struct shrinker *shrink, - struct shrink_control *sc) +unsigned long +nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(head); struct nfs_inode *nfsi, *next; struct nfs_access_entry *cache; int nr_to_scan = sc->nr_to_scan; gfp_t gfp_mask = sc->gfp_mask; + long freed = 0; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) - return (nr_to_scan == 0) ? 0 : -1; + return SHRINK_STOP; spin_lock(&nfs_access_lru_lock); list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { @@ -2032,6 +2033,7 @@ int nfs_access_cache_shrinker(struct shrinker *shrink, struct nfs_access_entry, lru); list_move(&cache->lru, &head); rb_erase(&cache->rb_node, &nfsi->access_cache); + freed++; if (!list_empty(&nfsi->access_cache_entry_lru)) list_move_tail(&nfsi->access_cache_inode_lru, &nfs_access_lru_list); @@ -2046,7 +2048,13 @@ remove_lru_entry: } spin_unlock(&nfs_access_lru_lock); nfs_access_free_list(&head); - return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure; + return freed; +} + +unsigned long +nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) +{ + return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); } static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d388302c005f..38da8c2b81ac 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -273,8 +273,10 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const char *ip_addr); /* dir.c */ -extern int nfs_access_cache_shrinker(struct shrinker *shrink, - struct shrink_control *sc); +extern unsigned long nfs_access_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); int nfs_create(struct inode *, struct dentry *, umode_t, bool); int nfs_mkdir(struct inode *, struct dentry *, umode_t); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index f520a1113b38..28842abafab4 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -279,15 +279,15 @@ _nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, if (test_bit(sp4_mode, &clp->cl_sp4_flags)) { spin_lock(&clp->cl_lock); if (clp->cl_machine_cred != NULL) - newcred = get_rpccred(clp->cl_machine_cred); + /* don't call get_rpccred on the machine cred - + * a reference will be held for life of clp */ + newcred = clp->cl_machine_cred; spin_unlock(&clp->cl_lock); - if (msg->rpc_cred) - put_rpccred(msg->rpc_cred); msg->rpc_cred = newcred; flavor = clp->cl_rpcclient->cl_auth->au_flavor; - WARN_ON(flavor != RPC_AUTH_GSS_KRB5I && - flavor != RPC_AUTH_GSS_KRB5P); + WARN_ON_ONCE(flavor != RPC_AUTH_GSS_KRB5I && + flavor != RPC_AUTH_GSS_KRB5P); *clntp = clp->cl_rpcclient; return true; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 39b6cf2d1683..989bb9d3074d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6001,10 +6001,12 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct .rpc_resp = &res, }; struct rpc_clnt *clnt = NFS_SERVER(dir)->client; + struct rpc_cred *cred = NULL; if (use_integrity) { clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient; - msg.rpc_cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client); + cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client); + msg.rpc_cred = cred; } dprintk("NFS call secinfo %s\n", name->name); @@ -6016,8 +6018,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct &res.seq_res, 0); dprintk("NFS reply secinfo: %d\n", status); - if (msg.rpc_cred) - put_rpccred(msg.rpc_cred); + if (cred) + put_rpccred(cred); return status; } @@ -6151,11 +6153,13 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = { }, .allow.u.words = { [0] = 1 << (OP_CLOSE) | - 1 << (OP_LOCKU), + 1 << (OP_LOCKU) | + 1 << (OP_COMMIT), [1] = 1 << (OP_SECINFO - 32) | 1 << (OP_SECINFO_NO_NAME - 32) | 1 << (OP_TEST_STATEID - 32) | - 1 << (OP_FREE_STATEID - 32) + 1 << (OP_FREE_STATEID - 32) | + 1 << (OP_WRITE - 32) } }; @@ -7496,11 +7500,13 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = &res, }; struct rpc_clnt *clnt = server->client; + struct rpc_cred *cred = NULL; int status; if (use_integrity) { clnt = server->nfs_client->cl_rpcclient; - msg.rpc_cred = nfs4_get_clid_cred(server->nfs_client); + cred = nfs4_get_clid_cred(server->nfs_client); + msg.rpc_cred = cred; } dprintk("--> %s\n", __func__); @@ -7508,8 +7514,8 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, &res.seq_res, 0); dprintk("<-- %s status=%d\n", __func__, status); - if (msg.rpc_cred) - put_rpccred(msg.rpc_cred); + if (cred) + put_rpccred(cred); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fbdad9e1719f..79210d23f607 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -414,7 +414,7 @@ static int nfs4_stat_to_errno(int); #define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) #define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \ XDR_QUADLEN(NFS4_STATEID_SIZE)) -#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1) +#define decode_free_stateid_maxsz (op_decode_hdr_maxsz) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -5966,21 +5966,8 @@ out: static int decode_free_stateid(struct xdr_stream *xdr, struct nfs41_free_stateid_res *res) { - __be32 *p; - int status; - - status = decode_op_hdr(xdr, OP_FREE_STATEID); - if (status) - return status; - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - res->status = be32_to_cpup(p++); + res->status = decode_op_hdr(xdr, OP_FREE_STATEID); return res->status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5793f24613c8..a03b9c6f9489 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -360,7 +360,8 @@ static void unregister_nfs4_fs(void) #endif static struct shrinker acl_shrinker = { - .shrink = nfs_access_cache_shrinker, + .count_objects = nfs_access_cache_count, + .scan_objects = nfs_access_cache_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index e76244edd748..9186c7ce0b14 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -59,11 +59,14 @@ static unsigned int longest_chain_cachesize; static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static void cache_cleaner_func(struct work_struct *unused); -static int nfsd_reply_cache_shrink(struct shrinker *shrink, - struct shrink_control *sc); +static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); static struct shrinker nfsd_reply_cache_shrinker = { - .shrink = nfsd_reply_cache_shrink, + .scan_objects = nfsd_reply_cache_scan, + .count_objects = nfsd_reply_cache_count, .seeks = 1, }; @@ -232,16 +235,18 @@ nfsd_cache_entry_expired(struct svc_cacherep *rp) * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. */ -static void +static long prune_cache_entries(void) { struct svc_cacherep *rp, *tmp; + long freed = 0; list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { if (!nfsd_cache_entry_expired(rp) && num_drc_entries <= max_drc_entries) break; nfsd_reply_cache_free_locked(rp); + freed++; } /* @@ -254,6 +259,7 @@ prune_cache_entries(void) cancel_delayed_work(&cache_cleaner); else mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); + return freed; } static void @@ -264,20 +270,28 @@ cache_cleaner_func(struct work_struct *unused) spin_unlock(&cache_lock); } -static int -nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - unsigned int num; + unsigned long num; spin_lock(&cache_lock); - if (sc->nr_to_scan) - prune_cache_entries(); num = num_drc_entries; spin_unlock(&cache_lock); return num; } +static unsigned long +nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long freed; + + spin_lock(&cache_lock); + freed = prune_cache_entries(); + spin_unlock(&cache_lock); + return freed; +} /* * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes */ diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9a702e193538..831d49a4111f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -687,45 +687,37 @@ int dquot_quota_sync(struct super_block *sb, int type) } EXPORT_SYMBOL(dquot_quota_sync); -/* Free unused dquots from cache */ -static void prune_dqcache(int count) +static unsigned long +dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct list_head *head; struct dquot *dquot; + unsigned long freed = 0; head = free_dquots.prev; - while (head != &free_dquots && count) { + while (head != &free_dquots && sc->nr_to_scan) { dquot = list_entry(head, struct dquot, dq_free); remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); do_destroy_dquot(dquot); - count--; + sc->nr_to_scan--; + freed++; head = free_dquots.prev; } + return freed; } -/* - * This is called from kswapd when we think we need some - * more memory - */ -static int shrink_dqcache_memory(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long +dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - int nr = sc->nr_to_scan; - - if (nr) { - spin_lock(&dq_list_lock); - prune_dqcache(nr); - spin_unlock(&dq_list_lock); - } - return ((unsigned) - percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS]) - /100) * sysctl_vfs_cache_pressure; + return vfs_pressure_ratio( + percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])); } static struct shrinker dqcache_shrinker = { - .shrink = shrink_dqcache_memory, + .count_objects = dqcache_shrink_count, + .scan_objects = dqcache_shrink_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/super.c b/fs/super.c index f6961ea84c56..3a96c9783a8b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -53,11 +53,15 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = { * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we * take a passive reference to the superblock to avoid this from occurring. */ -static int prune_super(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long super_cache_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct super_block *sb; - int fs_objects = 0; - int total_objects; + long fs_objects = 0; + long total_objects; + long freed = 0; + long dentries; + long inodes; sb = container_of(shrink, struct super_block, s_shrink); @@ -65,46 +69,62 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) * Deadlock avoidance. We may hold various FS locks, and we don't want * to recurse into the FS that called us in clear_inode() and friends.. */ - if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS)) - return -1; + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; if (!grab_super_passive(sb)) - return -1; + return SHRINK_STOP; if (sb->s_op->nr_cached_objects) - fs_objects = sb->s_op->nr_cached_objects(sb); - - total_objects = sb->s_nr_dentry_unused + - sb->s_nr_inodes_unused + fs_objects + 1; - - if (sc->nr_to_scan) { - int dentries; - int inodes; - - /* proportion the scan between the caches */ - dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) / - total_objects; - inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) / - total_objects; - if (fs_objects) - fs_objects = (sc->nr_to_scan * fs_objects) / - total_objects; - /* - * prune the dcache first as the icache is pinned by it, then - * prune the icache, followed by the filesystem specific caches - */ - prune_dcache_sb(sb, dentries); - prune_icache_sb(sb, inodes); + fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid); - if (fs_objects && sb->s_op->free_cached_objects) { - sb->s_op->free_cached_objects(sb, fs_objects); - fs_objects = sb->s_op->nr_cached_objects(sb); - } - total_objects = sb->s_nr_dentry_unused + - sb->s_nr_inodes_unused + fs_objects; + inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); + dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); + total_objects = dentries + inodes + fs_objects + 1; + + /* proportion the scan between the caches */ + dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); + inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); + + /* + * prune the dcache first as the icache is pinned by it, then + * prune the icache, followed by the filesystem specific caches + */ + freed = prune_dcache_sb(sb, dentries, sc->nid); + freed += prune_icache_sb(sb, inodes, sc->nid); + + if (fs_objects) { + fs_objects = mult_frac(sc->nr_to_scan, fs_objects, + total_objects); + freed += sb->s_op->free_cached_objects(sb, fs_objects, + sc->nid); } - total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure; + drop_super(sb); + return freed; +} + +static unsigned long super_cache_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + struct super_block *sb; + long total_objects = 0; + + sb = container_of(shrink, struct super_block, s_shrink); + + if (!grab_super_passive(sb)) + return 0; + + if (sb->s_op && sb->s_op->nr_cached_objects) + total_objects = sb->s_op->nr_cached_objects(sb, + sc->nid); + + total_objects += list_lru_count_node(&sb->s_dentry_lru, + sc->nid); + total_objects += list_lru_count_node(&sb->s_inode_lru, + sc->nid); + + total_objects = vfs_pressure_ratio(total_objects); drop_super(sb); return total_objects; } @@ -175,9 +195,12 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) INIT_HLIST_NODE(&s->s_instances); INIT_HLIST_BL_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); - INIT_LIST_HEAD(&s->s_dentry_lru); - INIT_LIST_HEAD(&s->s_inode_lru); - spin_lock_init(&s->s_inode_lru_lock); + + if (list_lru_init(&s->s_dentry_lru)) + goto err_out; + if (list_lru_init(&s->s_inode_lru)) + goto err_out_dentry_lru; + INIT_LIST_HEAD(&s->s_mounts); init_rwsem(&s->s_umount); lockdep_set_class(&s->s_umount, &type->s_umount_key); @@ -210,11 +233,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) s->cleancache_poolid = -1; s->s_shrink.seeks = DEFAULT_SEEKS; - s->s_shrink.shrink = prune_super; + s->s_shrink.scan_objects = super_cache_scan; + s->s_shrink.count_objects = super_cache_count; s->s_shrink.batch = 1024; + s->s_shrink.flags = SHRINKER_NUMA_AWARE; } out: return s; + +err_out_dentry_lru: + list_lru_destroy(&s->s_dentry_lru); err_out: security_sb_free(s); #ifdef CONFIG_SMP @@ -295,6 +323,9 @@ void deactivate_locked_super(struct super_block *s) /* caches are now gone, we can safely kill the shrinker now */ unregister_shrinker(&s->s_shrink); + list_lru_destroy(&s->s_dentry_lru); + list_lru_destroy(&s->s_inode_lru); + put_filesystem(fs); put_super(s); } else { diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 9e1d05666fed..f35135e28e96 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -277,18 +277,25 @@ static int kick_a_thread(void) return 0; } -int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) +unsigned long ubifs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) { - int nr = sc->nr_to_scan; - int freed, contention = 0; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); - if (nr == 0) - /* - * Due to the way UBIFS updates the clean znode counter it may - * temporarily be negative. - */ - return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; + /* + * Due to the way UBIFS updates the clean znode counter it may + * temporarily be negative. + */ + return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; +} + +unsigned long ubifs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long nr = sc->nr_to_scan; + int contention = 0; + unsigned long freed; + long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); if (!clean_zn_cnt) { /* @@ -316,10 +323,10 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) if (!freed && contention) { dbg_tnc("freed nothing, but contention"); - return -1; + return SHRINK_STOP; } out: - dbg_tnc("%d znodes were freed, requested %d", freed, nr); + dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); return freed; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 879b9976c12b..3e4aa7281e04 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -49,7 +49,8 @@ struct kmem_cache *ubifs_inode_slab; /* UBIFS TNC shrinker description */ static struct shrinker ubifs_shrinker_info = { - .shrink = ubifs_shrinker, + .scan_objects = ubifs_shrink_scan, + .count_objects = ubifs_shrink_count, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index b2babce4d70f..e8c8cfe1435c 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1624,7 +1624,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); /* shrinker.c */ -int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc); +unsigned long ubifs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc); +unsigned long ubifs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc); /* commit.c */ int ubifs_bg_thread(void *info); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c06823fe10d3..263470075ea2 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -81,54 +81,6 @@ xfs_buf_vmap_len( } /* - * xfs_buf_lru_add - add a buffer to the LRU. - * - * The LRU takes a new reference to the buffer so that it will only be freed - * once the shrinker takes the buffer off the LRU. - */ -STATIC void -xfs_buf_lru_add( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - - spin_lock(&btp->bt_lru_lock); - if (list_empty(&bp->b_lru)) { - atomic_inc(&bp->b_hold); - list_add_tail(&bp->b_lru, &btp->bt_lru); - btp->bt_lru_nr++; - bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; - } - spin_unlock(&btp->bt_lru_lock); -} - -/* - * xfs_buf_lru_del - remove a buffer from the LRU - * - * The unlocked check is safe here because it only occurs when there are not - * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there - * to optimise the shrinker removing the buffer from the LRU and calling - * xfs_buf_free(). i.e. it removes an unnecessary round trip on the - * bt_lru_lock. - */ -STATIC void -xfs_buf_lru_del( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - - if (list_empty(&bp->b_lru)) - return; - - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - } - spin_unlock(&btp->bt_lru_lock); -} - -/* * When we mark a buffer stale, we remove the buffer from the LRU and clear the * b_lru_ref count so that the buffer is freed immediately when the buffer * reference count falls to zero. If the buffer is already on the LRU, we need @@ -151,20 +103,14 @@ xfs_buf_stale( */ bp->b_flags &= ~_XBF_DELWRI_Q; - atomic_set(&(bp)->b_lru_ref, 0); - if (!list_empty(&bp->b_lru)) { - struct xfs_buftarg *btp = bp->b_target; + spin_lock(&bp->b_lock); + atomic_set(&bp->b_lru_ref, 0); + if (!(bp->b_state & XFS_BSTATE_DISPOSE) && + (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) + atomic_dec(&bp->b_hold); - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru) && - !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - atomic_dec(&bp->b_hold); - } - spin_unlock(&btp->bt_lru_lock); - } ASSERT(atomic_read(&bp->b_hold) >= 1); + spin_unlock(&bp->b_lock); } static int @@ -228,6 +174,7 @@ _xfs_buf_alloc( INIT_LIST_HEAD(&bp->b_list); RB_CLEAR_NODE(&bp->b_rbnode); sema_init(&bp->b_sema, 0); /* held, no waiters */ + spin_lock_init(&bp->b_lock); XB_SET_OWNER(bp); bp->b_target = target; bp->b_flags = flags; @@ -917,12 +864,33 @@ xfs_buf_rele( ASSERT(atomic_read(&bp->b_hold) > 0); if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { - if (!(bp->b_flags & XBF_STALE) && - atomic_read(&bp->b_lru_ref)) { - xfs_buf_lru_add(bp); + spin_lock(&bp->b_lock); + if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { + /* + * If the buffer is added to the LRU take a new + * reference to the buffer for the LRU and clear the + * (now stale) dispose list state flag + */ + if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) { + bp->b_state &= ~XFS_BSTATE_DISPOSE; + atomic_inc(&bp->b_hold); + } + spin_unlock(&bp->b_lock); spin_unlock(&pag->pag_buf_lock); } else { - xfs_buf_lru_del(bp); + /* + * most of the time buffers will already be removed from + * the LRU, so optimise that case by checking for the + * XFS_BSTATE_DISPOSE flag indicating the last list the + * buffer was on was the disposal list + */ + if (!(bp->b_state & XFS_BSTATE_DISPOSE)) { + list_lru_del(&bp->b_target->bt_lru, &bp->b_lru); + } else { + ASSERT(list_empty(&bp->b_lru)); + } + spin_unlock(&bp->b_lock); + ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); spin_unlock(&pag->pag_buf_lock); @@ -1502,83 +1470,121 @@ xfs_buf_iomove( * returned. These buffers will have an elevated hold count, so wait on those * while freeing all the buffers only held by the LRU. */ +static enum lru_status +xfs_buftarg_wait_rele( + struct list_head *item, + spinlock_t *lru_lock, + void *arg) + +{ + struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); + struct list_head *dispose = arg; + + if (atomic_read(&bp->b_hold) > 1) { + /* need to wait, so skip it this pass */ + trace_xfs_buf_wait_buftarg(bp, _RET_IP_); + return LRU_SKIP; + } + if (!spin_trylock(&bp->b_lock)) + return LRU_SKIP; + + /* + * clear the LRU reference count so the buffer doesn't get + * ignored in xfs_buf_rele(). + */ + atomic_set(&bp->b_lru_ref, 0); + bp->b_state |= XFS_BSTATE_DISPOSE; + list_move(item, dispose); + spin_unlock(&bp->b_lock); + return LRU_REMOVED; +} + void xfs_wait_buftarg( struct xfs_buftarg *btp) { - struct xfs_buf *bp; + LIST_HEAD(dispose); + int loop = 0; -restart: - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - if (atomic_read(&bp->b_hold) > 1) { - trace_xfs_buf_wait_buftarg(bp, _RET_IP_); - list_move_tail(&bp->b_lru, &btp->bt_lru); - spin_unlock(&btp->bt_lru_lock); - delay(100); - goto restart; + /* loop until there is nothing left on the lru list. */ + while (list_lru_count(&btp->bt_lru)) { + list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, + &dispose, LONG_MAX); + + while (!list_empty(&dispose)) { + struct xfs_buf *bp; + bp = list_first_entry(&dispose, struct xfs_buf, b_lru); + list_del_init(&bp->b_lru); + xfs_buf_rele(bp); } - /* - * clear the LRU reference count so the buffer doesn't get - * ignored in xfs_buf_rele(). - */ - atomic_set(&bp->b_lru_ref, 0); - spin_unlock(&btp->bt_lru_lock); - xfs_buf_rele(bp); - spin_lock(&btp->bt_lru_lock); + if (loop++ != 0) + delay(100); } - spin_unlock(&btp->bt_lru_lock); } -int -xfs_buftarg_shrink( +static enum lru_status +xfs_buftarg_isolate( + struct list_head *item, + spinlock_t *lru_lock, + void *arg) +{ + struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); + struct list_head *dispose = arg; + + /* + * we are inverting the lru lock/bp->b_lock here, so use a trylock. + * If we fail to get the lock, just skip it. + */ + if (!spin_trylock(&bp->b_lock)) + return LRU_SKIP; + /* + * Decrement the b_lru_ref count unless the value is already + * zero. If the value is already zero, we need to reclaim the + * buffer, otherwise it gets another trip through the LRU. + */ + if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { + spin_unlock(&bp->b_lock); + return LRU_ROTATE; + } + + bp->b_state |= XFS_BSTATE_DISPOSE; + list_move(item, dispose); + spin_unlock(&bp->b_lock); + return LRU_REMOVED; +} + +static unsigned long +xfs_buftarg_shrink_scan( struct shrinker *shrink, struct shrink_control *sc) { struct xfs_buftarg *btp = container_of(shrink, struct xfs_buftarg, bt_shrinker); - struct xfs_buf *bp; - int nr_to_scan = sc->nr_to_scan; LIST_HEAD(dispose); + unsigned long freed; + unsigned long nr_to_scan = sc->nr_to_scan; - if (!nr_to_scan) - return btp->bt_lru_nr; - - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - if (nr_to_scan-- <= 0) - break; - - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - - /* - * Decrement the b_lru_ref count unless the value is already - * zero. If the value is already zero, we need to reclaim the - * buffer, otherwise it gets another trip through the LRU. - */ - if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { - list_move_tail(&bp->b_lru, &btp->bt_lru); - continue; - } - - /* - * remove the buffer from the LRU now to avoid needing another - * lock round trip inside xfs_buf_rele(). - */ - list_move(&bp->b_lru, &dispose); - btp->bt_lru_nr--; - bp->b_lru_flags |= _XBF_LRU_DISPOSE; - } - spin_unlock(&btp->bt_lru_lock); + freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate, + &dispose, &nr_to_scan); while (!list_empty(&dispose)) { + struct xfs_buf *bp; bp = list_first_entry(&dispose, struct xfs_buf, b_lru); list_del_init(&bp->b_lru); xfs_buf_rele(bp); } - return btp->bt_lru_nr; + return freed; +} + +static unsigned long +xfs_buftarg_shrink_count( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_buftarg *btp = container_of(shrink, + struct xfs_buftarg, bt_shrinker); + return list_lru_count_node(&btp->bt_lru, sc->nid); } void @@ -1587,6 +1593,7 @@ xfs_free_buftarg( struct xfs_buftarg *btp) { unregister_shrinker(&btp->bt_shrinker); + list_lru_destroy(&btp->bt_lru); if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_blkdev_issue_flush(btp); @@ -1660,12 +1667,16 @@ xfs_alloc_buftarg( if (!btp->bt_bdi) goto error; - INIT_LIST_HEAD(&btp->bt_lru); - spin_lock_init(&btp->bt_lru_lock); if (xfs_setsize_buftarg_early(btp, bdev)) goto error; - btp->bt_shrinker.shrink = xfs_buftarg_shrink; + + if (list_lru_init(&btp->bt_lru)) + goto error; + + btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; + btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; + btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; register_shrinker(&btp->bt_shrinker); return btp; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 433a12ed7b17..e65683361017 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -25,6 +25,7 @@ #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/uio.h> +#include <linux/list_lru.h> /* * Base types @@ -59,7 +60,6 @@ typedef enum { #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ #define _XBF_COMPOUND (1 << 23)/* compound buffer */ -#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */ typedef unsigned int xfs_buf_flags_t; @@ -78,8 +78,12 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" }, \ - { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } + { _XBF_COMPOUND, "COMPOUND" } + +/* + * Internal state flags. + */ +#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ typedef struct xfs_buftarg { dev_t bt_dev; @@ -92,9 +96,7 @@ typedef struct xfs_buftarg { /* LRU control structures */ struct shrinker bt_shrinker; - struct list_head bt_lru; - spinlock_t bt_lru_lock; - unsigned int bt_lru_nr; + struct list_lru bt_lru; } xfs_buftarg_t; struct xfs_buf; @@ -137,7 +139,8 @@ typedef struct xfs_buf { * bt_lru_lock and not by b_sema */ struct list_head b_lru; /* lru list */ - xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ + spinlock_t b_lock; /* internal state lock */ + unsigned int b_state; /* internal state flags */ wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 251c66632e5e..71520e6e5d65 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -940,13 +940,8 @@ xfs_qm_dqput_final( trace_xfs_dqput_free(dqp); - mutex_lock(&qi->qi_lru_lock); - if (list_empty(&dqp->q_lru)) { - list_add_tail(&dqp->q_lru, &qi->qi_lru_list); - qi->qi_lru_count++; + if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) XFS_STATS_INC(xs_qm_dquot_unused); - } - mutex_unlock(&qi->qi_lru_lock); /* * If we just added a udquot to the freelist, then we want to release diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 16219b9c6790..73b62a24ceac 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1167,7 +1167,7 @@ xfs_reclaim_inodes( * them to be cleaned, which we hope will not be very long due to the * background walker having already kicked the IO off on those dirty inodes. */ -void +long xfs_reclaim_inodes_nr( struct xfs_mount *mp, int nr_to_scan) @@ -1176,7 +1176,7 @@ xfs_reclaim_inodes_nr( xfs_reclaim_work_queue(mp); xfs_ail_push_all(mp->m_ail); - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); + return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); } /* diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 8a89f7d791bd..456f0144e1b6 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -46,7 +46,7 @@ void xfs_reclaim_worker(struct work_struct *work); int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); int xfs_reclaim_inodes_count(struct xfs_mount *mp); -void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); +long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6218a0aeeeea..3e6c2e6c9cd2 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -51,8 +51,9 @@ */ STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); -STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); + +STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); /* * We use the batch lookup interface to iterate over the dquots as it * currently is the only interface into the radix tree code that allows @@ -203,12 +204,9 @@ xfs_qm_dqpurge( * We move dquots to the freelist as soon as their reference count * hits zero, so it really should be on the freelist here. */ - mutex_lock(&qi->qi_lru_lock); ASSERT(!list_empty(&dqp->q_lru)); - list_del_init(&dqp->q_lru); - qi->qi_lru_count--; + list_lru_del(&qi->qi_lru, &dqp->q_lru); XFS_STATS_DEC(xs_qm_dquot_unused); - mutex_unlock(&qi->qi_lru_lock); xfs_qm_dqdestroy(dqp); @@ -680,6 +678,143 @@ xfs_qm_calc_dquots_per_chunk( return ndquots; } +struct xfs_qm_isolate { + struct list_head buffers; + struct list_head dispose; +}; + +static enum lru_status +xfs_qm_dquot_isolate( + struct list_head *item, + spinlock_t *lru_lock, + void *arg) +{ + struct xfs_dquot *dqp = container_of(item, + struct xfs_dquot, q_lru); + struct xfs_qm_isolate *isol = arg; + + if (!xfs_dqlock_nowait(dqp)) + goto out_miss_busy; + + /* + * This dquot has acquired a reference in the meantime remove it from + * the freelist and try again. + */ + if (dqp->q_nrefs) { + xfs_dqunlock(dqp); + XFS_STATS_INC(xs_qm_dqwants); + + trace_xfs_dqreclaim_want(dqp); + list_del_init(&dqp->q_lru); + XFS_STATS_DEC(xs_qm_dquot_unused); + return LRU_REMOVED; + } + + /* + * If the dquot is dirty, flush it. If it's already being flushed, just + * skip it so there is time for the IO to complete before we try to + * reclaim it again on the next LRU pass. + */ + if (!xfs_dqflock_nowait(dqp)) { + xfs_dqunlock(dqp); + goto out_miss_busy; + } + + if (XFS_DQ_IS_DIRTY(dqp)) { + struct xfs_buf *bp = NULL; + int error; + + trace_xfs_dqreclaim_dirty(dqp); + + /* we have to drop the LRU lock to flush the dquot */ + spin_unlock(lru_lock); + + error = xfs_qm_dqflush(dqp, &bp); + if (error) { + xfs_warn(dqp->q_mount, "%s: dquot %p flush failed", + __func__, dqp); + goto out_unlock_dirty; + } + + xfs_buf_delwri_queue(bp, &isol->buffers); + xfs_buf_relse(bp); + goto out_unlock_dirty; + } + xfs_dqfunlock(dqp); + + /* + * Prevent lookups now that we are past the point of no return. + */ + dqp->dq_flags |= XFS_DQ_FREEING; + xfs_dqunlock(dqp); + + ASSERT(dqp->q_nrefs == 0); + list_move_tail(&dqp->q_lru, &isol->dispose); + XFS_STATS_DEC(xs_qm_dquot_unused); + trace_xfs_dqreclaim_done(dqp); + XFS_STATS_INC(xs_qm_dqreclaims); + return LRU_REMOVED; + +out_miss_busy: + trace_xfs_dqreclaim_busy(dqp); + XFS_STATS_INC(xs_qm_dqreclaim_misses); + return LRU_SKIP; + +out_unlock_dirty: + trace_xfs_dqreclaim_busy(dqp); + XFS_STATS_INC(xs_qm_dqreclaim_misses); + xfs_dqunlock(dqp); + spin_lock(lru_lock); + return LRU_RETRY; +} + +static unsigned long +xfs_qm_shrink_scan( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_quotainfo *qi = container_of(shrink, + struct xfs_quotainfo, qi_shrinker); + struct xfs_qm_isolate isol; + unsigned long freed; + int error; + unsigned long nr_to_scan = sc->nr_to_scan; + + if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) + return 0; + + INIT_LIST_HEAD(&isol.buffers); + INIT_LIST_HEAD(&isol.dispose); + + freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol, + &nr_to_scan); + + error = xfs_buf_delwri_submit(&isol.buffers); + if (error) + xfs_warn(NULL, "%s: dquot reclaim failed", __func__); + + while (!list_empty(&isol.dispose)) { + struct xfs_dquot *dqp; + + dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru); + list_del_init(&dqp->q_lru); + xfs_qm_dqfree_one(dqp); + } + + return freed; +} + +static unsigned long +xfs_qm_shrink_count( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_quotainfo *qi = container_of(shrink, + struct xfs_quotainfo, qi_shrinker); + + return list_lru_count_node(&qi->qi_lru, sc->nid); +} + /* * This initializes all the quota information that's kept in the * mount structure @@ -696,11 +831,18 @@ xfs_qm_init_quotainfo( qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); + if ((error = list_lru_init(&qinf->qi_lru))) { + kmem_free(qinf); + mp->m_quotainfo = NULL; + return error; + } + /* * See if quotainodes are setup, and if not, allocate them, * and change the superblock accordingly. */ if ((error = xfs_qm_init_quotainos(mp))) { + list_lru_destroy(&qinf->qi_lru); kmem_free(qinf); mp->m_quotainfo = NULL; return error; @@ -711,10 +853,6 @@ xfs_qm_init_quotainfo( INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS); mutex_init(&qinf->qi_tree_lock); - INIT_LIST_HEAD(&qinf->qi_lru_list); - qinf->qi_lru_count = 0; - mutex_init(&qinf->qi_lru_lock); - /* mutex used to serialize quotaoffs */ mutex_init(&qinf->qi_quotaofflock); @@ -779,8 +917,10 @@ xfs_qm_init_quotainfo( qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; } - qinf->qi_shrinker.shrink = xfs_qm_shake; + qinf->qi_shrinker.count_objects = xfs_qm_shrink_count; + qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; qinf->qi_shrinker.seeks = DEFAULT_SEEKS; + qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; register_shrinker(&qinf->qi_shrinker); return 0; } @@ -801,6 +941,7 @@ xfs_qm_destroy_quotainfo( ASSERT(qi != NULL); unregister_shrinker(&qi->qi_shrinker); + list_lru_destroy(&qi->qi_lru); if (qi->qi_uquotaip) { IRELE(qi->qi_uquotaip); @@ -1599,132 +1740,6 @@ xfs_qm_dqfree_one( xfs_qm_dqdestroy(dqp); } -STATIC void -xfs_qm_dqreclaim_one( - struct xfs_dquot *dqp, - struct list_head *buffer_list, - struct list_head *dispose_list) -{ - struct xfs_mount *mp = dqp->q_mount; - struct xfs_quotainfo *qi = mp->m_quotainfo; - int error; - - if (!xfs_dqlock_nowait(dqp)) - goto out_move_tail; - - /* - * This dquot has acquired a reference in the meantime remove it from - * the freelist and try again. - */ - if (dqp->q_nrefs) { - xfs_dqunlock(dqp); - - trace_xfs_dqreclaim_want(dqp); - XFS_STATS_INC(xs_qm_dqwants); - - list_del_init(&dqp->q_lru); - qi->qi_lru_count--; - XFS_STATS_DEC(xs_qm_dquot_unused); - return; - } - - /* - * Try to grab the flush lock. If this dquot is in the process of - * getting flushed to disk, we don't want to reclaim it. - */ - if (!xfs_dqflock_nowait(dqp)) - goto out_unlock_move_tail; - - if (XFS_DQ_IS_DIRTY(dqp)) { - struct xfs_buf *bp = NULL; - - trace_xfs_dqreclaim_dirty(dqp); - - error = xfs_qm_dqflush(dqp, &bp); - if (error) { - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - goto out_unlock_move_tail; - } - - xfs_buf_delwri_queue(bp, buffer_list); - xfs_buf_relse(bp); - /* - * Give the dquot another try on the freelist, as the - * flushing will take some time. - */ - goto out_unlock_move_tail; - } - xfs_dqfunlock(dqp); - - /* - * Prevent lookups now that we are past the point of no return. - */ - dqp->dq_flags |= XFS_DQ_FREEING; - xfs_dqunlock(dqp); - - ASSERT(dqp->q_nrefs == 0); - list_move_tail(&dqp->q_lru, dispose_list); - qi->qi_lru_count--; - XFS_STATS_DEC(xs_qm_dquot_unused); - - trace_xfs_dqreclaim_done(dqp); - XFS_STATS_INC(xs_qm_dqreclaims); - return; - - /* - * Move the dquot to the tail of the list so that we don't spin on it. - */ -out_unlock_move_tail: - xfs_dqunlock(dqp); -out_move_tail: - list_move_tail(&dqp->q_lru, &qi->qi_lru_list); - trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(xs_qm_dqreclaim_misses); -} - -STATIC int -xfs_qm_shake( - struct shrinker *shrink, - struct shrink_control *sc) -{ - struct xfs_quotainfo *qi = - container_of(shrink, struct xfs_quotainfo, qi_shrinker); - int nr_to_scan = sc->nr_to_scan; - LIST_HEAD (buffer_list); - LIST_HEAD (dispose_list); - struct xfs_dquot *dqp; - int error; - - if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) - return 0; - if (!nr_to_scan) - goto out; - - mutex_lock(&qi->qi_lru_lock); - while (!list_empty(&qi->qi_lru_list)) { - if (nr_to_scan-- <= 0) - break; - dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, - q_lru); - xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list); - } - mutex_unlock(&qi->qi_lru_lock); - - error = xfs_buf_delwri_submit(&buffer_list); - if (error) - xfs_warn(NULL, "%s: dquot reclaim failed", __func__); - - while (!list_empty(&dispose_list)) { - dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); - list_del_init(&dqp->q_lru); - xfs_qm_dqfree_one(dqp); - } - -out: - return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; -} - /* * Start a transaction and write the incore superblock changes to * disk. flags parameter indicates which fields have changed. diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 670cd4464070..2b602df9c242 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -49,9 +49,7 @@ typedef struct xfs_quotainfo { struct xfs_inode *qi_uquotaip; /* user quota inode */ struct xfs_inode *qi_gquotaip; /* group quota inode */ struct xfs_inode *qi_pquotaip; /* project quota inode */ - struct list_head qi_lru_list; - struct mutex qi_lru_lock; - int qi_lru_count; + struct list_lru qi_lru; int qi_dquots; time_t qi_btimelimit; /* limit for blks timer */ time_t qi_itimelimit; /* limit for inodes timer */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 979a77d4b87d..15188cc99449 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1535,19 +1535,21 @@ xfs_fs_mount( return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); } -static int +static long xfs_fs_nr_cached_objects( - struct super_block *sb) + struct super_block *sb, + int nid) { return xfs_reclaim_inodes_count(XFS_M(sb)); } -static void +static long xfs_fs_free_cached_objects( struct super_block *sb, - int nr_to_scan) + long nr_to_scan, + int nid) { - xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); + return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); } static const struct super_operations xfs_super_operations = { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d568f3975eeb..fcabc42d66ab 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -85,7 +85,6 @@ struct cpufreq_policy { struct list_head policy_list; struct kobject kobj; struct completion kobj_unregister; - int transition_ongoing; /* Tracks transition status */ }; /* Only for ACPI */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index feaa8d88eef7..59066e0b4ff1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -55,11 +55,11 @@ struct qstr { #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) struct dentry_stat_t { - int nr_dentry; - int nr_unused; - int age_limit; /* age in seconds */ - int want_pages; /* pages requested by system */ - int dummy[2]; + long nr_dentry; + long nr_unused; + long age_limit; /* age in seconds */ + long want_pages; /* pages requested by system */ + long dummy[2]; }; extern struct dentry_stat_t dentry_stat; @@ -395,4 +395,8 @@ static inline bool d_mountpoint(const struct dentry *dentry) extern int sysctl_vfs_cache_pressure; +static inline unsigned long vfs_pressure_ratio(unsigned long val) +{ + return mult_frac(val, sysctl_vfs_cache_pressure, 100); +} #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 529d8711baba..a4acd3c61190 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -10,6 +10,7 @@ #include <linux/stat.h> #include <linux/cache.h> #include <linux/list.h> +#include <linux/list_lru.h> #include <linux/llist.h> #include <linux/radix-tree.h> #include <linux/rbtree.h> @@ -1269,15 +1270,6 @@ struct super_block { struct list_head s_files; #endif struct list_head s_mounts; /* list of mounts; _not_ for fs use */ - /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ - struct list_head s_dentry_lru; /* unused dentry lru */ - int s_nr_dentry_unused; /* # of dentry on lru */ - - /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */ - spinlock_t s_inode_lru_lock ____cacheline_aligned_in_smp; - struct list_head s_inode_lru; /* unused inode lru */ - int s_nr_inodes_unused; /* # of inodes on lru */ - struct block_device *s_bdev; struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; @@ -1331,11 +1323,14 @@ struct super_block { /* AIO completions deferred from interrupt context */ struct workqueue_struct *s_dio_done_wq; -}; -/* superblock cache pruning functions */ -extern void prune_icache_sb(struct super_block *sb, int nr_to_scan); -extern void prune_dcache_sb(struct super_block *sb, int nr_to_scan); + /* + * Keep the lru lists last in the structure so they always sit on their + * own individual cachelines. + */ + struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; + struct list_lru s_inode_lru ____cacheline_aligned_in_smp; +}; extern struct timespec current_fs_time(struct super_block *sb); @@ -1629,8 +1624,8 @@ struct super_operations { ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); #endif int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); - int (*nr_cached_objects)(struct super_block *); - void (*free_cached_objects)(struct super_block *, int); + long (*nr_cached_objects)(struct super_block *, int); + long (*free_cached_objects)(struct super_block *, long, int); }; /* @@ -2494,7 +2489,6 @@ extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) extern int vfs_readlink(struct dentry *, char __user *, int, const char *); -extern int vfs_follow_link(struct nameidata *, const char *); extern int page_readlink(struct dentry *, char __user *, int); extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void page_put_link(struct dentry *, struct nameidata *, void *); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 2b93a9a5a1e6..0efc3e62843a 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -39,17 +39,6 @@ static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) spin_unlock(&fs->lock); } -static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, - struct path *pwd) -{ - spin_lock(&fs->lock); - *root = fs->root; - path_get(root); - *pwd = fs->pwd; - path_get(pwd); - spin_unlock(&fs->lock); -} - extern bool current_chrooted(void); #endif /* _LINUX_FS_STRUCT_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3aeb7305e2f5..7ea319e95b47 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -58,10 +58,26 @@ struct iommu_domain { #define IOMMU_CAP_CACHE_COHERENCY 0x1 #define IOMMU_CAP_INTR_REMAP 0x2 /* isolates device intrs */ +/* + * Following constraints are specifc to FSL_PAMUV1: + * -aperture must be power of 2, and naturally aligned + * -number of windows must be power of 2, and address space size + * of each window is determined by aperture size / # of windows + * -the actual size of the mapped region of a window must be power + * of 2 starting with 4KB and physical address must be naturally + * aligned. + * DOMAIN_ATTR_FSL_PAMUV1 corresponds to the above mentioned contraints. + * The caller can invoke iommu_domain_get_attr to check if the underlying + * iommu implementation supports these constraints. + */ + enum iommu_attr { DOMAIN_ATTR_GEOMETRY, DOMAIN_ATTR_PAGING, DOMAIN_ATTR_WINDOWS, + DOMAIN_ATTR_FSL_PAMU_STASH, + DOMAIN_ATTR_FSL_PAMU_ENABLE, + DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_MAX, }; diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h new file mode 100644 index 000000000000..3ce541753c88 --- /dev/null +++ b/include/linux/list_lru.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. + * Authors: David Chinner and Glauber Costa + * + * Generic LRU infrastructure + */ +#ifndef _LRU_LIST_H +#define _LRU_LIST_H + +#include <linux/list.h> +#include <linux/nodemask.h> + +/* list_lru_walk_cb has to always return one of those */ +enum lru_status { + LRU_REMOVED, /* item removed from list */ + LRU_ROTATE, /* item referenced, give another pass */ + LRU_SKIP, /* item cannot be locked, skip */ + LRU_RETRY, /* item not freeable. May drop the lock + internally, but has to return locked. */ +}; + +struct list_lru_node { + spinlock_t lock; + struct list_head list; + /* kept as signed so we can catch imbalance bugs */ + long nr_items; +} ____cacheline_aligned_in_smp; + +struct list_lru { + struct list_lru_node *node; + nodemask_t active_nodes; +}; + +void list_lru_destroy(struct list_lru *lru); +int list_lru_init(struct list_lru *lru); + +/** + * list_lru_add: add an element to the lru list's tail + * @list_lru: the lru pointer + * @item: the item to be added. + * + * If the element is already part of a list, this function returns doing + * nothing. Therefore the caller does not need to keep state about whether or + * not the element already belongs in the list and is allowed to lazy update + * it. Note however that this is valid for *a* list, not *this* list. If + * the caller organize itself in a way that elements can be in more than + * one type of list, it is up to the caller to fully remove the item from + * the previous list (with list_lru_del() for instance) before moving it + * to @list_lru + * + * Return value: true if the list was updated, false otherwise + */ +bool list_lru_add(struct list_lru *lru, struct list_head *item); + +/** + * list_lru_del: delete an element to the lru list + * @list_lru: the lru pointer + * @item: the item to be deleted. + * + * This function works analogously as list_lru_add in terms of list + * manipulation. The comments about an element already pertaining to + * a list are also valid for list_lru_del. + * + * Return value: true if the list was updated, false otherwise + */ +bool list_lru_del(struct list_lru *lru, struct list_head *item); + +/** + * list_lru_count_node: return the number of objects currently held by @lru + * @lru: the lru pointer. + * @nid: the node id to count from. + * + * Always return a non-negative number, 0 for empty lists. There is no + * guarantee that the list is not updated while the count is being computed. + * Callers that want such a guarantee need to provide an outer lock. + */ +unsigned long list_lru_count_node(struct list_lru *lru, int nid); +static inline unsigned long list_lru_count(struct list_lru *lru) +{ + long count = 0; + int nid; + + for_each_node_mask(nid, lru->active_nodes) + count += list_lru_count_node(lru, nid); + + return count; +} + +typedef enum lru_status +(*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg); +/** + * list_lru_walk_node: walk a list_lru, isolating and disposing freeable items. + * @lru: the lru pointer. + * @nid: the node id to scan from. + * @isolate: callback function that is resposible for deciding what to do with + * the item currently being scanned + * @cb_arg: opaque type that will be passed to @isolate + * @nr_to_walk: how many items to scan. + * + * This function will scan all elements in a particular list_lru, calling the + * @isolate callback for each of those items, along with the current list + * spinlock and a caller-provided opaque. The @isolate callback can choose to + * drop the lock internally, but *must* return with the lock held. The callback + * will return an enum lru_status telling the list_lru infrastructure what to + * do with the object being scanned. + * + * Please note that nr_to_walk does not mean how many objects will be freed, + * just how many objects will be scanned. + * + * Return value: the number of objects effectively removed from the LRU. + */ +unsigned long list_lru_walk_node(struct list_lru *lru, int nid, + list_lru_walk_cb isolate, void *cb_arg, + unsigned long *nr_to_walk); + +static inline unsigned long +list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate, + void *cb_arg, unsigned long nr_to_walk) +{ + long isolated = 0; + int nid; + + for_each_node_mask(nid, lru->active_nodes) { + isolated += list_lru_walk_node(lru, nid, isolate, + cb_arg, &nr_to_walk); + if (nr_to_walk <= 0) + break; + } + return isolated; +} +#endif /* _LRU_LIST_H */ diff --git a/include/linux/platform_data/exynos_thermal.h b/include/linux/platform_data/exynos_thermal.h deleted file mode 100644 index da7e6274b175..000000000000 --- a/include/linux/platform_data/exynos_thermal.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * exynos_thermal.h - Samsung EXYNOS TMU (Thermal Management Unit) - * - * Copyright (C) 2011 Samsung Electronics - * Donggeun Kim <dg77.kim@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _LINUX_EXYNOS_THERMAL_H -#define _LINUX_EXYNOS_THERMAL_H -#include <linux/cpu_cooling.h> - -enum calibration_type { - TYPE_ONE_POINT_TRIMMING, - TYPE_TWO_POINT_TRIMMING, - TYPE_NONE, -}; - -enum soc_type { - SOC_ARCH_EXYNOS4210 = 1, - SOC_ARCH_EXYNOS, -}; -/** - * struct freq_clip_table - * @freq_clip_max: maximum frequency allowed for this cooling state. - * @temp_level: Temperature level at which the temperature clipping will - * happen. - * @mask_val: cpumask of the allowed cpu's where the clipping will take place. - * - * This structure is required to be filled and passed to the - * cpufreq_cooling_unregister function. - */ -struct freq_clip_table { - unsigned int freq_clip_max; - unsigned int temp_level; - const struct cpumask *mask_val; -}; - -/** - * struct exynos_tmu_platform_data - * @threshold: basic temperature for generating interrupt - * 25 <= threshold <= 125 [unit: degree Celsius] - * @threshold_falling: differntial value for setting threshold - * of temperature falling interrupt. - * @trigger_levels: array for each interrupt levels - * [unit: degree Celsius] - * 0: temperature for trigger_level0 interrupt - * condition for trigger_level0 interrupt: - * current temperature > threshold + trigger_levels[0] - * 1: temperature for trigger_level1 interrupt - * condition for trigger_level1 interrupt: - * current temperature > threshold + trigger_levels[1] - * 2: temperature for trigger_level2 interrupt - * condition for trigger_level2 interrupt: - * current temperature > threshold + trigger_levels[2] - * 3: temperature for trigger_level3 interrupt - * condition for trigger_level3 interrupt: - * current temperature > threshold + trigger_levels[3] - * @trigger_level0_en: - * 1 = enable trigger_level0 interrupt, - * 0 = disable trigger_level0 interrupt - * @trigger_level1_en: - * 1 = enable trigger_level1 interrupt, - * 0 = disable trigger_level1 interrupt - * @trigger_level2_en: - * 1 = enable trigger_level2 interrupt, - * 0 = disable trigger_level2 interrupt - * @trigger_level3_en: - * 1 = enable trigger_level3 interrupt, - * 0 = disable trigger_level3 interrupt - * @gain: gain of amplifier in the positive-TC generator block - * 0 <= gain <= 15 - * @reference_voltage: reference voltage of amplifier - * in the positive-TC generator block - * 0 <= reference_voltage <= 31 - * @noise_cancel_mode: noise cancellation mode - * 000, 100, 101, 110 and 111 can be different modes - * @type: determines the type of SOC - * @efuse_value: platform defined fuse value - * @cal_type: calibration type for temperature - * @freq_clip_table: Table representing frequency reduction percentage. - * @freq_tab_count: Count of the above table as frequency reduction may - * applicable to only some of the trigger levels. - * - * This structure is required for configuration of exynos_tmu driver. - */ -struct exynos_tmu_platform_data { - u8 threshold; - u8 threshold_falling; - u8 trigger_levels[4]; - bool trigger_level0_en; - bool trigger_level1_en; - bool trigger_level2_en; - bool trigger_level3_en; - - u8 gain; - u8 reference_voltage; - u8 noise_cancel_mode; - u32 efuse_value; - - enum calibration_type cal_type; - enum soc_type type; - struct freq_clip_table freq_tab[4]; - unsigned int freq_tab_count; -}; -#endif /* _LINUX_EXYNOS_THERMAL_H */ diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h index 202e290faea8..51a2ff579d60 100644 --- a/include/linux/platform_data/leds-lp55xx.h +++ b/include/linux/platform_data/leds-lp55xx.h @@ -36,6 +36,13 @@ struct lp55xx_predef_pattern { u8 size_b; }; +enum lp8501_pwr_sel { + LP8501_ALL_VDD, /* D1~9 are connected to VDD */ + LP8501_6VDD_3VOUT, /* D1~6 with VDD, D7~9 with VOUT */ + LP8501_3VDD_6VOUT, /* D1~6 with VOUT, D7~9 with VDD */ + LP8501_ALL_VOUT, /* D1~9 are connected to VOUT */ +}; + /* * struct lp55xx_platform_data * @led_config : Configurable led class device @@ -67,6 +74,9 @@ struct lp55xx_platform_data { /* Predefined pattern data */ struct lp55xx_predef_pattern *patterns; unsigned int num_patterns; + + /* LP8501 specific */ + enum lp8501_pwr_sel pwr_sel; }; #endif /* _LEDS_LP55XX_H */ diff --git a/include/linux/platform_data/leds-pca9633.h b/include/linux/platform_data/leds-pca963x.h index c5bf29b6fa7f..e731f0036329 100644 --- a/include/linux/platform_data/leds-pca9633.h +++ b/include/linux/platform_data/leds-pca963x.h @@ -1,7 +1,8 @@ /* - * PCA9633 LED chip driver. + * PCA963X LED chip driver. * * Copyright 2012 bct electronic GmbH + * Copyright 2013 Qtechnology A/S * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,18 +19,24 @@ * 02110-1301 USA */ -#ifndef __LINUX_PCA9633_H -#define __LINUX_PCA9633_H +#ifndef __LINUX_PCA963X_H +#define __LINUX_PCA963X_H #include <linux/leds.h> -enum pca9633_outdrv { - PCA9633_OPEN_DRAIN, - PCA9633_TOTEM_POLE, /* aka push-pull */ +enum pca963x_outdrv { + PCA963X_OPEN_DRAIN, + PCA963X_TOTEM_POLE, /* aka push-pull */ }; -struct pca9633_platform_data { +enum pca963x_blink_type { + PCA963X_SW_BLINK, + PCA963X_HW_BLINK, +}; + +struct pca963x_platform_data { struct led_platform_data leds; - enum pca9633_outdrv outdrv; + enum pca963x_outdrv outdrv; + enum pca963x_blink_type blink_type; }; -#endif /* __LINUX_PCA9633_H*/ +#endif /* __LINUX_PCA963X_H*/ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 18299057402f..21a209336e79 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -3,15 +3,21 @@ /* * Reader/writer consistent mechanism without starving writers. This type of * lock for data where the reader wants a consistent set of information - * and is willing to retry if the information changes. Readers never - * block but they may have to retry if a writer is in - * progress. Writers do not wait for readers. + * and is willing to retry if the information changes. There are two types + * of readers: + * 1. Sequence readers which never block a writer but they may have to retry + * if a writer is in progress by detecting change in sequence number. + * Writers do not wait for a sequence reader. + * 2. Locking readers which will wait if a writer or another locking reader + * is in progress. A locking reader in progress will also block a writer + * from going forward. Unlike the regular rwlock, the read lock here is + * exclusive so that only one locking reader can get it. * - * This is not as cache friendly as brlock. Also, this will not work + * This is not as cache friendly as brlock. Also, this may not work well * for data that contains pointers, because any writer could * invalidate a pointer that a reader was following. * - * Expected reader usage: + * Expected non-blocking reader usage: * do { * seq = read_seqbegin(&foo); * ... @@ -268,4 +274,56 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) spin_unlock_irqrestore(&sl->lock, flags); } +/* + * A locking reader exclusively locks out other writers and locking readers, + * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. + * Don't need preempt_disable() because that is in the spin_lock already. + */ +static inline void read_seqlock_excl(seqlock_t *sl) +{ + spin_lock(&sl->lock); +} + +static inline void read_sequnlock_excl(seqlock_t *sl) +{ + spin_unlock(&sl->lock); +} + +static inline void read_seqlock_excl_bh(seqlock_t *sl) +{ + spin_lock_bh(&sl->lock); +} + +static inline void read_sequnlock_excl_bh(seqlock_t *sl) +{ + spin_unlock_bh(&sl->lock); +} + +static inline void read_seqlock_excl_irq(seqlock_t *sl) +{ + spin_lock_irq(&sl->lock); +} + +static inline void read_sequnlock_excl_irq(seqlock_t *sl) +{ + spin_unlock_irq(&sl->lock); +} + +static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) +{ + unsigned long flags; + + spin_lock_irqsave(&sl->lock, flags); + return flags; +} + +#define read_seqlock_excl_irqsave(lock, flags) \ + do { flags = __read_seqlock_excl_irqsave(lock); } while (0) + +static inline void +read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) +{ + spin_unlock_irqrestore(&sl->lock, flags); +} + #endif /* __LINUX_SEQLOCK_H */ diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index ac6b8ee07825..68c097077ef0 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -4,39 +4,67 @@ /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extention later. + * + * The 'gfpmask' refers to the allocation we are currently trying to + * fulfil. */ struct shrink_control { gfp_t gfp_mask; - /* How many slab objects shrinker() should scan and try to reclaim */ + /* + * How many objects scan_objects should scan and try to reclaim. + * This is reset before every call, so it is safe for callees + * to modify. + */ unsigned long nr_to_scan; + + /* shrink from these nodes */ + nodemask_t nodes_to_scan; + /* current node being shrunk (for NUMA aware shrinkers) */ + int nid; }; +#define SHRINK_STOP (~0UL) /* * A callback you can register to apply pressure to ageable caches. * - * 'sc' is passed shrink_control which includes a count 'nr_to_scan' - * and a 'gfpmask'. It should look through the least-recently-used - * 'nr_to_scan' entries and attempt to free them up. It should return - * the number of objects which remain in the cache. If it returns -1, it means - * it cannot do any scanning at this time (eg. there is a risk of deadlock). + * @count_objects should return the number of freeable items in the cache. If + * there are no objects to free or the number of freeable items cannot be + * determined, it should return 0. No deadlock checks should be done during the + * count callback - the shrinker relies on aggregating scan counts that couldn't + * be executed due to potential deadlocks to be run at a later call when the + * deadlock condition is no longer pending. * - * The 'gfpmask' refers to the allocation we are currently trying to - * fulfil. + * @scan_objects will only be called if @count_objects returned a non-zero + * value for the number of freeable objects. The callout should scan the cache + * and attempt to free items from the cache. It should then return the number + * of objects freed during the scan, or SHRINK_STOP if progress cannot be made + * due to potential deadlocks. If SHRINK_STOP is returned, then no further + * attempts to call the @scan_objects will be made from the current reclaim + * context. * - * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is - * querying the cache size, so a fastpath for that case is appropriate. + * @flags determine the shrinker abilities, like numa awareness */ struct shrinker { - int (*shrink)(struct shrinker *, struct shrink_control *sc); + unsigned long (*count_objects)(struct shrinker *, + struct shrink_control *sc); + unsigned long (*scan_objects)(struct shrinker *, + struct shrink_control *sc); + int seeks; /* seeks to recreate an obj */ long batch; /* reclaim batch size, 0 = default */ + unsigned long flags; /* These are for internal use */ struct list_head list; - atomic_long_t nr_in_batch; /* objs pending delete */ + /* objs pending delete, per node */ + atomic_long_t *nr_deferred; }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ -extern void register_shrinker(struct shrinker *); + +/* Flags */ +#define SHRINKER_NUMA_AWARE (1 << 0) + +extern int register_shrinker(struct shrinker *); extern void unregister_shrinker(struct shrinker *); #endif diff --git a/include/linux/thermal.h b/include/linux/thermal.h index a386a1cbb6e1..b268d3cf7ae3 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -207,6 +207,16 @@ struct thermal_bind_params { * See Documentation/thermal/sysfs-api.txt for more information. */ int trip_mask; + + /* + * This is an array of cooling state limits. Must have exactly + * 2 * thermal_zone.number_of_trip_points. It is an array consisting + * of tuples <lower-state upper-state> of state limits. Each trip + * will be associated with one state limit tuple when binding. + * A NULL pointer means <THERMAL_NO_LIMITS THERMAL_NO_LIMITS> + * on all trips. + */ + unsigned long *binding_limits; int (*match) (struct thermal_zone_device *tz, struct thermal_cooling_device *cdev); }; @@ -214,6 +224,14 @@ struct thermal_bind_params { /* Structure to define Thermal Zone parameters */ struct thermal_zone_params { char governor_name[THERMAL_NAME_LENGTH]; + + /* + * a boolean to indicate if the thermal to hwmon sysfs interface + * is required. when no_hwmon == false, a hwmon sysfs interface + * will be created. when no_hwmon == true, nothing will be done + */ + bool no_hwmon; + int num_tbps; /* Number of tbp entries */ struct thermal_bind_params *tbp; }; diff --git a/include/sound/rcar_snd.h b/include/sound/rcar_snd.h index d35412ae03b3..fe66533e9b7a 100644 --- a/include/sound/rcar_snd.h +++ b/include/sound/rcar_snd.h @@ -55,7 +55,7 @@ struct rsnd_ssi_platform_info { /* * flags */ -#define RSND_SCU_USB_HPBIF (1 << 31) /* it needs RSND_SSI_DEPENDENT */ +#define RSND_SCU_USE_HPBIF (1 << 31) /* it needs RSND_SSI_DEPENDENT */ struct rsnd_scu_platform_info { u32 flags; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 2902657ba766..45702c3c3837 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -439,7 +439,7 @@ TRACE_EVENT(btrfs_sync_fs, { BTRFS_UPDATE_DELAYED_HEAD, "UPDATE_DELAYED_HEAD" }) -TRACE_EVENT(btrfs_delayed_tree_ref, +DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref, TP_PROTO(struct btrfs_delayed_ref_node *ref, struct btrfs_delayed_tree_ref *full_ref, @@ -481,7 +481,25 @@ TRACE_EVENT(btrfs_delayed_tree_ref, (unsigned long long)__entry->seq) ); -TRACE_EVENT(btrfs_delayed_data_ref, +DEFINE_EVENT(btrfs_delayed_tree_ref, add_delayed_tree_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_tree_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DEFINE_EVENT(btrfs_delayed_tree_ref, run_delayed_tree_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_tree_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DECLARE_EVENT_CLASS(btrfs_delayed_data_ref, TP_PROTO(struct btrfs_delayed_ref_node *ref, struct btrfs_delayed_data_ref *full_ref, @@ -527,7 +545,25 @@ TRACE_EVENT(btrfs_delayed_data_ref, (unsigned long long)__entry->seq) ); -TRACE_EVENT(btrfs_delayed_ref_head, +DEFINE_EVENT(btrfs_delayed_data_ref, add_delayed_data_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_data_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DEFINE_EVENT(btrfs_delayed_data_ref, run_delayed_data_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_data_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, TP_PROTO(struct btrfs_delayed_ref_node *ref, struct btrfs_delayed_ref_head *head_ref, @@ -556,6 +592,24 @@ TRACE_EVENT(btrfs_delayed_ref_head, __entry->is_data) ); +DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_ref_head *head_ref, + int action), + + TP_ARGS(ref, head_ref, action) +); + +DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_ref_head *head_ref, + int action), + + TP_ARGS(ref, head_ref, action) +); + #define show_chunk_type(type) \ __print_flags(type, "|", \ { BTRFS_BLOCK_GROUP_DATA, "DATA" }, \ diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 63cfcccaebb3..132a985aba8b 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -202,7 +202,7 @@ TRACE_EVENT(mm_shrink_slab_start, TP_fast_assign( __entry->shr = shr; - __entry->shrink = shr->shrink; + __entry->shrink = shr->scan_objects; __entry->nr_objects_to_shrink = nr_objects_to_shrink; __entry->gfp_flags = sc->gfp_mask; __entry->pgs_scanned = pgs_scanned; @@ -241,7 +241,7 @@ TRACE_EVENT(mm_shrink_slab_end, TP_fast_assign( __entry->shr = shr; - __entry->shrink = shr->shrink; + __entry->shrink = shr->scan_objects; __entry->unused_scan = unused_scan_cnt; __entry->new_scan = new_scan_cnt; __entry->retval = shrinker_retval; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 05aed70627e2..45e618921c61 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -305,6 +305,31 @@ struct btrfs_ioctl_clone_range_args { #define BTRFS_DEFRAG_RANGE_COMPRESS 1 #define BTRFS_DEFRAG_RANGE_START_IO 2 +#define BTRFS_SAME_DATA_DIFFERS 1 +/* For extent-same ioctl */ +struct btrfs_ioctl_same_extent_info { + __s64 fd; /* in - destination file */ + __u64 logical_offset; /* in - start of extent in destination */ + __u64 bytes_deduped; /* out - total # of bytes we were able + * to dedupe from this file */ + /* status of this dedupe operation: + * 0 if dedup succeeds + * < 0 for error + * == BTRFS_SAME_DATA_DIFFERS if data differs + */ + __s32 status; /* out - see above description */ + __u32 reserved; +}; + +struct btrfs_ioctl_same_args { + __u64 logical_offset; /* in - start of extent in source */ + __u64 length; /* in - length of extent */ + __u16 dest_count; /* in - total elements in info array */ + __u16 reserved1; + __u32 reserved2; + struct btrfs_ioctl_same_extent_info info[0]; +}; + struct btrfs_ioctl_space_info { __u64 flags; __u64 total_bytes; @@ -524,7 +549,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code) struct btrfs_ioctl_search_args) #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ struct btrfs_ioctl_ino_lookup_args) -#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) +#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64) #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ struct btrfs_ioctl_space_args) #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) @@ -579,4 +604,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code) struct btrfs_ioctl_get_dev_stats) #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ struct btrfs_ioctl_dev_replace_args) +#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \ + struct btrfs_ioctl_same_args) + #endif /* _UAPI_LINUX_BTRFS_H */ diff --git a/include/uapi/linux/cifs/cifs_mount.h b/include/uapi/linux/cifs/cifs_mount.h new file mode 100644 index 000000000000..d7e4c6ce6171 --- /dev/null +++ b/include/uapi/linux/cifs/cifs_mount.h @@ -0,0 +1,27 @@ +/* + * include/uapi/linux/cifs/cifs_mount.h + * + * Author(s): Scott Lovenberg (scott.lovenberg@gmail.com) + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + */ +#ifndef _CIFS_MOUNT_H +#define _CIFS_MOUNT_H + +/* Max string lengths for cifs mounting options. */ +#define CIFS_MAX_DOMAINNAME_LEN 256 /* max fully qualified domain name */ +#define CIFS_MAX_USERNAME_LEN 256 /* reasonable max for current servers */ +#define CIFS_MAX_PASSWORD_LEN 512 /* Windows max seems to be 256 wide chars */ +#define CIFS_MAX_SHARE_LEN 256 /* reasonable max share name length */ +#define CIFS_NI_MAXHOST 1024 /* max host name length (256 * 4 bytes) */ + + +#endif /* _CIFS_MOUNT_H */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a4ed56cf0eac..6c28b61bb690 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -49,9 +49,9 @@ struct files_stat_struct { }; struct inodes_stat_t { - int nr_inodes; - int nr_unused; - int dummy[5]; /* padding for sysctl ABI compatibility */ + long nr_inodes; + long nr_unused; + long dummy[5]; /* padding for sysctl ABI compatibility */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 2207efc941d1..dd236b66ca3a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5039,6 +5039,7 @@ static void perf_event_mmap_output(struct perf_event *event, mmap_event->event_id.header.size += sizeof(mmap_event->maj); mmap_event->event_id.header.size += sizeof(mmap_event->min); mmap_event->event_id.header.size += sizeof(mmap_event->ino); + mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation); } perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f3569747d629..ad8e1bdca70e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1682,12 +1682,10 @@ static bool handle_trampoline(struct pt_regs *regs) tmp = ri; ri = ri->next; kfree(tmp); + utask->depth--; if (!chained) break; - - utask->depth--; - BUG_ON(!ri); } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 3085e62a80a5..c9c759d5a15c 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -644,22 +644,23 @@ int hibernate(void) if (error) goto Exit; - /* Allocate memory management structures */ - error = create_basic_memory_bitmaps(); - if (error) - goto Exit; - printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); printk("done.\n"); error = freeze_processes(); if (error) - goto Free_bitmaps; + goto Exit; + + lock_device_hotplug(); + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); + if (error) + goto Thaw; error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); if (error || freezer_test_done) - goto Thaw; + goto Free_bitmaps; if (in_suspend) { unsigned int flags = 0; @@ -682,14 +683,14 @@ int hibernate(void) pr_debug("PM: Image restored successfully.\n"); } + Free_bitmaps: + free_basic_memory_bitmaps(); Thaw: + unlock_device_hotplug(); thaw_processes(); /* Don't bother checking whether freezer_test_done is true */ freezer_test_done = false; - - Free_bitmaps: - free_basic_memory_bitmaps(); Exit: pm_notifier_call_chain(PM_POST_HIBERNATION); pm_restore_console(); @@ -806,21 +807,20 @@ static int software_resume(void) pm_prepare_console(); error = pm_notifier_call_chain(PM_RESTORE_PREPARE); if (error) - goto close_finish; - - error = create_basic_memory_bitmaps(); - if (error) - goto close_finish; + goto Close_Finish; pr_debug("PM: Preparing processes for restore.\n"); error = freeze_processes(); - if (error) { - swsusp_close(FMODE_READ); - goto Done; - } + if (error) + goto Close_Finish; pr_debug("PM: Loading hibernation image.\n"); + lock_device_hotplug(); + error = create_basic_memory_bitmaps(); + if (error) + goto Thaw; + error = swsusp_read(&flags); swsusp_close(FMODE_READ); if (!error) @@ -828,9 +828,10 @@ static int software_resume(void) printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n"); swsusp_free(); - thaw_processes(); - Done: free_basic_memory_bitmaps(); + Thaw: + unlock_device_hotplug(); + thaw_processes(); Finish: pm_notifier_call_chain(PM_POST_RESTORE); pm_restore_console(); @@ -840,7 +841,7 @@ static int software_resume(void) mutex_unlock(&pm_mutex); pr_debug("PM: Hibernation image not present or could not be loaded.\n"); return error; -close_finish: + Close_Finish: swsusp_close(FMODE_READ); goto Finish; } diff --git a/kernel/power/user.c b/kernel/power/user.c index 4ed81e74f86f..72e8f4fd616d 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -60,11 +60,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) error = -ENOSYS; goto Unlock; } - if(create_basic_memory_bitmaps()) { - atomic_inc(&snapshot_device_available); - error = -ENOMEM; - goto Unlock; - } nonseekable_open(inode, filp); data = &snapshot_state; filp->private_data = data; @@ -90,10 +85,9 @@ static int snapshot_open(struct inode *inode, struct file *filp) if (error) pm_notifier_call_chain(PM_POST_RESTORE); } - if (error) { - free_basic_memory_bitmaps(); + if (error) atomic_inc(&snapshot_device_available); - } + data->frozen = 0; data->ready = 0; data->platform_support = 0; @@ -111,11 +105,11 @@ static int snapshot_release(struct inode *inode, struct file *filp) lock_system_sleep(); swsusp_free(); - free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap); if (data->frozen) { pm_restore_gfp_mask(); + free_basic_memory_bitmaps(); thaw_processes(); } pm_notifier_call_chain(data->mode == O_RDONLY ? @@ -207,6 +201,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (!mutex_trylock(&pm_mutex)) return -EBUSY; + lock_device_hotplug(); data = filp->private_data; switch (cmd) { @@ -220,14 +215,22 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, printk("done.\n"); error = freeze_processes(); - if (!error) + if (error) + break; + + error = create_basic_memory_bitmaps(); + if (error) + thaw_processes(); + else data->frozen = 1; + break; case SNAPSHOT_UNFREEZE: if (!data->frozen || data->ready) break; pm_restore_gfp_mask(); + free_basic_memory_bitmaps(); thaw_processes(); data->frozen = 0; break; @@ -371,6 +374,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, } + unlock_device_hotplug(); mutex_unlock(&pm_mutex); return error; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7f0a5e6cdae0..9b3fe1cd8f40 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5151,7 +5151,7 @@ static int should_we_balance(struct lb_env *env) * First idle cpu or the first cpu(busiest) in this sched group * is eligible for doing load balancing at this and above domains. */ - return balance_cpu != env->dst_cpu; + return balance_cpu == env->dst_cpu; } /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index dc69093a8ec4..b2f06f3c6a3f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1471,14 +1471,14 @@ static struct ctl_table fs_table[] = { { .procname = "inode-nr", .data = &inodes_stat, - .maxlen = 2*sizeof(int), + .maxlen = 2*sizeof(long), .mode = 0444, .proc_handler = proc_nr_inodes, }, { .procname = "inode-state", .data = &inodes_stat, - .maxlen = 7*sizeof(int), + .maxlen = 7*sizeof(long), .mode = 0444, .proc_handler = proc_nr_inodes, }, @@ -1508,7 +1508,7 @@ static struct ctl_table fs_table[] = { { .procname = "dentry-state", .data = &dentry_stat, - .maxlen = 6*sizeof(int), + .maxlen = 6*sizeof(long), .mode = 0444, .proc_handler = proc_nr_dentry, }, diff --git a/mm/Makefile b/mm/Makefile index f00803386a67..305d10acd081 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -17,7 +17,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ compaction.o balloon_compaction.o \ - interval_tree.o $(mmu-y) + interval_tree.o list_lru.o $(mmu-y) obj-y += init-mm.o diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f60c4ebaa30c..7489884682d8 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -211,24 +211,29 @@ static void put_huge_zero_page(void) BUG_ON(atomic_dec_and_test(&huge_zero_refcount)); } -static int shrink_huge_zero_page(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink, + struct shrink_control *sc) { - if (!sc->nr_to_scan) - /* we can free zero page only if last reference remains */ - return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; + /* we can free zero page only if last reference remains */ + return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; +} +static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); __free_page(zero_page); + return HPAGE_PMD_NR; } return 0; } static struct shrinker huge_zero_page_shrinker = { - .shrink = shrink_huge_zero_page, + .count_objects = shrink_huge_zero_page_count, + .scan_objects = shrink_huge_zero_page_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/mm/list_lru.c b/mm/list_lru.c new file mode 100644 index 000000000000..72467914b856 --- /dev/null +++ b/mm/list_lru.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. + * Authors: David Chinner and Glauber Costa + * + * Generic LRU infrastructure + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/list_lru.h> +#include <linux/slab.h> + +bool list_lru_add(struct list_lru *lru, struct list_head *item) +{ + int nid = page_to_nid(virt_to_page(item)); + struct list_lru_node *nlru = &lru->node[nid]; + + spin_lock(&nlru->lock); + WARN_ON_ONCE(nlru->nr_items < 0); + if (list_empty(item)) { + list_add_tail(item, &nlru->list); + if (nlru->nr_items++ == 0) + node_set(nid, lru->active_nodes); + spin_unlock(&nlru->lock); + return true; + } + spin_unlock(&nlru->lock); + return false; +} +EXPORT_SYMBOL_GPL(list_lru_add); + +bool list_lru_del(struct list_lru *lru, struct list_head *item) +{ + int nid = page_to_nid(virt_to_page(item)); + struct list_lru_node *nlru = &lru->node[nid]; + + spin_lock(&nlru->lock); + if (!list_empty(item)) { + list_del_init(item); + if (--nlru->nr_items == 0) + node_clear(nid, lru->active_nodes); + WARN_ON_ONCE(nlru->nr_items < 0); + spin_unlock(&nlru->lock); + return true; + } + spin_unlock(&nlru->lock); + return false; +} +EXPORT_SYMBOL_GPL(list_lru_del); + +unsigned long +list_lru_count_node(struct list_lru *lru, int nid) +{ + unsigned long count = 0; + struct list_lru_node *nlru = &lru->node[nid]; + + spin_lock(&nlru->lock); + WARN_ON_ONCE(nlru->nr_items < 0); + count += nlru->nr_items; + spin_unlock(&nlru->lock); + + return count; +} +EXPORT_SYMBOL_GPL(list_lru_count_node); + +unsigned long +list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate, + void *cb_arg, unsigned long *nr_to_walk) +{ + + struct list_lru_node *nlru = &lru->node[nid]; + struct list_head *item, *n; + unsigned long isolated = 0; + + spin_lock(&nlru->lock); +restart: + list_for_each_safe(item, n, &nlru->list) { + enum lru_status ret; + + /* + * decrement nr_to_walk first so that we don't livelock if we + * get stuck on large numbesr of LRU_RETRY items + */ + if (--(*nr_to_walk) == 0) + break; + + ret = isolate(item, &nlru->lock, cb_arg); + switch (ret) { + case LRU_REMOVED: + if (--nlru->nr_items == 0) + node_clear(nid, lru->active_nodes); + WARN_ON_ONCE(nlru->nr_items < 0); + isolated++; + break; + case LRU_ROTATE: + list_move_tail(item, &nlru->list); + break; + case LRU_SKIP: + break; + case LRU_RETRY: + /* + * The lru lock has been dropped, our list traversal is + * now invalid and so we have to restart from scratch. + */ + goto restart; + default: + BUG(); + } + } + + spin_unlock(&nlru->lock); + return isolated; +} +EXPORT_SYMBOL_GPL(list_lru_walk_node); + +int list_lru_init(struct list_lru *lru) +{ + int i; + size_t size = sizeof(*lru->node) * nr_node_ids; + + lru->node = kzalloc(size, GFP_KERNEL); + if (!lru->node) + return -ENOMEM; + + nodes_clear(lru->active_nodes); + for (i = 0; i < nr_node_ids; i++) { + spin_lock_init(&lru->node[i].lock); + INIT_LIST_HEAD(&lru->node[i].list); + lru->node[i].nr_items = 0; + } + return 0; +} +EXPORT_SYMBOL_GPL(list_lru_init); + +void list_lru_destroy(struct list_lru *lru) +{ + kfree(lru->node); +} +EXPORT_SYMBOL_GPL(list_lru_destroy); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index d472e14c6808..947ed5413279 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -248,10 +248,12 @@ void shake_page(struct page *p, int access) */ if (access) { int nr; + int nid = page_to_nid(p); do { struct shrink_control shrink = { .gfp_mask = GFP_KERNEL, }; + node_set(nid, shrink.nodes_to_scan); nr = shrink_slab(&shrink, 1000, 1000); if (page_count(p) == 1) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0eb1a1df649d..ed85fe3870e2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -52,14 +52,10 @@ DEFINE_MUTEX(mem_hotplug_mutex); void lock_memory_hotplug(void) { mutex_lock(&mem_hotplug_mutex); - - /* for exclusive hibernation if CONFIG_HIBERNATION=y */ - lock_system_sleep(); } void unlock_memory_hotplug(void) { - unlock_system_sleep(); mutex_unlock(&mem_hotplug_mutex); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 76d1d5eaeec3..8ed1b775bdc9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -186,14 +186,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) } /* - * Add a shrinker callback to be called from the vm + * Add a shrinker callback to be called from the vm. */ -void register_shrinker(struct shrinker *shrinker) +int register_shrinker(struct shrinker *shrinker) { - atomic_long_set(&shrinker->nr_in_batch, 0); + size_t size = sizeof(*shrinker->nr_deferred); + + /* + * If we only have one possible node in the system anyway, save + * ourselves the trouble and disable NUMA aware behavior. This way we + * will save memory and some small loop time later. + */ + if (nr_node_ids == 1) + shrinker->flags &= ~SHRINKER_NUMA_AWARE; + + if (shrinker->flags & SHRINKER_NUMA_AWARE) + size *= nr_node_ids; + + shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); + if (!shrinker->nr_deferred) + return -ENOMEM; + down_write(&shrinker_rwsem); list_add_tail(&shrinker->list, &shrinker_list); up_write(&shrinker_rwsem); + return 0; } EXPORT_SYMBOL(register_shrinker); @@ -208,15 +225,102 @@ void unregister_shrinker(struct shrinker *shrinker) } EXPORT_SYMBOL(unregister_shrinker); -static inline int do_shrinker_shrink(struct shrinker *shrinker, - struct shrink_control *sc, - unsigned long nr_to_scan) -{ - sc->nr_to_scan = nr_to_scan; - return (*shrinker->shrink)(shrinker, sc); +#define SHRINK_BATCH 128 + +static unsigned long +shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, + unsigned long nr_pages_scanned, unsigned long lru_pages) +{ + unsigned long freed = 0; + unsigned long long delta; + long total_scan; + long max_pass; + long nr; + long new_nr; + int nid = shrinkctl->nid; + long batch_size = shrinker->batch ? shrinker->batch + : SHRINK_BATCH; + + max_pass = shrinker->count_objects(shrinker, shrinkctl); + if (max_pass == 0) + return 0; + + /* + * copy the current shrinker scan count into a local variable + * and zero it so that other concurrent shrinker invocations + * don't also do this scanning work. + */ + nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); + + total_scan = nr; + delta = (4 * nr_pages_scanned) / shrinker->seeks; + delta *= max_pass; + do_div(delta, lru_pages + 1); + total_scan += delta; + if (total_scan < 0) { + printk(KERN_ERR + "shrink_slab: %pF negative objects to delete nr=%ld\n", + shrinker->scan_objects, total_scan); + total_scan = max_pass; + } + + /* + * We need to avoid excessive windup on filesystem shrinkers + * due to large numbers of GFP_NOFS allocations causing the + * shrinkers to return -1 all the time. This results in a large + * nr being built up so when a shrink that can do some work + * comes along it empties the entire cache due to nr >>> + * max_pass. This is bad for sustaining a working set in + * memory. + * + * Hence only allow the shrinker to scan the entire cache when + * a large delta change is calculated directly. + */ + if (delta < max_pass / 4) + total_scan = min(total_scan, max_pass / 2); + + /* + * Avoid risking looping forever due to too large nr value: + * never try to free more than twice the estimate number of + * freeable entries. + */ + if (total_scan > max_pass * 2) + total_scan = max_pass * 2; + + trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, + nr_pages_scanned, lru_pages, + max_pass, delta, total_scan); + + while (total_scan >= batch_size) { + unsigned long ret; + + shrinkctl->nr_to_scan = batch_size; + ret = shrinker->scan_objects(shrinker, shrinkctl); + if (ret == SHRINK_STOP) + break; + freed += ret; + + count_vm_events(SLABS_SCANNED, batch_size); + total_scan -= batch_size; + + cond_resched(); + } + + /* + * move the unused scan count back into the shrinker in a + * manner that handles concurrent updates. If we exhausted the + * scan, there is no need to do an update. + */ + if (total_scan > 0) + new_nr = atomic_long_add_return(total_scan, + &shrinker->nr_deferred[nid]); + else + new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); + + trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr); + return freed; } -#define SHRINK_BATCH 128 /* * Call the shrink functions to age shrinkable caches * @@ -236,115 +340,45 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker, * * Returns the number of slab objects which we shrunk. */ -unsigned long shrink_slab(struct shrink_control *shrink, +unsigned long shrink_slab(struct shrink_control *shrinkctl, unsigned long nr_pages_scanned, unsigned long lru_pages) { struct shrinker *shrinker; - unsigned long ret = 0; + unsigned long freed = 0; if (nr_pages_scanned == 0) nr_pages_scanned = SWAP_CLUSTER_MAX; if (!down_read_trylock(&shrinker_rwsem)) { - /* Assume we'll be able to shrink next time */ - ret = 1; + /* + * If we would return 0, our callers would understand that we + * have nothing else to shrink and give up trying. By returning + * 1 we keep it going and assume we'll be able to shrink next + * time. + */ + freed = 1; goto out; } list_for_each_entry(shrinker, &shrinker_list, list) { - unsigned long long delta; - long total_scan; - long max_pass; - int shrink_ret = 0; - long nr; - long new_nr; - long batch_size = shrinker->batch ? shrinker->batch - : SHRINK_BATCH; - - max_pass = do_shrinker_shrink(shrinker, shrink, 0); - if (max_pass <= 0) - continue; - - /* - * copy the current shrinker scan count into a local variable - * and zero it so that other concurrent shrinker invocations - * don't also do this scanning work. - */ - nr = atomic_long_xchg(&shrinker->nr_in_batch, 0); - - total_scan = nr; - delta = (4 * nr_pages_scanned) / shrinker->seeks; - delta *= max_pass; - do_div(delta, lru_pages + 1); - total_scan += delta; - if (total_scan < 0) { - printk(KERN_ERR "shrink_slab: %pF negative objects to " - "delete nr=%ld\n", - shrinker->shrink, total_scan); - total_scan = max_pass; - } - - /* - * We need to avoid excessive windup on filesystem shrinkers - * due to large numbers of GFP_NOFS allocations causing the - * shrinkers to return -1 all the time. This results in a large - * nr being built up so when a shrink that can do some work - * comes along it empties the entire cache due to nr >>> - * max_pass. This is bad for sustaining a working set in - * memory. - * - * Hence only allow the shrinker to scan the entire cache when - * a large delta change is calculated directly. - */ - if (delta < max_pass / 4) - total_scan = min(total_scan, max_pass / 2); - - /* - * Avoid risking looping forever due to too large nr value: - * never try to free more than twice the estimate number of - * freeable entries. - */ - if (total_scan > max_pass * 2) - total_scan = max_pass * 2; - - trace_mm_shrink_slab_start(shrinker, shrink, nr, - nr_pages_scanned, lru_pages, - max_pass, delta, total_scan); - - while (total_scan >= batch_size) { - int nr_before; + for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { + if (!node_online(shrinkctl->nid)) + continue; - nr_before = do_shrinker_shrink(shrinker, shrink, 0); - shrink_ret = do_shrinker_shrink(shrinker, shrink, - batch_size); - if (shrink_ret == -1) + if (!(shrinker->flags & SHRINKER_NUMA_AWARE) && + (shrinkctl->nid != 0)) break; - if (shrink_ret < nr_before) - ret += nr_before - shrink_ret; - count_vm_events(SLABS_SCANNED, batch_size); - total_scan -= batch_size; - cond_resched(); - } + freed += shrink_slab_node(shrinkctl, shrinker, + nr_pages_scanned, lru_pages); - /* - * move the unused scan count back into the shrinker in a - * manner that handles concurrent updates. If we exhausted the - * scan, there is no need to do an update. - */ - if (total_scan > 0) - new_nr = atomic_long_add_return(total_scan, - &shrinker->nr_in_batch); - else - new_nr = atomic_long_read(&shrinker->nr_in_batch); - - trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); + } } up_read(&shrinker_rwsem); out: cond_resched(); - return ret; + return freed; } static inline int is_page_cache_freeable(struct page *page) @@ -2400,12 +2434,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, */ if (global_reclaim(sc)) { unsigned long lru_pages = 0; + + nodes_clear(shrink->nodes_to_scan); for_each_zone_zonelist(zone, z, zonelist, gfp_zone(sc->gfp_mask)) { if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; lru_pages += zone_reclaimable_pages(zone); + node_set(zone_to_nid(zone), + shrink->nodes_to_scan); } shrink_slab(shrink, sc->nr_scanned, lru_pages); @@ -2861,6 +2899,8 @@ static bool kswapd_shrink_zone(struct zone *zone, return true; shrink_zone(zone, sc); + nodes_clear(shrink.nodes_to_scan); + node_set(zone_to_nid(zone), shrink.nodes_to_scan); reclaim_state->reclaimed_slab = 0; shrink_slab(&shrink, sc->nr_scanned, lru_pages); @@ -3541,10 +3581,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * number of slab pages and shake the slab until it is reduced * by the same nr_pages that we used for reclaiming unmapped * pages. - * - * Note that shrink_slab will free memory on all zones and may - * take a long time. */ + nodes_clear(shrink.nodes_to_scan); + node_set(zone_to_nid(zone), shrink.nodes_to_scan); for (;;) { unsigned long lru_pages = zone_reclaimable_pages(zone); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 415159061cd0..5285ead196c0 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -434,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache); /* * Remove stale credentials. Avoid sleeping inside the loop. */ -static int +static long rpcauth_prune_expired(struct list_head *free, int nr_to_scan) { spinlock_t *cache_lock; struct rpc_cred *cred, *next; unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; + long freed = 0; list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { @@ -451,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) */ if (time_in_range(cred->cr_expire, expired, jiffies) && test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) - return 0; + break; list_del_init(&cred->cr_lru); number_cred_unused--; + freed++; if (atomic_read(&cred->cr_count) != 0) continue; @@ -467,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) } spin_unlock(cache_lock); } - return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; + return freed; } /* * Run memory cache shrinker. */ -static int -rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) + { LIST_HEAD(free); - int res; - int nr_to_scan = sc->nr_to_scan; - gfp_t gfp_mask = sc->gfp_mask; + unsigned long freed; + + if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL) + return SHRINK_STOP; - if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) - return (nr_to_scan == 0) ? 0 : -1; + /* nothing left, don't come back */ if (list_empty(&cred_unused)) - return 0; + return SHRINK_STOP; + spin_lock(&rpc_credcache_lock); - res = rpcauth_prune_expired(&free, nr_to_scan); + freed = rpcauth_prune_expired(&free, sc->nr_to_scan); spin_unlock(&rpc_credcache_lock); rpcauth_destroy_credlist(&free); - return res; + + return freed; +} + +static unsigned long +rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) + +{ + return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; } /* @@ -805,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task) } static struct shrinker rpc_cred_shrinker = { - .shrink = rpcauth_cache_shrinker, + .count_objects = rpcauth_cache_shrink_count, + .scan_objects = rpcauth_cache_shrink_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index f6d84be49050..ed04869b2d4f 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -239,7 +239,7 @@ generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred) if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags)) dprintk("RPC: UID %d Credential key reset\n", - tcred->cr_uid); + from_kuid(&init_user_ns, tcred->cr_uid)); /* set up fasttrack for the normal case */ set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 30eb502135bb..fcac5d141717 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -75,7 +75,7 @@ static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO; * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 -static DEFINE_HASHTABLE(gss_auth_hash_table, 16); +static DEFINE_HASHTABLE(gss_auth_hash_table, 4); static DEFINE_SPINLOCK(gss_auth_hash_lock); struct gss_pipe { diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e54ebd530849..6e61a019aa5e 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -3428,6 +3428,7 @@ static struct snd_pci_quirk msi_black_list[] = { SND_PCI_QUIRK(0x1043, 0x81f2, "ASUS", 0), /* Athlon64 X2 + nvidia */ SND_PCI_QUIRK(0x1043, 0x81f6, "ASUS", 0), /* nvidia */ SND_PCI_QUIRK(0x1043, 0x822d, "ASUS", 0), /* Athlon64 X2 + nvidia MCP55 */ + SND_PCI_QUIRK(0x1179, 0xfb44, "Toshiba Satellite C870", 0), /* AMD Hudson */ SND_PCI_QUIRK(0x1849, 0x0888, "ASRock", 0), /* Athlon64 X2 + nvidia */ SND_PCI_QUIRK(0xa0a0, 0x0575, "Aopen MZ915-M", 0), /* ICH6 */ {} diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index cccaf9c7a7bb..b524f89a1f13 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -169,7 +169,7 @@ static void cs_automute(struct hda_codec *codec) snd_hda_gen_update_outputs(codec); - if (spec->gpio_eapd_hp) { + if (spec->gpio_eapd_hp || spec->gpio_eapd_speaker) { spec->gpio_data = spec->gen.hp_jack_present ? spec->gpio_eapd_hp : spec->gpio_eapd_speaker; snd_hda_codec_write(codec, 0x01, 0, @@ -291,10 +291,11 @@ static int cs_init(struct hda_codec *codec) { struct cs_spec *spec = codec->spec; - /* init_verb sequence for C0/C1/C2 errata*/ - snd_hda_sequence_write(codec, cs_errata_init_verbs); - - snd_hda_sequence_write(codec, cs_coef_init_verbs); + if (spec->vendor_nid == CS420X_VENDOR_NID) { + /* init_verb sequence for C0/C1/C2 errata*/ + snd_hda_sequence_write(codec, cs_errata_init_verbs); + snd_hda_sequence_write(codec, cs_coef_init_verbs); + } snd_hda_gen_init(codec); @@ -307,8 +308,10 @@ static int cs_init(struct hda_codec *codec) spec->gpio_data); } - init_input_coef(codec); - init_digital_coef(codec); + if (spec->vendor_nid == CS420X_VENDOR_NID) { + init_input_coef(codec); + init_digital_coef(codec); + } return 0; } @@ -552,6 +555,76 @@ static int patch_cs420x(struct hda_codec *codec) } /* + * CS4208 support: + * Its layout is no longer compatible with CS4206/CS4207, and the generic + * parser seems working fairly well, except for trivial fixups. + */ +enum { + CS4208_GPIO0, +}; + +static const struct hda_model_fixup cs4208_models[] = { + { .id = CS4208_GPIO0, .name = "gpio0" }, + {} +}; + +static const struct snd_pci_quirk cs4208_fixup_tbl[] = { + /* codec SSID */ + SND_PCI_QUIRK(0x106b, 0x7100, "MacBookPro 6,1", CS4208_GPIO0), + SND_PCI_QUIRK(0x106b, 0x7200, "MacBookPro 6,2", CS4208_GPIO0), + {} /* terminator */ +}; + +static void cs4208_fixup_gpio0(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + struct cs_spec *spec = codec->spec; + spec->gpio_eapd_hp = 0; + spec->gpio_eapd_speaker = 1; + spec->gpio_mask = spec->gpio_dir = + spec->gpio_eapd_hp | spec->gpio_eapd_speaker; + } +} + +static const struct hda_fixup cs4208_fixups[] = { + [CS4208_GPIO0] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs4208_fixup_gpio0, + }, +}; + +static int patch_cs4208(struct hda_codec *codec) +{ + struct cs_spec *spec; + int err; + + spec = cs_alloc_spec(codec, 0); /* no specific w/a */ + if (!spec) + return -ENOMEM; + + spec->gen.automute_hook = cs_automute; + + snd_hda_pick_fixup(codec, cs4208_models, cs4208_fixup_tbl, + cs4208_fixups); + snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PRE_PROBE); + + err = cs_parse_auto_config(codec); + if (err < 0) + goto error; + + codec->patch_ops = cs_patch_ops; + + snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PROBE); + + return 0; + + error: + cs_free(codec); + return err; +} + +/* * Cirrus Logic CS4210 * * 1 DAC => HP(sense) / Speakers, @@ -991,6 +1064,7 @@ static int patch_cs4213(struct hda_codec *codec) static const struct hda_codec_preset snd_hda_preset_cirrus[] = { { .id = 0x10134206, .name = "CS4206", .patch = patch_cs420x }, { .id = 0x10134207, .name = "CS4207", .patch = patch_cs420x }, + { .id = 0x10134208, .name = "CS4208", .patch = patch_cs4208 }, { .id = 0x10134210, .name = "CS4210", .patch = patch_cs4210 }, { .id = 0x10134213, .name = "CS4213", .patch = patch_cs4213 }, {} /* terminator */ @@ -998,6 +1072,7 @@ static const struct hda_codec_preset snd_hda_preset_cirrus[] = { MODULE_ALIAS("snd-hda-codec-id:10134206"); MODULE_ALIAS("snd-hda-codec-id:10134207"); +MODULE_ALIAS("snd-hda-codec-id:10134208"); MODULE_ALIAS("snd-hda-codec-id:10134210"); MODULE_ALIAS("snd-hda-codec-id:10134213"); diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 9a58893d52a7..3d8cd04455a6 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -44,6 +44,8 @@ static bool static_hdmi_pcm; module_param(static_hdmi_pcm, bool, 0644); MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info"); +#define is_haswell(codec) ((codec)->vendor_id == 0x80862807) + struct hdmi_spec_per_cvt { hda_nid_t cvt_nid; int assigned; @@ -894,6 +896,11 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec, if (!channels) return; + if (is_haswell(codec)) + snd_hda_codec_write(codec, pin_nid, 0, + AC_VERB_SET_AMP_GAIN_MUTE, + AMP_OUT_UNMUTE); + eld = &per_pin->sink_eld; if (!eld->monitor_present) return; @@ -1033,10 +1040,10 @@ static void hdmi_unsol_event(struct hda_codec *codec, unsigned int res) hdmi_non_intrinsic_event(codec, res); } -static void haswell_verify_pin_D0(struct hda_codec *codec, +static void haswell_verify_D0(struct hda_codec *codec, hda_nid_t cvt_nid, hda_nid_t nid) { - int pwr, lamp, ramp; + int pwr; /* For Haswell, the converter 1/2 may keep in D3 state after bootup, * thus pins could only choose converter 0 for use. Make sure the @@ -1052,25 +1059,6 @@ static void haswell_verify_pin_D0(struct hda_codec *codec, pwr = (pwr & AC_PWRST_ACTUAL) >> AC_PWRST_ACTUAL_SHIFT; snd_printd("Haswell HDMI audio: Power for pin 0x%x is now D%d\n", nid, pwr); } - - lamp = snd_hda_codec_read(codec, nid, 0, - AC_VERB_GET_AMP_GAIN_MUTE, - AC_AMP_GET_LEFT | AC_AMP_GET_OUTPUT); - ramp = snd_hda_codec_read(codec, nid, 0, - AC_VERB_GET_AMP_GAIN_MUTE, - AC_AMP_GET_RIGHT | AC_AMP_GET_OUTPUT); - if (lamp != ramp) { - snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, - AC_AMP_SET_RIGHT | AC_AMP_SET_OUTPUT | lamp); - - lamp = snd_hda_codec_read(codec, nid, 0, - AC_VERB_GET_AMP_GAIN_MUTE, - AC_AMP_GET_LEFT | AC_AMP_GET_OUTPUT); - ramp = snd_hda_codec_read(codec, nid, 0, - AC_VERB_GET_AMP_GAIN_MUTE, - AC_AMP_GET_RIGHT | AC_AMP_GET_OUTPUT); - snd_printd("Haswell HDMI audio: Mute after set on pin 0x%x: [0x%x 0x%x]\n", nid, lamp, ramp); - } } /* @@ -1087,8 +1075,8 @@ static int hdmi_setup_stream(struct hda_codec *codec, hda_nid_t cvt_nid, int pinctl; int new_pinctl = 0; - if (codec->vendor_id == 0x80862807) - haswell_verify_pin_D0(codec, cvt_nid, pin_nid); + if (is_haswell(codec)) + haswell_verify_D0(codec, cvt_nid, pin_nid); if (snd_hda_query_pin_caps(codec, pin_nid) & AC_PINCAP_HBR) { pinctl = snd_hda_codec_read(codec, pin_nid, 0, @@ -1227,7 +1215,7 @@ static int hdmi_pcm_open(struct hda_pcm_stream *hinfo, mux_idx); /* configure unused pins to choose other converters */ - if (codec->vendor_id == 0x80862807) + if (is_haswell(codec)) haswell_config_cvts(codec, pin_idx, mux_idx); snd_hda_spdif_ctls_assign(codec, pin_idx, per_cvt->cvt_nid); @@ -1358,14 +1346,10 @@ static void hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll) /* Haswell-specific workaround: re-setup when the transcoder is * changed during the stream playback */ - if (codec->vendor_id == 0x80862807 && - eld->eld_valid && !old_eld_valid && per_pin->setup) { - snd_hda_codec_write(codec, pin_nid, 0, - AC_VERB_SET_AMP_GAIN_MUTE, - AMP_OUT_UNMUTE); + if (is_haswell(codec) && + eld->eld_valid && !old_eld_valid && per_pin->setup) hdmi_setup_audio_infoframe(codec, per_pin, per_pin->non_pcm); - } } mutex_unlock(&pin_eld->lock); @@ -1405,7 +1389,7 @@ static int hdmi_add_pin(struct hda_codec *codec, hda_nid_t pin_nid) if (get_defcfg_connect(config) == AC_JACK_PORT_NONE) return 0; - if (codec->vendor_id == 0x80862807) + if (is_haswell(codec)) intel_haswell_fixup_connect_list(codec, pin_nid); pin_idx = spec->num_pins; @@ -2014,7 +1998,7 @@ static int patch_generic_hdmi(struct hda_codec *codec) codec->spec = spec; hdmi_array_init(spec, 4); - if (codec->vendor_id == 0x80862807) { + if (is_haswell(codec)) { intel_haswell_enable_all_pins(codec, true); intel_haswell_fixup_enable_dp12(codec); } @@ -2025,7 +2009,7 @@ static int patch_generic_hdmi(struct hda_codec *codec) return -EINVAL; } codec->patch_ops = generic_hdmi_patch_ops; - if (codec->vendor_id == 0x80862807) { + if (is_haswell(codec)) { codec->patch_ops.set_power_state = haswell_set_power_state; codec->dp_mst = true; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9e9378cde8fa..bc07d369fac4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3443,6 +3443,56 @@ static void alc283_fixup_chromebook(struct hda_codec *codec, } } +/* mute tablet speaker pin (0x14) via dock plugging in addition */ +static void asus_tx300_automute(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + snd_hda_gen_update_outputs(codec); + if (snd_hda_jack_detect(codec, 0x1b)) + spec->gen.mute_bits |= (1ULL << 0x14); +} + +static void alc282_fixup_asus_tx300(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + /* TX300 needs to set up GPIO2 for the speaker amp */ + static const struct hda_verb gpio2_verbs[] = { + { 0x01, AC_VERB_SET_GPIO_MASK, 0x04 }, + { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04 }, + { 0x01, AC_VERB_SET_GPIO_DATA, 0x04 }, + {} + }; + static const struct hda_pintbl dock_pins[] = { + { 0x1b, 0x21114000 }, /* dock speaker pin */ + {} + }; + struct snd_kcontrol *kctl; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_add_verbs(codec, gpio2_verbs); + snd_hda_apply_pincfgs(codec, dock_pins); + spec->gen.auto_mute_via_amp = 1; + spec->gen.automute_hook = asus_tx300_automute; + snd_hda_jack_detect_enable_callback(codec, 0x1b, + HDA_GEN_HP_EVENT, + snd_hda_gen_hp_automute); + break; + case HDA_FIXUP_ACT_BUILD: + /* this is a bit tricky; give more sane names for the main + * (tablet) speaker and the dock speaker, respectively + */ + kctl = snd_hda_find_mixer_ctl(codec, "Speaker Playback Switch"); + if (kctl) + strcpy(kctl->id.name, "Dock Speaker Playback Switch"); + kctl = snd_hda_find_mixer_ctl(codec, "Bass Speaker Playback Switch"); + if (kctl) + strcpy(kctl->id.name, "Speaker Playback Switch"); + break; + } +} + enum { ALC269_FIXUP_SONY_VAIO, ALC275_FIXUP_SONY_VAIO_GPIO2, @@ -3480,6 +3530,7 @@ enum { ALC269_FIXUP_LIMIT_INT_MIC_BOOST, ALC269VB_FIXUP_ORDISSIMO_EVE2, ALC283_FIXUP_CHROME_BOOK, + ALC282_FIXUP_ASUS_TX300, }; static const struct hda_fixup alc269_fixups[] = { @@ -3735,6 +3786,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc283_fixup_chromebook, }, + [ALC282_FIXUP_ASUS_TX300] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc282_fixup_asus_tx300, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -3784,6 +3839,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x1983, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x21ed, "HP Falco Chromebook", ALC283_FIXUP_CHROME_BOOK), SND_PCI_QUIRK_VENDOR(0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_DMIC), diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index 0ecf356027f6..bb53dea85b17 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -649,7 +649,7 @@ static int atmel_ssc_prepare(struct snd_pcm_substream *substream, dma_params = ssc_p->dma_params[dir]; ssc_writel(ssc_p->ssc->regs, CR, dma_params->mask->ssc_enable); - ssc_writel(ssc_p->ssc->regs, IER, dma_params->mask->ssc_error); + ssc_writel(ssc_p->ssc->regs, IDR, dma_params->mask->ssc_error); pr_debug("%s enabled SSC_SR=0x%08x\n", dir ? "receive" : "transmit", diff --git a/sound/soc/codecs/mc13783.c b/sound/soc/codecs/mc13783.c index 4d3c8fd8c5db..ea141e1d6f28 100644 --- a/sound/soc/codecs/mc13783.c +++ b/sound/soc/codecs/mc13783.c @@ -125,6 +125,10 @@ static int mc13783_write(struct snd_soc_codec *codec, ret = mc13xxx_reg_write(priv->mc13xxx, reg, value); + /* include errata fix for spi audio problems */ + if (reg == MC13783_AUDIO_CODEC || reg == MC13783_AUDIO_DAC) + ret = mc13xxx_reg_write(priv->mc13xxx, reg, value); + mc13xxx_unlock(priv->mc13xxx); return ret; diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 704e246f5b1e..b7ab71f2ccc1 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -198,6 +198,7 @@ config SND_SOC_IMX_SPDIF select SND_SOC_IMX_PCM_DMA select SND_SOC_FSL_SPDIF select SND_SOC_SPDIF + select REGMAP_MMIO help SoC Audio support for i.MX boards with S/PDIF Say Y if you want to add support for SoC audio on an i.MX board with diff --git a/sound/soc/fsl/imx-audmux.c b/sound/soc/fsl/imx-audmux.c index ab17381cc981..d3bf71a0ec56 100644 --- a/sound/soc/fsl/imx-audmux.c +++ b/sound/soc/fsl/imx-audmux.c @@ -335,7 +335,8 @@ static int imx_audmux_probe(struct platform_device *pdev) if (audmux_type == IMX31_AUDMUX) audmux_debugfs_init(); - imx_audmux_parse_dt_defaults(pdev, pdev->dev.of_node); + if (of_id) + imx_audmux_parse_dt_defaults(pdev, pdev->dev.of_node); return 0; } diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index 7fce340ab3ef..0f3d73d4ef48 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -559,7 +559,8 @@ static int kirkwood_i2s_dev_remove(struct platform_device *pdev) #ifdef CONFIG_OF static struct of_device_id mvebu_audio_of_match[] = { - { .compatible = "marvell,mvebu-audio" }, + { .compatible = "marvell,kirkwood-audio" }, + { .compatible = "marvell,dove-audio" }, { } }; MODULE_DEVICE_TABLE(of, mvebu_audio_of_match); diff --git a/sound/soc/sh/rcar/scu.c b/sound/soc/sh/rcar/scu.c index 184d9008cecd..2df2e9150b89 100644 --- a/sound/soc/sh/rcar/scu.c +++ b/sound/soc/sh/rcar/scu.c @@ -157,9 +157,9 @@ static int rsnd_scu_start(struct rsnd_mod *mod, int ret; /* - * SCU will be used if it has RSND_SCU_USB_HPBIF flags + * SCU will be used if it has RSND_SCU_USE_HPBIF flags */ - if (!(flags & RSND_SCU_USB_HPBIF)) { + if (!(flags & RSND_SCU_USE_HPBIF)) { /* it use PIO transter */ dev_dbg(dev, "%s%d is not used\n", rsnd_mod_name(mod), rsnd_mod_id(mod)); diff --git a/tools/perf/Makefile b/tools/perf/Makefile index c5dc1ad1b8d7..3a0ff7fb71b6 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -394,6 +394,8 @@ ifeq ($(ARCH),x86) LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o endif LIB_OBJS += $(OUTPUT)tests/code-reading.o +LIB_OBJS += $(OUTPUT)tests/sample-parsing.o +LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o BUILTIN_OBJS += $(OUTPUT)builtin-bench.o @@ -439,7 +441,6 @@ PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT) ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -LIB_OBJS += $(OUTPUT)tests/sample-parsing.o ifdef NO_LIBELF EXTLIBS := $(filter-out -lelf,$(EXTLIBS)) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index f988d380c52f..5ebd0c3b71b6 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -277,6 +277,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) .tool = { .sample = process_sample_event, .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .exit = perf_event__process_exit, .fork = perf_event__process_fork, diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 9b336fdb6f71..423875c999b2 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -123,6 +123,19 @@ static int perf_event__repipe_mmap(struct perf_tool *tool, return err; } +static int perf_event__repipe_mmap2(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + int err; + + err = perf_event__process_mmap2(tool, event, sample, machine); + perf_event__repipe(tool, event, sample, machine); + + return err; +} + static int perf_event__repipe_fork(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -339,6 +352,7 @@ static int __cmd_inject(struct perf_inject *inject) if (inject->build_ids || inject->sched_stat) { inject->tool.mmap = perf_event__repipe_mmap; + inject->tool.mmap2 = perf_event__repipe_mmap2; inject->tool.fork = perf_event__repipe_fork; inject->tool.tracing_data = perf_event__repipe_tracing_data; } @@ -390,6 +404,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .tool = { .sample = perf_event__repipe_sample, .mmap = perf_event__repipe, + .mmap2 = perf_event__repipe, .comm = perf_event__repipe, .fork = perf_event__repipe, .exit = perf_event__repipe, diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 47b35407c2f2..935d52216c89 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1165,16 +1165,16 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) struct perf_event_attr *attr = &pos->attr; /* make sure these *are* set */ - attr->sample_type |= PERF_SAMPLE_TID; - attr->sample_type |= PERF_SAMPLE_TIME; - attr->sample_type |= PERF_SAMPLE_CPU; - attr->sample_type |= PERF_SAMPLE_RAW; + perf_evsel__set_sample_bit(pos, TID); + perf_evsel__set_sample_bit(pos, TIME); + perf_evsel__set_sample_bit(pos, CPU); + perf_evsel__set_sample_bit(pos, RAW); /* make sure these are *not*; want as small a sample as possible */ - attr->sample_type &= ~PERF_SAMPLE_PERIOD; - attr->sample_type &= ~PERF_SAMPLE_IP; - attr->sample_type &= ~PERF_SAMPLE_CALLCHAIN; - attr->sample_type &= ~PERF_SAMPLE_ADDR; - attr->sample_type &= ~PERF_SAMPLE_READ; + perf_evsel__reset_sample_bit(pos, PERIOD); + perf_evsel__reset_sample_bit(pos, IP); + perf_evsel__reset_sample_bit(pos, CALLCHAIN); + perf_evsel__reset_sample_bit(pos, ADDR); + perf_evsel__reset_sample_bit(pos, READ); attr->mmap = 0; attr->comm = 0; attr->task = 0; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 791b432df847..253133a6251d 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -190,6 +190,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) .tool = { .sample = process_sample_event, .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .lost = perf_event__process_lost, .fork = perf_event__process_fork, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9725aa375414..8e50d8d77419 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -744,6 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .tool = { .sample = process_sample_event, .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .exit = perf_event__process_exit, .fork = perf_event__process_fork, diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 93a34cef9676..7f31a3ded1b6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -542,6 +542,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, static struct perf_tool perf_script = { .sample = process_sample_event, .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .exit = perf_event__process_exit, .fork = perf_event__process_fork, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b6f0725068bd..f5aa6375e3e9 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -100,7 +100,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(SHARED); P_MMAP_FLAG(PRIVATE); +#ifdef MAP_32BIT P_MMAP_FLAG(32BIT); +#endif P_MMAP_FLAG(ANONYMOUS); P_MMAP_FLAG(DENYWRITE); P_MMAP_FLAG(EXECUTABLE); @@ -994,6 +996,9 @@ again: handler = evsel->handler.func; handler(trace, evsel, &sample); + + if (done) + goto out_unmap_evlist; } } diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 8bbeba322df9..1e67437fb4ca 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -112,6 +112,10 @@ static struct test { .func = test__keep_tracking, }, { + .desc = "Test parsing with no sample_id_all bit set", + .func = test__parse_no_sample_id_all, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c new file mode 100644 index 000000000000..e117b6c6a248 --- /dev/null +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -0,0 +1,108 @@ +#include <sys/types.h> +#include <stddef.h> + +#include "tests.h" + +#include "event.h" +#include "evlist.h" +#include "header.h" +#include "util.h" + +static int process_event(struct perf_evlist **pevlist, union perf_event *event) +{ + struct perf_sample sample; + + if (event->header.type == PERF_RECORD_HEADER_ATTR) { + if (perf_event__process_attr(NULL, event, pevlist)) { + pr_debug("perf_event__process_attr failed\n"); + return -1; + } + return 0; + } + + if (event->header.type >= PERF_RECORD_USER_TYPE_START) + return -1; + + if (!*pevlist) + return -1; + + if (perf_evlist__parse_sample(*pevlist, event, &sample)) { + pr_debug("perf_evlist__parse_sample failed\n"); + return -1; + } + + return 0; +} + +static int process_events(union perf_event **events, size_t count) +{ + struct perf_evlist *evlist = NULL; + int err = 0; + size_t i; + + for (i = 0; i < count && !err; i++) + err = process_event(&evlist, events[i]); + + if (evlist) + perf_evlist__delete(evlist); + + return err; +} + +struct test_attr_event { + struct attr_event attr; + u64 id; +}; + +/** + * test__parse_no_sample_id_all - test parsing with no sample_id_all bit set. + * + * This function tests parsing data produced on kernel's that do not support the + * sample_id_all bit. Without the sample_id_all bit, non-sample events (such as + * mmap events) do not have an id sample appended, and consequently logic + * designed to determine the id will not work. That case happens when there is + * more than one selected event, so this test processes three events: 2 + * attributes representing the selected events and one mmap event. + * + * Return: %0 on success, %-1 if the test fails. + */ +int test__parse_no_sample_id_all(void) +{ + int err; + + struct test_attr_event event1 = { + .attr = { + .header = { + .type = PERF_RECORD_HEADER_ATTR, + .size = sizeof(struct test_attr_event), + }, + }, + .id = 1, + }; + struct test_attr_event event2 = { + .attr = { + .header = { + .type = PERF_RECORD_HEADER_ATTR, + .size = sizeof(struct test_attr_event), + }, + }, + .id = 2, + }; + struct mmap_event event3 = { + .header = { + .type = PERF_RECORD_MMAP, + .size = sizeof(struct mmap_event), + }, + }; + union perf_event *events[] = { + (union perf_event *)&event1, + (union perf_event *)&event2, + (union perf_event *)&event3, + }; + + err = process_events(events, ARRAY_SIZE(events)); + if (err) + return -1; + + return 0; +} diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 72d8881873b0..b8a7056519ac 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -50,7 +50,7 @@ int test__PERF_RECORD(void) struct perf_sample sample; const char *cmd = "sleep"; const char *argv[] = { cmd, "1", NULL, }; - char *bname; + char *bname, *mmap_filename; u64 prev_time = 0; bool found_cmd_mmap = false, found_libc_mmap = false, @@ -212,6 +212,7 @@ int test__PERF_RECORD(void) if ((type == PERF_RECORD_COMM || type == PERF_RECORD_MMAP || + type == PERF_RECORD_MMAP2 || type == PERF_RECORD_FORK || type == PERF_RECORD_EXIT) && (pid_t)event->comm.pid != evlist->workload.pid) { @@ -220,7 +221,8 @@ int test__PERF_RECORD(void) } if ((type == PERF_RECORD_COMM || - type == PERF_RECORD_MMAP) && + type == PERF_RECORD_MMAP || + type == PERF_RECORD_MMAP2) && event->comm.pid != event->comm.tid) { pr_debug("%s with different pid/tid!\n", name); ++errs; @@ -236,7 +238,12 @@ int test__PERF_RECORD(void) case PERF_RECORD_EXIT: goto found_exit; case PERF_RECORD_MMAP: - bname = strrchr(event->mmap.filename, '/'); + mmap_filename = event->mmap.filename; + goto check_bname; + case PERF_RECORD_MMAP2: + mmap_filename = event->mmap2.filename; + check_bname: + bname = strrchr(mmap_filename, '/'); if (bname != NULL) { if (!found_cmd_mmap) found_cmd_mmap = !strcmp(bname + 1, cmd); @@ -245,7 +252,7 @@ int test__PERF_RECORD(void) if (!found_ld_mmap) found_ld_mmap = !strncmp(bname + 1, "ld", 2); } else if (!found_vdso_mmap) - found_vdso_mmap = !strcmp(event->mmap.filename, "[vdso]"); + found_vdso_mmap = !strcmp(mmap_filename, "[vdso]"); break; case PERF_RECORD_SAMPLE: diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index c048b589998a..e0ac713857ba 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -39,5 +39,6 @@ int test__perf_time_to_tsc(void); int test__code_reading(void); int test__sample_parsing(void); int test__keep_tracking(void); +int test__parse_no_sample_id_all(void); #endif /* TESTS_H */ diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 5b4fb330f656..194e2f42ff5d 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -350,9 +350,9 @@ static int hist_entry__period_snprintf(struct perf_hpp *hpp, } static int hist_entry__fprintf(struct hist_entry *he, size_t size, - struct hists *hists, FILE *fp) + struct hists *hists, + char *bf, size_t bfsz, FILE *fp) { - char bf[512]; int ret; struct perf_hpp hpp = { .buf = bf, @@ -360,8 +360,8 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, }; bool color = !symbol_conf.field_sep; - if (size == 0 || size > sizeof(bf)) - size = hpp.size = sizeof(bf); + if (size == 0 || size > bfsz) + size = hpp.size = bfsz; ret = hist_entry__period_snprintf(&hpp, he, color); hist_entry__sort_snprintf(he, bf + ret, size - ret, hists); @@ -392,6 +392,8 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, .ptr = hists_to_evsel(hists), }; bool first = true; + size_t linesz; + char *line = NULL; init_rem_hits(); @@ -479,6 +481,13 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, goto out; print_entries: + linesz = hists__sort_list_width(hists) + 3 + 1; + line = malloc(linesz); + if (line == NULL) { + ret = -1; + goto out; + } + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent = h->stat.period * 100.0 / @@ -490,10 +499,10 @@ print_entries: if (percent < min_pcnt) continue; - ret += hist_entry__fprintf(h, max_cols, hists, fp); + ret += hist_entry__fprintf(h, max_cols, hists, line, linesz, fp); if (max_rows && ++nr_rows >= max_rows) - goto out; + break; if (h->ms.map == NULL && verbose > 1) { __map_groups__fprintf_maps(&h->thread->mg, @@ -501,6 +510,8 @@ print_entries: fprintf(fp, "%.10s end\n", graph_dotted_line); } } + + free(line); out: free(rem_sq_bracket); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index fb584092eb88..7ded71d19d75 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -67,6 +67,7 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused, struct perf_tool build_id__mark_dso_hit_ops = { .sample = build_id__mark_dso_hit, .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, .fork = perf_event__process_fork, .exit = perf_event__exit_del_thread, .attr = perf_event__process_attr, diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 8d51f21107aa..9b393e7dca6f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -11,6 +11,7 @@ static const char *perf_event__names[] = { [0] = "TOTAL", [PERF_RECORD_MMAP] = "MMAP", + [PERF_RECORD_MMAP2] = "MMAP2", [PERF_RECORD_LOST] = "LOST", [PERF_RECORD_COMM] = "COMM", [PERF_RECORD_EXIT] = "EXIT", @@ -186,7 +187,7 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, return -1; } - event->header.type = PERF_RECORD_MMAP; + event->header.type = PERF_RECORD_MMAP2; /* * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c */ @@ -197,7 +198,9 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, char prot[5]; char execname[PATH_MAX]; char anonstr[] = "//anon"; + unsigned int ino; size_t size; + ssize_t n; if (fgets(bf, sizeof(bf), fp) == NULL) break; @@ -206,9 +209,16 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %*x:%*x %*u %s\n", - &event->mmap.start, &event->mmap.len, prot, - &event->mmap.pgoff, execname); + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", + &event->mmap2.start, &event->mmap2.len, prot, + &event->mmap2.pgoff, &event->mmap2.maj, + &event->mmap2.min, + &ino, execname); + + event->mmap2.ino = (u64)ino; + + if (n != 8) + continue; if (prot[2] != 'x') continue; @@ -217,15 +227,15 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, anonstr); size = strlen(execname) + 1; - memcpy(event->mmap.filename, execname, size); + memcpy(event->mmap2.filename, execname, size); size = PERF_ALIGN(size, sizeof(u64)); - event->mmap.len -= event->mmap.start; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.pid = tgid; - event->mmap.tid = pid; + event->mmap2.len -= event->mmap.start; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, machine->id_hdr_size); + event->mmap2.header.size += machine->id_hdr_size; + event->mmap2.pid = tgid; + event->mmap2.tid = pid; if (process(tool, event, &synth_sample, machine) != 0) { rc = -1; @@ -527,6 +537,17 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) event->mmap.len, event->mmap.pgoff, event->mmap.filename); } +size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 + " %02x:%02x %"PRIu64" %"PRIu64"]: %s\n", + event->mmap2.pid, event->mmap2.tid, event->mmap2.start, + event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj, + event->mmap2.min, event->mmap2.ino, + event->mmap2.ino_generation, + event->mmap2.filename); +} + int perf_event__process_mmap(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -535,6 +556,14 @@ int perf_event__process_mmap(struct perf_tool *tool __maybe_unused, return machine__process_mmap_event(machine, event); } +int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_mmap2_event(machine, event); +} + size_t perf_event__fprintf_task(union perf_event *event, FILE *fp) { return fprintf(fp, "(%d:%d):(%d:%d)\n", @@ -574,6 +603,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_MMAP: ret += perf_event__fprintf_mmap(event, fp); break; + case PERF_RECORD_MMAP2: + ret += perf_event__fprintf_mmap2(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 93130d856bf0..c67ecc457d29 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -17,6 +17,19 @@ struct mmap_event { char filename[PATH_MAX]; }; +struct mmap2_event { + struct perf_event_header header; + u32 pid, tid; + u64 start; + u64 len; + u64 pgoff; + u32 maj; + u32 min; + u64 ino; + u64 ino_generation; + char filename[PATH_MAX]; +}; + struct comm_event { struct perf_event_header header; u32 pid, tid; @@ -159,6 +172,7 @@ struct tracing_data_event { union perf_event { struct perf_event_header header; struct mmap_event mmap; + struct mmap2_event mmap2; struct comm_event comm; struct fork_event fork; struct lost_event lost; @@ -208,6 +222,10 @@ int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_mmap2(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_fork(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -238,6 +256,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b8727ae45e3b..f9f77bee0b1b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -64,6 +64,16 @@ void perf_evlist__set_id_pos(struct perf_evlist *evlist) evlist->is_pos = first->is_pos; } +static void perf_evlist__update_id_pos(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + list_for_each_entry(evsel, &evlist->entries, node) + perf_evsel__calc_id_pos(evsel); + + perf_evlist__set_id_pos(evlist); +} + static void perf_evlist__purge(struct perf_evlist *evlist) { struct perf_evsel *pos, *n; @@ -446,20 +456,25 @@ static int perf_evlist__event2id(struct perf_evlist *evlist, static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, union perf_event *event) { + struct perf_evsel *first = perf_evlist__first(evlist); struct hlist_head *head; struct perf_sample_id *sid; int hash; u64 id; if (evlist->nr_entries == 1) - return perf_evlist__first(evlist); + return first; + + if (!first->attr.sample_id_all && + event->header.type != PERF_RECORD_SAMPLE) + return first; if (perf_evlist__event2id(evlist, event, &id)) return NULL; /* Synthesized events have an id of zero */ if (!id) - return perf_evlist__first(evlist); + return first; hash = hash_64(id, PERF_EVLIST__HLIST_BITS); head = &evlist->heads[hash]; @@ -915,6 +930,8 @@ int perf_evlist__open(struct perf_evlist *evlist) struct perf_evsel *evsel; int err; + perf_evlist__update_id_pos(evlist); + list_for_each_entry(evsel, &evlist->entries, node) { err = perf_evsel__open(evsel, evlist->cpus, evlist->threads); if (err < 0) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3612183e2cc5..0ce9febf1ba0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -27,6 +27,7 @@ static struct { bool sample_id_all; bool exclude_guest; + bool mmap2; } perf_missing_features; #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) @@ -676,8 +677,9 @@ void perf_evsel__config(struct perf_evsel *evsel, if (opts->sample_weight) attr->sample_type |= PERF_SAMPLE_WEIGHT; - attr->mmap = track; - attr->comm = track; + attr->mmap = track; + attr->mmap2 = track && !perf_missing_features.mmap2; + attr->comm = track; /* * XXX see the function comment above @@ -1016,6 +1018,8 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, } fallback_missing_features: + if (perf_missing_features.mmap2) + evsel->attr.mmap2 = 0; if (perf_missing_features.exclude_guest) evsel->attr.exclude_guest = evsel->attr.exclude_host = 0; retry_sample_id: @@ -1080,8 +1084,11 @@ try_fallback: if (err != -EINVAL || cpu > 0 || thread > 0) goto out_close; - if (!perf_missing_features.exclude_guest && - (evsel->attr.exclude_guest || evsel->attr.exclude_host)) { + if (!perf_missing_features.mmap2 && evsel->attr.mmap2) { + perf_missing_features.mmap2 = true; + goto fallback_missing_features; + } else if (!perf_missing_features.exclude_guest && + (evsel->attr.exclude_guest || evsel->attr.exclude_host)) { perf_missing_features.exclude_guest = true; goto fallback_missing_features; } else if (!perf_missing_features.sample_id_all) { @@ -1925,6 +1932,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, if_print(exclude_hv); if_print(exclude_idle); if_print(mmap); + if_print(mmap2); if_print(comm); if_print(freq); if_print(inherit_stat); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a33197a4fd21..26441d0e571b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1351,6 +1351,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip); + fprintf(fp, ", attr_mmap2 = %d", evsel->attr.mmap2); + fprintf(fp, ", attr_mmap = %d", evsel->attr.mmap); + fprintf(fp, ", attr_mmap_data = %d", evsel->attr.mmap_data); if (evsel->ids) { fprintf(fp, ", id = {"); for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) { diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 1dca61f0512d..933d14f287ca 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -997,6 +997,54 @@ out_problem: return -1; } +int machine__process_mmap2_event(struct machine *machine, + union perf_event *event) +{ + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + struct thread *thread; + struct map *map; + enum map_type type; + int ret = 0; + + if (dump_trace) + perf_event__fprintf_mmap2(event, stdout); + + if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + cpumode == PERF_RECORD_MISC_KERNEL) { + ret = machine__process_kernel_mmap_event(machine, event); + if (ret < 0) + goto out_problem; + return 0; + } + + thread = machine__findnew_thread(machine, event->mmap2.pid, + event->mmap2.pid); + if (thread == NULL) + goto out_problem; + + if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) + type = MAP__VARIABLE; + else + type = MAP__FUNCTION; + + map = map__new(&machine->user_dsos, event->mmap2.start, + event->mmap2.len, event->mmap2.pgoff, + event->mmap2.pid, event->mmap2.maj, + event->mmap2.min, event->mmap2.ino, + event->mmap2.ino_generation, + event->mmap2.filename, type); + + if (map == NULL) + goto out_problem; + + thread__insert_map(thread, map); + return 0; + +out_problem: + dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n"); + return 0; +} + int machine__process_mmap_event(struct machine *machine, union perf_event *event) { u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; @@ -1028,7 +1076,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event map = map__new(&machine->user_dsos, event->mmap.start, event->mmap.len, event->mmap.pgoff, - event->mmap.pid, event->mmap.filename, + event->mmap.pid, 0, 0, 0, 0, + event->mmap.filename, type); if (map == NULL) @@ -1101,6 +1150,8 @@ int machine__process_event(struct machine *machine, union perf_event *event) ret = machine__process_comm_event(machine, event); break; case PERF_RECORD_MMAP: ret = machine__process_mmap_event(machine, event); break; + case PERF_RECORD_MMAP2: + ret = machine__process_mmap2_event(machine, event); break; case PERF_RECORD_FORK: ret = machine__process_fork_event(machine, event); break; case PERF_RECORD_EXIT: diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 0df925ba6a44..58a6be1fc739 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -45,6 +45,7 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event int machine__process_fork_event(struct machine *machine, union perf_event *event); int machine__process_lost_event(struct machine *machine, union perf_event *event); int machine__process_mmap_event(struct machine *machine, union perf_event *event); +int machine__process_mmap2_event(struct machine *machine, union perf_event *event); int machine__process_event(struct machine *machine, union perf_event *event); typedef void (*machine__process_t)(struct machine *machine, void *data); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 9e8304ca343e..4f6680d2043b 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -48,7 +48,8 @@ void map__init(struct map *map, enum map_type type, } struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, - u64 pgoff, u32 pid, char *filename, + u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino, + u64 ino_gen, char *filename, enum map_type type) { struct map *map = malloc(sizeof(*map)); @@ -62,6 +63,11 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); + map->maj = d_maj; + map->min = d_min; + map->ino = ino; + map->ino_generation = ino_gen; + if (anon) { snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", pid); filename = newfilename; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 2cc93cbf0e17..4886ca280536 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -36,6 +36,9 @@ struct map { bool erange_warned; u32 priv; u64 pgoff; + u32 maj, min; /* only valid for MMAP2 record */ + u64 ino; /* only valid for MMAP2 record */ + u64 ino_generation;/* only valid for MMAP2 record */ /* ip -> dso rip */ u64 (*map_ip)(struct map *, u64); @@ -88,8 +91,9 @@ typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); void map__init(struct map *map, enum map_type type, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, - u64 pgoff, u32 pid, char *filename, - enum map_type type); + u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino, + u64 ino_gen, + char *filename, enum map_type type); struct map *map__new2(u64 start, struct dso *dso, enum map_type type); void map__delete(struct map *map); struct map *map__clone(struct map *map); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1fc0c628683e..51f5edf2a6d0 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -351,6 +351,25 @@ static void perf_event__mmap_swap(union perf_event *event, } } +static void perf_event__mmap2_swap(union perf_event *event, + bool sample_id_all) +{ + event->mmap2.pid = bswap_32(event->mmap2.pid); + event->mmap2.tid = bswap_32(event->mmap2.tid); + event->mmap2.start = bswap_64(event->mmap2.start); + event->mmap2.len = bswap_64(event->mmap2.len); + event->mmap2.pgoff = bswap_64(event->mmap2.pgoff); + event->mmap2.maj = bswap_32(event->mmap2.maj); + event->mmap2.min = bswap_32(event->mmap2.min); + event->mmap2.ino = bswap_64(event->mmap2.ino); + + if (sample_id_all) { + void *data = &event->mmap2.filename; + + data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } +} static void perf_event__task_swap(union perf_event *event, bool sample_id_all) { event->fork.pid = bswap_32(event->fork.pid); @@ -455,6 +474,7 @@ typedef void (*perf_event__swap_op)(union perf_event *event, static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_MMAP] = perf_event__mmap_swap, + [PERF_RECORD_MMAP2] = perf_event__mmap2_swap, [PERF_RECORD_COMM] = perf_event__comm_swap, [PERF_RECORD_FORK] = perf_event__task_swap, [PERF_RECORD_EXIT] = perf_event__task_swap, @@ -504,6 +524,7 @@ static int flush_sample_queue(struct perf_session *s, u64 limit = os->next_flush; u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; unsigned idx = 0, progress_next = os->nr_samples / 16; + bool show_progress = limit == ULLONG_MAX; int ret; if (!tool->ordered_samples || !limit) @@ -526,7 +547,7 @@ static int flush_sample_queue(struct perf_session *s, os->last_flush = iter->timestamp; list_del(&iter->list); list_add(&iter->list, &os->sample_cache); - if (++idx >= progress_next) { + if (show_progress && (++idx >= progress_next)) { progress_next += os->nr_samples / 16; ui_progress__update(idx, os->nr_samples, "Processing time ordered events..."); @@ -850,7 +871,8 @@ static struct machine * (cpumode == PERF_RECORD_MISC_GUEST_USER))) { u32 pid; - if (event->header.type == PERF_RECORD_MMAP) + if (event->header.type == PERF_RECORD_MMAP + || event->header.type == PERF_RECORD_MMAP2) pid = event->mmap.pid; else pid = sample->pid; @@ -977,6 +999,8 @@ static int perf_session_deliver_event(struct perf_session *session, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); + case PERF_RECORD_MMAP2: + return tool->mmap2(tool, event, sample, machine); case PERF_RECORD_COMM: return tool->comm(tool, event, sample, machine); case PERF_RECORD_FORK: @@ -1619,52 +1643,26 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, const struct perf_evsel_str_handler *assocs, size_t nr_assocs) { - struct perf_evlist *evlist = session->evlist; - struct event_format *format; struct perf_evsel *evsel; - char *tracepoint, *name; size_t i; int err; for (i = 0; i < nr_assocs; i++) { - err = -ENOMEM; - tracepoint = strdup(assocs[i].name); - if (tracepoint == NULL) - goto out; - - err = -ENOENT; - name = strchr(tracepoint, ':'); - if (name == NULL) - goto out_free; - - *name++ = '\0'; - format = pevent_find_event_by_name(session->pevent, - tracepoint, name); - if (format == NULL) { - /* - * Adding a handler for an event not in the session, - * just ignore it. - */ - goto next; - } - - evsel = perf_evlist__find_tracepoint_by_id(evlist, format->id); + /* + * Adding a handler for an event not in the session, + * just ignore it. + */ + evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name); if (evsel == NULL) - goto next; + continue; err = -EEXIST; if (evsel->handler.func != NULL) - goto out_free; + goto out; evsel->handler.func = assocs[i].handler; -next: - free(tracepoint); } err = 0; out: return err; - -out_free: - free(tracepoint); - goto out; } diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 62b16b6165ba..4385816d3d49 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -29,6 +29,7 @@ struct perf_tool { event_sample sample, read; event_op mmap, + mmap2, comm, fork, exit, |