diff options
author | Anton Vorontsov <anton.vorontsov@linaro.org> | 2012-12-11 22:15:57 -0800 |
---|---|---|
committer | Anton Vorontsov <anton.vorontsov@linaro.org> | 2012-12-11 22:15:57 -0800 |
commit | 76d8a23b127020472207b281427d3e9f4f1227e4 (patch) | |
tree | e14d7063d96d850fb259115d6fb08cbeb98ccf88 /Documentation | |
parent | eba3b670a9166a91be5a11fe33290dca6b9457a2 (diff) | |
parent | 1ebaf4f4e6912199f8a4e30ba3ab55da2b71bcdf (diff) | |
download | linux-76d8a23b127020472207b281427d3e9f4f1227e4.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux
The merge is merely to fix conflicts before sending a pull request.
Conflicts:
drivers/power/ab8500_btemp.c
drivers/power/ab8500_charger.c
drivers/power/ab8500_fg.c
drivers/power/abx500_chargalg.c
drivers/power/max8925_power.c
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Diffstat (limited to 'Documentation')
82 files changed, 1871 insertions, 736 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 2f06d40fe07d..2e33dc6b2346 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -189,6 +189,14 @@ Description: A computed peak value based on the sum squared magnitude of the underlying value in the specified directions. +What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_raw +What: /sys/bus/iio/devices/iio:deviceX/in_pressure_raw +KernelVersion: 3.8 +Contact: linux-iio@vger.kernel.org +Description: + Raw pressure measurement from channel Y. Units after + application of scale and offset are kilopascal. + What: /sys/bus/iio/devices/iio:deviceX/in_accel_offset What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_offset What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_offset @@ -197,6 +205,8 @@ What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_offset What: /sys/bus/iio/devices/iio:deviceX/in_voltage_offset What: /sys/bus/iio/devices/iio:deviceX/in_tempY_offset What: /sys/bus/iio/devices/iio:deviceX/in_temp_offset +What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_offset +What: /sys/bus/iio/devices/iio:deviceX/in_pressure_offset KernelVersion: 2.6.35 Contact: linux-iio@vger.kernel.org Description: @@ -226,6 +236,8 @@ What: /sys/bus/iio/devices/iio:deviceX/in_magn_scale What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_scale What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_scale What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_scale +What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_scale +What: /sys/bus/iio/devices/iio:deviceX/in_pressure_scale KernelVersion: 2.6.35 Contact: linux-iio@vger.kernel.org Description: @@ -245,6 +257,8 @@ What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibbias What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibbias What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibbias What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias +What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias +What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibbias KernelVersion: 2.6.35 Contact: linux-iio@vger.kernel.org Description: @@ -262,6 +276,8 @@ What /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibscale What /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibscale what /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale what /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale +What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale +What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale KernelVersion: 2.6.35 Contact: linux-iio@vger.kernel.org Description: @@ -275,6 +291,8 @@ What: /sys/.../iio:deviceX/in_voltage-voltage_scale_available What: /sys/.../iio:deviceX/out_voltageX_scale_available What: /sys/.../iio:deviceX/out_altvoltageX_scale_available What: /sys/.../iio:deviceX/in_capacitance_scale_available +What: /sys/.../iio:deviceX/in_pressure_scale_available +What: /sys/.../iio:deviceX/in_pressureY_scale_available KernelVersion: 2.6.35 Contact: linux-iio@vger.kernel.org Description: @@ -694,6 +712,8 @@ What: /sys/.../buffer/scan_elements/in_voltageY_en What: /sys/.../buffer/scan_elements/in_voltageY-voltageZ_en What: /sys/.../buffer/scan_elements/in_incli_x_en What: /sys/.../buffer/scan_elements/in_incli_y_en +What: /sys/.../buffer/scan_elements/in_pressureY_en +What: /sys/.../buffer/scan_elements/in_pressure_en KernelVersion: 2.6.37 Contact: linux-iio@vger.kernel.org Description: @@ -707,6 +727,8 @@ What: /sys/.../buffer/scan_elements/in_voltageY_type What: /sys/.../buffer/scan_elements/in_voltage_type What: /sys/.../buffer/scan_elements/in_voltageY_supply_type What: /sys/.../buffer/scan_elements/in_timestamp_type +What: /sys/.../buffer/scan_elements/in_pressureY_type +What: /sys/.../buffer/scan_elements/in_pressure_type KernelVersion: 2.6.37 Contact: linux-iio@vger.kernel.org Description: @@ -751,6 +773,8 @@ What: /sys/.../buffer/scan_elements/in_magn_z_index What: /sys/.../buffer/scan_elements/in_incli_x_index What: /sys/.../buffer/scan_elements/in_incli_y_index What: /sys/.../buffer/scan_elements/in_timestamp_index +What: /sys/.../buffer/scan_elements/in_pressureY_index +What: /sys/.../buffer/scan_elements/in_pressure_index KernelVersion: 2.6.37 Contact: linux-iio@vger.kernel.org Description: diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 23d78b5aab11..0ba6ea2f89d9 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -11,7 +11,7 @@ What: /sys/class/devfreq/.../governor Date: September 2011 Contact: MyungJoo Ham <myungjoo.ham@samsung.com> Description: - The /sys/class/devfreq/.../governor shows the name of the + The /sys/class/devfreq/.../governor show or set the name of the governor used by the corresponding devfreq object. What: /sys/class/devfreq/.../cur_freq @@ -19,15 +19,16 @@ Date: September 2011 Contact: MyungJoo Ham <myungjoo.ham@samsung.com> Description: The /sys/class/devfreq/.../cur_freq shows the current - frequency of the corresponding devfreq object. + frequency of the corresponding devfreq object. Same as + target_freq when get_cur_freq() is not implemented by + devfreq driver. -What: /sys/class/devfreq/.../central_polling -Date: September 2011 -Contact: MyungJoo Ham <myungjoo.ham@samsung.com> +What: /sys/class/devfreq/.../target_freq +Date: September 2012 +Contact: Rajagopal Venkat <rajagopal.venkat@linaro.org> Description: - The /sys/class/devfreq/.../central_polling shows whether - the devfreq ojbect is using devfreq-provided central - polling mechanism or not. + The /sys/class/devfreq/.../target_freq shows the next governor + predicted target frequency of the corresponding devfreq object. What: /sys/class/devfreq/.../polling_interval Date: September 2011 @@ -43,6 +44,17 @@ Description: (/sys/class/devfreq/.../central_polling is 0), this value may be useless. +What: /sys/class/devfreq/.../trans_stat +Date: October 2012 +Contact: MyungJoo Ham <myungjoo.ham@samsung.com> +Descrtiption: + This ABI shows the statistics of devfreq behavior on a + specific device. It shows the time spent in each state and + the number of transitions between states. + In order to activate this ABI, the devfreq target device + driver should provide the list of available frequencies + with its profile. + What: /sys/class/devfreq/.../userspace/set_freq Date: September 2011 Contact: MyungJoo Ham <myungjoo.ham@samsung.com> @@ -50,3 +62,19 @@ Description: The /sys/class/devfreq/.../userspace/set_freq shows and sets the requested frequency for the devfreq object if userspace governor is in effect. + +What: /sys/class/devfreq/.../available_frequencies +Date: October 2012 +Contact: Nishanth Menon <nm@ti.com> +Description: + The /sys/class/devfreq/.../available_frequencies shows + the available frequencies of the corresponding devfreq object. + This is a snapshot of available frequencies and not limited + by the min/max frequency restrictions. + +What: /sys/class/devfreq/.../available_governors +Date: October 2012 +Contact: Nishanth Menon <nm@ti.com> +Description: + The /sys/class/devfreq/.../available_governors shows + currently available governors in the system. diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 45000f0db4d4..7fc2997b23a6 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -204,3 +204,34 @@ Description: This attribute has no effect on system-wide suspend/resume and hibernation. + +What: /sys/devices/.../power/pm_qos_no_power_off +Date: September 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/devices/.../power/pm_qos_no_power_off attribute + is used for manipulating the PM QoS "no power off" flag. If + set, this flag indicates to the kernel that power should not + be removed entirely from the device. + + Not all drivers support this attribute. If it isn't supported, + it is not present. + + This attribute has no effect on system-wide suspend/resume and + hibernation. + +What: /sys/devices/.../power/pm_qos_remote_wakeup +Date: September 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/devices/.../power/pm_qos_remote_wakeup attribute + is used for manipulating the PM QoS "remote wakeup required" + flag. If set, this flag indicates to the kernel that the + device is a source of user events that have to be signaled from + its low-power states. + + Not all drivers support this attribute. If it isn't supported, + it is not present. + + This attribute has no effect on system-wide suspend/resume and + hibernation. diff --git a/Documentation/ABI/testing/sysfs-devices-sun b/Documentation/ABI/testing/sysfs-devices-sun new file mode 100644 index 000000000000..86be9848a77e --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-sun @@ -0,0 +1,14 @@ +Whatt: /sys/devices/.../sun +Date: October 2012 +Contact: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> +Description: + The file contains a Slot-unique ID which provided by the _SUN + method in the ACPI namespace. The value is written in Advanced + Configuration and Power Interface Specification as follows: + + "The _SUN value is required to be unique among the slots of + the same type. It is also recommended that this number match + the slot number printed on the physical slot whenever possible." + + So reading the sysfs file, we can identify a physical position + of the slot in the system. diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty index 0c430150d929..ad22fb0ee765 100644 --- a/Documentation/ABI/testing/sysfs-tty +++ b/Documentation/ABI/testing/sysfs-tty @@ -26,3 +26,115 @@ Description: UART port in serial_core, that is bound to TTY like ttyS0. uartclk = 16 * baud_base + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/type +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Shows the current tty type for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/line +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Shows the current tty line number for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/port +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Shows the current tty port I/O address for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/irq +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Shows the current primary interrupt for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/flags +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the tty port status flags for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/xmit_fifo_size +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the transmit FIFO size for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/close_delay +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the closing delay time for this port in ms. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/closing_wait +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the close wait time for this port in ms. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/custom_divisor +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the custom divisor if any that is set on this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/io_type +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the I/O type that is to be used with the iomem base + address. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/iomem_base +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + The I/O memory base for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/iomem_reg_shift +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the register shift indicating the spacing to be used + for accesses on this iomem address. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. diff --git a/Documentation/DocBook/gadget.tmpl b/Documentation/DocBook/gadget.tmpl index 6ef2f0073e5a..4017f147ba2f 100644 --- a/Documentation/DocBook/gadget.tmpl +++ b/Documentation/DocBook/gadget.tmpl @@ -671,7 +671,7 @@ than a kernel driver. <para>There's a USB Mass Storage class driver, which provides a different solution for interoperability with systems such as MS-Windows and MacOS. -That <emphasis>File-backed Storage</emphasis> driver uses a +That <emphasis>Mass Storage</emphasis> driver uses a file or block device as backing store for a drive, like the <filename>loop</filename> driver. The USB host uses the BBB, CB, or CBI versions of the mass diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl index ac3d0018140c..ddb05e98af0d 100644 --- a/Documentation/DocBook/uio-howto.tmpl +++ b/Documentation/DocBook/uio-howto.tmpl @@ -719,6 +719,62 @@ framework to set up sysfs files for this region. Simply leave it alone. </para> </sect1> +<sect1 id="using uio_dmem_genirq"> +<title>Using uio_dmem_genirq for platform devices</title> + <para> + In addition to statically allocated memory ranges, they may also be + a desire to use dynamically allocated regions in a user space driver. + In particular, being able to access memory made available through the + dma-mapping API, may be particularly useful. The + <varname>uio_dmem_genirq</varname> driver provides a way to accomplish + this. + </para> + <para> + This driver is used in a similar manner to the + <varname>"uio_pdrv_genirq"</varname> driver with respect to interrupt + configuration and handling. + </para> + <para> + Set the <varname>.name</varname> element of + <varname>struct platform_device</varname> to + <varname>"uio_dmem_genirq"</varname> to use this driver. + </para> + <para> + When using this driver, fill in the <varname>.platform_data</varname> + element of <varname>struct platform_device</varname>, which is of type + <varname>struct uio_dmem_genirq_pdata</varname> and which contains the + following elements: + </para> + <itemizedlist> + <listitem><varname>struct uio_info uioinfo</varname>: The same + structure used as the <varname>uio_pdrv_genirq</varname> platform + data</listitem> + <listitem><varname>unsigned int *dynamic_region_sizes</varname>: + Pointer to list of sizes of dynamic memory regions to be mapped into + user space. + </listitem> + <listitem><varname>unsigned int num_dynamic_regions</varname>: + Number of elements in <varname>dynamic_region_sizes</varname> array. + </listitem> + </itemizedlist> + <para> + The dynamic regions defined in the platform data will be appended to + the <varname> mem[] </varname> array after the platform device + resources, which implies that the total number of static and dynamic + memory regions cannot exceed <varname>MAX_UIO_MAPS</varname>. + </para> + <para> + The dynamic memory regions will be allocated when the UIO device file, + <varname>/dev/uioX</varname> is opened. + Simiar to static memory resources, the memory region information for + dynamic regions is then visible via sysfs at + <varname>/sys/class/uio/uioX/maps/mapY/*</varname>. + The dynmaic memory regions will be freed when the UIO device file is + closed. When no processes are holding the device file open, the address + returned to userspace is ~0. + </para> +</sect1> + </chapter> <chapter id="userspace_driver" xreflabel="Writing a driver in user space"> diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt index 1401cece745a..9bc95942ec22 100644 --- a/Documentation/IRQ-domain.txt +++ b/Documentation/IRQ-domain.txt @@ -7,6 +7,21 @@ systems with multiple interrupt controllers the kernel must ensure that each one gets assigned non-overlapping allocations of Linux IRQ numbers. +The number of interrupt controllers registered as unique irqchips +show a rising tendency: for example subdrivers of different kinds +such as GPIO controllers avoid reimplementing identical callback +mechanisms as the IRQ core system by modelling their interrupt +handlers as irqchips, i.e. in effect cascading interrupt controllers. + +Here the interrupt number loose all kind of correspondence to +hardware interrupt numbers: whereas in the past, IRQ numbers could +be chosen so they matched the hardware IRQ line into the root +interrupt controller (i.e. the component actually fireing the +interrupt line to the CPU) nowadays this number is just a number. + +For this reason we need a mechanism to separate controller-local +interrupt numbers, called hardware irq's, from Linux IRQ numbers. + The irq_alloc_desc*() and irq_free_desc*() APIs provide allocation of irq numbers, but they don't provide any support for reverse mapping of the controller-local IRQ (hwirq) number into the Linux IRQ number @@ -40,6 +55,10 @@ required hardware setup. When an interrupt is received, irq_find_mapping() function should be used to find the Linux IRQ number from the hwirq number. +The irq_create_mapping() function must be called *atleast once* +before any call to irq_find_mapping(), lest the descriptor will not +be allocated. + If the driver has the Linux IRQ number or the irq_data pointer, and needs to know the associated hwirq number (such as in the irq_chip callbacks) then it can be directly obtained from irq_data->hwirq. @@ -119,4 +138,17 @@ numbers. Most users of legacy mappings should use irq_domain_add_simple() which will use a legacy domain only if an IRQ range is supplied by the -system and will otherwise use a linear domain mapping. +system and will otherwise use a linear domain mapping. The semantics +of this call are such that if an IRQ range is specified then +descriptors will be allocated on-the-fly for it, and if no range is +specified it will fall through to irq_domain_add_linear() which meand +*no* irq descriptors will be allocated. + +A typical use case for simple domains is where an irqchip provider +is supporting both dynamic and static IRQ assignments. + +In order to avoid ending up in a situation where a linear domain is +used and no descriptor gets allocated it is very important to make sure +that the driver using the simple domain call irq_create_mapping() +before any irq_find_mapping() since the latter will actually work +for the static IRQ assignment case. diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt index 7c1dfb19fc40..7f40c72a9c51 100644 --- a/Documentation/RCU/RTFP.txt +++ b/Documentation/RCU/RTFP.txt @@ -186,7 +186,7 @@ Bibtex Entries @article{Kung80 ,author="H. T. Kung and Q. Lehman" -,title="Concurrent Maintenance of Binary Search Trees" +,title="Concurrent Manipulation of Binary Search Trees" ,Year="1980" ,Month="September" ,journal="ACM Transactions on Database Systems" diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index cdb20d41a44a..31ef8fe07f82 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -271,15 +271,14 @@ over a rather long period of time, but improvements are always welcome! The same cautions apply to call_rcu_bh() and call_rcu_sched(). 9. All RCU list-traversal primitives, which include - rcu_dereference(), list_for_each_entry_rcu(), - list_for_each_continue_rcu(), and list_for_each_safe_rcu(), - must be either within an RCU read-side critical section or - must be protected by appropriate update-side locks. RCU - read-side critical sections are delimited by rcu_read_lock() - and rcu_read_unlock(), or by similar primitives such as - rcu_read_lock_bh() and rcu_read_unlock_bh(), in which case - the matching rcu_dereference() primitive must be used in order - to keep lockdep happy, in this case, rcu_dereference_bh(). + rcu_dereference(), list_for_each_entry_rcu(), and + list_for_each_safe_rcu(), must be either within an RCU read-side + critical section or must be protected by appropriate update-side + locks. RCU read-side critical sections are delimited by + rcu_read_lock() and rcu_read_unlock(), or by similar primitives + such as rcu_read_lock_bh() and rcu_read_unlock_bh(), in which + case the matching rcu_dereference() primitive must be used in + order to keep lockdep happy, in this case, rcu_dereference_bh(). The reason that it is permissible to use RCU list-traversal primitives when the update-side lock is held is that doing so diff --git a/Documentation/RCU/listRCU.txt b/Documentation/RCU/listRCU.txt index 4349c1487e91..adb5a3782846 100644 --- a/Documentation/RCU/listRCU.txt +++ b/Documentation/RCU/listRCU.txt @@ -205,7 +205,7 @@ RCU ("read-copy update") its name. The RCU code is as follows: audit_copy_rule(&ne->rule, &e->rule); ne->rule.action = newaction; ne->rule.file_count = newfield_count; - list_replace_rcu(e, ne); + list_replace_rcu(&e->list, &ne->list); call_rcu(&e->rcu, audit_free_rule); return 0; } diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt index 4202ad093130..141d531aa14b 100644 --- a/Documentation/RCU/rcuref.txt +++ b/Documentation/RCU/rcuref.txt @@ -20,7 +20,7 @@ release_referenced() delete() { { ... write_lock(&list_lock); atomic_dec(&el->rc, relfunc) ... - ... delete_element + ... remove_element } write_unlock(&list_lock); ... if (atomic_dec_and_test(&el->rc)) @@ -52,7 +52,7 @@ release_referenced() delete() { { ... spin_lock(&list_lock); if (atomic_dec_and_test(&el->rc)) ... - call_rcu(&el->head, el_free); delete_element + call_rcu(&el->head, el_free); remove_element ... spin_unlock(&list_lock); } ... if (atomic_dec_and_test(&el->rc)) @@ -64,3 +64,60 @@ Sometimes, a reference to the element needs to be obtained in the update (write) stream. In such cases, atomic_inc_not_zero() might be overkill, since we hold the update-side spinlock. One might instead use atomic_inc() in such cases. + +It is not always convenient to deal with "FAIL" in the +search_and_reference() code path. In such cases, the +atomic_dec_and_test() may be moved from delete() to el_free() +as follows: + +1. 2. +add() search_and_reference() +{ { + alloc_object rcu_read_lock(); + ... search_for_element + atomic_set(&el->rc, 1); atomic_inc(&el->rc); + spin_lock(&list_lock); ... + + add_element rcu_read_unlock(); + ... } + spin_unlock(&list_lock); 4. +} delete() +3. { +release_referenced() spin_lock(&list_lock); +{ ... + ... remove_element + if (atomic_dec_and_test(&el->rc)) spin_unlock(&list_lock); + kfree(el); ... + ... call_rcu(&el->head, el_free); +} ... +5. } +void el_free(struct rcu_head *rhp) +{ + release_referenced(); +} + +The key point is that the initial reference added by add() is not removed +until after a grace period has elapsed following removal. This means that +search_and_reference() cannot find this element, which means that the value +of el->rc cannot increase. Thus, once it reaches zero, there are no +readers that can or ever will be able to reference the element. The +element can therefore safely be freed. This in turn guarantees that if +any reader finds the element, that reader may safely acquire a reference +without checking the value of the reference counter. + +In cases where delete() can sleep, synchronize_rcu() can be called from +delete(), so that el_free() can be subsumed into delete as follows: + +4. +delete() +{ + spin_lock(&list_lock); + ... + remove_element + spin_unlock(&list_lock); + ... + synchronize_rcu(); + if (atomic_dec_and_test(&el->rc)) + kfree(el); + ... +} diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 672d19083252..c776968f4463 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -10,51 +10,63 @@ for rcutree and next for rcutiny. CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats -These implementations of RCU provides several debugfs files under the +These implementations of RCU provide several debugfs directories under the top-level directory "rcu": -rcu/rcudata: +rcu/rcu_bh +rcu/rcu_preempt +rcu/rcu_sched + +Each directory contains files for the corresponding flavor of RCU. +Note that rcu/rcu_preempt is only present for CONFIG_TREE_PREEMPT_RCU. +For CONFIG_TREE_RCU, the RCU flavor maps onto the RCU-sched flavor, +so that activity for both appears in rcu/rcu_sched. + +In addition, the following file appears in the top-level directory: +rcu/rcutorture. This file displays rcutorture test progress. The output +of "cat rcu/rcutorture" looks as follows: + +rcutorture test sequence: 0 (test in progress) +rcutorture update version number: 615 + +The first line shows the number of rcutorture tests that have completed +since boot. If a test is currently running, the "(test in progress)" +string will appear as shown above. The second line shows the number of +update cycles that the current test has started, or zero if there is +no test in progress. + + +Within each flavor directory (rcu/rcu_bh, rcu/rcu_sched, and possibly +also rcu/rcu_preempt) the following files will be present: + +rcudata: Displays fields in struct rcu_data. -rcu/rcudata.csv: - Comma-separated values spreadsheet version of rcudata. -rcu/rcugp: +rcuexp: + Displays statistics for expedited grace periods. +rcugp: Displays grace-period counters. -rcu/rcuhier: +rcuhier: Displays the struct rcu_node hierarchy. -rcu/rcu_pending: +rcu_pending: Displays counts of the reasons rcu_pending() decided that RCU had work to do. -rcu/rcutorture: - Displays rcutorture test progress. -rcu/rcuboost: +rcuboost: Displays RCU boosting statistics. Only present if CONFIG_RCU_BOOST=y. -The output of "cat rcu/rcudata" looks as follows: - -rcu_sched: - 0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 - 1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 - 2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 - 3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 - 4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 - 5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 - 6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 - 7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 -rcu_bh: - 0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 - 1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 - 2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 - 3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 - 4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 - 5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 - 6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 - 7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 - -The first section lists the rcu_data structures for rcu_sched, the second -for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an -additional section for rcu_preempt. Each section has one line per CPU, -or eight for this 8-CPU system. The fields are as follows: +The output of "cat rcu/rcu_preempt/rcudata" looks as follows: + + 0!c=30455 g=30456 pq=1 qp=1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716 + 1!c=30719 g=30720 pq=1 qp=0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982 + 2!c=30150 g=30151 pq=1 qp=1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458 + 3 c=31249 g=31250 pq=1 qp=0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622 + 4!c=29502 g=29503 pq=1 qp=1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521 + 5 c=31201 g=31202 pq=1 qp=1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698 + 6!c=30253 g=30254 pq=1 qp=1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353 + 7 c=31178 g=31178 pq=1 qp=0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969 + +This file has one line per CPU, or eight for this 8-CPU system. +The fields are as follows: o The number at the beginning of each line is the CPU number. CPUs numbers followed by an exclamation mark are offline, @@ -64,11 +76,13 @@ o The number at the beginning of each line is the CPU number. substantially larger than the number of actual CPUs. o "c" is the count of grace periods that this CPU believes have - completed. Offlined CPUs and CPUs in dynticks idle mode may - lag quite a ways behind, for example, CPU 6 under "rcu_sched" - above, which has been offline through not quite 40,000 RCU grace - periods. It is not unusual to see CPUs lagging by thousands of - grace periods. + completed. Offlined CPUs and CPUs in dynticks idle mode may lag + quite a ways behind, for example, CPU 4 under "rcu_sched" above, + which has been offline through 16 RCU grace periods. It is not + unusual to see offline CPUs lagging by thousands of grace periods. + Note that although the grace-period number is an unsigned long, + it is printed out as a signed long to allow more human-friendly + representation near boot time. o "g" is the count of grace periods that this CPU believes have started. Again, offlined CPUs and CPUs in dynticks idle mode @@ -84,30 +98,25 @@ o "pq" indicates that this CPU has passed through a quiescent state CPU has not yet reported that fact, (2) some other CPU has not yet reported for this grace period, or (3) both. -o "pgp" indicates which grace period the last-observed quiescent - state for this CPU corresponds to. This is important for handling - the race between CPU 0 reporting an extended dynticks-idle - quiescent state for CPU 1 and CPU 1 suddenly waking up and - reporting its own quiescent state. If CPU 1 was the last CPU - for the current grace period, then the CPU that loses this race - will attempt to incorrectly mark CPU 1 as having checked in for - the next grace period! - o "qp" indicates that RCU still expects a quiescent state from this CPU. Offlined CPUs and CPUs in dyntick idle mode might well have qp=1, which is OK: RCU is still ignoring them. o "dt" is the current value of the dyntick counter that is incremented - when entering or leaving dynticks idle state, either by the - scheduler or by irq. This number is even if the CPU is in - dyntick idle mode and odd otherwise. The number after the first - "/" is the interrupt nesting depth when in dyntick-idle state, - or one greater than the interrupt-nesting depth otherwise. - The number after the second "/" is the NMI nesting depth. + when entering or leaving idle, either due to a context switch or + due to an interrupt. This number is even if the CPU is in idle + from RCU's viewpoint and odd otherwise. The number after the + first "/" is the interrupt nesting depth when in idle state, + or a large number added to the interrupt-nesting depth when + running a non-idle task. Some architectures do not accurately + count interrupt nesting when running in non-idle kernel context, + which can result in interesting anomalies such as negative + interrupt-nesting levels. The number after the second "/" + is the NMI nesting depth. o "df" is the number of times that some other CPU has forced a quiescent state on behalf of this CPU due to this CPU being in - dynticks-idle state. + idle state. o "of" is the number of times that some other CPU has forced a quiescent state on behalf of this CPU due to this CPU being @@ -120,9 +129,13 @@ o "of" is the number of times that some other CPU has forced a error, so it makes sense to err conservatively. o "ql" is the number of RCU callbacks currently residing on - this CPU. This is the total number of callbacks, regardless - of what state they are in (new, waiting for grace period to - start, waiting for grace period to end, ready to invoke). + this CPU. The first number is the number of "lazy" callbacks + that are known to RCU to only be freeing memory, and the number + after the "/" is the total number of callbacks, lazy or not. + These counters count callbacks regardless of what phase of + grace-period processing that they are in (new, waiting for + grace period to start, waiting for grace period to end, ready + to invoke). o "qs" gives an indication of the state of the callback queue with four characters: @@ -150,6 +163,43 @@ o "qs" gives an indication of the state of the callback queue If there are no callbacks in a given one of the above states, the corresponding character is replaced by ".". +o "b" is the batch limit for this CPU. If more than this number + of RCU callbacks is ready to invoke, then the remainder will + be deferred. + +o "ci" is the number of RCU callbacks that have been invoked for + this CPU. Note that ci+nci+ql is the number of callbacks that have + been registered in absence of CPU-hotplug activity. + +o "nci" is the number of RCU callbacks that have been offloaded from + this CPU. This will always be zero unless the kernel was built + with CONFIG_RCU_NOCB_CPU=y and the "rcu_nocbs=" kernel boot + parameter was specified. + +o "co" is the number of RCU callbacks that have been orphaned due to + this CPU going offline. These orphaned callbacks have been moved + to an arbitrarily chosen online CPU. + +o "ca" is the number of RCU callbacks that have been adopted by this + CPU due to other CPUs going offline. Note that ci+co-ca+ql is + the number of RCU callbacks registered on this CPU. + + +Kernels compiled with CONFIG_RCU_BOOST=y display the following from +/debug/rcu/rcu_preempt/rcudata: + + 0!c=12865 g=12866 pq=1 qp=1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871 + 1 c=14407 g=14408 pq=1 qp=0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485 + 2 c=14407 g=14408 pq=1 qp=0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490 + 3 c=14407 g=14408 pq=1 qp=0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290 + 4 c=14405 g=14406 pq=1 qp=1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114 + 5!c=14168 g=14169 pq=1 qp=0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722 + 6 c=14404 g=14405 pq=1 qp=0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811 + 7 c=14407 g=14408 pq=1 qp=1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042 + +This is similar to the output discussed above, but contains the following +additional fields: + o "kt" is the per-CPU kernel-thread state. The digit preceding the first slash is zero if there is no work pending and 1 otherwise. The character between the first pair of slashes is @@ -184,35 +234,51 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of This field is displayed only for CONFIG_RCU_BOOST kernels. -o "b" is the batch limit for this CPU. If more than this number - of RCU callbacks is ready to invoke, then the remainder will - be deferred. -o "ci" is the number of RCU callbacks that have been invoked for - this CPU. Note that ci+ql is the number of callbacks that have - been registered in absence of CPU-hotplug activity. +The output of "cat rcu/rcu_preempt/rcuexp" looks as follows: -o "co" is the number of RCU callbacks that have been orphaned due to - this CPU going offline. These orphaned callbacks have been moved - to an arbitrarily chosen online CPU. +s=21872 d=21872 w=0 tf=0 wd1=0 wd2=0 n=0 sc=21872 dt=21872 dl=0 dx=21872 + +These fields are as follows: + +o "s" is the starting sequence number. -o "ca" is the number of RCU callbacks that have been adopted due to - other CPUs going offline. Note that ci+co-ca+ql is the number of - RCU callbacks registered on this CPU. +o "d" is the ending sequence number. When the starting and ending + numbers differ, there is an expedited grace period in progress. -There is also an rcu/rcudata.csv file with the same information in -comma-separated-variable spreadsheet format. +o "w" is the number of times that the sequence numbers have been + in danger of wrapping. +o "tf" is the number of times that contention has resulted in a + failure to begin an expedited grace period. -The output of "cat rcu/rcugp" looks as follows: +o "wd1" and "wd2" are the number of times that an attempt to + start an expedited grace period found that someone else had + completed an expedited grace period that satisfies the + attempted request. "Our work is done." -rcu_sched: completed=33062 gpnum=33063 -rcu_bh: completed=464 gpnum=464 +o "n" is number of times that contention was so great that + the request was demoted from an expedited grace period to + a normal grace period. + +o "sc" is the number of times that the attempt to start a + new expedited grace period succeeded. + +o "dt" is the number of times that we attempted to update + the "d" counter. + +o "dl" is the number of times that we failed to update the "d" + counter. + +o "dx" is the number of times that we succeeded in updating + the "d" counter. -Again, this output is for both "rcu_sched" and "rcu_bh". Note that -kernels built with CONFIG_TREE_PREEMPT_RCU will have an additional -"rcu_preempt" line. The fields are taken from the rcu_state structure, -and are as follows: + +The output of "cat rcu/rcu_preempt/rcugp" looks as follows: + +completed=31249 gpnum=31250 age=1 max=18 + +These fields are taken from the rcu_state structure, and are as follows: o "completed" is the number of grace periods that have completed. It is comparable to the "c" field from rcu/rcudata in that a @@ -220,44 +286,42 @@ o "completed" is the number of grace periods that have completed. that the corresponding RCU grace period has completed. o "gpnum" is the number of grace periods that have started. It is - comparable to the "g" field from rcu/rcudata in that a CPU - whose "g" field matches the value of "gpnum" is aware that the - corresponding RCU grace period has started. + similarly comparable to the "g" field from rcu/rcudata in that + a CPU whose "g" field matches the value of "gpnum" is aware that + the corresponding RCU grace period has started. + + If these two fields are equal, then there is no grace period + in progress, in other words, RCU is idle. On the other hand, + if the two fields differ (as they are above), then an RCU grace + period is in progress. - If these two fields are equal (as they are for "rcu_bh" above), - then there is no grace period in progress, in other words, RCU - is idle. On the other hand, if the two fields differ (as they - do for "rcu_sched" above), then an RCU grace period is in progress. +o "age" is the number of jiffies that the current grace period + has extended for, or zero if there is no grace period currently + in effect. +o "max" is the age in jiffies of the longest-duration grace period + thus far. -The output of "cat rcu/rcuhier" looks as follows, with very long lines: +The output of "cat rcu/rcu_preempt/rcuhier" looks as follows: -c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 -1/1 ..>. 0:127 ^0 -3/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3 -3/3f ..>. 0:5 ^0 2/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3 -rcu_bh: -c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 -0/1 ..>. 0:127 ^0 -0/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3 -0/3f ..>. 0:5 ^0 0/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3 +c=14407 g=14408 s=0 jfq=2 j=c863 nfqs=12040/nfqsng=0(12040) fqlh=1051 oqlen=0/0 +3/3 ..>. 0:7 ^0 +e/e ..>. 0:3 ^0 d/d ..>. 4:7 ^1 -This is once again split into "rcu_sched" and "rcu_bh" portions, -and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional -"rcu_preempt" section. The fields are as follows: +The fields are as follows: -o "c" is exactly the same as "completed" under rcu/rcugp. +o "c" is exactly the same as "completed" under rcu/rcu_preempt/rcugp. -o "g" is exactly the same as "gpnum" under rcu/rcugp. +o "g" is exactly the same as "gpnum" under rcu/rcu_preempt/rcugp. -o "s" is the "signaled" state that drives force_quiescent_state()'s +o "s" is the current state of the force_quiescent_state() state machine. o "jfq" is the number of jiffies remaining for this grace period before force_quiescent_state() is invoked to help push things - along. Note that CPUs in dyntick-idle mode throughout the grace - period will not report on their own, but rather must be check by - some other CPU via force_quiescent_state(). + along. Note that CPUs in idle mode throughout the grace period + will not report on their own, but rather must be check by some + other CPU via force_quiescent_state(). o "j" is the low-order four hex digits of the jiffies counter. Yes, Paul did run into a number of problems that turned out to @@ -268,7 +332,8 @@ o "nfqs" is the number of calls to force_quiescent_state() since o "nfqsng" is the number of useless calls to force_quiescent_state(), where there wasn't actually a grace period active. This can - happen due to races. The number in parentheses is the difference + no longer happen due to grace-period processing being pushed + into a kthread. The number in parentheses is the difference between "nfqs" and "nfqsng", or the number of times that force_quiescent_state() actually did some real work. @@ -276,28 +341,27 @@ o "fqlh" is the number of calls to force_quiescent_state() that exited immediately (without even being counted in nfqs above) due to contention on ->fqslock. -o Each element of the form "1/1 0:127 ^0" represents one struct - rcu_node. Each line represents one level of the hierarchy, from - root to leaves. It is best to think of the rcu_data structures - as forming yet another level after the leaves. Note that there - might be either one, two, or three levels of rcu_node structures, - depending on the relationship between CONFIG_RCU_FANOUT and - CONFIG_NR_CPUS. +o Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node + structure. Each line represents one level of the hierarchy, + from root to leaves. It is best to think of the rcu_data + structures as forming yet another level after the leaves. + Note that there might be either one, two, three, or even four + levels of rcu_node structures, depending on the relationship + between CONFIG_RCU_FANOUT, CONFIG_RCU_FANOUT_LEAF (possibly + adjusted using the rcu_fanout_leaf kernel boot parameter), and + CONFIG_NR_CPUS (possibly adjusted using the nr_cpu_ids count of + possible CPUs for the booting hardware). o The numbers separated by the "/" are the qsmask followed by the qsmaskinit. The qsmask will have one bit - set for each entity in the next lower level that - has not yet checked in for the current grace period. + set for each entity in the next lower level that has + not yet checked in for the current grace period ("e" + indicating CPUs 5, 6, and 7 in the example above). The qsmaskinit will have one bit for each entity that is currently expected to check in during each grace period. The value of qsmaskinit is assigned to that of qsmask at the beginning of each grace period. - For example, for "rcu_sched", the qsmask of the first - entry of the lowest level is 0x14, meaning that we - are still waiting for CPUs 2 and 4 to check in for the - current grace period. - o The characters separated by the ">" indicate the state of the blocked-tasks lists. A "G" preceding the ">" indicates that at least one task blocked in an RCU @@ -312,48 +376,39 @@ o Each element of the form "1/1 0:127 ^0" represents one struct A "." character appears if the corresponding condition does not hold, so that "..>." indicates that no tasks are blocked. In contrast, "GE>T" indicates maximal - inconvenience from blocked tasks. + inconvenience from blocked tasks. CONFIG_TREE_RCU + builds of the kernel will always show "..>.". o The numbers separated by the ":" are the range of CPUs served by this struct rcu_node. This can be helpful in working out how the hierarchy is wired together. - For example, the first entry at the lowest level shows - "0:5", indicating that it covers CPUs 0 through 5. + For example, the example rcu_node structure shown above + has "0:7", indicating that it covers CPUs 0 through 7. o The number after the "^" indicates the bit in the - next higher level rcu_node structure that this - rcu_node structure corresponds to. - - For example, the first entry at the lowest level shows - "^0", indicating that it corresponds to bit zero in - the first entry at the middle level. - - -The output of "cat rcu/rcu_pending" looks as follows: - -rcu_sched: - 0 np=255892 qsp=53936 rpq=85 cbr=0 cng=14417 gpc=10033 gps=24320 nn=146741 - 1 np=261224 qsp=54638 rpq=33 cbr=0 cng=25723 gpc=16310 gps=2849 nn=155792 - 2 np=237496 qsp=49664 rpq=23 cbr=0 cng=2762 gpc=45478 gps=1762 nn=136629 - 3 np=236249 qsp=48766 rpq=98 cbr=0 cng=286 gpc=48049 gps=1218 nn=137723 - 4 np=221310 qsp=46850 rpq=7 cbr=0 cng=26 gpc=43161 gps=4634 nn=123110 - 5 np=237332 qsp=48449 rpq=9 cbr=0 cng=54 gpc=47920 gps=3252 nn=137456 - 6 np=219995 qsp=46718 rpq=12 cbr=0 cng=50 gpc=42098 gps=6093 nn=120834 - 7 np=249893 qsp=49390 rpq=42 cbr=0 cng=72 gpc=38400 gps=17102 nn=144888 -rcu_bh: - 0 np=146741 qsp=1419 rpq=6 cbr=0 cng=6 gpc=0 gps=0 nn=145314 - 1 np=155792 qsp=12597 rpq=3 cbr=0 cng=0 gpc=4 gps=8 nn=143180 - 2 np=136629 qsp=18680 rpq=1 cbr=0 cng=0 gpc=7 gps=6 nn=117936 - 3 np=137723 qsp=2843 rpq=0 cbr=0 cng=0 gpc=10 gps=7 nn=134863 - 4 np=123110 qsp=12433 rpq=0 cbr=0 cng=0 gpc=4 gps=2 nn=110671 - 5 np=137456 qsp=4210 rpq=1 cbr=0 cng=0 gpc=6 gps=5 nn=133235 - 6 np=120834 qsp=9902 rpq=2 cbr=0 cng=0 gpc=6 gps=3 nn=110921 - 7 np=144888 qsp=26336 rpq=0 cbr=0 cng=0 gpc=8 gps=2 nn=118542 - -As always, this is once again split into "rcu_sched" and "rcu_bh" -portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional -"rcu_preempt" section. The fields are as follows: + next higher level rcu_node structure that this rcu_node + structure corresponds to. For example, the "d/d ..>. 4:7 + ^1" has a "1" in this position, indicating that it + corresponds to the "1" bit in the "3" shown in the + "3/3 ..>. 0:7 ^0" entry on the next level up. + + +The output of "cat rcu/rcu_sched/rcu_pending" looks as follows: + + 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 + 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 + 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 + 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 + 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 + 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 + 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 + 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 + +The fields are as follows: + +o The leading number is the CPU number, with "!" indicating + an offline CPU. o "np" is the number of times that __rcu_pending() has been invoked for the corresponding flavor of RCU. @@ -377,38 +432,23 @@ o "gpc" is the number of times that an old grace period had o "gps" is the number of times that a new grace period had started, but this CPU was not yet aware of it. -o "nn" is the number of times that this CPU needed nothing. Alert - readers will note that the rcu "nn" number for a given CPU very - closely matches the rcu_bh "np" number for that same CPU. This - is due to short-circuit evaluation in rcu_pending(). - - -The output of "cat rcu/rcutorture" looks as follows: - -rcutorture test sequence: 0 (test in progress) -rcutorture update version number: 615 - -The first line shows the number of rcutorture tests that have completed -since boot. If a test is currently running, the "(test in progress)" -string will appear as shown above. The second line shows the number of -update cycles that the current test has started, or zero if there is -no test in progress. +o "nn" is the number of times that this CPU needed nothing. The output of "cat rcu/rcuboost" looks as follows: -0:5 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f - balk: nt=0 egt=989 bt=0 nb=0 ny=0 nos=16 -6:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f - balk: nt=0 egt=225 bt=0 nb=0 ny=0 nos=6 +0:3 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894 + balk: nt=0 egt=4695 bt=0 nb=0 ny=56 nos=0 +4:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894 + balk: nt=0 egt=6541 bt=0 nb=0 ny=126 nos=0 This information is output only for rcu_preempt. Each two-line entry corresponds to a leaf rcu_node strcuture. The fields are as follows: o "n:m" is the CPU-number range for the corresponding two-line entry. In the sample output above, the first entry covers - CPUs zero through five and the second entry covers CPUs 6 - and 7. + CPUs zero through three and the second entry covers CPUs four + through seven. o "tasks=TNEB" gives the state of the various segments of the rnp->blocked_tasks list: diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index bf0f6de2aa00..0cc7820967f4 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -499,6 +499,8 @@ The foo_reclaim() function might appear as follows: { struct foo *fp = container_of(rp, struct foo, rcu); + foo_cleanup(fp->a); + kfree(fp); } @@ -521,6 +523,12 @@ o Use call_rcu() -after- removing a data element from an read-side critical sections that might be referencing that data item. +If the callback for call_rcu() is not doing anything more than calling +kfree() on the structure, you can use kfree_rcu() instead of call_rcu() +to avoid having to write your own callback: + + kfree_rcu(old_fp, rcu); + Again, see checklist.txt for additional rules governing the use of RCU. @@ -773,8 +781,8 @@ a single atomic update, converting to RCU will require special care. Also, the presence of synchronize_rcu() means that the RCU version of delete() can now block. If this is a problem, there is a callback-based -mechanism that never blocks, namely call_rcu(), that can be used in -place of synchronize_rcu(). +mechanism that never blocks, namely call_rcu() or kfree_rcu(), that can +be used in place of synchronize_rcu(). 7. FULL LIST OF RCU APIs @@ -789,9 +797,7 @@ RCU list traversal: list_for_each_entry_rcu hlist_for_each_entry_rcu hlist_nulls_for_each_entry_rcu - - list_for_each_continue_rcu (to be deprecated in favor of new - list_for_each_entry_continue_rcu) + list_for_each_entry_continue_rcu RCU pointer/list update: @@ -813,6 +819,7 @@ RCU: Critical sections Grace period Barrier rcu_read_unlock synchronize_rcu rcu_dereference synchronize_rcu_expedited call_rcu + kfree_rcu bh: Critical sections Grace period Barrier diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt new file mode 100644 index 000000000000..4f27785ca0c8 --- /dev/null +++ b/Documentation/acpi/enumeration.txt @@ -0,0 +1,227 @@ +ACPI based device enumeration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +ACPI 5 introduced a set of new resources (UartTSerialBus, I2cSerialBus, +SpiSerialBus, GpioIo and GpioInt) which can be used in enumerating slave +devices behind serial bus controllers. + +In addition we are starting to see peripherals integrated in the +SoC/Chipset to appear only in ACPI namespace. These are typically devices +that are accessed through memory-mapped registers. + +In order to support this and re-use the existing drivers as much as +possible we decided to do following: + + o Devices that have no bus connector resource are represented as + platform devices. + + o Devices behind real busses where there is a connector resource + are represented as struct spi_device or struct i2c_device + (standard UARTs are not busses so there is no struct uart_device). + +As both ACPI and Device Tree represent a tree of devices (and their +resources) this implementation follows the Device Tree way as much as +possible. + +The ACPI implementation enumerates devices behind busses (platform, SPI and +I2C), creates the physical devices and binds them to their ACPI handle in +the ACPI namespace. + +This means that when ACPI_HANDLE(dev) returns non-NULL the device was +enumerated from ACPI namespace. This handle can be used to extract other +device-specific configuration. There is an example of this below. + +Platform bus support +~~~~~~~~~~~~~~~~~~~~ +Since we are using platform devices to represent devices that are not +connected to any physical bus we only need to implement a platform driver +for the device and add supported ACPI IDs. If this same IP-block is used on +some other non-ACPI platform, the driver might work out of the box or needs +some minor changes. + +Adding ACPI support for an existing driver should be pretty +straightforward. Here is the simplest example: + + #ifdef CONFIG_ACPI + static struct acpi_device_id mydrv_acpi_match[] = { + /* ACPI IDs here */ + { } + }; + MODULE_DEVICE_TABLE(acpi, mydrv_acpi_match); + #endif + + static struct platform_driver my_driver = { + ... + .driver = { + .acpi_match_table = ACPI_PTR(mydrv_acpi_match), + }, + }; + +If the driver needs to perform more complex initialization like getting and +configuring GPIOs it can get its ACPI handle and extract this information +from ACPI tables. + +Currently the kernel is not able to automatically determine from which ACPI +device it should make the corresponding platform device so we need to add +the ACPI device explicitly to acpi_platform_device_ids list defined in +drivers/acpi/scan.c. This limitation is only for the platform devices, SPI +and I2C devices are created automatically as described below. + +SPI serial bus support +~~~~~~~~~~~~~~~~~~~~~~ +Slave devices behind SPI bus have SpiSerialBus resource attached to them. +This is extracted automatically by the SPI core and the slave devices are +enumerated once spi_register_master() is called by the bus driver. + +Here is what the ACPI namespace for a SPI slave might look like: + + Device (EEP0) + { + Name (_ADR, 1) + Name (_CID, Package() { + "ATML0025", + "AT25", + }) + ... + Method (_CRS, 0, NotSerialized) + { + SPISerialBus(1, PolarityLow, FourWireMode, 8, + ControllerInitiated, 1000000, ClockPolarityLow, + ClockPhaseFirst, "\\_SB.PCI0.SPI1",) + } + ... + +The SPI device drivers only need to add ACPI IDs in a similar way than with +the platform device drivers. Below is an example where we add ACPI support +to at25 SPI eeprom driver (this is meant for the above ACPI snippet): + + #ifdef CONFIG_ACPI + static struct acpi_device_id at25_acpi_match[] = { + { "AT25", 0 }, + { }, + }; + MODULE_DEVICE_TABLE(acpi, at25_acpi_match); + #endif + + static struct spi_driver at25_driver = { + .driver = { + ... + .acpi_match_table = ACPI_PTR(at25_acpi_match), + }, + }; + +Note that this driver actually needs more information like page size of the +eeprom etc. but at the time writing this there is no standard way of +passing those. One idea is to return this in _DSM method like: + + Device (EEP0) + { + ... + Method (_DSM, 4, NotSerialized) + { + Store (Package (6) + { + "byte-len", 1024, + "addr-mode", 2, + "page-size, 32 + }, Local0) + + // Check UUIDs etc. + + Return (Local0) + } + +Then the at25 SPI driver can get this configation by calling _DSM on its +ACPI handle like: + + struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_object_list input; + acpi_status status; + + /* Fill in the input buffer */ + + status = acpi_evaluate_object(ACPI_HANDLE(&spi->dev), "_DSM", + &input, &output); + if (ACPI_FAILURE(status)) + /* Handle the error */ + + /* Extract the data here */ + + kfree(output.pointer); + +I2C serial bus support +~~~~~~~~~~~~~~~~~~~~~~ +The slaves behind I2C bus controller only need to add the ACPI IDs like +with the platform and SPI drivers. However the I2C bus controller driver +needs to call acpi_i2c_register_devices() after it has added the adapter. + +An I2C bus (controller) driver does: + + ... + ret = i2c_add_numbered_adapter(adapter); + if (ret) + /* handle error */ + + of_i2c_register_devices(adapter); + /* Enumerate the slave devices behind this bus via ACPI */ + acpi_i2c_register_devices(adapter); + +Below is an example of how to add ACPI support to the existing mpu3050 +input driver: + + #ifdef CONFIG_ACPI + static struct acpi_device_id mpu3050_acpi_match[] = { + { "MPU3050", 0 }, + { }, + }; + MODULE_DEVICE_TABLE(acpi, mpu3050_acpi_match); + #endif + + static struct i2c_driver mpu3050_i2c_driver = { + .driver = { + .name = "mpu3050", + .owner = THIS_MODULE, + .pm = &mpu3050_pm, + .of_match_table = mpu3050_of_match, + .acpi_match_table ACPI_PTR(mpu3050_acpi_match), + }, + .probe = mpu3050_probe, + .remove = __devexit_p(mpu3050_remove), + .id_table = mpu3050_ids, + }; + +GPIO support +~~~~~~~~~~~~ +ACPI 5 introduced two new resources to describe GPIO connections: GpioIo +and GpioInt. These resources are used be used to pass GPIO numbers used by +the device to the driver. For example: + + Method (_CRS, 0, NotSerialized) + { + Name (SBUF, ResourceTemplate() + { + GpioIo (Exclusive, PullDefault, 0x0000, 0x0000, + IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0", + 0x00, ResourceConsumer,,) + { + // Pin List + 0x0055 + } + ... + + Return (SBUF) + } + } + +These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0" +specifies the path to the controller. In order to use these GPIOs in Linux +we need to translate them to the Linux GPIO numbers. + +The driver can do this by including <linux/acpi_gpio.h> and then calling +acpi_get_gpio(path, gpio). This will return the Linux GPIO number or +negative errno if there was no translation found. + +Other GpioIo parameters must be converted first by the driver to be +suitable to the gpiolib before passing them. + +In case of GpioInt resource an additional call to gpio_to_irq() must be +done before calling request_irq(). diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index c07f7b4fb88d..a25cb3fafeba 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -144,9 +144,9 @@ Figure 1 shows the important aspects of the controller 3. Each page has a pointer to the page_cgroup, which in turn knows the cgroup it belongs to -The accounting is done as follows: mem_cgroup_charge() is invoked to set up -the necessary data structures and check if the cgroup that is being charged -is over its limit. If it is, then reclaim is invoked on the cgroup. +The accounting is done as follows: mem_cgroup_charge_common() is invoked to +set up the necessary data structures and check if the cgroup that is being +charged is over its limit. If it is, then reclaim is invoked on the cgroup. More details can be found in the reclaim section of this document. If everything goes well, a page meta-data-structure called page_cgroup is updated. page_cgroup has its own LRU on cgroup. @@ -466,6 +466,10 @@ Note: 5.3 swappiness Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. +Please note that unlike the global swappiness, memcg knob set to 0 +really prevents from any swapping even if there is a swap storage +available. This might lead to memcg OOM killer if there are no file +pages to reclaim. Following cgroups' swappiness can't be changed. - root cgroup (uses /proc/sys/vm/swappiness). diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 66ef8f35613d..9f401350f502 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -207,6 +207,30 @@ by making it not-removable. In such cases you will also notice that the online file is missing under cpu0. +Q: Is CPU0 removable on X86? +A: Yes. If kernel is compiled with CONFIG_BOOTPARAM_HOTPLUG_CPU0=y, CPU0 is +removable by default. Otherwise, CPU0 is also removable by kernel option +cpu0_hotplug. + +But some features depend on CPU0. Two known dependencies are: + +1. Resume from hibernate/suspend depends on CPU0. Hibernate/suspend will fail if +CPU0 is offline and you need to online CPU0 before hibernate/suspend can +continue. +2. PIC interrupts also depend on CPU0. CPU0 can't be removed if a PIC interrupt +is detected. + +It's said poweroff/reboot may depend on CPU0 on some machines although I haven't +seen any poweroff/reboot failure so far after CPU0 is offline on a few tested +machines. + +Please let me know if you know or see any other dependencies of CPU0. + +If the dependencies are under your control, you can turn on CPU0 hotplug feature +either by CONFIG_BOOTPARAM_HOTPLUG_CPU0 or by kernel parameter cpu0_hotplug. + +--Fenghua Yu <fenghua.yu@intel.com> + Q: How do i find out if a particular CPU is not removable? A: Depending on the implementation, some architectures may show this by the absence of the "online" file. This is done if it can be determined ahead of diff --git a/Documentation/devices.txt b/Documentation/devices.txt index b6251cca9263..08f01e79c41a 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -2561,9 +2561,6 @@ Your cooperation is appreciated. 192 = /dev/usb/yurex1 First USB Yurex device ... 209 = /dev/usb/yurex16 16th USB Yurex device - 240 = /dev/usb/dabusb0 First daubusb device - ... - 243 = /dev/usb/dabusb3 Fourth dabusb device 180 block USB block devices 0 = /dev/uba First USB block device diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt index d187e9f7cf1c..1196290082d1 100644 --- a/Documentation/devicetree/bindings/arm/atmel-at91.txt +++ b/Documentation/devicetree/bindings/arm/atmel-at91.txt @@ -7,6 +7,12 @@ PIT Timer required properties: - interrupts: Should contain interrupt for the PIT which is the IRQ line shared across all System Controller members. +System Timer (ST) required properties: +- compatible: Should be "atmel,at91rm9200-st" +- reg: Should contain registers location and length +- interrupts: Should contain interrupt for the ST which is the IRQ line + shared across all System Controller members. + TC/TCLIB Timer required properties: - compatible: Should be "atmel,<chip>-tcb". <chip> can be "at91rm9200" or "at91sam9x5" diff --git a/Documentation/devicetree/bindings/arm/davinci/nand.txt b/Documentation/devicetree/bindings/arm/davinci/nand.txt index e37241f1fdd8..49fc7ada929a 100644 --- a/Documentation/devicetree/bindings/arm/davinci/nand.txt +++ b/Documentation/devicetree/bindings/arm/davinci/nand.txt @@ -23,29 +23,16 @@ Recommended properties : - ti,davinci-nand-buswidth: buswidth 8 or 16 - ti,davinci-nand-use-bbt: use flash based bad block table support. -Example (enbw_cmc board): -aemif@60000000 { - compatible = "ti,davinci-aemif"; - #address-cells = <2>; - #size-cells = <1>; - reg = <0x68000000 0x80000>; - ranges = <2 0 0x60000000 0x02000000 - 3 0 0x62000000 0x02000000 - 4 0 0x64000000 0x02000000 - 5 0 0x66000000 0x02000000 - 6 0 0x68000000 0x02000000>; - nand@3,0 { - compatible = "ti,davinci-nand"; - reg = <3 0x0 0x807ff - 6 0x0 0x8000>; - #address-cells = <1>; - #size-cells = <1>; - ti,davinci-chipselect = <1>; - ti,davinci-mask-ale = <0>; - ti,davinci-mask-cle = <0>; - ti,davinci-mask-chipsel = <0>; - ti,davinci-ecc-mode = "hw"; - ti,davinci-ecc-bits = <4>; - ti,davinci-nand-use-bbt; - }; +Example(da850 EVM ): +nand_cs3@62000000 { + compatible = "ti,davinci-nand"; + reg = <0x62000000 0x807ff + 0x68000000 0x8000>; + ti,davinci-chipselect = <1>; + ti,davinci-mask-ale = <0>; + ti,davinci-mask-cle = <0>; + ti,davinci-mask-chipsel = <0>; + ti,davinci-ecc-mode = "hw"; + ti,davinci-ecc-bits = <4>; + ti,davinci-nand-use-bbt; }; diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt index 7ca52161e7ab..7c3ee3aeb7b7 100644 --- a/Documentation/devicetree/bindings/arm/l2cc.txt +++ b/Documentation/devicetree/bindings/arm/l2cc.txt @@ -37,7 +37,7 @@ L2: cache-controller { reg = <0xfff12000 0x1000>; arm,data-latency = <1 1 1>; arm,tag-latency = <2 2 2>; - arm,filter-latency = <0x80000000 0x8000000>; + arm,filter-ranges = <0x80000000 0x8000000>; cache-unified; cache-level = <2>; interrupts = <45>; diff --git a/Documentation/devicetree/bindings/clock/imx23-clock.txt b/Documentation/devicetree/bindings/clock/imx23-clock.txt index a0b867ef8d96..baadbb11fe98 100644 --- a/Documentation/devicetree/bindings/clock/imx23-clock.txt +++ b/Documentation/devicetree/bindings/clock/imx23-clock.txt @@ -52,7 +52,7 @@ clocks and IDs. lcdif 38 etm 39 usb 40 - usb_pwr 41 + usb_phy 41 Examples: diff --git a/Documentation/devicetree/bindings/clock/imx28-clock.txt b/Documentation/devicetree/bindings/clock/imx28-clock.txt index aa2af2866fe8..52a49a4a50b3 100644 --- a/Documentation/devicetree/bindings/clock/imx28-clock.txt +++ b/Documentation/devicetree/bindings/clock/imx28-clock.txt @@ -73,8 +73,8 @@ clocks and IDs. can1 59 usb0 60 usb1 61 - usb0_pwr 62 - usb1_pwr 63 + usb0_phy 62 + usb1_phy 63 enet_out 64 Examples: diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-spear.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-spear.txt new file mode 100644 index 000000000000..f3d44984d91c --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-spear.txt @@ -0,0 +1,42 @@ +SPEAr cpufreq driver +------------------- + +SPEAr SoC cpufreq driver for CPU frequency scaling. +It supports both uniprocessor (UP) and symmetric multiprocessor (SMP) systems +which share clock across all CPUs. + +Required properties: +- cpufreq_tbl: Table of frequencies CPU could be transitioned into, in the + increasing order. + +Optional properties: +- clock-latency: Specify the possible maximum transition latency for clock, in + unit of nanoseconds. + +Both required and optional properties listed above must be defined under node +/cpus/cpu@0. + +Examples: +-------- +cpus { + + <...> + + cpu@0 { + compatible = "arm,cortex-a9"; + reg = <0>; + + <...> + + cpufreq_tbl = < 166000 + 200000 + 250000 + 300000 + 400000 + 500000 + 600000 >; + }; + + <...> + +}; diff --git a/Documentation/devicetree/bindings/gpio/gpio-stmpe.txt b/Documentation/devicetree/bindings/gpio/gpio-stmpe.txt new file mode 100644 index 000000000000..a0e4cf885213 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio-stmpe.txt @@ -0,0 +1,18 @@ +STMPE gpio +---------- + +Required properties: + - compatible: "st,stmpe-gpio" + +Optional properties: + - st,norequest-mask: bitmask specifying which GPIOs should _not_ be requestable + due to different usage (e.g. touch, keypad) + +Node name must be stmpe_gpio and should be child node of stmpe node to which it +belongs. + +Example: + stmpe_gpio { + compatible = "st,stmpe-gpio"; + st,norequest-mask = <0x20>; //gpio 5 can't be used + }; diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt index 4e16ba4feab0..a33628759d36 100644 --- a/Documentation/devicetree/bindings/gpio/gpio.txt +++ b/Documentation/devicetree/bindings/gpio/gpio.txt @@ -75,4 +75,40 @@ Example of two SOC GPIO banks defined as gpio-controller nodes: gpio-controller; }; +2.1) gpio-controller and pinctrl subsystem +------------------------------------------ +gpio-controller on a SOC might be tightly coupled with the pinctrl +subsystem, in the sense that the pins can be used by other functions +together with optional gpio feature. + +While the pin allocation is totally managed by the pin ctrl subsystem, +gpio (under gpiolib) is still maintained by gpio drivers. It may happen +that different pin ranges in a SoC is managed by different gpio drivers. + +This makes it logical to let gpio drivers announce their pin ranges to +the pin ctrl subsystem and call 'pinctrl_request_gpio' in order to +request the corresponding pin before any gpio usage. + +For this, the gpio controller can use a pinctrl phandle and pins to +announce the pinrange to the pin ctrl subsystem. For example, + + qe_pio_e: gpio-controller@1460 { + #gpio-cells = <2>; + compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank"; + reg = <0x1460 0x18>; + gpio-controller; + gpio-ranges = <&pinctrl1 20 10>, <&pinctrl2 50 20>; + + } + +where, + &pinctrl1 and &pinctrl2 is the phandle to the pinctrl DT node. + + Next values specify the base pin and number of pins for the range + handled by 'qe_pio_e' gpio. In the given example from base pin 20 to + pin 29 under pinctrl1 and pin 50 to pin 69 under pinctrl2 is handled + by this gpio controller. + +The pinctrl node must have "#gpio-range-cells" property to show number of +arguments to pass with phandle from gpio controllers node. diff --git a/Documentation/devicetree/bindings/gpio/gpio_atmel.txt b/Documentation/devicetree/bindings/gpio/gpio_atmel.txt index 66efc804806a..85f8c0d084fa 100644 --- a/Documentation/devicetree/bindings/gpio/gpio_atmel.txt +++ b/Documentation/devicetree/bindings/gpio/gpio_atmel.txt @@ -9,6 +9,10 @@ Required properties: unused). - gpio-controller: Marks the device node as a GPIO controller. +optional properties: +- #gpio-lines: Number of gpio if absent 32. + + Example: pioA: gpio@fffff200 { compatible = "atmel,at91rm9200-gpio"; @@ -16,5 +20,6 @@ Example: interrupts = <2 4>; #gpio-cells = <2>; gpio-controller; + #gpio-lines = <19>; }; diff --git a/Documentation/devicetree/bindings/gpio/spear_spics.txt b/Documentation/devicetree/bindings/gpio/spear_spics.txt new file mode 100644 index 000000000000..96c37eb15075 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/spear_spics.txt @@ -0,0 +1,50 @@ +=== ST Microelectronics SPEAr SPI CS Driver === + +SPEAr platform provides a provision to control chipselects of ARM PL022 Prime +Cell spi controller through its system registers, which otherwise remains under +PL022 control. If chipselect remain under PL022 control then they would be +released as soon as transfer is over and TxFIFO becomes empty. This is not +desired by some of the device protocols above spi which expect (multiple) +transfers without releasing their chipselects. + +Chipselects can be controlled by software by turning them as GPIOs. SPEAr +provides another interface through system registers through which software can +directly control each PL022 chipselect. Hence, it is natural for SPEAr to export +the control of this interface as gpio. + +Required properties: + + * compatible: should be defined as "st,spear-spics-gpio" + * reg: mentioning address range of spics controller + * st-spics,peripcfg-reg: peripheral configuration register offset + * st-spics,sw-enable-bit: bit offset to enable sw control + * st-spics,cs-value-bit: bit offset to drive chipselect low or high + * st-spics,cs-enable-mask: chip select number bit mask + * st-spics,cs-enable-shift: chip select number program offset + * gpio-controller: Marks the device node as gpio controller + * #gpio-cells: should be 1 and will mention chip select number + +All the above bit offsets are within peripcfg register. + +Example: +------- +spics: spics@e0700000{ + compatible = "st,spear-spics-gpio"; + reg = <0xe0700000 0x1000>; + st-spics,peripcfg-reg = <0x3b0>; + st-spics,sw-enable-bit = <12>; + st-spics,cs-value-bit = <11>; + st-spics,cs-enable-mask = <3>; + st-spics,cs-enable-shift = <8>; + gpio-controller; + #gpio-cells = <2>; +}; + + +spi0: spi@e0100000 { + status = "okay"; + num-cs = <3>; + cs-gpios = <&gpio1 7 0>, <&spics 0>, + <&spics 1>; + ... +} diff --git a/Documentation/devicetree/bindings/i2c/atmel-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-at91.txt index b689a0d9441c..b689a0d9441c 100644 --- a/Documentation/devicetree/bindings/i2c/atmel-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-at91.txt diff --git a/Documentation/devicetree/bindings/i2c/davinci.txt b/Documentation/devicetree/bindings/i2c/i2c-davinci.txt index 2dc935b4113d..2dc935b4113d 100644 --- a/Documentation/devicetree/bindings/i2c/davinci.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-davinci.txt diff --git a/Documentation/devicetree/bindings/i2c/gpio-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-gpio.txt index 4f8ec947c6bd..4f8ec947c6bd 100644 --- a/Documentation/devicetree/bindings/i2c/gpio-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-gpio.txt diff --git a/Documentation/devicetree/bindings/i2c/fsl-imx-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-imx.txt index f3cf43b66f7e..f3cf43b66f7e 100644 --- a/Documentation/devicetree/bindings/i2c/fsl-imx-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-imx.txt diff --git a/Documentation/devicetree/bindings/i2c/fsl-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-mpc.txt index 1eacd6b20ed5..1eacd6b20ed5 100644 --- a/Documentation/devicetree/bindings/i2c/fsl-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mpc.txt diff --git a/Documentation/devicetree/bindings/i2c/mux.txt b/Documentation/devicetree/bindings/i2c/i2c-mux.txt index af84cce5cd7b..af84cce5cd7b 100644 --- a/Documentation/devicetree/bindings/i2c/mux.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mux.txt diff --git a/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt b/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt new file mode 100644 index 000000000000..f46d928aa73d --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt @@ -0,0 +1,18 @@ + +* Marvell MV64XXX I2C controller + +Required properties : + + - reg : Offset and length of the register set for the device + - compatible : Should be "marvell,mv64xxx-i2c" + - interrupts : The interrupt number + - clock-frequency : Desired I2C bus clock frequency in Hz. + +Examples: + + i2c@11000 { + compatible = "marvell,mv64xxx-i2c"; + reg = <0x11000 0x20>; + interrupts = <29>; + clock-frequency = <100000>; + }; diff --git a/Documentation/devicetree/bindings/i2c/nomadik.txt b/Documentation/devicetree/bindings/i2c/i2c-nomadik.txt index 72065b0ff680..72065b0ff680 100644 --- a/Documentation/devicetree/bindings/i2c/nomadik.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-nomadik.txt diff --git a/Documentation/devicetree/bindings/i2c/cavium-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-octeon.txt index dced82ebe31d..dced82ebe31d 100644 --- a/Documentation/devicetree/bindings/i2c/cavium-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-octeon.txt diff --git a/Documentation/devicetree/bindings/i2c/omap-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-omap.txt index 56564aa4b444..56564aa4b444 100644 --- a/Documentation/devicetree/bindings/i2c/omap-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-omap.txt diff --git a/Documentation/devicetree/bindings/i2c/pnx.txt b/Documentation/devicetree/bindings/i2c/i2c-pnx.txt index fe98ada33ee4..fe98ada33ee4 100644 --- a/Documentation/devicetree/bindings/i2c/pnx.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-pnx.txt diff --git a/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-pxa-pci-ce4100.txt index 569b16248514..569b16248514 100644 --- a/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-pxa-pci-ce4100.txt diff --git a/Documentation/devicetree/bindings/i2c/mrvl-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-pxa.txt index 0f7945019f6f..12b78ac507e9 100644 --- a/Documentation/devicetree/bindings/i2c/mrvl-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-pxa.txt @@ -31,21 +31,3 @@ Examples: reg = <0xd4025000 0x1000>; interrupts = <58>; }; - -* Marvell MV64XXX I2C controller - -Required properties : - - - reg : Offset and length of the register set for the device - - compatible : Should be "marvell,mv64xxx-i2c" - - interrupts : The interrupt number - - clock-frequency : Desired I2C bus clock frequency in Hz. - -Examples: - - i2c@11000 { - compatible = "marvell,mv64xxx-i2c"; - reg = <0x11000 0x20>; - interrupts = <29>; - clock-frequency = <100000>; - }; diff --git a/Documentation/devicetree/bindings/i2c/samsung-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt index b6cb5a12c672..b6cb5a12c672 100644 --- a/Documentation/devicetree/bindings/i2c/samsung-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt diff --git a/Documentation/devicetree/bindings/i2c/sirf-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-sirf.txt index 7baf9e133fa8..7baf9e133fa8 100644 --- a/Documentation/devicetree/bindings/i2c/sirf-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-sirf.txt diff --git a/Documentation/devicetree/bindings/i2c/arm-versatile.txt b/Documentation/devicetree/bindings/i2c/i2c-versatile.txt index 361d31c51b6f..361d31c51b6f 100644 --- a/Documentation/devicetree/bindings/i2c/arm-versatile.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-versatile.txt diff --git a/Documentation/devicetree/bindings/i2c/xiic.txt b/Documentation/devicetree/bindings/i2c/i2c-xiic.txt index ceabbe91ae44..ceabbe91ae44 100644 --- a/Documentation/devicetree/bindings/i2c/xiic.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-xiic.txt diff --git a/Documentation/devicetree/bindings/leds/common.txt b/Documentation/devicetree/bindings/leds/common.txt new file mode 100644 index 000000000000..2d88816dd550 --- /dev/null +++ b/Documentation/devicetree/bindings/leds/common.txt @@ -0,0 +1,23 @@ +Common leds properties. + +Optional properties for child nodes: +- label : The label for this LED. If omitted, the label is + taken from the node name (excluding the unit address). + +- linux,default-trigger : This parameter, if present, is a + string defining the trigger assigned to the LED. Current triggers are: + "backlight" - LED will act as a back-light, controlled by the framebuffer + system + "default-on" - LED will turn on (but for leds-gpio see "default-state" + property in Documentation/devicetree/bindings/gpio/led.txt) + "heartbeat" - LED "double" flashes at a load average based rate + "ide-disk" - LED indicates disk activity + "timer" - LED flashes at a fixed, configurable rate + +Examples: + +system-status { + label = "Status"; + linux,default-trigger = "heartbeat"; + ... +}; diff --git a/Documentation/devicetree/bindings/gpio/led.txt b/Documentation/devicetree/bindings/leds/leds-gpio.txt index edc83c1c0d54..df1b3080f6b8 100644 --- a/Documentation/devicetree/bindings/gpio/led.txt +++ b/Documentation/devicetree/bindings/leds/leds-gpio.txt @@ -10,16 +10,10 @@ LED sub-node properties: - gpios : Should specify the LED's GPIO, see "gpios property" in Documentation/devicetree/bindings/gpio/gpio.txt. Active low LEDs should be indicated using flags in the GPIO specifier. -- label : (optional) The label for this LED. If omitted, the label is - taken from the node name (excluding the unit address). -- linux,default-trigger : (optional) This parameter, if present, is a - string defining the trigger assigned to the LED. Current triggers are: - "backlight" - LED will act as a back-light, controlled by the framebuffer - system - "default-on" - LED will turn on, but see "default-state" below - "heartbeat" - LED "double" flashes at a load average based rate - "ide-disk" - LED indicates disk activity - "timer" - LED flashes at a fixed, configurable rate +- label : (optional) + see Documentation/devicetree/bindings/leds/common.txt +- linux,default-trigger : (optional) + see Documentation/devicetree/bindings/leds/common.txt - default-state: (optional) The initial state of the LED. Valid values are "on", "off", and "keep". If the LED is already on or off and the default-state property is set the to same value, then no diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt index 8e2e0ba2f486..a591c6741d75 100644 --- a/Documentation/devicetree/bindings/mmc/mmc.txt +++ b/Documentation/devicetree/bindings/mmc/mmc.txt @@ -21,6 +21,12 @@ Optional properties: - cd-inverted: when present, polarity on the cd gpio line is inverted - wp-inverted: when present, polarity on the wp gpio line is inverted - max-frequency: maximum operating clock frequency +- no-1-8-v: when present, denotes that 1.8v card voltage is not supported on + this system, even if the controller claims it is. + +Optional SDIO properties: +- keep-power-in-suspend: Preserves card power during a suspend/resume cycle +- enable-sdio-wakeup: Enables wake up of host system on SDIO IRQ assertion Example: @@ -33,4 +39,6 @@ sdhci@ab000000 { cd-inverted; wp-gpios = <&gpio 70 0>; max-frequency = <50000000>; + keep-power-in-suspend; + enable-sdio-wakeup; } diff --git a/Documentation/devicetree/bindings/mmc/samsung-sdhci.txt b/Documentation/devicetree/bindings/mmc/samsung-sdhci.txt index 630a7d7f4718..97e9e315400d 100644 --- a/Documentation/devicetree/bindings/mmc/samsung-sdhci.txt +++ b/Documentation/devicetree/bindings/mmc/samsung-sdhci.txt @@ -12,10 +12,6 @@ is used. The Samsung's SDHCI controller bindings extends this as listed below. [A] The property "samsung,cd-pinmux-gpio" can be used as stated in the "Optional Board Specific Properties" section below. -[B] If core card-detect bindings and "samsung,cd-pinmux-gpio" property - is not specified, it is assumed that there is no card detection - mechanism used. - Required SoC Specific Properties: - compatible: should be one of the following - "samsung,s3c6410-sdhci": For controllers compatible with s3c6410 sdhci @@ -24,14 +20,18 @@ Required SoC Specific Properties: controller. Required Board Specific Properties: -- gpios: Should specify the gpios used for clock, command and data lines. The - gpio specifier format depends on the gpio controller. +- Samsung GPIO variant (will be completely replaced by pinctrl): + - gpios: Should specify the gpios used for clock, command and data lines. The + gpio specifier format depends on the gpio controller. +- Pinctrl variant (preferred if available): + - pinctrl-0: Should specify pin control groups used for this controller. + - pinctrl-names: Should contain only one value - "default". Optional Board Specific Properties: - samsung,cd-pinmux-gpio: Specifies the card detect line that is routed through a pinmux to the card-detect pin of the card slot. This property should be used only if none of the mmc core card-detect properties are - used. + used. Only for Samsung GPIO variant. Example: sdhci@12530000 { @@ -40,12 +40,18 @@ Example: interrupts = <0 75 0>; bus-width = <4>; cd-gpios = <&gpk2 2 2 3 3>; + + /* Samsung GPIO variant */ gpios = <&gpk2 0 2 0 3>, /* clock line */ <&gpk2 1 2 0 3>, /* command line */ <&gpk2 3 2 3 3>, /* data line 0 */ <&gpk2 4 2 3 3>, /* data line 1 */ <&gpk2 5 2 3 3>, /* data line 2 */ <&gpk2 6 2 3 3>; /* data line 3 */ + + /* Pinctrl variant */ + pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_bus4>; + pinctrl-names = "default"; }; Note: This example shows both SoC specific and board specific properties diff --git a/Documentation/devicetree/bindings/mmc/synposis-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt index 06cd32d08052..06cd32d08052 100644 --- a/Documentation/devicetree/bindings/mmc/synposis-dw-mshc.txt +++ b/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt index be76a23b34c4..ed271fc255b2 100644 --- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt +++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt @@ -19,6 +19,7 @@ ti,dual-volt: boolean, supports dual voltage cards "supply-name" examples are "vmmc", "vmmc_aux" etc ti,non-removable: non-removable slot (like eMMC) ti,needs-special-reset: Requires a special softreset sequence +ti,needs-special-hs-handling: HSMMC IP needs special setting for handling High Speed Example: mmc1: mmc@0x4809c000 { diff --git a/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt b/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt new file mode 100644 index 000000000000..d7fb6abb3eb8 --- /dev/null +++ b/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt @@ -0,0 +1,23 @@ +* Wondermedia WM8505/WM8650 SD/MMC Host Controller + +This file documents differences between the core properties described +by mmc.txt and the properties used by the wmt-sdmmc driver. + +Required properties: +- compatible: Should be "wm,wm8505-sdhc". +- interrupts: Two interrupts are required - regular irq and dma irq. + +Optional properties: +- sdon-inverted: SD_ON bit is inverted on the controller + +Examples: + +sdhc@d800a000 { + compatible = "wm,wm8505-sdhc"; + reg = <0xd800a000 0x1000>; + interrupts = <20 21>; + clocks = <&sdhc>; + bus-width = <4>; + sdon-inverted; +}; + diff --git a/Documentation/devicetree/bindings/net/mdio-gpio.txt b/Documentation/devicetree/bindings/net/mdio-gpio.txt index bc9549529014..c79bab025369 100644 --- a/Documentation/devicetree/bindings/net/mdio-gpio.txt +++ b/Documentation/devicetree/bindings/net/mdio-gpio.txt @@ -8,9 +8,16 @@ gpios property as described in section VIII.1 in the following order: MDC, MDIO. +Note: Each gpio-mdio bus should have an alias correctly numbered in "aliases" +node. + Example: -mdio { +aliases { + mdio-gpio0 = <&mdio0>; +}; + +mdio0: mdio { compatible = "virtual,mdio-gpio"; #address-cells = <1>; #size-cells = <0>; diff --git a/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt new file mode 100644 index 000000000000..3a268127b054 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt @@ -0,0 +1,141 @@ +* Atmel AT91 Pinmux Controller + +The AT91 Pinmux Controler, enables the IC +to share one PAD to several functional blocks. The sharing is done by +multiplexing the PAD input/output signals. For each PAD there are up to +8 muxing options (called periph modes). Since different modules require +different PAD settings (like pull up, keeper, etc) the contoller controls +also the PAD settings parameters. + +Please refer to pinctrl-bindings.txt in this directory for details of the +common pinctrl bindings used by client devices, including the meaning of the +phrase "pin configuration node". + +Atmel AT91 pin configuration node is a node of a group of pins which can be +used for a specific device or function. This node represents both mux and config +of the pins in that group. The 'pins' selects the function mode(also named pin +mode) this pin can work on and the 'config' configures various pad settings +such as pull-up, multi drive, etc. + +Required properties for iomux controller: +- compatible: "atmel,at91rm9200-pinctrl" +- atmel,mux-mask: array of mask (periph per bank) to describe if a pin can be + configured in this periph mode. All the periph and bank need to be describe. + +How to create such array: + +Each column will represent the possible peripheral of the pinctrl +Each line will represent a pio bank + +Take an example on the 9260 +Peripheral: 2 ( A and B) +Bank: 3 (A, B and C) +=> + + /* A B */ + 0xffffffff 0xffc00c3b /* pioA */ + 0xffffffff 0x7fff3ccf /* pioB */ + 0xffffffff 0x007fffff /* pioC */ + +For each peripheral/bank we will descibe in a u32 if a pin can can be +configured in it by putting 1 to the pin bit (1 << pin) + +Let's take the pioA on peripheral B +From the datasheet Table 10-2. +Peripheral B +PA0 MCDB0 +PA1 MCCDB +PA2 +PA3 MCDB3 +PA4 MCDB2 +PA5 MCDB1 +PA6 +PA7 +PA8 +PA9 +PA10 ETX2 +PA11 ETX3 +PA12 +PA13 +PA14 +PA15 +PA16 +PA17 +PA18 +PA19 +PA20 +PA21 +PA22 ETXER +PA23 ETX2 +PA24 ETX3 +PA25 ERX2 +PA26 ERX3 +PA27 ERXCK +PA28 ECRS +PA29 ECOL +PA30 RXD4 +PA31 TXD4 + +=> 0xffc00c3b + +Required properties for pin configuration node: +- atmel,pins: 4 integers array, represents a group of pins mux and config + setting. The format is atmel,pins = <PIN_BANK PIN_BANK_NUM PERIPH CONFIG>. + The PERIPH 0 means gpio. + +Bits used for CONFIG: +PULL_UP (1 << 0): indicate this pin need a pull up. +MULTIDRIVE (1 << 1): indicate this pin need to be configured as multidrive. +DEGLITCH (1 << 2): indicate this pin need deglitch. +PULL_DOWN (1 << 3): indicate this pin need a pull down. +DIS_SCHMIT (1 << 4): indicate this pin need to disable schmit trigger. +DEBOUNCE (1 << 16): indicate this pin need debounce. +DEBOUNCE_VAL (0x3fff << 17): debounce val. + +NOTE: +Some requirements for using atmel,at91rm9200-pinctrl binding: +1. We have pin function node defined under at91 controller node to represent + what pinmux functions this SoC supports. +2. The driver can use the function node's name and pin configuration node's + name describe the pin function and group hierarchy. + For example, Linux at91 pinctrl driver takes the function node's name + as the function name and pin configuration node's name as group name to + create the map table. +3. Each pin configuration node should have a phandle, devices can set pins + configurations by referring to the phandle of that pin configuration node. +4. The gpio controller must be describe in the pinctrl simple-bus. + +Examples: + +pinctrl@fffff400 { + #address-cells = <1>; + #size-cells = <1>; + ranges; + compatible = "atmel,at91rm9200-pinctrl", "simple-bus"; + reg = <0xfffff400 0x600>; + + atmel,mux-mask = < + /* A B */ + 0xffffffff 0xffc00c3b /* pioA */ + 0xffffffff 0x7fff3ccf /* pioB */ + 0xffffffff 0x007fffff /* pioC */ + >; + + /* shared pinctrl settings */ + dbgu { + pinctrl_dbgu: dbgu-0 { + atmel,pins = + <1 14 0x1 0x0 /* PB14 periph A */ + 1 15 0x1 0x1>; /* PB15 periph with pullup */ + }; + }; +}; + +dbgu: serial@fffff200 { + compatible = "atmel,at91sam9260-usart"; + reg = <0xfffff200 0x200>; + interrupts = <1 4 7>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_dbgu>; + status = "disabled"; +}; diff --git a/Documentation/devicetree/bindings/rtc/orion-rtc.txt b/Documentation/devicetree/bindings/rtc/orion-rtc.txt new file mode 100644 index 000000000000..3bf63ffa5160 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/orion-rtc.txt @@ -0,0 +1,18 @@ +* Mvebu Real Time Clock + +RTC controller for the Kirkwood, the Dove, the Armada 370 and the +Armada XP SoCs + +Required properties: +- compatible : Should be "marvell,orion-rtc" +- reg: physical base address of the controller and length of memory mapped + region. +- interrupts: IRQ line for the RTC. + +Example: + +rtc@10300 { + compatible = "marvell,orion-rtc"; + reg = <0xd0010300 0x20>; + interrupts = <50>; +}; diff --git a/Documentation/devicetree/bindings/tty/serial/fsl-mxs-auart.txt b/Documentation/devicetree/bindings/tty/serial/fsl-mxs-auart.txt index 2ee903fad25c..273a8d5b3300 100644 --- a/Documentation/devicetree/bindings/tty/serial/fsl-mxs-auart.txt +++ b/Documentation/devicetree/bindings/tty/serial/fsl-mxs-auart.txt @@ -6,11 +6,19 @@ Required properties: - reg : Address and length of the register set for the device - interrupts : Should contain the auart interrupt numbers +Optional properties: +- fsl,auart-dma-channel : The DMA channels, the first is for RX, the other + is for TX. If you add this property, it also means that you + will enable the DMA support for the auart. + Note: due to the hardware bug in imx23(see errata : 2836), + only the imx28 can enable the DMA support for the auart. + Example: auart0: serial@8006a000 { compatible = "fsl,imx28-auart", "fsl,imx23-auart"; reg = <0x8006a000 0x2000>; interrupts = <112 70 71>; + fsl,auart-dma-channel = <8 9>; }; Note: Each auart port should have an alias correctly numbered in "aliases" diff --git a/Documentation/devicetree/bindings/tty/serial/of-serial.txt b/Documentation/devicetree/bindings/tty/serial/of-serial.txt index ba385f2e0ddc..1e1145ca4f3c 100644 --- a/Documentation/devicetree/bindings/tty/serial/of-serial.txt +++ b/Documentation/devicetree/bindings/tty/serial/of-serial.txt @@ -14,7 +14,10 @@ Required properties: - "serial" if the port type is unknown. - reg : offset and length of the register set for the device. - interrupts : should contain uart interrupt. -- clock-frequency : the input clock frequency for the UART. +- clock-frequency : the input clock frequency for the UART + or + clocks phandle to refer to the clk used as per Documentation/devicetree + /bindings/clock/clock-bindings.txt Optional properties: - current-speed : the current active speed of the UART. diff --git a/Documentation/devicetree/bindings/usb/am33xx-usb.txt b/Documentation/devicetree/bindings/usb/am33xx-usb.txt index ca8fa56e9f03..a92250512a4e 100644 --- a/Documentation/devicetree/bindings/usb/am33xx-usb.txt +++ b/Documentation/devicetree/bindings/usb/am33xx-usb.txt @@ -3,12 +3,12 @@ AM33XX MUSB GLUE - ti,hwmods : must be "usb_otg_hs" - multipoint : Should be "1" indicating the musb controller supports multipoint. This is a MUSB configuration-specific setting. - - num_eps : Specifies the number of endpoints. This is also a + - num-eps : Specifies the number of endpoints. This is also a MUSB configuration-specific setting. Should be set to "16" - - ram_bits : Specifies the ram address size. Should be set to "12" - - port0_mode : Should be "3" to represent OTG. "1" signifies HOST and "2" + - ram-bits : Specifies the ram address size. Should be set to "12" + - port0-mode : Should be "3" to represent OTG. "1" signifies HOST and "2" represents PERIPHERAL. - - port1_mode : Should be "1" to represent HOST. "3" signifies OTG and "2" + - port1-mode : Should be "1" to represent HOST. "3" signifies OTG and "2" represents PERIPHERAL. - power : Should be "250". This signifies the controller can supply upto 500mA when operating in host mode. diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 9de2b9ff9d6e..770a0193ca1b 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -5,6 +5,7 @@ using them to avoid name-space collisions. ad Avionic Design GmbH adi Analog Devices, Inc. +ak Asahi Kasei Corp. amcc Applied Micro Circuits Corporation (APM, formally AMCC) apm Applied Micro Circuits Corporation (APM) arm ARM Ltd. @@ -25,6 +26,7 @@ gef GE Fanuc Intelligent Platforms Embedded Systems, Inc. hp Hewlett Packard ibm International Business Machines (IBM) idt Integrated Device Technologies, Inc. +img Imagination Technologies Ltd. intercontrol Inter Control Group linux Linux-specific binding marvell Marvell Technology Group Ltd. @@ -34,8 +36,9 @@ national National Semiconductor nintendo Nintendo nvidia NVIDIA nxp NXP Semiconductors +onnn ON Semiconductor Corp. picochip Picochip Ltd -powervr Imagination Technologies +powervr PowerVR (deprecated, use img) qcom Qualcomm, Inc. ramtron Ramtron International realtek Realtek Semiconductor Corp. @@ -45,6 +48,7 @@ schindler Schindler sil Silicon Image simtek sirf SiRF Technology, Inc. +snps Synopsys, Inc. st STMicroelectronics stericsson ST-Ericsson ti Texas Instruments diff --git a/Documentation/devicetree/usage-model.txt b/Documentation/devicetree/usage-model.txt index dca90fe22a90..ef9d06c9f8fd 100644 --- a/Documentation/devicetree/usage-model.txt +++ b/Documentation/devicetree/usage-model.txt @@ -347,7 +347,7 @@ later), which will happily live at the base of the Linux /sys/devices tree. Therefore, if a DT node is at the root of the tree, then it really probably is best registered as a platform_device. -Linux board support code calls of_platform_populate(NULL, NULL, NULL) +Linux board support code calls of_platform_populate(NULL, NULL, NULL, NULL) to kick off discovery of devices at the root of the tree. The parameters are all NULL because when starting from the root of the tree, there is no need to provide a starting node (the first NULL), a diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index a1793d670cd0..3844d21d6ca3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -33,7 +33,7 @@ Table of Contents 2 Modifying System Parameters 3 Per-Process Parameters - 3.1 /proc/<pid>/oom_score_adj - Adjust the oom-killer + 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj - Adjust the oom-killer score 3.2 /proc/<pid>/oom_score - Display current oom-killer score 3.3 /proc/<pid>/io - Display the IO accounting fields @@ -1320,10 +1320,10 @@ of the kernel. CHAPTER 3: PER-PROCESS PARAMETERS ------------------------------------------------------------------------------ -3.1 /proc/<pid>/oom_score_adj- Adjust the oom-killer score +3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score -------------------------------------------------------------------------------- -This file can be used to adjust the badness heuristic used to select which +These file can be used to adjust the badness heuristic used to select which process gets killed in out of memory conditions. The badness heuristic assigns a value to each candidate task ranging from 0 @@ -1361,6 +1361,12 @@ same system, cpuset, mempolicy, or memory controller resources to use at least equivalent to discounting 50% of the task's allowed memory from being considered as scoring against the task. +For backwards compatibility with previous kernels, /proc/<pid>/oom_adj may also +be used to tune the badness score. Its acceptable values range from -16 +(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17 +(OOM_DISABLE) to disable oom killing entirely for that task. Its value is +scaled linearly with /proc/<pid>/oom_score_adj. + The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last value set by a CAP_SYS_RESOURCE process. To reduce the value any lower requires CAP_SYS_RESOURCE. @@ -1375,7 +1381,9 @@ minimal amount of work. ------------------------------------------------------------- This file can be used to check the current score used by the oom-killer is for -any given <pid>. +any given <pid>. Use it together with /proc/<pid>/oom_score_adj to tune which +process should be killed in an out-of-memory situation. + 3.3 /proc/<pid>/io - Display the IO accounting fields ------------------------------------------------------- diff --git a/Documentation/firmware_class/README b/Documentation/firmware_class/README index 815b711bcd85..43fada989e65 100644 --- a/Documentation/firmware_class/README +++ b/Documentation/firmware_class/README @@ -22,12 +22,17 @@ - calls request_firmware(&fw_entry, $FIRMWARE, device) - kernel searchs the fimware image with name $FIRMWARE directly in the below search path of root filesystem: + User customized search path by module parameter 'path'[1] "/lib/firmware/updates/" UTS_RELEASE, "/lib/firmware/updates", "/lib/firmware/" UTS_RELEASE, "/lib/firmware" - If found, goto 7), else goto 2) + [1], the 'path' is a string parameter which length should be less + than 256, user should pass 'firmware_class.path=$CUSTOMIZED_PATH' + if firmware_class is built in kernel(the general situation) + 2), userspace: - /sys/class/firmware/xxx/{loading,data} appear. - hotplug gets called with a firmware identifier in $FIRMWARE @@ -114,3 +119,10 @@ on the setup, so I think that the choice on what firmware to make persistent should be left to userspace. + about firmware cache: + -------------------- + After firmware cache mechanism is introduced during system sleep, + request_firmware can be called safely inside device's suspend and + resume callback, and callers need't cache the firmware by + themselves any more for dealing with firmware loss during system + resume. diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index e08a883de36e..77a1d11af723 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -439,6 +439,48 @@ slower clock delays the rising edge of SCK, and the I2C master adjusts its signaling rate accordingly. +GPIO controllers and the pinctrl subsystem +------------------------------------------ + +A GPIO controller on a SOC might be tightly coupled with the pinctrl +subsystem, in the sense that the pins can be used by other functions +together with an optional gpio feature. We have already covered the +case where e.g. a GPIO controller need to reserve a pin or set the +direction of a pin by calling any of: + +pinctrl_request_gpio() +pinctrl_free_gpio() +pinctrl_gpio_direction_input() +pinctrl_gpio_direction_output() + +But how does the pin control subsystem cross-correlate the GPIO +numbers (which are a global business) to a certain pin on a certain +pin controller? + +This is done by registering "ranges" of pins, which are essentially +cross-reference tables. These are described in +Documentation/pinctrl.txt + +While the pin allocation is totally managed by the pinctrl subsystem, +gpio (under gpiolib) is still maintained by gpio drivers. It may happen +that different pin ranges in a SoC is managed by different gpio drivers. + +This makes it logical to let gpio drivers announce their pin ranges to +the pin ctrl subsystem before it will call 'pinctrl_request_gpio' in order +to request the corresponding pin to be prepared by the pinctrl subsystem +before any gpio usage. + +For this, the gpio controller can register its pin range with pinctrl +subsystem. There are two ways of doing it currently: with or without DT. + +For with DT support refer to Documentation/devicetree/bindings/gpio/gpio.txt. + +For non-DT support, user can call gpiochip_add_pin_range() with appropriate +parameters to register a range of gpio pins with a pinctrl driver. For this +exact name string of pinctrl device has to be passed as one of the +argument to this routine. + + What do these conventions omit? =============================== One of the biggest things these conventions omit is pin multiplexing, since diff --git a/Documentation/hwmon/ads7828 b/Documentation/hwmon/ads7828 index 2bbebe6f771f..f6e263e0f607 100644 --- a/Documentation/hwmon/ads7828 +++ b/Documentation/hwmon/ads7828 @@ -4,29 +4,47 @@ Kernel driver ads7828 Supported chips: * Texas Instruments/Burr-Brown ADS7828 Prefix: 'ads7828' - Addresses scanned: I2C 0x48, 0x49, 0x4a, 0x4b - Datasheet: Publicly available at the Texas Instruments website : + Datasheet: Publicly available at the Texas Instruments website: http://focus.ti.com/lit/ds/symlink/ads7828.pdf + * Texas Instruments ADS7830 + Prefix: 'ads7830' + Datasheet: Publicly available at the Texas Instruments website: + http://focus.ti.com/lit/ds/symlink/ads7830.pdf + Authors: Steve Hardy <shardy@redhat.com> + Vivien Didelot <vivien.didelot@savoirfairelinux.com> + Guillaume Roguez <guillaume.roguez@savoirfairelinux.com> + +Platform data +------------- + +The ads7828 driver accepts an optional ads7828_platform_data structure (defined +in include/linux/platform_data/ads7828.h). The structure fields are: -Module Parameters ------------------ +* diff_input: (bool) Differential operation + set to true for differential mode, false for default single ended mode. -* se_input: bool (default Y) - Single ended operation - set to N for differential mode -* int_vref: bool (default Y) - Operate with the internal 2.5V reference - set to N for external reference -* vref_mv: int (default 2500) - If using an external reference, set this to the reference voltage in mV +* ext_vref: (bool) External reference + set to true if it operates with an external reference, false for default + internal reference. + +* vref_mv: (unsigned int) Voltage reference + if using an external reference, set this to the reference voltage in mV, + otherwise it will default to the internal value (2500mV). This value will be + bounded with limits accepted by the chip, described in the datasheet. + + If no structure is provided, the configuration defaults to single ended + operation and internal voltage reference (2.5V). Description ----------- -This driver implements support for the Texas Instruments ADS7828. +This driver implements support for the Texas Instruments ADS7828 and ADS7830. -This device is a 12-bit 8-channel A-D converter. +The ADS7828 device is a 12-bit 8-channel A/D converter, while the ADS7830 does +8-bit sampling. It can operate in single ended mode (8 +ve inputs) or in differential mode, where 4 differential pairs can be measured. @@ -34,3 +52,7 @@ where 4 differential pairs can be measured. The chip also has the facility to use an external voltage reference. This may be required if your hardware supplies the ADS7828 from a 5V supply, see the datasheet for more details. + +There is no reliable way to identify this chip, so the driver will not scan +some addresses to try to auto-detect it. That means that you will have to +statically declare the device in the platform support code. diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp index f17256f069ba..3374c085678d 100644 --- a/Documentation/hwmon/coretemp +++ b/Documentation/hwmon/coretemp @@ -98,8 +98,10 @@ Process Processor TjMax(C) 45nm Atom Processors D525/510/425/410 100 + Z670/650 90 Z560/550/540/530P/530/520PT/520/515/510PT/510P 90 Z510/500 90 + N570/550 100 N475/470/455/450 100 N280/270 90 330/230 125 diff --git a/Documentation/hwmon/da9055 b/Documentation/hwmon/da9055 new file mode 100644 index 000000000000..855c3f536e00 --- /dev/null +++ b/Documentation/hwmon/da9055 @@ -0,0 +1,47 @@ +Supported chips: + * Dialog Semiconductors DA9055 PMIC + Prefix: 'da9055' + Datasheet: Datasheet is not publicly available. + +Authors: David Dajun Chen <dchen@diasemi.com> + +Description +----------- + +The DA9055 provides an Analogue to Digital Converter (ADC) with 10 bits +resolution and track and hold circuitry combined with an analogue input +multiplexer. The analogue input multiplexer will allow conversion of up to 5 +different inputs. The track and hold circuit ensures stable input voltages at +the input of the ADC during the conversion. + +The ADC is used to measure the following inputs: +Channel 0: VDDOUT - measurement of the system voltage +Channel 1: ADC_IN1 - high impedance input (0 - 2.5V) +Channel 2: ADC_IN2 - high impedance input (0 - 2.5V) +Channel 3: ADC_IN3 - high impedance input (0 - 2.5V) +Channel 4: Internal Tjunc. - sense (internal temp. sensor) + +By using sysfs attributes we can measure the system voltage VDDOUT, +chip junction temperature and auxiliary channels voltages. + +Voltage Monitoring +------------------ + +Voltages are sampled in a AUTO mode it can be manually sampled too and results +are stored in a 10 bit ADC. + +The system voltage is calculated as: + Milli volt = ((ADC value * 1000) / 85) + 2500 + +The voltages on ADC channels 1, 2 and 3 are calculated as: + Milli volt = (ADC value * 1000) / 102 + +Temperature Monitoring +---------------------- + +Temperatures are sampled by a 10 bit ADC. Junction temperatures +are monitored by the ADC channels. + +The junction temperature is calculated: + Degrees celsius = -0.4084 * (ADC_RES - T_OFFSET) + 307.6332 +The junction temperature attribute is supported by the driver. diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index ec9ae6708691..14c3f4f1b617 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -1175,15 +1175,16 @@ When kbuild executes, the following steps are followed (roughly): in an init section in the image. Platform code *must* copy the blob to non-init memory prior to calling unflatten_device_tree(). - Example: - #arch/x86/platform/ce4100/Makefile - clean-files := *dtb.S + To use this command, simply add *.dtb into obj-y or targets, or make + some other target depend on %.dtb - DTC_FLAGS := -p 1024 - obj-y += foo.dtb.o + A central rule exists to create $(obj)/%.dtb from $(src)/%.dts; + architecture Makefiles do no need to explicitly write out that rule. - $(obj)/%.dtb: $(src)/%.dts - $(call cmd,dtc) + Example: + targets += $(dtb-y) + clean-files += *.dtb + DTC_FLAGS ?= -p 1024 --- 6.8 Custom kbuild commands diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9776f068306b..28bd0f0e32c5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1304,6 +1304,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. lapic [X86-32,APIC] Enable the local APIC even if BIOS disabled it. + lapic= [x86,APIC] "notscdeadline" Do not use TSC deadline + value for LAPIC timer one-shot implementation. Default + back to the programmable timer unit in the LAPIC. + lapic_timer_c2_ok [X86,APIC] trust the local apic timer in C2 power state. @@ -1984,6 +1988,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nox2apic [X86-64,APIC] Do not enable x2APIC mode. + cpu0_hotplug [X86] Turn on CPU0 hotplug feature when + CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off. + Some features depend on CPU0. Known dependencies are: + 1. Resume from suspend/hibernate depends on CPU0. + Suspend/hibernate will fail if CPU0 is offline and you + need to online CPU0 before suspend/hibernate. + 2. PIC interrupts also depend on CPU0. CPU0 can't be + removed if a PIC interrupt is detected. + It's said poweroff/reboot may depend on CPU0 on some + machines although I haven't seen such issues so far + after CPU0 is offline on a few tested machines. + If the dependencies are under your control, you can + turn on cpu0_hotplug. + nptcg= [IA-64] Override max number of concurrent global TLB purges which is reported from either PAL_VM_SUMMARY or SAL PALO. @@ -2394,6 +2412,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ramdisk_size= [RAM] Sizes of RAM disks in kilobytes See Documentation/blockdev/ramdisk.txt. + rcu_nocbs= [KNL,BOOT] + In kernels built with CONFIG_RCU_NOCB_CPU=y, set + the specified list of CPUs to be no-callback CPUs. + Invocation of these CPUs' RCU callbacks will + be offloaded to "rcuoN" kthreads created for + that purpose. This reduces OS jitter on the + offloaded CPUs, which can be useful for HPC and + real-time workloads. It can also improve energy + efficiency for asymmetric multiprocessors. + + rcu_nocbs_poll [KNL,BOOT] + Rather than requiring that offloaded CPUs + (specified by rcu_nocbs= above) explicitly + awaken the corresponding "rcuoN" kthreads, + make these kthreads poll for callbacks. + This improves the real-time response for the + offloaded CPUs by relieving them of the need to + wake up the corresponding kthread, but degrades + energy efficiency by requiring that the kthreads + periodically wake up to do the polling. + rcutree.blimit= [KNL,BOOT] Set maximum number of finished RCU callbacks to process in one batch. @@ -2859,6 +2898,22 @@ bytes respectively. Such letter suffixes can also be entirely omitted. to facilitate early boot debugging. See also Documentation/trace/events.txt + trace_options=[option-list] + [FTRACE] Enable or disable tracer options at boot. + The option-list is a comma delimited list of options + that can be enabled or disabled just as if you were + to echo the option name into + + /sys/kernel/debug/tracing/trace_options + + For example, to enable stacktrace option (to dump the + stack trace of each event), add to the command line: + + trace_options=stacktrace + + See also Documentation/trace/ftrace.txt "trace options" + section. + transparent_hugepage= [KNL] Format: [always|madvise|never] diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 2759f7c188f0..3c4e1b3b80a1 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -251,12 +251,13 @@ And there are a number of things that _must_ or _must_not_ be assumed: And for: - *A = X; Y = *A; + *A = X; *(A + 4) = Y; - we may get either of: + we may get any of: - STORE *A = X; Y = LOAD *A; - STORE *A = Y = X; + STORE *A = X; STORE *(A + 4) = Y; + STORE *(A + 4) = Y; STORE *A = X; + STORE {*A, *(A + 4) } = {X, Y}; ========================= diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 6d0c2519cf47..c6f993d491b5 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt @@ -161,7 +161,8 @@ a recent addition and not present on older kernels. in the memory block. 'state' : read-write at read: contains online/offline state of memory. - at write: user can specify "online", "offline" command + at write: user can specify "online_kernel", + "online_movable", "online", "offline" command which will be performed on al sections in the block. 'phys_device' : read-only: designed to show the name of physical memory device. This is not well implemented now. @@ -255,6 +256,17 @@ For onlining, you have to write "online" to the section's state file as: % echo online > /sys/devices/system/memory/memoryXXX/state +This onlining will not change the ZONE type of the target memory section, +If the memory section is in ZONE_NORMAL, you can change it to ZONE_MOVABLE: + +% echo online_movable > /sys/devices/system/memory/memoryXXX/state +(NOTE: current limit: this memory section must be adjacent to ZONE_MOVABLE) + +And if the memory section is in ZONE_MOVABLE, you can change it to ZONE_NORMAL: + +% echo online_kernel > /sys/devices/system/memory/memoryXXX/state +(NOTE: current limit: this memory section must be adjacent to ZONE_NORMAL) + After this, section memoryXXX's state will be 'online' and the amount of available memory will be increased. @@ -377,15 +389,18 @@ The third argument is passed by pointer of struct memory_notify. struct memory_notify { unsigned long start_pfn; unsigned long nr_pages; + int status_change_nid_normal; int status_change_nid; } start_pfn is start_pfn of online/offline memory. nr_pages is # of pages of online/offline memory. +status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask +is (will be) set/clear, if this is -1, then nodemask status is not changed. status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be) set/clear. It means a new(memoryless) node gets new memory by online and a node loses all memory. If this is -1, then nodemask status is not changed. -If status_changed_nid >= 0, callback should create/discard structures for the +If status_changed_nid* >= 0, callback should create/discard structures for the node if necessary. -------------- diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt index 22ae8441489f..0d98fac8893b 100644 --- a/Documentation/mmc/mmc-dev-attrs.txt +++ b/Documentation/mmc/mmc-dev-attrs.txt @@ -25,6 +25,8 @@ All attributes are read-only. serial Product Serial Number (from CID Register) erase_size Erase group size preferred_erase_size Preferred erase size + raw_rpmb_size_mult RPMB partition size + rel_sectors Reliable write sector count Note on Erase Size and Preferred Erase Size: @@ -65,6 +67,11 @@ Note on Erase Size and Preferred Erase Size: "preferred_erase_size" is in bytes. +Note on raw_rpmb_size_mult: + "raw_rpmb_size_mult" is a mutliple of 128kB block. + RPMB size in byte is calculated by using the following equation: + RPMB partition size = 128kB x raw_rpmb_size_mult + SD/MMC/SDIO Clock Gating Attribute ================================== diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt index 4164f5c02e4b..f310edec8a77 100644 --- a/Documentation/networking/netdev-features.txt +++ b/Documentation/networking/netdev-features.txt @@ -164,4 +164,4 @@ read the CRC recorded by the NIC on receipt of the packet. This requests that the NIC receive all possible frames, including errored frames (such as bad FCS, etc). This can be helpful when sniffing a link with bad packets on it. Some NICs may receive more packets if also put into normal -PROMISC mdoe. +PROMISC mode. diff --git a/Documentation/networking/vxlan.txt b/Documentation/networking/vxlan.txt index 5b34b762d7d5..6d993510f091 100644 --- a/Documentation/networking/vxlan.txt +++ b/Documentation/networking/vxlan.txt @@ -32,7 +32,7 @@ no entry is in the forwarding table. # ip link delete vxlan0 3. Show vxlan info - # ip -d show vxlan0 + # ip -d link show vxlan0 It is possible to create, destroy and display the vxlan forwarding table using the new bridge command. @@ -41,7 +41,7 @@ forwarding table using the new bridge command. # bridge fdb add to 00:17:42:8a:b4:05 dst 192.19.0.2 dev vxlan0 2. Delete forwarding table entry - # bridge fdb delete 00:17:42:8a:b4:05 + # bridge fdb delete 00:17:42:8a:b4:05 dev vxlan0 3. Show forwarding table # bridge fdb show dev vxlan0 diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt index 3b4ee5328868..da40efbef6ec 100644 --- a/Documentation/pinctrl.txt +++ b/Documentation/pinctrl.txt @@ -364,6 +364,9 @@ will get an pin number into its handled number range. Further it is also passed the range ID value, so that the pin controller knows which range it should deal with. +Calling pinctrl_add_gpio_range from pinctrl driver is DEPRECATED. Please see +section 2.1 of Documentation/devicetree/bindings/gpio/gpio.txt on how to bind +pinctrl and gpio drivers. PINMUX interfaces ================= @@ -1193,4 +1196,6 @@ foo_switch() ... } -The above has to be done from process context. +The above has to be done from process context. The reservation of the pins +will be done when the state is activated, so in effect one specific pin +can be used by different functions at different times on a running system. diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt index 17e130a80347..79a2a58425ee 100644 --- a/Documentation/power/pm_qos_interface.txt +++ b/Documentation/power/pm_qos_interface.txt @@ -99,7 +99,7 @@ reading the aggregated value does not require any locking mechanism. From kernel mode the use of this interface is the following: -int dev_pm_qos_add_request(device, handle, value): +int dev_pm_qos_add_request(device, handle, type, value): Will insert an element into the list for that identified device with the target value. Upon change to this list the new target is recomputed and any registered notifiers are called only if the target value is now different. diff --git a/Documentation/telephony/00-INDEX b/Documentation/telephony/00-INDEX deleted file mode 100644 index 4ffe0ed5b6fb..000000000000 --- a/Documentation/telephony/00-INDEX +++ /dev/null @@ -1,4 +0,0 @@ -00-INDEX - - this file. -ixj.txt - - document describing the Quicknet drivers. diff --git a/Documentation/telephony/ixj.txt b/Documentation/telephony/ixj.txt deleted file mode 100644 index db94fb6c5678..000000000000 --- a/Documentation/telephony/ixj.txt +++ /dev/null @@ -1,394 +0,0 @@ -Linux Quicknet-Drivers-Howto -Quicknet Technologies, Inc. (www.quicknet.net) -Version 0.3.4 December 18, 1999 - -1.0 Introduction - -This document describes the first GPL release version of the Linux -driver for the Quicknet Internet PhoneJACK and Internet LineJACK -cards. More information about these cards is available at -www.quicknet.net. The driver version discussed in this document is -0.3.4. - -These cards offer nice telco style interfaces to use your standard -telephone/key system/PBX as the user interface for VoIP applications. -The Internet LineJACK also offers PSTN connectivity for a single line -Internet to PSTN gateway. Of course, you can add more than one card -to a system to obtain multi-line functionality. At this time, the -driver supports the POTS port on both the Internet PhoneJACK and the -Internet LineJACK, but the PSTN port on the latter card is not yet -supported. - -This document, and the drivers for the cards, are intended for a -limited audience that includes technically capable programmers who -would like to experiment with Quicknet cards. The drivers are -considered in ALPHA status and are not yet considered stable enough -for general, widespread use in an unlimited audience. - -That's worth saying again: - -THE LINUX DRIVERS FOR QUICKNET CARDS ARE PRESENTLY IN A ALPHA STATE -AND SHOULD NOT BE CONSIDERED AS READY FOR NORMAL WIDESPREAD USE. - -They are released early in the spirit of Internet development and to -make this technology available to innovators who would benefit from -early exposure. - -When we promote the device driver to "beta" level it will be -considered ready for non-programmer, non-technical users. Until then, -please be aware that these drivers may not be stable and may affect -the performance of your system. - - -1.1 Latest Additions/Improvements - -The 0.3.4 version of the driver is the first GPL release. Several -features had to be removed from the prior binary only module, mostly -for reasons of Intellectual Property rights. We can't release -information that is not ours - so certain aspects of the driver had to -be removed to protect the rights of others. - -Specifically, very old Internet PhoneJACK cards have non-standard -G.723.1 codecs (due to the early nature of the DSPs in those days). -The auto-conversion code to bring those cards into compliance with -today's standards is available as a binary only module to those people -needing it. If you bought your card after 1997 or so, you are OK - -it's only the very old cards that are affected. - -Also, the code to download G.728/G.729/G.729a codecs to the DSP is -available as a binary only module as well. This IP is not ours to -release. - -Hooks are built into the GPL driver to allow it to work with other -companion modules that are completely separate from this module. - -1.2 Copyright, Trademarks, Disclaimer, & Credits - -Copyright - -Copyright (c) 1999 Quicknet Technologies, Inc. Permission is granted -to freely copy and distribute this document provided you preserve it -in its original form. For corrections and minor changes contact the -maintainer at linux@quicknet.net. - -Trademarks - -Internet PhoneJACK and Internet LineJACK are registered trademarks of -Quicknet Technologies, Inc. - -Disclaimer - -Much of the info in this HOWTO is early information released by -Quicknet Technologies, Inc. for the express purpose of allowing early -testing and use of the Linux drivers developed for their products. -While every attempt has been made to be thorough, complete and -accurate, the information contained here may be unreliable and there -are likely a number of errors in this document. Please let the -maintainer know about them. Since this is free documentation, it -should be obvious that neither I nor previous authors can be held -legally responsible for any errors. - -Credits - -This HOWTO was written by: - - Greg Herlein <gherlein@quicknet.net> - Ed Okerson <eokerson@quicknet.net> - -1.3 Future Plans: You Can Help - -Please let the maintainer know of any errors in facts, opinions, -logic, spelling, grammar, clarity, links, etc. But first, if the date -is over a month old, check to see that you have the latest -version. Please send any info that you think belongs in this document. - -You can also contribute code and/or bug-fixes for the sample -applications. - - -1.4 Where to get things - -Info on latest versions of the driver are here: - -http://web.archive.org/web/*/http://www.quicknet.net/develop.htm - -1.5 Mailing List - -Quicknet operates a mailing list to provide a public forum on using -these drivers. - -To subscribe to the linux-sdk mailing list, send an email to: - - majordomo@linux.quicknet.net - -In the body of the email, type: - - subscribe linux-sdk <your-email-address> - -Please delete any signature block that you would normally add to the -bottom of your email - it tends to confuse majordomo. - -To send mail to the list, address your mail to - - linux-sdk@linux.quicknet.net - -Your message will go out to everyone on the list. - -To unsubscribe to the linux-sdk mailing list, send an email to: - - majordomo@linux.quicknet.net - -In the body of the email, type: - - unsubscribe linux-sdk <your-email-address> - - - -2.0 Requirements - -2.1 Quicknet Card(s) - -You will need at least one Internet PhoneJACK or Internet LineJACK -cards. These are ISA or PCI bus devices that use Plug-n-Play for -configuration, and use no IRQs. The driver will support up to 16 -cards in any one system, of any mix between the two types. - -Note that you will need two cards to do any useful testing alone, since -you will need a card on both ends of the connection. Of course, if -you are doing collaborative work, perhaps your friends or coworkers -have cards too. If not, we'll gladly sell them some! - - -2.2 ISAPNP - -Since the Quicknet cards are Plug-n-Play devices, you will need the -isapnp tools package to configure the cards, or you can use the isapnp -module to autoconfigure them. The former package probably came with -your Linux distribution. Documentation on this package is available -online at: - -http://mailer.wiwi.uni-marburg.de/linux/LDP/HOWTO/Plug-and-Play-HOWTO.html - -The isapnp autoconfiguration is available on the Quicknet website at: - - http://www.quicknet.net/develop.htm - -though it may be in the kernel by the time you read this. - - -3.0 Card Configuration - -If you did not get your drivers as part of the linux kernel, do the -following to install them: - - a. untar the distribution file. We use the following command: - tar -xvzf ixj-0.x.x.tgz - -This creates a subdirectory holding all the necessary files. Go to that -subdirectory. - - b. run the "ixj_dev_create" script to remove any stray device -files left in the /dev directory, and to create the new officially -designated device files. Note that the old devices were called -/dev/ixj, and the new method uses /dev/phone. - - c. type "make;make install" - this will compile and install the -module. - - d. type "depmod -av" to rebuild all your kernel version dependencies. - - e. if you are using the isapnp module to configure the cards - automatically, then skip to step f. Otherwise, ensure that you - have run the isapnp configuration utility to properly configure - the cards. - - e1. The Internet PhoneJACK has one configuration register that - requires 16 IO ports. The Internet LineJACK card has two - configuration registers and isapnp reports that IO 0 - requires 16 IO ports and IO 1 requires 8. The Quicknet - driver assumes that these registers are configured to be - contiguous, i.e. if IO 0 is set to 0x340 then IO 1 should - be set to 0x350. - - Make sure that none of the cards overlap if you have - multiple cards in the system. - - If you are new to the isapnp tools, you can jumpstart - yourself by doing the following: - - e2. go to the /etc directory and run pnpdump to get a blank - isapnp.conf file. - - pnpdump > /etc/isapnp.conf - - e3. edit the /etc/isapnp.conf file to set the IO warnings and - the register IO addresses. The IO warnings means that you - should find the line in the file that looks like this: - - (CONFLICT (IO FATAL)(IRQ FATAL)(DMA FATAL)(MEM FATAL)) # or WARNING - - and you should edit the line to look like this: - - (CONFLICT (IO WARNING)(IRQ FATAL)(DMA FATAL)(MEM FATAL)) # - or WARNING - - The next step is to set the IO port addresses. The issue - here is that isapnp does not identify all of the ports out - there. Specifically any device that does not have a driver - or module loaded by Linux will not be registered. This - includes older sound cards and network cards. We have - found that the IO port 0x300 is often used even though - isapnp claims that no-one is using those ports. We - recommend that for a single card installation that port - 0x340 (and 0x350) be used. The IO port line should change - from this: - - (IO 0 (SIZE 16) (BASE 0x0300) (CHECK)) - - to this: - - (IO 0 (SIZE 16) (BASE 0x0340) ) - - e4. if you have multiple Quicknet cards, make sure that you do - not have any overlaps. Be especially careful if you are - mixing Internet PhoneJACK and Internet LineJACK cards in - the same system. In these cases we recommend moving the - IO port addresses to the 0x400 block. Please note that on - a few machines the 0x400 series are used. Feel free to - experiment with other addresses. Our cards have been - proven to work using IO addresses of up to 0xFF0. - - e5. the last step is to uncomment the activation line so the - drivers will be associated with the port. This means the - line (immediately below) the IO line should go from this: - - # (ACT Y) - - to this: - - (ACT Y) - - Once you have finished editing the isapnp.conf file you - must submit it into the pnp driverconfigure the cards. - This is done using the following command: - - isapnp isapnp.conf - - If this works you should see a line that identifies the - Quicknet device, the IO port(s) chosen, and a message - "Enabled OK". - - f. if you are loading the module by hand, use insmod. An example -of this would look like this: - - insmod phonedev - insmod ixj dspio=0x320,0x310 xio=0,0x330 - -Then verify the module loaded by running lsmod. If you are not using a -module that matches your kernel version, you may need to "force" the -load using the -f option in the insmod command. - - insmod phonedev - insmod -f ixj dspio=0x320,0x310 xio=0,0x330 - - -If you are using isapnp to autoconfigure your card, then you do NOT -need any of the above, though you need to use depmod to load the -driver, like this: - - depmod ixj - -which will result in the needed drivers getting loaded automatically. - - g. if you are planning on having the kernel automatically request -the module for you, then you need to edit /etc/conf.modules and add the -following lines: - - options ixj dspio=0x340 xio=0x330 ixjdebug=0 - -If you do this, then when you execute an application that uses the -module the kernel will request that it is loaded. - - h. if you want non-root users to be able to read and write to the -ixj devices (this is a good idea!) you should do the following: - - - decide upon a group name to use and create that group if - needed. Add the user names to that group that you wish to - have access to the device. For example, we typically will - create a group named "ixj" in /etc/group and add all users - to that group that we want to run software that can use the - ixjX devices. - - - change the permissions on the device files, like this: - - chgrp ixj /dev/ixj* - chmod 660 /dev/ixj* - -Once this is done, then non-root users should be able to use the -devices. If you have enabled autoloading of modules, then the user -should be able to open the device and have the module loaded -automatically for them. - - -4.0 Driver Installation problems. - -We have tested these drivers on the 2.2.9, 2.2.10, 2.2.12, and 2.2.13 kernels -and in all cases have eventually been able to get the drivers to load and -run. We have found four types of problems that prevent this from happening. -The problems and solutions are: - - a. A step was missed in the installation. Go back and use section 3 -as a checklist. Many people miss running the ixj_dev_create script and thus -never load the device names into the filesystem. - - b. The kernel is inconsistently linked. We have found this problem in -the Out Of the Box installation of several distributions. The symptoms -are that neither driver will load, and that the unknown symbols include "jiffy" -and "kmalloc". The solution is to recompile both the kernel and the -modules. The command string for the final compile looks like this: - - In the kernel directory: - 1. cp .config /tmp - 2. make mrproper - 3. cp /tmp/.config . - 4. make clean;make bzImage;make modules;make modules_install - -This rebuilds both the kernel and all the modules and makes sure they all -have the same linkages. This generally solves the problem once the new -kernel is installed and the system rebooted. - - c. The kernel has been patched, then unpatched. This happens when -someone decides to use an earlier kernel after they load a later kernel. -The symptoms are proceeding through all three above steps and still not -being able to load the driver. What has happened is that the generated -header files are out of sync with the kernel itself. The solution is -to recompile (again) using "make mrproper". This will remove and then -regenerate all the necessary header files. Once this is done, then you -need to install and reboot the kernel. We have not seen any problem -loading one of our drivers after this treatment. - -5.0 Known Limitations - -We cannot currently play "dial-tone" and listen for DTMF digits at the -same time using the ISA PhoneJACK. This is a bug in the 8020 DSP chip -used on that product. All other Quicknet products function normally -in this regard. We have a work-around, but it's not done yet. Until -then, if you want dial-tone, you can always play a recorded dial-tone -sound into the audio until you have gathered the DTMF digits. - - - - - - - - - - - - - - - - - diff --git a/Documentation/usb/error-codes.txt b/Documentation/usb/error-codes.txt index b3f606b81a03..9c3eb845ebe5 100644 --- a/Documentation/usb/error-codes.txt +++ b/Documentation/usb/error-codes.txt @@ -21,6 +21,8 @@ Non-USB-specific: USB-specific: +-EBUSY The URB is already active. + -ENODEV specified USB-device or bus doesn't exist -ENOENT specified interface or endpoint does not exist or @@ -35,9 +37,8 @@ USB-specific: d) ISO: number_of_packets is < 0 e) various other cases --EAGAIN a) specified ISO start frame too early - b) (using ISO-ASAP) too much scheduled for the future - wait some time and try again. +-EXDEV ISO: URB_ISO_ASAP wasn't specified and all the frames + the URB would be scheduled in have already expired. -EFBIG Host controller driver can't schedule that many ISO frames. diff --git a/Documentation/usb/mass-storage.txt b/Documentation/usb/mass-storage.txt index e9b9334627bf..59063ad7a60d 100644 --- a/Documentation/usb/mass-storage.txt +++ b/Documentation/usb/mass-storage.txt @@ -20,9 +20,9 @@ This document describes how to use the gadget from user space, its relation to mass storage function (or MSF) and different gadgets - using it, and how it differs from File Storage Gadget (or FSG). It - will talk only briefly about how to use MSF within composite - gadgets. + using it, and how it differs from File Storage Gadget (or FSG) + (which is no longer included in Linux). It will talk only briefly + about how to use MSF within composite gadgets. * Module parameters @@ -198,16 +198,15 @@ The Mass Storage Function and thus the Mass Storage Gadget has been based on the File Storage Gadget. The difference between the two is that MSG is a composite gadget (ie. uses the composite framework) - while file storage gadget is a traditional gadget. From userspace + while file storage gadget was a traditional gadget. From userspace point of view this distinction does not really matter, but from kernel hacker's point of view, this means that (i) MSG does not duplicate code needed for handling basic USB protocol commands and (ii) MSF can be used in any other composite gadget. - Because of that, File Storage Gadget has been deprecated and - scheduled to be removed in Linux 3.8. All users need to transition - to the Mass Storage Gadget by that time. The two gadgets behave - mostly the same from the outside except: + Because of that, File Storage Gadget has been removed in Linux 3.8. + All users need to transition to the Mass Storage Gadget. The two + gadgets behave mostly the same from the outside except: 1. In FSG the “removable” and “cdrom” module parameters set the flag for all logical units whereas in MSG they accept a list of y/n diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 9efceff51bfb..f15cb74c4f78 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -1013,7 +1013,7 @@ boot_params as that of 16-bit boot protocol, the boot loader should also fill the additional fields of the struct boot_params as that described in zero-page.txt. -After setupping the struct boot_params, the boot loader can load the +After setting up the struct boot_params, the boot loader can load the 32/64-bit kernel in the same way as that of 16-bit boot protocol. In 32-bit boot protocol, the kernel is started by jumping to the @@ -1023,7 +1023,7 @@ In 32-bit boot protocol, the kernel is started by jumping to the At entry, the CPU must be in 32-bit protected mode with paging disabled; a GDT must be loaded with the descriptors for selectors __BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat -segment; __BOOS_CS must have execute/read permission, and __BOOT_DS +segment; __BOOT_CS must have execute/read permission, and __BOOT_DS must have read/write permission; CS must be __BOOT_CS and DS, ES, SS must be __BOOT_DS; interrupt must be disabled; %esi must hold the base address of the struct boot_params; %ebp, %edi and %ebx must be zero. diff --git a/Documentation/zh_CN/arm/kernel_user_helpers.txt b/Documentation/zh_CN/arm/kernel_user_helpers.txt new file mode 100644 index 000000000000..cd7fc8f34cf9 --- /dev/null +++ b/Documentation/zh_CN/arm/kernel_user_helpers.txt @@ -0,0 +1,284 @@ +Chinese translated version of Documentation/arm/kernel_user_helpers.txt + +If you have any comment or update to the content, please contact the +original document maintainer directly. However, if you have a problem +communicating in English you can also ask the Chinese maintainer for +help. Contact the Chinese maintainer if this translation is outdated +or if there is a problem with the translation. + +Maintainer: Nicolas Pitre <nicolas.pitre@linaro.org> + Dave Martin <dave.martin@linaro.org> +Chinese maintainer: Fu Wei <tekkamanninja@gmail.com> +--------------------------------------------------------------------- +Documentation/arm/kernel_user_helpers.txt 的中文翻译 + +如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 +交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 +译存在问题,请联系中文版维护者。 +英文版维护者: Nicolas Pitre <nicolas.pitre@linaro.org> + Dave Martin <dave.martin@linaro.org> +中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com> +中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com> +中文版校译者: 宋冬生 Dongsheng Song <dongshneg.song@gmail.com> + 傅炜 Fu Wei <tekkamanninja@gmail.com> + + +以下为正文 +--------------------------------------------------------------------- +内核提供的用户空间辅助代码 +========================= + +在内核内存空间的固定地址处,有一个由内核提供并可从用户空间访问的代码 +段。它用于向用户空间提供因在许多 ARM CPU 中未实现的特性和/或指令而需 +内核提供帮助的某些操作。这些代码直接在用户模式下执行的想法是为了获得 +最佳效率,但那些与内核计数器联系过于紧密的部分,则被留给了用户库实现。 +事实上,此代码甚至可能因不同的 CPU 而异,这取决于其可用的指令集或它 +是否为 SMP 系统。换句话说,内核保留在不作出警告的情况下根据需要更改 +这些代码的权利。只有本文档描述的入口及其结果是保证稳定的。 + +这与完全成熟的 VDSO 实现不同(但两者并不冲突),尽管如此,VDSO 可阻止 +某些通过常量高效跳转到那些代码段的汇编技巧。且由于那些代码段在返回用户 +代码前仅使用少量的代码周期,则一个 VDSO 间接远程调用将会在这些简单的 +操作上增加一个可测量的开销。 + +在对那些拥有原生支持的新型处理器进行代码优化时,仅在已为其他操作使用 +了类似的新增指令,而导致二进制结果已与早期 ARM 处理器不兼容的情况下, +用户空间才应绕过这些辅助代码,并在内联函数中实现这些操作(无论是通过 +编译器在代码中直接放置,还是作为库函数调用实现的一部分)。也就是说, +如果你编译的代码不会为了其他目的使用新指令,则不要仅为了避免使用这些 +内核辅助代码,导致二进制程序无法在早期处理器上运行。 + +新的辅助代码可能随着时间的推移而增加,所以新内核中的某些辅助代码在旧 +内核中可能不存在。因此,程序必须在对任何辅助代码调用假设是安全之前, +检测 __kuser_helper_version 的值(见下文)。理想情况下,这种检测应该 +只在进程启动时执行一次;如果内核版本不支持所需辅助代码,则该进程可尽早 +中止执行。 + +kuser_helper_version +-------------------- + +位置: 0xffff0ffc + +参考声明: + + extern int32_t __kuser_helper_version; + +定义: + + 这个区域包含了当前运行内核实现的辅助代码版本号。用户空间可以通过读 + 取此版本号以确定特定的辅助代码是否存在。 + +使用范例: + +#define __kuser_helper_version (*(int32_t *)0xffff0ffc) + +void check_kuser_version(void) +{ + if (__kuser_helper_version < 2) { + fprintf(stderr, "can't do atomic operations, kernel too old\n"); + abort(); + } +} + +注意: + + 用户空间可以假设这个域的值不会在任何单个进程的生存期内改变。也就 + 是说,这个域可以仅在库的初始化阶段或进程启动阶段读取一次。 + +kuser_get_tls +------------- + +位置: 0xffff0fe0 + +参考原型: + + void * __kuser_get_tls(void); + +输入: + + lr = 返回地址 + +输出: + + r0 = TLS 值 + +被篡改的寄存器: + + 无 + +定义: + + 获取之前通过 __ARM_NR_set_tls 系统调用设置的 TLS 值。 + +使用范例: + +typedef void * (__kuser_get_tls_t)(void); +#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0) + +void foo() +{ + void *tls = __kuser_get_tls(); + printf("TLS = %p\n", tls); +} + +注意: + + - 仅在 __kuser_helper_version >= 1 时,此辅助代码存在 + (从内核版本 2.6.12 开始)。 + +kuser_cmpxchg +------------- + +位置: 0xffff0fc0 + +参考原型: + + int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr); + +输入: + + r0 = oldval + r1 = newval + r2 = ptr + lr = 返回地址 + +输出: + + r0 = 成功代码 (零或非零) + C flag = 如果 r0 == 0 则置 1,如果 r0 != 0 则清零。 + +被篡改的寄存器: + + r3, ip, flags + +定义: + + 仅在 *ptr 为 oldval 时原子保存 newval 于 *ptr 中。 + 如果 *ptr 被改变,则返回值为零,否则为非零值。 + 如果 *ptr 被改变,则 C flag 也会被置 1,以实现调用代码中的汇编 + 优化。 + +使用范例: + +typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr); +#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0) + +int atomic_add(volatile int *ptr, int val) +{ + int old, new; + + do { + old = *ptr; + new = old + val; + } while(__kuser_cmpxchg(old, new, ptr)); + + return new; +} + +注意: + + - 这个例程已根据需要包含了内存屏障。 + + - 仅在 __kuser_helper_version >= 2 时,此辅助代码存在 + (从内核版本 2.6.12 开始)。 + +kuser_memory_barrier +-------------------- + +位置: 0xffff0fa0 + +参考原型: + + void __kuser_memory_barrier(void); + +输入: + + lr = 返回地址 + +输出: + + 无 + +被篡改的寄存器: + + 无 + +定义: + + 应用于任何需要内存屏障以防止手动数据修改带来的一致性问题,以及 + __kuser_cmpxchg 中。 + +使用范例: + +typedef void (__kuser_dmb_t)(void); +#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0) + +注意: + + - 仅在 __kuser_helper_version >= 3 时,此辅助代码存在 + (从内核版本 2.6.15 开始)。 + +kuser_cmpxchg64 +--------------- + +位置: 0xffff0f60 + +参考原型: + + int __kuser_cmpxchg64(const int64_t *oldval, + const int64_t *newval, + volatile int64_t *ptr); + +输入: + + r0 = 指向 oldval + r1 = 指向 newval + r2 = 指向目标值 + lr = 返回地址 + +输出: + + r0 = 成功代码 (零或非零) + C flag = 如果 r0 == 0 则置 1,如果 r0 != 0 则清零。 + +被篡改的寄存器: + + r3, lr, flags + +定义: + + 仅在 *ptr 等于 *oldval 指向的 64 位值时,原子保存 *newval + 指向的 64 位值于 *ptr 中。如果 *ptr 被改变,则返回值为零, + 否则为非零值。 + + 如果 *ptr 被改变,则 C flag 也会被置 1,以实现调用代码中的汇编 + 优化。 + +使用范例: + +typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval, + const int64_t *newval, + volatile int64_t *ptr); +#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60) + +int64_t atomic_add64(volatile int64_t *ptr, int64_t val) +{ + int64_t old, new; + + do { + old = *ptr; + new = old + val; + } while(__kuser_cmpxchg64(&old, &new, ptr)); + + return new; +} + +注意: + + - 这个例程已根据需要包含了内存屏障。 + + - 由于这个过程的代码长度(此辅助代码跨越 2 个常规的 kuser “槽”), + 因此 0xffff0f80 不被作为有效的入口点。 + + - 仅在 __kuser_helper_version >= 5 时,此辅助代码存在 + (从内核版本 3.1 开始)。 diff --git a/Documentation/zh_CN/arm64/memory.txt b/Documentation/zh_CN/arm64/memory.txt index 83b519314706..a5f6283829f9 100644 --- a/Documentation/zh_CN/arm64/memory.txt +++ b/Documentation/zh_CN/arm64/memory.txt @@ -47,21 +47,21 @@ AArch64 Linux 内存布局: ----------------------------------------------------------------------- 0000000000000000 0000007fffffffff 512GB 用户空间 -ffffff8000000000 ffffffbbfffcffff ~240GB vmalloc +ffffff8000000000 ffffffbbfffeffff ~240GB vmalloc -ffffffbbfffd0000 ffffffbcfffdffff 64KB [防护页] +ffffffbbffff0000 ffffffbbffffffff 64KB [防护页] -ffffffbbfffe0000 ffffffbcfffeffff 64KB PCI I/O 空间 +ffffffbc00000000 ffffffbdffffffff 8GB vmemmap -ffffffbbffff0000 ffffffbcffffffff 64KB [防护页] +ffffffbe00000000 ffffffbffbbfffff ~8GB [防护页,未来用于 vmmemap] -ffffffbc00000000 ffffffbdffffffff 8GB vmemmap +ffffffbffbe00000 ffffffbffbe0ffff 64KB PCI I/O 空间 -ffffffbe00000000 ffffffbffbffffff ~8GB [防护页,未来用于 vmmemap] +ffffffbbffff0000 ffffffbcffffffff ~2MB [防护页] ffffffbffc000000 ffffffbfffffffff 64MB 模块 -ffffffc000000000 ffffffffffffffff 256GB 内存空间 +ffffffc000000000 ffffffffffffffff 256GB 内核逻辑内存映射 4KB 页大小的转换表查找: |