summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS2
-rw-r--r--Documentation/ABI/obsolete/sysfs-gpio2
-rw-r--r--Documentation/ABI/removed/sysfs-class-rfkill2
-rw-r--r--Documentation/ABI/stable/sysfs-class-rfkill2
-rw-r--r--Documentation/ABI/stable/sysfs-devices-node2
-rw-r--r--Documentation/ABI/testing/procfs-diskstats2
-rw-r--r--Documentation/ABI/testing/sysfs-block2
-rw-r--r--Documentation/ABI/testing/sysfs-block-device2
-rw-r--r--Documentation/ABI/testing/sysfs-class-switchtec2
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu4
-rw-r--r--Documentation/ABI/testing/sysfs-platform-asus-laptop2
-rw-r--r--Documentation/COPYING-logo (renamed from Documentation/logo.txt)0
-rw-r--r--Documentation/DMA-API-HOWTO.txt2
-rw-r--r--Documentation/accounting/cgroupstats.rst (renamed from Documentation/accounting/cgroupstats.txt)14
-rw-r--r--Documentation/accounting/delay-accounting.rst (renamed from Documentation/accounting/delay-accounting.txt)61
-rw-r--r--Documentation/accounting/index.rst14
-rw-r--r--Documentation/accounting/psi.rst (renamed from Documentation/accounting/psi.txt)42
-rw-r--r--Documentation/accounting/taskstats-struct.rst (renamed from Documentation/accounting/taskstats-struct.txt)79
-rw-r--r--Documentation/accounting/taskstats.rst (renamed from Documentation/accounting/taskstats.txt)15
-rw-r--r--Documentation/admin-guide/aoe/aoe.rst (renamed from Documentation/aoe/aoe.rst)4
-rw-r--r--Documentation/admin-guide/aoe/autoload.sh (renamed from Documentation/aoe/autoload.sh)0
-rw-r--r--Documentation/admin-guide/aoe/examples.rst (renamed from Documentation/aoe/examples.rst)0
-rw-r--r--Documentation/admin-guide/aoe/index.rst (renamed from Documentation/aoe/index.rst)2
-rw-r--r--Documentation/admin-guide/aoe/status.sh (renamed from Documentation/aoe/status.sh)0
-rw-r--r--Documentation/admin-guide/aoe/todo.rst (renamed from Documentation/aoe/todo.rst)0
-rw-r--r--Documentation/admin-guide/aoe/udev-install.sh (renamed from Documentation/aoe/udev-install.sh)0
-rw-r--r--Documentation/admin-guide/aoe/udev.txt (renamed from Documentation/aoe/udev.txt)2
-rw-r--r--Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg (renamed from Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg)0
-rw-r--r--Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg (renamed from Documentation/blockdev/drbd/DRBD-data-packets.svg)0
-rw-r--r--Documentation/admin-guide/blockdev/drbd/conn-states-8.dot (renamed from Documentation/blockdev/drbd/conn-states-8.dot)0
-rw-r--r--Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst (renamed from Documentation/blockdev/drbd/data-structure-v9.txt)6
-rw-r--r--Documentation/admin-guide/blockdev/drbd/disk-states-8.dot (renamed from Documentation/blockdev/drbd/disk-states-8.dot)0
-rw-r--r--Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot (renamed from Documentation/blockdev/drbd/drbd-connection-state-overview.dot)0
-rw-r--r--Documentation/admin-guide/blockdev/drbd/figures.rst30
-rw-r--r--Documentation/admin-guide/blockdev/drbd/index.rst (renamed from Documentation/blockdev/drbd/README.txt)15
-rw-r--r--Documentation/admin-guide/blockdev/drbd/node-states-8.dot (renamed from Documentation/blockdev/drbd/node-states-8.dot)1
-rw-r--r--Documentation/admin-guide/blockdev/floppy.rst (renamed from Documentation/blockdev/floppy.txt)88
-rw-r--r--Documentation/admin-guide/blockdev/index.rst16
-rw-r--r--Documentation/admin-guide/blockdev/nbd.rst (renamed from Documentation/blockdev/nbd.txt)2
-rw-r--r--Documentation/admin-guide/blockdev/paride.rst (renamed from Documentation/blockdev/paride.txt)196
-rw-r--r--Documentation/admin-guide/blockdev/ramdisk.rst (renamed from Documentation/blockdev/ramdisk.txt)55
-rw-r--r--Documentation/admin-guide/blockdev/zram.rst (renamed from Documentation/blockdev/zram.txt)197
-rw-r--r--Documentation/admin-guide/btmrvl.rst (renamed from Documentation/btmrvl.txt)0
-rw-r--r--Documentation/admin-guide/bug-hunting.rst4
-rw-r--r--Documentation/admin-guide/cgroup-v1/blkio-controller.rst (renamed from Documentation/cgroup-v1/blkio-controller.rst)0
-rw-r--r--Documentation/admin-guide/cgroup-v1/cgroups.rst (renamed from Documentation/cgroup-v1/cgroups.rst)4
-rw-r--r--Documentation/admin-guide/cgroup-v1/cpuacct.rst (renamed from Documentation/cgroup-v1/cpuacct.rst)0
-rw-r--r--Documentation/admin-guide/cgroup-v1/cpusets.rst (renamed from Documentation/cgroup-v1/cpusets.rst)2
-rw-r--r--Documentation/admin-guide/cgroup-v1/devices.rst (renamed from Documentation/cgroup-v1/devices.rst)0
-rw-r--r--Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst (renamed from Documentation/cgroup-v1/freezer-subsystem.rst)0
-rw-r--r--Documentation/admin-guide/cgroup-v1/hugetlb.rst (renamed from Documentation/cgroup-v1/hugetlb.rst)0
-rw-r--r--Documentation/admin-guide/cgroup-v1/index.rst (renamed from Documentation/cgroup-v1/index.rst)2
-rw-r--r--Documentation/admin-guide/cgroup-v1/memcg_test.rst (renamed from Documentation/cgroup-v1/memcg_test.rst)4
-rw-r--r--Documentation/admin-guide/cgroup-v1/memory.rst (renamed from Documentation/cgroup-v1/memory.rst)0
-rw-r--r--Documentation/admin-guide/cgroup-v1/net_cls.rst (renamed from Documentation/cgroup-v1/net_cls.rst)0
-rw-r--r--Documentation/admin-guide/cgroup-v1/net_prio.rst (renamed from Documentation/cgroup-v1/net_prio.rst)0
-rw-r--r--Documentation/admin-guide/cgroup-v1/pids.rst (renamed from Documentation/cgroup-v1/pids.rst)0
-rw-r--r--Documentation/admin-guide/cgroup-v1/rdma.rst (renamed from Documentation/cgroup-v1/rdma.rst)0
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst8
-rw-r--r--Documentation/admin-guide/clearing-warn-once.rst (renamed from Documentation/clearing-warn-once.txt)0
-rw-r--r--Documentation/admin-guide/cpu-load.rst (renamed from Documentation/cpu-load.txt)0
-rw-r--r--Documentation/admin-guide/cputopology.rst (renamed from Documentation/cputopology.txt)0
-rw-r--r--Documentation/admin-guide/device-mapper/cache-policies.rst (renamed from Documentation/device-mapper/cache-policies.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/cache.rst (renamed from Documentation/device-mapper/cache.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/delay.rst (renamed from Documentation/device-mapper/delay.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-crypt.rst (renamed from Documentation/device-mapper/dm-crypt.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-dust.txt (renamed from Documentation/device-mapper/dm-dust.txt)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-flakey.rst (renamed from Documentation/device-mapper/dm-flakey.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-init.rst (renamed from Documentation/device-mapper/dm-init.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-integrity.rst (renamed from Documentation/device-mapper/dm-integrity.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-io.rst (renamed from Documentation/device-mapper/dm-io.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-log.rst (renamed from Documentation/device-mapper/dm-log.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-queue-length.rst (renamed from Documentation/device-mapper/dm-queue-length.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-raid.rst (renamed from Documentation/device-mapper/dm-raid.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-service-time.rst (renamed from Documentation/device-mapper/dm-service-time.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-uevent.rst (renamed from Documentation/device-mapper/dm-uevent.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/dm-zoned.rst (renamed from Documentation/device-mapper/dm-zoned.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/era.rst (renamed from Documentation/device-mapper/era.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/index.rst (renamed from Documentation/device-mapper/index.rst)2
-rw-r--r--Documentation/admin-guide/device-mapper/kcopyd.rst (renamed from Documentation/device-mapper/kcopyd.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/linear.rst (renamed from Documentation/device-mapper/linear.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/log-writes.rst (renamed from Documentation/device-mapper/log-writes.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/persistent-data.rst (renamed from Documentation/device-mapper/persistent-data.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/snapshot.rst (renamed from Documentation/device-mapper/snapshot.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/statistics.rst (renamed from Documentation/device-mapper/statistics.rst)4
-rw-r--r--Documentation/admin-guide/device-mapper/striped.rst (renamed from Documentation/device-mapper/striped.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/switch.rst (renamed from Documentation/device-mapper/switch.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/thin-provisioning.rst (renamed from Documentation/device-mapper/thin-provisioning.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/unstriped.rst (renamed from Documentation/device-mapper/unstriped.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/verity.rst (renamed from Documentation/device-mapper/verity.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/writecache.rst (renamed from Documentation/device-mapper/writecache.rst)0
-rw-r--r--Documentation/admin-guide/device-mapper/zero.rst (renamed from Documentation/device-mapper/zero.rst)0
-rw-r--r--Documentation/admin-guide/efi-stub.rst (renamed from Documentation/efi-stub.txt)0
-rw-r--r--Documentation/admin-guide/gpio/index.rst (renamed from Documentation/gpio/index.rst)2
-rw-r--r--Documentation/admin-guide/gpio/sysfs.rst (renamed from Documentation/gpio/sysfs.rst)0
-rw-r--r--Documentation/admin-guide/highuid.rst (renamed from Documentation/highuid.txt)0
-rw-r--r--Documentation/admin-guide/hw-vuln/l1tf.rst2
-rw-r--r--Documentation/admin-guide/hw_random.rst (renamed from Documentation/hw_random.txt)0
-rw-r--r--Documentation/admin-guide/index.rst28
-rw-r--r--Documentation/admin-guide/iostats.rst (renamed from Documentation/iostats.txt)0
-rw-r--r--Documentation/admin-guide/kdump/gdbmacros.txt (renamed from Documentation/kdump/gdbmacros.txt)0
-rw-r--r--Documentation/admin-guide/kdump/index.rst (renamed from Documentation/kdump/index.rst)1
-rw-r--r--Documentation/admin-guide/kdump/kdump.rst (renamed from Documentation/kdump/kdump.rst)0
-rw-r--r--Documentation/admin-guide/kdump/vmcoreinfo.rst (renamed from Documentation/kdump/vmcoreinfo.rst)0
-rw-r--r--Documentation/admin-guide/kernel-parameters.rst2
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt44
-rw-r--r--Documentation/admin-guide/kernel-per-CPU-kthreads.rst (renamed from Documentation/kernel-per-CPU-kthreads.txt)2
-rw-r--r--Documentation/admin-guide/laptops/asus-laptop.rst (renamed from Documentation/laptops/asus-laptop.txt)92
-rw-r--r--Documentation/admin-guide/laptops/disk-shock-protection.rst (renamed from Documentation/laptops/disk-shock-protection.txt)32
-rw-r--r--Documentation/admin-guide/laptops/index.rst17
-rw-r--r--Documentation/admin-guide/laptops/laptop-mode.rst (renamed from Documentation/laptops/laptop-mode.txt)579
-rw-r--r--Documentation/admin-guide/laptops/lg-laptop.rst (renamed from Documentation/laptops/lg-laptop.rst)1
-rw-r--r--Documentation/admin-guide/laptops/sony-laptop.rst (renamed from Documentation/laptops/sony-laptop.txt)58
-rw-r--r--Documentation/admin-guide/laptops/sonypi.rst (renamed from Documentation/laptops/sonypi.txt)50
-rw-r--r--Documentation/admin-guide/laptops/thinkpad-acpi.rst (renamed from Documentation/laptops/thinkpad-acpi.txt)369
-rw-r--r--Documentation/admin-guide/laptops/toshiba_haps.rst (renamed from Documentation/laptops/toshiba_haps.txt)49
-rw-r--r--Documentation/admin-guide/lcd-panel-cgram.rst (renamed from Documentation/auxdisplay/lcd-panel-cgram.txt)7
-rw-r--r--Documentation/admin-guide/ldm.rst (renamed from Documentation/ldm.txt)0
-rw-r--r--Documentation/admin-guide/lockup-watchdogs.rst (renamed from Documentation/lockup-watchdogs.txt)0
-rw-r--r--Documentation/admin-guide/mm/cma_debugfs.rst (renamed from Documentation/cma/debugfs.txt)6
-rw-r--r--Documentation/admin-guide/mm/index.rst3
-rw-r--r--Documentation/admin-guide/mm/ksm.rst2
-rw-r--r--Documentation/admin-guide/mm/numa_memory_policy.rst2
-rw-r--r--Documentation/admin-guide/namespaces/compatibility-list.rst (renamed from Documentation/namespaces/compatibility-list.txt)10
-rw-r--r--Documentation/admin-guide/namespaces/index.rst11
-rw-r--r--Documentation/admin-guide/namespaces/resource-control.rst (renamed from Documentation/namespaces/resource-control.txt)4
-rw-r--r--Documentation/admin-guide/numastat.rst (renamed from Documentation/numastat.txt)0
-rw-r--r--Documentation/admin-guide/perf/arm-ccn.rst (renamed from Documentation/perf/arm-ccn.txt)18
-rw-r--r--Documentation/admin-guide/perf/arm_dsu_pmu.rst (renamed from Documentation/perf/arm_dsu_pmu.txt)5
-rw-r--r--Documentation/admin-guide/perf/hisi-pmu.rst (renamed from Documentation/perf/hisi-pmu.txt)37
-rw-r--r--Documentation/admin-guide/perf/index.rst16
-rw-r--r--Documentation/admin-guide/perf/qcom_l2_pmu.rst (renamed from Documentation/perf/qcom_l2_pmu.txt)3
-rw-r--r--Documentation/admin-guide/perf/qcom_l3_pmu.rst (renamed from Documentation/perf/qcom_l3_pmu.txt)3
-rw-r--r--Documentation/admin-guide/perf/thunderx2-pmu.rst (renamed from Documentation/perf/thunderx2-pmu.txt)25
-rw-r--r--Documentation/admin-guide/perf/xgene-pmu.rst (renamed from Documentation/perf/xgene-pmu.txt)3
-rw-r--r--Documentation/admin-guide/pnp.rst (renamed from Documentation/pnp.txt)0
-rw-r--r--Documentation/admin-guide/rapidio.rst (renamed from Documentation/driver-api/rapidio.rst)0
-rw-r--r--Documentation/admin-guide/rtc.rst (renamed from Documentation/rtc.txt)0
-rw-r--r--Documentation/admin-guide/svga.rst (renamed from Documentation/svga.txt)0
-rw-r--r--Documentation/admin-guide/sysctl/abi.rst67
-rw-r--r--Documentation/admin-guide/sysctl/fs.rst (renamed from Documentation/sysctl/fs.txt)146
-rw-r--r--Documentation/admin-guide/sysctl/index.rst (renamed from Documentation/sysctl/README)34
-rw-r--r--Documentation/admin-guide/sysctl/kernel.rst (renamed from Documentation/sysctl/kernel.txt)374
-rw-r--r--Documentation/admin-guide/sysctl/net.rst (renamed from Documentation/sysctl/net.txt)141
-rw-r--r--Documentation/admin-guide/sysctl/sunrpc.rst (renamed from Documentation/sysctl/sunrpc.txt)13
-rw-r--r--Documentation/admin-guide/sysctl/user.rst (renamed from Documentation/sysctl/user.txt)32
-rw-r--r--Documentation/admin-guide/sysctl/vm.rst (renamed from Documentation/sysctl/vm.txt)264
-rw-r--r--Documentation/admin-guide/video-output.rst (renamed from Documentation/video-output.txt)0
-rw-r--r--Documentation/arm/Marvell/README395
-rw-r--r--Documentation/arm/Netwinder78
-rw-r--r--Documentation/arm/SA1100/FreeBird21
-rw-r--r--Documentation/arm/SA1100/empeg2
-rw-r--r--Documentation/arm/SA1100/serial_UART47
-rw-r--r--Documentation/arm/arm.rst (renamed from Documentation/arm/README)50
-rw-r--r--Documentation/arm/booting.rst (renamed from Documentation/arm/Booting)71
-rw-r--r--Documentation/arm/cluster-pm-race-avoidance.rst (renamed from Documentation/arm/cluster-pm-race-avoidance.txt)177
-rw-r--r--Documentation/arm/firmware.rst (renamed from Documentation/arm/firmware.txt)14
-rw-r--r--Documentation/arm/index.rst80
-rw-r--r--Documentation/arm/interrupts.rst (renamed from Documentation/arm/Interrupts)90
-rw-r--r--Documentation/arm/ixp4xx.rst (renamed from Documentation/arm/IXP4xx)61
-rw-r--r--Documentation/arm/kernel_mode_neon.rst (renamed from Documentation/arm/kernel_mode_neon.txt)3
-rw-r--r--Documentation/arm/kernel_user_helpers.rst (renamed from Documentation/arm/kernel_user_helpers.txt)79
-rw-r--r--Documentation/arm/keystone/knav-qmss.rst (renamed from Documentation/arm/keystone/knav-qmss.txt)6
-rw-r--r--Documentation/arm/keystone/overview.rst (renamed from Documentation/arm/keystone/Overview.txt)47
-rw-r--r--Documentation/arm/marvel.rst488
-rw-r--r--Documentation/arm/mem_alignment.rst (renamed from Documentation/arm/mem_alignment)11
-rw-r--r--Documentation/arm/memory.rst (renamed from Documentation/arm/memory.txt)9
-rw-r--r--Documentation/arm/microchip.rst (renamed from Documentation/arm/Microchip/README)63
-rw-r--r--Documentation/arm/netwinder.rst85
-rw-r--r--Documentation/arm/nwfpe/index.rst13
-rw-r--r--Documentation/arm/nwfpe/netwinder-fpe.rst (renamed from Documentation/arm/nwfpe/README.FPE)24
-rw-r--r--Documentation/arm/nwfpe/notes.rst (renamed from Documentation/arm/nwfpe/NOTES)3
-rw-r--r--Documentation/arm/nwfpe/nwfpe.rst (renamed from Documentation/arm/nwfpe/README)10
-rw-r--r--Documentation/arm/nwfpe/todo.rst (renamed from Documentation/arm/nwfpe/TODO)47
-rw-r--r--Documentation/arm/omap/dss.rst (renamed from Documentation/arm/OMAP/DSS)102
-rw-r--r--Documentation/arm/omap/index.rst12
-rw-r--r--Documentation/arm/omap/omap.rst (renamed from Documentation/arm/OMAP/README)7
-rw-r--r--Documentation/arm/omap/omap_pm.rst (renamed from Documentation/arm/OMAP/omap_pm)55
-rw-r--r--Documentation/arm/porting.rst (renamed from Documentation/arm/Porting)14
-rw-r--r--Documentation/arm/pxa/mfp.rst (renamed from Documentation/arm/pxa/mfp.txt)110
-rw-r--r--Documentation/arm/sa1100/adsbitsy.rst (renamed from Documentation/arm/SA1100/ADSBitsy)14
-rw-r--r--Documentation/arm/sa1100/assabet.rst (renamed from Documentation/arm/SA1100/Assabet)193
-rw-r--r--Documentation/arm/sa1100/brutus.rst (renamed from Documentation/arm/SA1100/Brutus)49
-rw-r--r--Documentation/arm/sa1100/cerf.rst (renamed from Documentation/arm/SA1100/CERF)10
-rw-r--r--Documentation/arm/sa1100/freebird.rst25
-rw-r--r--Documentation/arm/sa1100/graphicsclient.rst (renamed from Documentation/arm/SA1100/GraphicsClient)48
-rw-r--r--Documentation/arm/sa1100/graphicsmaster.rst (renamed from Documentation/arm/SA1100/GraphicsMaster)13
-rw-r--r--Documentation/arm/sa1100/huw_webpanel.rst (renamed from Documentation/arm/SA1100/HUW_WEBPANEL)8
-rw-r--r--Documentation/arm/sa1100/index.rst25
-rw-r--r--Documentation/arm/sa1100/itsy.rst (renamed from Documentation/arm/SA1100/Itsy)14
-rw-r--r--Documentation/arm/sa1100/lart.rst (renamed from Documentation/arm/SA1100/LART)3
-rw-r--r--Documentation/arm/sa1100/nanoengine.rst (renamed from Documentation/arm/SA1100/nanoEngine)6
-rw-r--r--Documentation/arm/sa1100/pangolin.rst (renamed from Documentation/arm/SA1100/Pangolin)10
-rw-r--r--Documentation/arm/sa1100/pleb.rst (renamed from Documentation/arm/SA1100/PLEB)6
-rw-r--r--Documentation/arm/sa1100/serial_uart.rst51
-rw-r--r--Documentation/arm/sa1100/tifon.rst (renamed from Documentation/arm/SA1100/Tifon)4
-rw-r--r--Documentation/arm/sa1100/yopy.rst (renamed from Documentation/arm/SA1100/Yopy)5
-rw-r--r--Documentation/arm/samsung-s3c24xx/cpufreq.rst (renamed from Documentation/arm/Samsung-S3C24XX/CPUfreq.txt)5
-rw-r--r--Documentation/arm/samsung-s3c24xx/eb2410itx.rst (renamed from Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt)5
-rw-r--r--Documentation/arm/samsung-s3c24xx/gpio.rst (renamed from Documentation/arm/Samsung-S3C24XX/GPIO.txt)23
-rw-r--r--Documentation/arm/samsung-s3c24xx/h1940.rst (renamed from Documentation/arm/Samsung-S3C24XX/H1940.txt)5
-rw-r--r--Documentation/arm/samsung-s3c24xx/index.rst20
-rw-r--r--Documentation/arm/samsung-s3c24xx/nand.rst (renamed from Documentation/arm/Samsung-S3C24XX/NAND.txt)6
-rw-r--r--Documentation/arm/samsung-s3c24xx/overview.rst (renamed from Documentation/arm/Samsung-S3C24XX/Overview.txt)21
-rw-r--r--Documentation/arm/samsung-s3c24xx/s3c2412.rst (renamed from Documentation/arm/Samsung-S3C24XX/S3C2412.txt)5
-rw-r--r--Documentation/arm/samsung-s3c24xx/s3c2413.rst (renamed from Documentation/arm/Samsung-S3C24XX/S3C2413.txt)7
-rw-r--r--Documentation/arm/samsung-s3c24xx/smdk2440.rst (renamed from Documentation/arm/Samsung-S3C24XX/SMDK2440.txt)5
-rw-r--r--Documentation/arm/samsung-s3c24xx/suspend.rst (renamed from Documentation/arm/Samsung-S3C24XX/Suspend.txt)20
-rw-r--r--Documentation/arm/samsung-s3c24xx/usb-host.rst (renamed from Documentation/arm/Samsung-S3C24XX/USB-Host.txt)16
-rw-r--r--Documentation/arm/samsung/bootloader-interface.rst (renamed from Documentation/arm/Samsung/Bootloader-interface.txt)27
-rwxr-xr-xDocumentation/arm/samsung/clksrc-change-registers.awk (renamed from Documentation/arm/Samsung/clksrc-change-registers.awk)0
-rw-r--r--Documentation/arm/samsung/gpio.rst (renamed from Documentation/arm/Samsung/GPIO.txt)7
-rw-r--r--Documentation/arm/samsung/index.rst12
-rw-r--r--Documentation/arm/samsung/overview.rst (renamed from Documentation/arm/Samsung/Overview.txt)15
-rw-r--r--Documentation/arm/setup.rst (renamed from Documentation/arm/Setup)49
-rw-r--r--Documentation/arm/sh-mobile/.gitignore (renamed from Documentation/arm/SH-Mobile/.gitignore)0
-rw-r--r--Documentation/arm/spear/overview.rst (renamed from Documentation/arm/SPEAr/overview.txt)21
-rw-r--r--Documentation/arm/sti/overview.rst (renamed from Documentation/arm/sti/overview.txt)21
-rw-r--r--Documentation/arm/sti/stih407-overview.rst (renamed from Documentation/arm/sti/stih407-overview.txt)9
-rw-r--r--Documentation/arm/sti/stih415-overview.rst (renamed from Documentation/arm/sti/stih415-overview.txt)8
-rw-r--r--Documentation/arm/sti/stih416-overview.rst (renamed from Documentation/arm/sti/stih416-overview.txt)5
-rw-r--r--Documentation/arm/sti/stih418-overview.rst (renamed from Documentation/arm/sti/stih418-overview.txt)9
-rw-r--r--Documentation/arm/stm32/overview.rst2
-rw-r--r--Documentation/arm/stm32/stm32f429-overview.rst7
-rw-r--r--Documentation/arm/stm32/stm32f746-overview.rst7
-rw-r--r--Documentation/arm/stm32/stm32f769-overview.rst7
-rw-r--r--Documentation/arm/stm32/stm32h743-overview.rst7
-rw-r--r--Documentation/arm/stm32/stm32mp157-overview.rst3
-rw-r--r--Documentation/arm/sunxi.rst (renamed from Documentation/arm/sunxi/README)98
-rw-r--r--Documentation/arm/sunxi/clocks.rst (renamed from Documentation/arm/sunxi/clocks.txt)7
-rw-r--r--Documentation/arm/swp_emulation.rst (renamed from Documentation/arm/swp_emulation)24
-rw-r--r--Documentation/arm/tcm.rst (renamed from Documentation/arm/tcm.txt)54
-rw-r--r--Documentation/arm/uefi.rst (renamed from Documentation/arm/uefi.txt)39
-rw-r--r--Documentation/arm/vfp/release-notes.rst (renamed from Documentation/arm/VFP/release-notes.txt)4
-rw-r--r--Documentation/arm/vlocks.rst (renamed from Documentation/arm/vlocks.txt)9
-rw-r--r--Documentation/arm64/index.rst2
-rw-r--r--Documentation/backlight/lp855x-driver.txt66
-rw-r--r--Documentation/block/bfq-iosched.rst (renamed from Documentation/block/bfq-iosched.txt)68
-rw-r--r--Documentation/block/biodoc.rst (renamed from Documentation/block/biodoc.txt)335
-rw-r--r--Documentation/block/biovecs.rst (renamed from Documentation/block/biovecs.txt)20
-rw-r--r--Documentation/block/capability.rst18
-rw-r--r--Documentation/block/capability.txt15
-rw-r--r--Documentation/block/cmdline-partition.rst (renamed from Documentation/block/cmdline-partition.txt)13
-rw-r--r--Documentation/block/data-integrity.rst (renamed from Documentation/block/data-integrity.txt)60
-rw-r--r--Documentation/block/deadline-iosched.rst (renamed from Documentation/block/deadline-iosched.txt)21
-rw-r--r--Documentation/block/index.rst25
-rw-r--r--Documentation/block/ioprio.rst (renamed from Documentation/block/ioprio.txt)103
-rw-r--r--Documentation/block/kyber-iosched.rst (renamed from Documentation/block/kyber-iosched.txt)3
-rw-r--r--Documentation/block/null_blk.rst (renamed from Documentation/block/null_blk.txt)65
-rw-r--r--Documentation/block/pr.rst (renamed from Documentation/block/pr.txt)18
-rw-r--r--Documentation/block/queue-sysfs.rst (renamed from Documentation/block/queue-sysfs.txt)7
-rw-r--r--Documentation/block/request.rst (renamed from Documentation/block/request.txt)47
-rw-r--r--Documentation/block/stat.rst (renamed from Documentation/block/stat.txt)13
-rw-r--r--Documentation/block/switching-sched.rst (renamed from Documentation/block/switching-sched.txt)28
-rw-r--r--Documentation/block/writeback_cache_control.rst (renamed from Documentation/block/writeback_cache_control.txt)12
-rw-r--r--Documentation/cdrom/index.rst2
-rw-r--r--Documentation/core-api/gcc-plugins.rst (renamed from Documentation/gcc-plugins.txt)0
-rw-r--r--Documentation/core-api/index.rst1
-rw-r--r--Documentation/core-api/printk-formats.rst2
-rw-r--r--Documentation/devicetree/bindings/arm/xen.txt2
-rw-r--r--Documentation/devicetree/bindings/phy/phy-bindings.txt2
-rw-r--r--Documentation/devicetree/bindings/phy/phy-pxa-usb.txt2
-rw-r--r--Documentation/devicetree/booting-without-of.txt4
-rw-r--r--Documentation/driver-api/backlight/lp855x-driver.rst81
-rw-r--r--Documentation/driver-api/bt8xxgpio.rst (renamed from Documentation/bt8xxgpio.txt)0
-rw-r--r--Documentation/driver-api/connector.rst (renamed from Documentation/connector/connector.txt)130
-rw-r--r--Documentation/driver-api/console.rst (renamed from Documentation/console/console.txt)63
-rw-r--r--Documentation/driver-api/dcdbas.rst (renamed from Documentation/dcdbas.txt)0
-rw-r--r--Documentation/driver-api/dell_rbu.rst (renamed from Documentation/dell_rbu.txt)0
-rw-r--r--Documentation/driver-api/driver-model/binding.rst (renamed from Documentation/driver-model/binding.rst)0
-rw-r--r--Documentation/driver-api/driver-model/bus.rst (renamed from Documentation/driver-model/bus.rst)0
-rw-r--r--Documentation/driver-api/driver-model/class.rst (renamed from Documentation/driver-model/class.rst)0
-rw-r--r--Documentation/driver-api/driver-model/design-patterns.rst (renamed from Documentation/driver-model/design-patterns.rst)0
-rw-r--r--Documentation/driver-api/driver-model/device.rst (renamed from Documentation/driver-model/device.rst)0
-rw-r--r--Documentation/driver-api/driver-model/devres.rst (renamed from Documentation/driver-model/devres.rst)0
-rw-r--r--Documentation/driver-api/driver-model/driver.rst (renamed from Documentation/driver-model/driver.rst)0
-rw-r--r--Documentation/driver-api/driver-model/index.rst (renamed from Documentation/driver-model/index.rst)2
-rw-r--r--Documentation/driver-api/driver-model/overview.rst (renamed from Documentation/driver-model/overview.rst)0
-rw-r--r--Documentation/driver-api/driver-model/platform.rst (renamed from Documentation/driver-model/platform.rst)0
-rw-r--r--Documentation/driver-api/driver-model/porting.rst (renamed from Documentation/driver-model/porting.rst)2
-rw-r--r--Documentation/driver-api/early-userspace/buffer-format.rst (renamed from Documentation/early-userspace/buffer-format.txt)19
-rw-r--r--Documentation/driver-api/early-userspace/early_userspace_support.rst (renamed from Documentation/early-userspace/README)3
-rw-r--r--Documentation/driver-api/early-userspace/index.rst18
-rw-r--r--Documentation/driver-api/edid.rst (renamed from Documentation/EDID/howto.rst)2
-rw-r--r--Documentation/driver-api/eisa.rst (renamed from Documentation/eisa.txt)4
-rw-r--r--Documentation/driver-api/gpio/driver.rst2
-rw-r--r--Documentation/driver-api/index.rst43
-rw-r--r--Documentation/driver-api/interconnect.rst (renamed from Documentation/interconnect/interconnect.rst)2
-rw-r--r--Documentation/driver-api/isa.rst (renamed from Documentation/isa.txt)0
-rw-r--r--Documentation/driver-api/isapnp.rst (renamed from Documentation/isapnp.txt)0
-rw-r--r--Documentation/driver-api/lightnvm-pblk.rst (renamed from Documentation/lightnvm/pblk.txt)0
-rw-r--r--Documentation/driver-api/md/index.rst12
-rw-r--r--Documentation/driver-api/md/md-cluster.rst (renamed from Documentation/md/md-cluster.txt)184
-rw-r--r--Documentation/driver-api/md/raid5-cache.rst (renamed from Documentation/md/raid5-cache.txt)28
-rw-r--r--Documentation/driver-api/md/raid5-ppl.rst (renamed from Documentation/md/raid5-ppl.txt)2
-rw-r--r--Documentation/driver-api/memory-devices/index.rst18
-rw-r--r--Documentation/driver-api/memory-devices/ti-emif.rst (renamed from Documentation/memory-devices/ti-emif.txt)27
-rw-r--r--Documentation/driver-api/memory-devices/ti-gpmc.rst (renamed from Documentation/bus-devices/ti-gpmc.txt)163
-rw-r--r--Documentation/driver-api/men-chameleon-bus.rst (renamed from Documentation/men-chameleon-bus.txt)0
-rw-r--r--Documentation/driver-api/mmc/index.rst13
-rw-r--r--Documentation/driver-api/mmc/mmc-async-req.rst (renamed from Documentation/mmc/mmc-async-req.txt)59
-rw-r--r--Documentation/driver-api/mmc/mmc-dev-attrs.rst (renamed from Documentation/mmc/mmc-dev-attrs.txt)32
-rw-r--r--Documentation/driver-api/mmc/mmc-dev-parts.rst (renamed from Documentation/mmc/mmc-dev-parts.txt)13
-rw-r--r--Documentation/driver-api/mmc/mmc-tools.rst (renamed from Documentation/mmc/mmc-tools.txt)5
-rw-r--r--Documentation/driver-api/mtd/index.rst12
-rw-r--r--Documentation/driver-api/mtd/intel-spi.rst (renamed from Documentation/mtd/intel-spi.txt)46
-rw-r--r--Documentation/driver-api/mtd/nand_ecc.rst (renamed from Documentation/mtd/nand_ecc.txt)497
-rw-r--r--Documentation/driver-api/mtd/spi-nor.rst (renamed from Documentation/mtd/spi-nor.txt)7
-rw-r--r--Documentation/driver-api/nfc/index.rst11
-rw-r--r--Documentation/driver-api/nfc/nfc-hci.rst (renamed from Documentation/nfc/nfc-hci.txt)167
-rw-r--r--Documentation/driver-api/nfc/nfc-pn544.rst (renamed from Documentation/nfc/nfc-pn544.txt)6
-rw-r--r--Documentation/driver-api/ntb.rst (renamed from Documentation/ntb.txt)0
-rw-r--r--Documentation/driver-api/nvdimm/btt.rst (renamed from Documentation/nvdimm/btt.txt)144
-rw-r--r--Documentation/driver-api/nvdimm/index.rst12
-rw-r--r--Documentation/driver-api/nvdimm/nvdimm.rst (renamed from Documentation/nvdimm/nvdimm.txt)526
-rw-r--r--Documentation/driver-api/nvdimm/security.rst (renamed from Documentation/nvdimm/security.txt)4
-rw-r--r--Documentation/driver-api/nvmem.rst (renamed from Documentation/nvmem/nvmem.txt)112
-rw-r--r--Documentation/driver-api/parport-lowlevel.rst (renamed from Documentation/parport-lowlevel.txt)0
-rw-r--r--Documentation/driver-api/phy/index.rst18
-rw-r--r--Documentation/driver-api/phy/phy.rst (renamed from Documentation/phy.txt)0
-rw-r--r--Documentation/driver-api/phy/samsung-usb2.rst (renamed from Documentation/phy/samsung-usb2.txt)60
-rw-r--r--Documentation/driver-api/pps.rst2
-rw-r--r--Documentation/driver-api/pti_intel_mid.rst106
-rw-r--r--Documentation/driver-api/ptp.rst2
-rw-r--r--Documentation/driver-api/pwm.rst (renamed from Documentation/pwm.txt)0
-rw-r--r--Documentation/driver-api/rapidio/index.rst15
-rw-r--r--Documentation/driver-api/rapidio/mport_cdev.rst (renamed from Documentation/rapidio/mport_cdev.txt)47
-rw-r--r--Documentation/driver-api/rapidio/rapidio.rst (renamed from Documentation/rapidio/rapidio.txt)39
-rw-r--r--Documentation/driver-api/rapidio/rio_cm.rst (renamed from Documentation/rapidio/rio_cm.txt)66
-rw-r--r--Documentation/driver-api/rapidio/sysfs.rst (renamed from Documentation/rapidio/sysfs.txt)4
-rw-r--r--Documentation/driver-api/rapidio/tsi721.rst (renamed from Documentation/rapidio/tsi721.txt)45
-rw-r--r--Documentation/driver-api/rfkill.rst (renamed from Documentation/rfkill.txt)0
-rw-r--r--Documentation/driver-api/serial/cyclades_z.rst (renamed from Documentation/serial/cyclades_z.rst)0
-rw-r--r--Documentation/driver-api/serial/driver.rst (renamed from Documentation/serial/driver.rst)2
-rw-r--r--Documentation/driver-api/serial/index.rst (renamed from Documentation/serial/index.rst)2
-rw-r--r--Documentation/driver-api/serial/moxa-smartio.rst (renamed from Documentation/serial/moxa-smartio.rst)0
-rw-r--r--Documentation/driver-api/serial/n_gsm.rst (renamed from Documentation/serial/n_gsm.rst)0
-rw-r--r--Documentation/driver-api/serial/rocket.rst (renamed from Documentation/serial/rocket.rst)0
-rw-r--r--Documentation/driver-api/serial/serial-iso7816.rst (renamed from Documentation/serial/serial-iso7816.rst)0
-rw-r--r--Documentation/driver-api/serial/serial-rs485.rst (renamed from Documentation/serial/serial-rs485.rst)0
-rw-r--r--Documentation/driver-api/serial/tty.rst (renamed from Documentation/serial/tty.rst)0
-rw-r--r--Documentation/driver-api/sgi-ioc4.rst (renamed from Documentation/sgi-ioc4.txt)0
-rw-r--r--Documentation/driver-api/sm501.rst (renamed from Documentation/SM501.txt)0
-rw-r--r--Documentation/driver-api/smsc_ece1099.rst (renamed from Documentation/smsc_ece1099.txt)0
-rw-r--r--Documentation/driver-api/switchtec.rst (renamed from Documentation/switchtec.txt)2
-rw-r--r--Documentation/driver-api/sync_file.rst (renamed from Documentation/sync_file.txt)0
-rw-r--r--Documentation/driver-api/vfio-mediated-device.rst (renamed from Documentation/vfio-mediated-device.txt)2
-rw-r--r--Documentation/driver-api/vfio.rst (renamed from Documentation/vfio.txt)0
-rw-r--r--Documentation/driver-api/xilinx/eemi.rst (renamed from Documentation/xilinx/eemi.rst)0
-rw-r--r--Documentation/driver-api/xilinx/index.rst (renamed from Documentation/xilinx/index.rst)1
-rw-r--r--Documentation/driver-api/xillybus.rst (renamed from Documentation/xillybus.txt)0
-rw-r--r--Documentation/driver-api/zorro.rst (renamed from Documentation/zorro.txt)0
-rw-r--r--Documentation/fault-injection/index.rst2
-rw-r--r--Documentation/fb/fbcon.rst4
-rw-r--r--Documentation/fb/index.rst2
-rw-r--r--Documentation/fb/vesafb.rst2
-rw-r--r--Documentation/filesystems/nfs/nfsroot.txt2
-rw-r--r--Documentation/filesystems/proc.txt2
-rw-r--r--Documentation/filesystems/ramfs-rootfs-initramfs.txt4
-rw-r--r--Documentation/filesystems/sysfs.txt2
-rw-r--r--Documentation/filesystems/tmpfs.txt2
-rw-r--r--Documentation/firmware-guide/acpi/enumeration.rst2
-rw-r--r--Documentation/fpga/index.rst2
-rw-r--r--Documentation/hid/index.rst2
-rw-r--r--Documentation/hwmon/submitting-patches.rst2
-rw-r--r--Documentation/ia64/aliasing.rst (renamed from Documentation/ia64/aliasing.txt)73
-rw-r--r--Documentation/ia64/efirtc.rst (renamed from Documentation/ia64/efirtc.txt)120
-rw-r--r--Documentation/ia64/err_inject.rst (renamed from Documentation/ia64/err_inject.txt)359
-rw-r--r--Documentation/ia64/fsys.rst (renamed from Documentation/ia64/fsys.txt)133
-rw-r--r--Documentation/ia64/ia64.rst (renamed from Documentation/ia64/README)26
-rw-r--r--Documentation/ia64/index.rst18
-rw-r--r--Documentation/ia64/irq-redir.rst (renamed from Documentation/ia64/IRQ-redir.txt)31
-rw-r--r--Documentation/ia64/mca.rst (renamed from Documentation/ia64/mca.txt)10
-rw-r--r--Documentation/ia64/serial.rst (renamed from Documentation/ia64/serial.txt)36
-rw-r--r--Documentation/ia64/xen.rst206
-rw-r--r--Documentation/ia64/xen.txt183
-rw-r--r--Documentation/ide/index.rst2
-rw-r--r--Documentation/iio/index.rst2
-rw-r--r--Documentation/index.rst32
-rw-r--r--Documentation/ioctl/botching-up-ioctls.rst (renamed from Documentation/ioctl/botching-up-ioctls.txt)1
-rw-r--r--Documentation/ioctl/cdrom.rst1233
-rw-r--r--Documentation/ioctl/cdrom.txt967
-rw-r--r--Documentation/ioctl/hdio.rst (renamed from Documentation/ioctl/hdio.txt)835
-rw-r--r--Documentation/ioctl/index.rst16
-rw-r--r--Documentation/ioctl/ioctl-decoding.rst (renamed from Documentation/ioctl/ioctl-decoding.txt)13
-rw-r--r--Documentation/ioctl/ioctl-number.rst361
-rw-r--r--Documentation/ioctl/ioctl-number.txt351
-rw-r--r--Documentation/kbuild/index.rst2
-rw-r--r--Documentation/kbuild/issues.rst20
-rw-r--r--Documentation/kbuild/kbuild.rst3
-rw-r--r--Documentation/kbuild/kconfig-language.rst12
-rw-r--r--Documentation/kbuild/kconfig.rst8
-rw-r--r--Documentation/kbuild/makefiles.rst1
-rw-r--r--Documentation/kernel-hacking/locking.rst2
-rw-r--r--Documentation/leds/index.rst2
-rw-r--r--Documentation/livepatch/index.rst2
-rw-r--r--Documentation/locking/index.rst24
-rw-r--r--Documentation/locking/lockdep-design.rst (renamed from Documentation/locking/lockdep-design.txt)51
-rw-r--r--Documentation/locking/lockstat.rst204
-rw-r--r--Documentation/locking/lockstat.txt183
-rw-r--r--Documentation/locking/locktorture.rst (renamed from Documentation/locking/locktorture.txt)105
-rw-r--r--Documentation/locking/mutex-design.rst (renamed from Documentation/locking/mutex-design.txt)26
-rw-r--r--Documentation/locking/rt-mutex-design.rst (renamed from Documentation/locking/rt-mutex-design.txt)139
-rw-r--r--Documentation/locking/rt-mutex.rst (renamed from Documentation/locking/rt-mutex.txt)30
-rw-r--r--Documentation/locking/spinlocks.rst (renamed from Documentation/locking/spinlocks.txt)32
-rw-r--r--Documentation/locking/ww-mutex-design.rst (renamed from Documentation/locking/ww-mutex-design.txt)82
-rw-r--r--Documentation/m68k/index.rst17
-rw-r--r--Documentation/m68k/kernel-options.rst (renamed from Documentation/m68k/kernel-options.txt)319
-rw-r--r--Documentation/mic/index.rst2
-rw-r--r--Documentation/netlabel/index.rst2
-rw-r--r--Documentation/networking/ip-sysctl.txt2
-rw-r--r--Documentation/pcmcia/index.rst2
-rw-r--r--Documentation/pi-futex.txt2
-rw-r--r--Documentation/powerpc/firmware-assisted-dump.txt2
-rw-r--r--Documentation/process/submit-checklist.rst2
-rw-r--r--Documentation/pti/pti_intel_mid.txt99
-rw-r--r--Documentation/rbtree.txt6
-rw-r--r--Documentation/riscv/index.rst2
-rw-r--r--Documentation/s390/debugging390.rst2
-rw-r--r--Documentation/s390/index.rst2
-rw-r--r--Documentation/s390/vfio-ccw.rst6
-rw-r--r--Documentation/scheduler/index.rst2
-rw-r--r--Documentation/scheduler/sched-deadline.rst2
-rw-r--r--Documentation/scheduler/sched-design-CFS.rst2
-rw-r--r--Documentation/scheduler/sched-rt-group.rst2
-rw-r--r--Documentation/security/index.rst5
-rw-r--r--Documentation/security/lsm-development.rst (renamed from Documentation/security/LSM.rst)0
-rw-r--r--Documentation/security/lsm.rst (renamed from Documentation/lsm.txt)0
-rw-r--r--Documentation/security/sak.rst (renamed from Documentation/SAK.txt)0
-rw-r--r--Documentation/security/siphash.rst (renamed from Documentation/siphash.txt)0
-rw-r--r--Documentation/security/tpm/index.rst1
-rw-r--r--Documentation/security/tpm/xen-tpmfront.rst (renamed from Documentation/security/tpm/xen-tpmfront.txt)105
-rw-r--r--Documentation/sparc/index.rst2
-rw-r--r--Documentation/sysctl/abi.txt54
-rw-r--r--Documentation/target/index.rst2
-rw-r--r--Documentation/timers/index.rst2
-rw-r--r--Documentation/translations/it_IT/kernel-hacking/locking.rst2
-rw-r--r--Documentation/translations/it_IT/process/submit-checklist.rst2
-rw-r--r--Documentation/translations/zh_CN/arm/Booting4
-rw-r--r--Documentation/translations/zh_CN/arm/kernel_user_helpers.txt4
-rw-r--r--Documentation/translations/zh_CN/filesystems/sysfs.txt2
-rw-r--r--Documentation/translations/zh_CN/gpio.txt4
-rw-r--r--Documentation/translations/zh_CN/oops-tracing.txt4
-rw-r--r--Documentation/translations/zh_CN/process/submit-checklist.rst2
-rw-r--r--Documentation/userspace-api/accelerators/ocxl.rst (renamed from Documentation/accelerators/ocxl.rst)2
-rw-r--r--Documentation/userspace-api/index.rst1
-rw-r--r--Documentation/vm/numa.rst4
-rw-r--r--Documentation/vm/page_migration.rst2
-rw-r--r--Documentation/vm/unevictable-lru.rst4
-rw-r--r--Documentation/w1/w1.netlink2
-rw-r--r--Documentation/watchdog/index.rst2
-rw-r--r--Documentation/x86/index.rst2
-rw-r--r--Documentation/x86/intel-iommu.rst (renamed from Documentation/Intel-IOMMU.txt)0
-rw-r--r--Documentation/x86/intel_txt.rst (renamed from Documentation/intel_txt.txt)0
-rw-r--r--Documentation/x86/topology.rst2
-rw-r--r--Documentation/x86/x86_64/fake-numa-for-cpusets.rst4
-rw-r--r--Documentation/xtensa/atomctl.rst (renamed from Documentation/xtensa/atomctl.txt)13
-rw-r--r--Documentation/xtensa/booting.rst (renamed from Documentation/xtensa/booting.txt)5
-rw-r--r--Documentation/xtensa/index.rst12
-rw-r--r--Documentation/xtensa/mmu.rst195
-rw-r--r--Documentation/xtensa/mmu.txt189
-rw-r--r--MAINTAINERS101
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/arc/boot/dts/haps_hs.dts30
-rw-r--r--arch/arc/boot/dts/hsdk.dts14
-rw-r--r--arch/arc/configs/haps_hs_defconfig5
-rw-r--r--arch/arc/configs/hsdk_defconfig5
-rw-r--r--arch/arc/include/asm/entry-arcv2.h361
-rw-r--r--arch/arc/include/asm/entry-compact.h4
-rw-r--r--arch/arc/include/asm/linkage.h18
-rw-r--r--arch/arc/kernel/asm-offsets.c7
-rw-r--r--arch/arc/kernel/entry-arcv2.S62
-rw-r--r--arch/arc/kernel/entry-compact.S2
-rw-r--r--arch/arc/kernel/entry.S4
-rw-r--r--arch/arc/kernel/unwind.c9
-rw-r--r--arch/arc/mm/fault.c185
-rw-r--r--arch/arc/mm/tlbex.S11
-rw-r--r--arch/arm/Kconfig6
-rw-r--r--arch/arm/common/mcpm_entry.c2
-rw-r--r--arch/arm/common/mcpm_head.S2
-rw-r--r--arch/arm/common/vlock.S2
-rw-r--r--arch/arm/include/asm/setup.h2
-rw-r--r--arch/arm/include/uapi/asm/setup.h2
-rw-r--r--arch/arm/kernel/entry-armv.S2
-rw-r--r--arch/arm/mach-exynos/common.h2
-rw-r--r--arch/arm/mach-ixp4xx/Kconfig14
-rw-r--r--arch/arm/mach-s3c24xx/pm.c2
-rw-r--r--arch/arm/mm/Kconfig4
-rw-r--r--arch/arm/plat-samsung/Kconfig6
-rw-r--r--arch/arm/tools/mach-types2
-rw-r--r--arch/arm64/Kconfig4
-rw-r--r--arch/arm64/kernel/kuser32.S2
-rw-r--r--arch/ia64/kernel/efi.c2
-rw-r--r--arch/ia64/kernel/fsys.S2
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/ia64/mm/ioremap.c2
-rw-r--r--arch/ia64/pci/pci.c2
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/mips/bmips/setup.c2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl1
-rw-r--r--arch/parisc/Kconfig2
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sh/Kconfig4
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/ia32/sys_ia32.c4
-rw-r--r--arch/xtensa/boot/dts/virt.dts72
-rw-r--r--arch/xtensa/configs/virt_defconfig113
-rw-r--r--arch/xtensa/include/asm/asmmacro.h46
-rw-r--r--arch/xtensa/include/asm/initialize_mmu.h2
-rw-r--r--arch/xtensa/include/asm/platform.h10
-rw-r--r--arch/xtensa/include/asm/types.h23
-rw-r--r--arch/xtensa/kernel/coprocessor.S7
-rw-r--r--arch/xtensa/kernel/entry.S11
-rw-r--r--arch/xtensa/kernel/mcount.S11
-rw-r--r--arch/xtensa/kernel/pci.c124
-rw-r--r--arch/xtensa/kernel/platform.c2
-rw-r--r--arch/xtensa/kernel/setup.c4
-rw-r--r--arch/xtensa/lib/checksum.S12
-rw-r--r--arch/xtensa/lib/memcopy.S38
-rw-r--r--arch/xtensa/lib/memset.S10
-rw-r--r--arch/xtensa/lib/strncpy_user.S16
-rw-r--r--arch/xtensa/lib/strnlen_user.S14
-rw-r--r--arch/xtensa/lib/usercopy.S12
-rw-r--r--arch/xtensa/mm/init.c5
-rw-r--r--arch/xtensa/mm/misc.S78
-rw-r--r--block/Kconfig4
-rw-r--r--block/Kconfig.iosched2
-rw-r--r--block/bfq-iosched.c2
-rw-r--r--block/blk-integrity.c2
-rw-r--r--block/ioprio.c2
-rw-r--r--block/mq-deadline.c2
-rw-r--r--block/partitions/Kconfig2
-rw-r--r--block/partitions/cmdline.c2
-rw-r--r--drivers/base/platform.c2
-rw-r--r--drivers/block/Kconfig8
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--drivers/block/zram/Kconfig6
-rw-r--r--drivers/char/Kconfig6
-rw-r--r--drivers/char/hw_random/core.c2
-rw-r--r--drivers/crypto/sunxi-ss/sun4i-ss-cipher.c2
-rw-r--r--drivers/crypto/sunxi-ss/sun4i-ss-core.c2
-rw-r--r--drivers/crypto/sunxi-ss/sun4i-ss-hash.c2
-rw-r--r--drivers/crypto/sunxi-ss/sun4i-ss.h2
-rw-r--r--drivers/dma-buf/Kconfig2
-rw-r--r--drivers/gpio/Kconfig2
-rw-r--r--drivers/gpio/gpio-cs5535.c2
-rw-r--r--drivers/gpu/drm/Kconfig2
-rw-r--r--drivers/gpu/drm/drm_ioctl.c2
-rw-r--r--drivers/gpu/drm/drm_modeset_lock.c2
-rw-r--r--drivers/input/touchscreen/sun4i-ts.c2
-rw-r--r--drivers/md/Kconfig2
-rw-r--r--drivers/md/dm-init.c2
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/mtd/nand/raw/nand_ecc.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c2
-rw-r--r--drivers/nvdimm/Kconfig2
-rw-r--r--drivers/pci/switch/Kconfig2
-rw-r--r--drivers/perf/qcom_l3_pmu.c2
-rw-r--r--drivers/platform/x86/Kconfig8
-rw-r--r--drivers/platform/x86/dcdbas.c2
-rw-r--r--drivers/platform/x86/dell_rbu.c2
-rw-r--r--drivers/pnp/isapnp/Kconfig2
-rw-r--r--drivers/rapidio/Kconfig2
-rw-r--r--drivers/staging/unisys/Documentation/overview.txt4
-rw-r--r--drivers/tty/Kconfig6
-rw-r--r--drivers/tty/serial/Kconfig2
-rw-r--r--drivers/tty/serial/ucc_uart.c2
-rw-r--r--drivers/vfio/Kconfig2
-rw-r--r--drivers/vfio/mdev/Kconfig2
-rw-r--r--drivers/w1/Kconfig2
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/Makefile3
-rw-r--r--fs/btrfs/backref.c17
-rw-r--r--fs/btrfs/backref.h3
-rw-r--r--fs/btrfs/block-rsv.c425
-rw-r--r--fs/btrfs/block-rsv.h101
-rw-r--r--fs/btrfs/btrfs_inode.h22
-rw-r--r--fs/btrfs/check-integrity.c11
-rw-r--r--fs/btrfs/compression.c65
-rw-r--r--fs/btrfs/compression.h3
-rw-r--r--fs/btrfs/ctree.h282
-rw-r--r--fs/btrfs/delalloc-space.c494
-rw-r--r--fs/btrfs/delalloc-space.h23
-rw-r--r--fs/btrfs/delayed-ref.c181
-rw-r--r--fs/btrfs/delayed-ref.h10
-rw-r--r--fs/btrfs/dev-replace.c31
-rw-r--r--fs/btrfs/disk-io.c166
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c2503
-rw-r--r--fs/btrfs/extent_io.c149
-rw-r--r--fs/btrfs/extent_io.h10
-rw-r--r--fs/btrfs/file-item.c43
-rw-r--r--fs/btrfs/file.c28
-rw-r--r--fs/btrfs/free-space-cache.c16
-rw-r--r--fs/btrfs/inode-map.c1
-rw-r--r--fs/btrfs/inode.c109
-rw-r--r--fs/btrfs/ioctl.c23
-rw-r--r--fs/btrfs/locking.c62
-rw-r--r--fs/btrfs/ordered-data.c56
-rw-r--r--fs/btrfs/ordered-data.h8
-rw-r--r--fs/btrfs/print-tree.c6
-rw-r--r--fs/btrfs/props.c8
-rw-r--r--fs/btrfs/qgroup.c24
-rw-r--r--fs/btrfs/raid56.h4
-rw-r--r--fs/btrfs/relocation.c1
-rw-r--r--fs/btrfs/root-tree.c56
-rw-r--r--fs/btrfs/scrub.c50
-rw-r--r--fs/btrfs/send.c16
-rw-r--r--fs/btrfs/space-info.c1094
-rw-r--r--fs/btrfs/space-info.h133
-rw-r--r--fs/btrfs/super.c30
-rw-r--r--fs/btrfs/sysfs.c1
-rw-r--r--fs/btrfs/tests/extent-io-tests.c117
-rw-r--r--fs/btrfs/tests/extent-map-tests.c22
-rw-r--r--fs/btrfs/transaction.c18
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/tree-checker.c11
-rw-r--r--fs/btrfs/tree-log.c40
-rw-r--r--fs/btrfs/volumes.c376
-rw-r--r--fs/btrfs/volumes.h52
-rw-r--r--fs/orangefs/file.c4
-rw-r--r--fs/proc/Kconfig2
-rw-r--r--fs/proc/base.c132
-rw-r--r--include/linux/cgroup-defs.h2
-rw-r--r--include/linux/connector.h63
-rw-r--r--include/linux/device.h2
-rw-r--r--include/linux/hw_random.h2
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/mutex.h2
-rw-r--r--include/linux/platform_device.h2
-rw-r--r--include/linux/rwsem.h2
-rw-r--r--include/linux/sched/task.h1
-rw-r--r--include/linux/serial_core.h2
-rw-r--r--include/trace/events/btrfs.h40
-rw-r--r--include/uapi/asm-generic/unistd.h2
-rw-r--r--include/uapi/linux/bpf.h2
-rw-r--r--include/uapi/linux/btrfs_tree.h2
-rw-r--r--include/uapi/rdma/rdma_user_ioctl_cmds.h2
-rw-r--r--init/Kconfig6
-rw-r--r--kernel/cgroup/cpuset.c2
-rw-r--r--kernel/fork.c17
-rw-r--r--kernel/locking/mutex.c2
-rw-r--r--kernel/locking/rtmutex.c2
-rw-r--r--kernel/panic.c2
-rw-r--r--lib/Kconfig.debug4
-rw-r--r--mm/swap.c2
-rw-r--r--samples/Kconfig2
-rw-r--r--scripts/coccinelle/free/devm_free.cocci2
-rw-r--r--scripts/gcc-plugins/Kconfig2
-rw-r--r--security/Kconfig2
-rw-r--r--security/device_cgroup.c2
-rw-r--r--security/safesetid/lsm.c276
-rw-r--r--security/safesetid/lsm.h34
-rw-r--r--security/safesetid/securityfs.c307
-rw-r--r--tools/include/uapi/linux/bpf.h2
-rw-r--r--tools/testing/selftests/safesetid/safesetid-test.c18
-rw-r--r--tools/testing/selftests/zram/README2
-rw-r--r--usr/Kconfig2
665 files changed, 15497 insertions, 11717 deletions
diff --git a/CREDITS b/CREDITS
index beac0c81d081..401c5092bbf9 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3120,7 +3120,7 @@ S: France
N: Rik van Riel
E: riel@redhat.com
W: http://www.surriel.com/
-D: Linux-MM site, Documentation/sysctl/*, swap/mm readaround
+D: Linux-MM site, Documentation/admin-guide/sysctl/*, swap/mm readaround
D: kswapd fixes, random kernel hacker, rmap VM,
D: nl.linux.org administrator, minor scheduler additions
S: Red Hat Boston
diff --git a/Documentation/ABI/obsolete/sysfs-gpio b/Documentation/ABI/obsolete/sysfs-gpio
index 40d41ea1a3f5..e0d4e5e2dd90 100644
--- a/Documentation/ABI/obsolete/sysfs-gpio
+++ b/Documentation/ABI/obsolete/sysfs-gpio
@@ -11,7 +11,7 @@ Description:
Kernel code may export it for complete or partial access.
GPIOs are identified as they are inside the kernel, using integers in
- the range 0..INT_MAX. See Documentation/gpio for more information.
+ the range 0..INT_MAX. See Documentation/admin-guide/gpio for more information.
/sys/class/gpio
/export ... asks the kernel to export a GPIO to userspace
diff --git a/Documentation/ABI/removed/sysfs-class-rfkill b/Documentation/ABI/removed/sysfs-class-rfkill
index 3ce6231f20b2..9c08c7f98ffb 100644
--- a/Documentation/ABI/removed/sysfs-class-rfkill
+++ b/Documentation/ABI/removed/sysfs-class-rfkill
@@ -1,6 +1,6 @@
rfkill - radio frequency (RF) connector kill switch support
-For details to this subsystem look at Documentation/rfkill.txt.
+For details to this subsystem look at Documentation/driver-api/rfkill.rst.
What: /sys/class/rfkill/rfkill[0-9]+/claim
Date: 09-Jul-2007
diff --git a/Documentation/ABI/stable/sysfs-class-rfkill b/Documentation/ABI/stable/sysfs-class-rfkill
index 80151a409d67..5b154f922643 100644
--- a/Documentation/ABI/stable/sysfs-class-rfkill
+++ b/Documentation/ABI/stable/sysfs-class-rfkill
@@ -1,6 +1,6 @@
rfkill - radio frequency (RF) connector kill switch support
-For details to this subsystem look at Documentation/rfkill.txt.
+For details to this subsystem look at Documentation/driver-api/rfkill.rst.
For the deprecated /sys/class/rfkill/*/claim knobs of this interface look in
Documentation/ABI/removed/sysfs-class-rfkill.
diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index f7ce68fbd4b9..df8413cf1468 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -61,7 +61,7 @@ Date: October 2002
Contact: Linux Memory Management list <linux-mm@kvack.org>
Description:
The node's hit/miss statistics, in units of pages.
- See Documentation/numastat.txt
+ See Documentation/admin-guide/numastat.rst
What: /sys/devices/system/node/nodeX/distance
Date: October 2002
diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats
index abac31d216de..2c44b4f1b060 100644
--- a/Documentation/ABI/testing/procfs-diskstats
+++ b/Documentation/ABI/testing/procfs-diskstats
@@ -29,4 +29,4 @@ Description:
17 - sectors discarded
18 - time spent discarding
- For more details refer to Documentation/iostats.txt
+ For more details refer to Documentation/admin-guide/iostats.rst
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index dfad7427817c..f8c7c7126bb1 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -15,7 +15,7 @@ Description:
9 - I/Os currently in progress
10 - time spent doing I/Os (ms)
11 - weighted time spent doing I/Os (ms)
- For more details refer Documentation/iostats.txt
+ For more details refer Documentation/admin-guide/iostats.rst
What: /sys/block/<disk>/<part>/stat
diff --git a/Documentation/ABI/testing/sysfs-block-device b/Documentation/ABI/testing/sysfs-block-device
index 82ef6eab042d..17f2bc7dd261 100644
--- a/Documentation/ABI/testing/sysfs-block-device
+++ b/Documentation/ABI/testing/sysfs-block-device
@@ -45,7 +45,7 @@ Description:
- Values below -2 are rejected with -EINVAL
For more information, see
- Documentation/laptops/disk-shock-protection.txt
+ Documentation/admin-guide/laptops/disk-shock-protection.rst
What: /sys/block/*/device/ncq_prio_enable
diff --git a/Documentation/ABI/testing/sysfs-class-switchtec b/Documentation/ABI/testing/sysfs-class-switchtec
index 48cb4c15e430..76c7a661a595 100644
--- a/Documentation/ABI/testing/sysfs-class-switchtec
+++ b/Documentation/ABI/testing/sysfs-class-switchtec
@@ -1,6 +1,6 @@
switchtec - Microsemi Switchtec PCI Switch Management Endpoint
-For details on this subsystem look at Documentation/switchtec.txt.
+For details on this subsystem look at Documentation/driver-api/switchtec.rst.
What: /sys/class/switchtec
Date: 05-Jan-2017
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index d404603c6b52..5f7d7b14fa44 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -34,7 +34,7 @@ Description: CPU topology files that describe kernel limits related to
present: cpus that have been identified as being present in
the system.
- See Documentation/cputopology.txt for more information.
+ See Documentation/admin-guide/cputopology.rst for more information.
What: /sys/devices/system/cpu/probe
@@ -103,7 +103,7 @@ Description: CPU topology files that describe a logical CPU's relationship
thread_siblings_list: human-readable list of cpu#'s hardware
threads within the same core as cpu#
- See Documentation/cputopology.txt for more information.
+ See Documentation/admin-guide/cputopology.rst for more information.
What: /sys/devices/system/cpu/cpuidle/current_driver
diff --git a/Documentation/ABI/testing/sysfs-platform-asus-laptop b/Documentation/ABI/testing/sysfs-platform-asus-laptop
index cd9d667c3da2..8b0e8205a6a2 100644
--- a/Documentation/ABI/testing/sysfs-platform-asus-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-asus-laptop
@@ -31,7 +31,7 @@ Description:
To control the LED display, use the following :
echo 0x0T000DDD > /sys/devices/platform/asus_laptop/
where T control the 3 letters display, and DDD the 3 digits display.
- The DDD table can be found in Documentation/laptops/asus-laptop.txt
+ The DDD table can be found in Documentation/admin-guide/laptops/asus-laptop.rst
What: /sys/devices/platform/asus_laptop/bluetooth
Date: January 2007
diff --git a/Documentation/logo.txt b/Documentation/COPYING-logo
index 296f0f7f67eb..296f0f7f67eb 100644
--- a/Documentation/logo.txt
+++ b/Documentation/COPYING-logo
diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
index cb712a02f59f..358d495456d1 100644
--- a/Documentation/DMA-API-HOWTO.txt
+++ b/Documentation/DMA-API-HOWTO.txt
@@ -212,7 +212,7 @@ The standard 64-bit addressing device would do something like this::
If the device only supports 32-bit addressing for descriptors in the
coherent allocations, but supports full 64-bits for streaming mappings
-it would look like this:
+it would look like this::
if (dma_set_mask(dev, DMA_BIT_MASK(64))) {
dev_warn(dev, "mydev: No suitable DMA available\n");
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.rst
index d16a9849e60e..b9afc48f4ea2 100644
--- a/Documentation/accounting/cgroupstats.txt
+++ b/Documentation/accounting/cgroupstats.rst
@@ -1,3 +1,7 @@
+==================
+Control Groupstats
+==================
+
Control Groupstats is inspired by the discussion at
http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as
suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263.
@@ -19,9 +23,9 @@ about tasks blocked on I/O. If CONFIG_TASK_DELAY_ACCT is disabled, this
information will not be available.
To extract cgroup statistics a utility very similar to getdelays.c
-has been developed, the sample output of the utility is shown below
+has been developed, the sample output of the utility is shown below::
-~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup/a"
-sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
-~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup"
-sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
+ ~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup/a"
+ sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
+ ~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup"
+ sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.rst
index 042ea59b5853..7cc7f5852da0 100644
--- a/Documentation/accounting/delay-accounting.txt
+++ b/Documentation/accounting/delay-accounting.rst
@@ -1,5 +1,6 @@
+================
Delay accounting
-----------------
+================
Tasks encounter delays in execution when they wait
for some kernel resource to become available e.g. a
@@ -39,7 +40,9 @@ in detail in a separate document in this directory. Taskstats returns a
generic data structure to userspace corresponding to per-pid and per-tgid
statistics. The delay accounting functionality populates specific fields of
this structure. See
+
include/linux/taskstats.h
+
for a description of the fields pertaining to delay accounting.
It will generally be in the form of counters returning the cumulative
delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
@@ -61,13 +64,16 @@ also serves as an example of using the taskstats interface.
Usage
-----
-Compile the kernel with
+Compile the kernel with::
+
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASKSTATS=y
Delay accounting is enabled by default at boot up.
-To disable, add
+To disable, add::
+
nodelayacct
+
to the kernel boot options. The rest of the instructions
below assume this has not been done.
@@ -78,40 +84,43 @@ The utility also allows a given command to be
executed and the corresponding delays to be
seen.
-General format of the getdelays command
+General format of the getdelays command::
-getdelays [-t tgid] [-p pid] [-c cmd...]
+ getdelays [-t tgid] [-p pid] [-c cmd...]
-Get delays, since system boot, for pid 10
-# ./getdelays -p 10
-(output similar to next case)
+Get delays, since system boot, for pid 10::
-Get sum of delays, since system boot, for all pids with tgid 5
-# ./getdelays -t 5
+ # ./getdelays -p 10
+ (output similar to next case)
+Get sum of delays, since system boot, for all pids with tgid 5::
-CPU count real total virtual total delay total
- 7876 92005750 100000000 24001500
-IO count delay total
- 0 0
-SWAP count delay total
- 0 0
-RECLAIM count delay total
- 0 0
+ # ./getdelays -t 5
+
+
+ CPU count real total virtual total delay total
+ 7876 92005750 100000000 24001500
+ IO count delay total
+ 0 0
+ SWAP count delay total
+ 0 0
+ RECLAIM count delay total
+ 0 0
+
+Get delays seen in executing a given simple command::
-Get delays seen in executing a given simple command
-# ./getdelays -c ls /
+ # ./getdelays -c ls /
-bin data1 data3 data5 dev home media opt root srv sys usr
-boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var
+ bin data1 data3 data5 dev home media opt root srv sys usr
+ boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var
-CPU count real total virtual total delay total
+ CPU count real total virtual total delay total
6 4000250 4000000 0
-IO count delay total
+ IO count delay total
0 0
-SWAP count delay total
+ SWAP count delay total
0 0
-RECLAIM count delay total
+ RECLAIM count delay total
0 0
diff --git a/Documentation/accounting/index.rst b/Documentation/accounting/index.rst
new file mode 100644
index 000000000000..9369d8bf32be
--- /dev/null
+++ b/Documentation/accounting/index.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+Accounting
+==========
+
+.. toctree::
+ :maxdepth: 1
+
+ cgroupstats
+ delay-accounting
+ psi
+ taskstats
+ taskstats-struct
diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.rst
index 5cbe5659e3b7..621111ce5740 100644
--- a/Documentation/accounting/psi.txt
+++ b/Documentation/accounting/psi.rst
@@ -35,14 +35,14 @@ Pressure interface
Pressure information for each resource is exported through the
respective file in /proc/pressure/ -- cpu, memory, and io.
-The format for CPU is as such:
+The format for CPU is as such::
-some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+ some avg10=0.00 avg60=0.00 avg300=0.00 total=0
-and for memory and IO:
+and for memory and IO::
-some avg10=0.00 avg60=0.00 avg300=0.00 total=0
-full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+ some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+ full avg10=0.00 avg60=0.00 avg300=0.00 total=0
The "some" line indicates the share of time in which at least some
tasks are stalled on a given resource.
@@ -77,9 +77,9 @@ To register a trigger user has to open psi interface file under
/proc/pressure/ representing the resource to be monitored and write the
desired threshold and time window. The open file descriptor should be
used to wait for trigger events using select(), poll() or epoll().
-The following format is used:
+The following format is used::
-<some|full> <stall amount in us> <time window in us>
+ <some|full> <stall amount in us> <time window in us>
For example writing "some 150000 1000000" into /proc/pressure/memory
would add 150ms threshold for partial memory stall measured within
@@ -115,18 +115,20 @@ trigger is closed.
Userspace monitor usage example
===============================
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <poll.h>
-#include <string.h>
-#include <unistd.h>
-
-/*
- * Monitor memory partial stall with 1s tracking window size
- * and 150ms threshold.
- */
-int main() {
+::
+
+ #include <errno.h>
+ #include <fcntl.h>
+ #include <stdio.h>
+ #include <poll.h>
+ #include <string.h>
+ #include <unistd.h>
+
+ /*
+ * Monitor memory partial stall with 1s tracking window size
+ * and 150ms threshold.
+ */
+ int main() {
const char trig[] = "some 150000 1000000";
struct pollfd fds;
int n;
@@ -165,7 +167,7 @@ int main() {
}
return 0;
-}
+ }
Cgroup2 interface
=================
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.rst
index e7512c061c15..ca90fd489c9a 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.rst
@@ -1,5 +1,6 @@
+====================
The struct taskstats
---------------------
+====================
This document contains an explanation of the struct taskstats fields.
@@ -10,16 +11,24 @@ There are three different groups of fields in the struct taskstats:
the common fields and basic accounting fields are collected for
delivery at do_exit() of a task.
2) Delay accounting fields
- These fields are placed between
- /* Delay accounting fields start */
- and
- /* Delay accounting fields end */
+ These fields are placed between::
+
+ /* Delay accounting fields start */
+
+ and::
+
+ /* Delay accounting fields end */
+
Their values are collected if CONFIG_TASK_DELAY_ACCT is set.
3) Extended accounting fields
- These fields are placed between
- /* Extended accounting fields start */
- and
- /* Extended accounting fields end */
+ These fields are placed between::
+
+ /* Extended accounting fields start */
+
+ and::
+
+ /* Extended accounting fields end */
+
Their values are collected if CONFIG_TASK_XACCT is set.
4) Per-task and per-thread context switch count statistics
@@ -31,31 +40,33 @@ There are three different groups of fields in the struct taskstats:
Future extension should add fields to the end of the taskstats struct, and
should not change the relative position of each field within the struct.
+::
-struct taskstats {
+ struct taskstats {
+
+1) Common and basic accounting fields::
-1) Common and basic accounting fields:
/* The version number of this struct. This field is always set to
* TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
* Each time the struct is changed, the value should be incremented.
*/
__u16 version;
- /* The exit code of a task. */
+ /* The exit code of a task. */
__u32 ac_exitcode; /* Exit status */
- /* The accounting flags of a task as defined in <linux/acct.h>
+ /* The accounting flags of a task as defined in <linux/acct.h>
* Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
*/
__u8 ac_flag; /* Record flags */
- /* The value of task_nice() of a task. */
+ /* The value of task_nice() of a task. */
__u8 ac_nice; /* task_nice */
- /* The name of the command that started this task. */
+ /* The name of the command that started this task. */
char ac_comm[TS_COMM_LEN]; /* Command name */
- /* The scheduling discipline as set in task->policy field. */
+ /* The scheduling discipline as set in task->policy field. */
__u8 ac_sched; /* Scheduling discipline */
__u8 ac_pad[3];
@@ -64,26 +75,27 @@ struct taskstats {
__u32 ac_pid; /* Process ID */
__u32 ac_ppid; /* Parent process ID */
- /* The time when a task begins, in [secs] since 1970. */
+ /* The time when a task begins, in [secs] since 1970. */
__u32 ac_btime; /* Begin time [sec since 1970] */
- /* The elapsed time of a task, in [usec]. */
+ /* The elapsed time of a task, in [usec]. */
__u64 ac_etime; /* Elapsed time [usec] */
- /* The user CPU time of a task, in [usec]. */
+ /* The user CPU time of a task, in [usec]. */
__u64 ac_utime; /* User CPU time [usec] */
- /* The system CPU time of a task, in [usec]. */
+ /* The system CPU time of a task, in [usec]. */
__u64 ac_stime; /* System CPU time [usec] */
- /* The minor page fault count of a task, as set in task->min_flt. */
+ /* The minor page fault count of a task, as set in task->min_flt. */
__u64 ac_minflt; /* Minor Page Fault Count */
/* The major page fault count of a task, as set in task->maj_flt. */
__u64 ac_majflt; /* Major Page Fault Count */
-2) Delay accounting fields:
+2) Delay accounting fields::
+
/* Delay accounting fields start
*
* All values, until the comment "Delay accounting fields end" are
@@ -134,7 +146,8 @@ struct taskstats {
/* version 1 ends here */
-3) Extended accounting fields
+3) Extended accounting fields::
+
/* Extended accounting fields start */
/* Accumulated RSS usage in duration of a task, in MBytes-usecs.
@@ -145,15 +158,15 @@ struct taskstats {
*/
__u64 coremem; /* accumulated RSS usage in MB-usec */
- /* Accumulated virtual memory usage in duration of a task.
+ /* Accumulated virtual memory usage in duration of a task.
* Same as acct_rss_mem1 above except that we keep track of VM usage.
*/
__u64 virtmem; /* accumulated VM usage in MB-usec */
- /* High watermark of RSS usage in duration of a task, in KBytes. */
+ /* High watermark of RSS usage in duration of a task, in KBytes. */
__u64 hiwater_rss; /* High-watermark of RSS usage */
- /* High watermark of VM usage in duration of a task, in KBytes. */
+ /* High watermark of VM usage in duration of a task, in KBytes. */
__u64 hiwater_vm; /* High-water virtual memory usage */
/* The following four fields are I/O statistics of a task. */
@@ -164,17 +177,23 @@ struct taskstats {
/* Extended accounting fields end */
-4) Per-task and per-thread statistics
+4) Per-task and per-thread statistics::
+
__u64 nvcsw; /* Context voluntary switch counter */
__u64 nivcsw; /* Context involuntary switch counter */
-5) Time accounting for SMT machines
+5) Time accounting for SMT machines::
+
__u64 ac_utimescaled; /* utime scaled on frequency etc */
__u64 ac_stimescaled; /* stime scaled on frequency etc */
__u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
-6) Extended delay accounting fields for memory reclaim
+6) Extended delay accounting fields for memory reclaim::
+
/* Delay waiting for memory reclaim */
__u64 freepages_count;
__u64 freepages_delay_total;
-}
+
+::
+
+ }
diff --git a/Documentation/accounting/taskstats.txt b/Documentation/accounting/taskstats.rst
index ff06b738bb88..2a28b7f55c10 100644
--- a/Documentation/accounting/taskstats.txt
+++ b/Documentation/accounting/taskstats.rst
@@ -1,5 +1,6 @@
+=============================
Per-task statistics interface
------------------------------
+=============================
Taskstats is a netlink-based interface for sending per-task and
@@ -65,7 +66,7 @@ taskstats.h file.
The data exchanged between user and kernel space is a netlink message belonging
to the NETLINK_GENERIC family and using the netlink attributes interface.
-The messages are in the format
+The messages are in the format::
+----------+- - -+-------------+-------------------+
| nlmsghdr | Pad | genlmsghdr | taskstats payload |
@@ -167,15 +168,13 @@ extended and the number of cpus grows large.
To avoid losing statistics, userspace should do one or more of the following:
- increase the receive buffer sizes for the netlink sockets opened by
-listeners to receive exit data.
+ listeners to receive exit data.
- create more listeners and reduce the number of cpus being listened to by
-each listener. In the extreme case, there could be one listener for each cpu.
-Users may also consider setting the cpu affinity of the listener to the subset
-of cpus to which it listens, especially if they are listening to just one cpu.
+ each listener. In the extreme case, there could be one listener for each cpu.
+ Users may also consider setting the cpu affinity of the listener to the subset
+ of cpus to which it listens, especially if they are listening to just one cpu.
Despite these measures, if the userspace receives ENOBUFS error messages
indicated overflow of receive buffers, it should take measures to handle the
loss of data.
-
-----
diff --git a/Documentation/aoe/aoe.rst b/Documentation/admin-guide/aoe/aoe.rst
index 58747ecec71d..a05e751363a0 100644
--- a/Documentation/aoe/aoe.rst
+++ b/Documentation/admin-guide/aoe/aoe.rst
@@ -20,7 +20,7 @@ driver. The aoetools are on sourceforge.
http://aoetools.sourceforge.net/
-The scripts in this Documentation/aoe directory are intended to
+The scripts in this Documentation/admin-guide/aoe directory are intended to
document the use of the driver and are not necessary if you install
the aoetools.
@@ -86,7 +86,7 @@ Using sysfs
a convenient way. Users with aoetools should use the aoe-stat
command::
- root@makki root# sh Documentation/aoe/status.sh
+ root@makki root# sh Documentation/admin-guide/aoe/status.sh
e10.0 eth3 up
e10.1 eth3 up
e10.2 eth3 up
diff --git a/Documentation/aoe/autoload.sh b/Documentation/admin-guide/aoe/autoload.sh
index 815dff4691c9..815dff4691c9 100644
--- a/Documentation/aoe/autoload.sh
+++ b/Documentation/admin-guide/aoe/autoload.sh
diff --git a/Documentation/aoe/examples.rst b/Documentation/admin-guide/aoe/examples.rst
index 91f3198e52c1..91f3198e52c1 100644
--- a/Documentation/aoe/examples.rst
+++ b/Documentation/admin-guide/aoe/examples.rst
diff --git a/Documentation/aoe/index.rst b/Documentation/admin-guide/aoe/index.rst
index 4394b9b7913c..d71c5df15922 100644
--- a/Documentation/aoe/index.rst
+++ b/Documentation/admin-guide/aoe/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
=======================
ATA over Ethernet (AoE)
=======================
diff --git a/Documentation/aoe/status.sh b/Documentation/admin-guide/aoe/status.sh
index eeec7baae57a..eeec7baae57a 100644
--- a/Documentation/aoe/status.sh
+++ b/Documentation/admin-guide/aoe/status.sh
diff --git a/Documentation/aoe/todo.rst b/Documentation/admin-guide/aoe/todo.rst
index dea8db5a33e1..dea8db5a33e1 100644
--- a/Documentation/aoe/todo.rst
+++ b/Documentation/admin-guide/aoe/todo.rst
diff --git a/Documentation/aoe/udev-install.sh b/Documentation/admin-guide/aoe/udev-install.sh
index 15e86f58c036..15e86f58c036 100644
--- a/Documentation/aoe/udev-install.sh
+++ b/Documentation/admin-guide/aoe/udev-install.sh
diff --git a/Documentation/aoe/udev.txt b/Documentation/admin-guide/aoe/udev.txt
index 54feda5a0772..5fb756466bc7 100644
--- a/Documentation/aoe/udev.txt
+++ b/Documentation/admin-guide/aoe/udev.txt
@@ -11,7 +11,7 @@
# udev_rules="/etc/udev/rules.d/"
# bash# ls /etc/udev/rules.d/
# 10-wacom.rules 50-udev.rules
-# bash# cp /path/to/linux/Documentation/aoe/udev.txt \
+# bash# cp /path/to/linux/Documentation/admin-guide/aoe/udev.txt \
# /etc/udev/rules.d/60-aoe.rules
#
diff --git a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg
index f87cfa0dc2fb..f87cfa0dc2fb 100644
--- a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg
+++ b/Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg
diff --git a/Documentation/blockdev/drbd/DRBD-data-packets.svg b/Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg
index 48a1e2165fec..48a1e2165fec 100644
--- a/Documentation/blockdev/drbd/DRBD-data-packets.svg
+++ b/Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg
diff --git a/Documentation/blockdev/drbd/conn-states-8.dot b/Documentation/admin-guide/blockdev/drbd/conn-states-8.dot
index 025e8cf5e64a..025e8cf5e64a 100644
--- a/Documentation/blockdev/drbd/conn-states-8.dot
+++ b/Documentation/admin-guide/blockdev/drbd/conn-states-8.dot
diff --git a/Documentation/blockdev/drbd/data-structure-v9.txt b/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst
index 1e52a0e32624..66036b901644 100644
--- a/Documentation/blockdev/drbd/data-structure-v9.txt
+++ b/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst
@@ -1,3 +1,7 @@
+================================
+kernel data structure for DRBD-9
+================================
+
This describes the in kernel data structure for DRBD-9. Starting with
Linux v3.14 we are reorganizing DRBD to use this data structure.
@@ -10,7 +14,7 @@ device is represented by a block device locally.
The DRBD objects are interconnected to form a matrix as depicted below; a
drbd_peer_device object sits at each intersection between a drbd_device and a
-drbd_connection:
+drbd_connection::
/--------------+---------------+.....+---------------\
| resource | device | | device |
diff --git a/Documentation/blockdev/drbd/disk-states-8.dot b/Documentation/admin-guide/blockdev/drbd/disk-states-8.dot
index d06cfb46fb98..d06cfb46fb98 100644
--- a/Documentation/blockdev/drbd/disk-states-8.dot
+++ b/Documentation/admin-guide/blockdev/drbd/disk-states-8.dot
diff --git a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot
index 6d9cf0a7b11d..6d9cf0a7b11d 100644
--- a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot
+++ b/Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot
diff --git a/Documentation/admin-guide/blockdev/drbd/figures.rst b/Documentation/admin-guide/blockdev/drbd/figures.rst
new file mode 100644
index 000000000000..bd9a4901fe46
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/figures.rst
@@ -0,0 +1,30 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. The here included files are intended to help understand the implementation
+
+Data flows that Relate some functions, and write packets
+========================================================
+
+.. kernel-figure:: DRBD-8.3-data-packets.svg
+ :alt: DRBD-8.3-data-packets.svg
+ :align: center
+
+.. kernel-figure:: DRBD-data-packets.svg
+ :alt: DRBD-data-packets.svg
+ :align: center
+
+
+Sub graphs of DRBD's state transitions
+======================================
+
+.. kernel-figure:: conn-states-8.dot
+ :alt: conn-states-8.dot
+ :align: center
+
+.. kernel-figure:: disk-states-8.dot
+ :alt: disk-states-8.dot
+ :align: center
+
+.. kernel-figure:: node-states-8.dot
+ :alt: node-states-8.dot
+ :align: center
diff --git a/Documentation/blockdev/drbd/README.txt b/Documentation/admin-guide/blockdev/drbd/index.rst
index 627b0a1bf35e..68ecd5c113e9 100644
--- a/Documentation/blockdev/drbd/README.txt
+++ b/Documentation/admin-guide/blockdev/drbd/index.rst
@@ -1,4 +1,9 @@
+==========================================
+Distributed Replicated Block Device - DRBD
+==========================================
+
Description
+===========
DRBD is a shared-nothing, synchronously replicated block device. It
is designed to serve as a building block for high availability
@@ -7,10 +12,8 @@ Description
Please visit http://www.drbd.org to find out more.
-The here included files are intended to help understand the implementation
-
-DRBD-8.3-data-packets.svg, DRBD-data-packets.svg
- relates some functions, and write packets.
+.. toctree::
+ :maxdepth: 1
-conn-states-8.dot, disk-states-8.dot, node-states-8.dot
- The sub graphs of DRBD's state transitions
+ data-structure-v9
+ figures
diff --git a/Documentation/blockdev/drbd/node-states-8.dot b/Documentation/admin-guide/blockdev/drbd/node-states-8.dot
index 4a2b00c23547..bfa54e1f8016 100644
--- a/Documentation/blockdev/drbd/node-states-8.dot
+++ b/Documentation/admin-guide/blockdev/drbd/node-states-8.dot
@@ -11,4 +11,3 @@ digraph peer_states {
Unknown -> Primary [ label = "connected" ]
Unknown -> Secondary [ label = "connected" ]
}
-
diff --git a/Documentation/blockdev/floppy.txt b/Documentation/admin-guide/blockdev/floppy.rst
index e2240f5ab64d..4a8f31cf4139 100644
--- a/Documentation/blockdev/floppy.txt
+++ b/Documentation/admin-guide/blockdev/floppy.rst
@@ -1,35 +1,37 @@
-This file describes the floppy driver.
+=============
+Floppy Driver
+=============
FAQ list:
=========
- A FAQ list may be found in the fdutils package (see below), and also
+A FAQ list may be found in the fdutils package (see below), and also
at <http://fdutils.linux.lu/faq.html>.
LILO configuration options (Thinkpad users, read this)
======================================================
- The floppy driver is configured using the 'floppy=' option in
+The floppy driver is configured using the 'floppy=' option in
lilo. This option can be typed at the boot prompt, or entered in the
lilo configuration file.
- Example: If your kernel is called linux-2.6.9, type the following line
-at the lilo boot prompt (if you have a thinkpad):
+Example: If your kernel is called linux-2.6.9, type the following line
+at the lilo boot prompt (if you have a thinkpad)::
linux-2.6.9 floppy=thinkpad
You may also enter the following line in /etc/lilo.conf, in the description
-of linux-2.6.9:
+of linux-2.6.9::
append = "floppy=thinkpad"
- Several floppy related options may be given, example:
+Several floppy related options may be given, example::
linux-2.6.9 floppy=daring floppy=two_fdc
append = "floppy=daring floppy=two_fdc"
- If you give options both in the lilo config file and on the boot
+If you give options both in the lilo config file and on the boot
prompt, the option strings of both places are concatenated, the boot
prompt options coming last. That's why there are also options to
restore the default behavior.
@@ -38,21 +40,23 @@ restore the default behavior.
Module configuration options
============================
- If you use the floppy driver as a module, use the following syntax:
-modprobe floppy floppy="<options>"
+If you use the floppy driver as a module, use the following syntax::
-Example:
- modprobe floppy floppy="omnibook messages"
+ modprobe floppy floppy="<options>"
- If you need certain options enabled every time you load the floppy driver,
-you can put:
+Example::
- options floppy floppy="omnibook messages"
+ modprobe floppy floppy="omnibook messages"
+
+If you need certain options enabled every time you load the floppy driver,
+you can put::
+
+ options floppy floppy="omnibook messages"
in a configuration file in /etc/modprobe.d/.
- The floppy driver related options are:
+The floppy driver related options are:
floppy=asus_pci
Sets the bit mask to allow only units 0 and 1. (default)
@@ -70,8 +74,7 @@ in a configuration file in /etc/modprobe.d/.
Tells the floppy driver that you have only one floppy controller.
(default)
- floppy=two_fdc
- floppy=<address>,two_fdc
+ floppy=two_fdc / floppy=<address>,two_fdc
Tells the floppy driver that you have two floppy controllers.
The second floppy controller is assumed to be at <address>.
This option is not needed if the second controller is at address
@@ -84,8 +87,7 @@ in a configuration file in /etc/modprobe.d/.
floppy=0,thinkpad
Tells the floppy driver that you don't have a Thinkpad.
- floppy=omnibook
- floppy=nodma
+ floppy=omnibook / floppy=nodma
Tells the floppy driver not to use Dma for data transfers.
This is needed on HP Omnibooks, which don't have a workable
DMA channel for the floppy driver. This option is also useful
@@ -144,14 +146,16 @@ in a configuration file in /etc/modprobe.d/.
described in the physical CMOS), or if your BIOS uses
non-standard CMOS types. The CMOS types are:
- 0 - Use the value of the physical CMOS
- 1 - 5 1/4 DD
- 2 - 5 1/4 HD
- 3 - 3 1/2 DD
- 4 - 3 1/2 HD
- 5 - 3 1/2 ED
- 6 - 3 1/2 ED
- 16 - unknown or not installed
+ == ==================================
+ 0 Use the value of the physical CMOS
+ 1 5 1/4 DD
+ 2 5 1/4 HD
+ 3 3 1/2 DD
+ 4 3 1/2 HD
+ 5 3 1/2 ED
+ 6 3 1/2 ED
+ 16 unknown or not installed
+ == ==================================
(Note: there are two valid types for ED drives. This is because 5 was
initially chosen to represent floppy *tapes*, and 6 for ED drives.
@@ -162,8 +166,7 @@ in a configuration file in /etc/modprobe.d/.
Print a warning message when an unexpected interrupt is received.
(default)
- floppy=no_unexpected_interrupts
- floppy=L40SX
+ floppy=no_unexpected_interrupts / floppy=L40SX
Don't print a message when an unexpected interrupt is received. This
is needed on IBM L40SX laptops in certain video modes. (There seems
to be an interaction between video and floppy. The unexpected
@@ -199,47 +202,54 @@ in a configuration file in /etc/modprobe.d/.
Sets the floppy DMA channel to <nr> instead of 2.
floppy=slow
- Use PS/2 stepping rate:
- " PS/2 floppies have much slower step rates than regular floppies.
+ Use PS/2 stepping rate::
+
+ PS/2 floppies have much slower step rates than regular floppies.
It's been recommended that take about 1/4 of the default speed
- in some more extreme cases."
+ in some more extreme cases.
Supporting utilities and additional documentation:
==================================================
- Additional parameters of the floppy driver can be configured at
+Additional parameters of the floppy driver can be configured at
runtime. Utilities which do this can be found in the fdutils package.
This package also contains a new version of mtools which allows to
access high capacity disks (up to 1992K on a high density 3 1/2 disk!).
It also contains additional documentation about the floppy driver.
The latest version can be found at fdutils homepage:
+
http://fdutils.linux.lu
The fdutils releases can be found at:
+
http://fdutils.linux.lu/download.html
+
http://www.tux.org/pub/knaff/fdutils/
+
ftp://metalab.unc.edu/pub/Linux/utils/disk-management/
Reporting problems about the floppy driver
==========================================
- If you have a question or a bug report about the floppy driver, mail
+If you have a question or a bug report about the floppy driver, mail
me at Alain.Knaff@poboxes.com . If you post to Usenet, preferably use
comp.os.linux.hardware. As the volume in these groups is rather high,
be sure to include the word "floppy" (or "FLOPPY") in the subject
line. If the reported problem happens when mounting floppy disks, be
sure to mention also the type of the filesystem in the subject line.
- Be sure to read the FAQ before mailing/posting any bug reports!
+Be sure to read the FAQ before mailing/posting any bug reports!
- Alain
+Alain
Changelog
=========
-10-30-2004 : Cleanup, updating, add reference to module configuration.
+10-30-2004 :
+ Cleanup, updating, add reference to module configuration.
James Nelson <james4765@gmail.com>
-6-3-2000 : Original Document
+6-3-2000 :
+ Original Document
diff --git a/Documentation/admin-guide/blockdev/index.rst b/Documentation/admin-guide/blockdev/index.rst
new file mode 100644
index 000000000000..b903cf152091
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/index.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+The Linux RapidIO Subsystem
+===========================
+
+.. toctree::
+ :maxdepth: 1
+
+ floppy
+ nbd
+ paride
+ ramdisk
+ zram
+
+ drbd/index
diff --git a/Documentation/blockdev/nbd.txt b/Documentation/admin-guide/blockdev/nbd.rst
index db242ea2bce8..d78dfe559dcf 100644
--- a/Documentation/blockdev/nbd.txt
+++ b/Documentation/admin-guide/blockdev/nbd.rst
@@ -1,3 +1,4 @@
+==================================
Network Block Device (TCP version)
==================================
@@ -28,4 +29,3 @@ max_part
nbds_max
Number of block devices that should be initialized (default: 16).
-
diff --git a/Documentation/blockdev/paride.txt b/Documentation/admin-guide/blockdev/paride.rst
index ee6717e3771d..87b4278bf314 100644
--- a/Documentation/blockdev/paride.txt
+++ b/Documentation/admin-guide/blockdev/paride.rst
@@ -1,15 +1,17 @@
-
- Linux and parallel port IDE devices
+===================================
+Linux and parallel port IDE devices
+===================================
PARIDE v1.03 (c) 1997-8 Grant Guenther <grant@torque.net>
1. Introduction
+===============
Owing to the simplicity and near universality of the parallel port interface
to personal computers, many external devices such as portable hard-disk,
CD-ROM, LS-120 and tape drives use the parallel port to connect to their
host computer. While some devices (notably scanners) use ad-hoc methods
-to pass commands and data through the parallel port interface, most
+to pass commands and data through the parallel port interface, most
external devices are actually identical to an internal model, but with
a parallel-port adapter chip added in. Some of the original parallel port
adapters were little more than mechanisms for multiplexing a SCSI bus.
@@ -28,47 +30,50 @@ were to open up a parallel port CD-ROM drive, for instance, one would
find a standard ATAPI CD-ROM drive, a power supply, and a single adapter
that interconnected a standard PC parallel port cable and a standard
IDE cable. It is usually possible to exchange the CD-ROM device with
-any other device using the IDE interface.
+any other device using the IDE interface.
The document describes the support in Linux for parallel port IDE
devices. It does not cover parallel port SCSI devices, "ditto" tape
-drives or scanners. Many different devices are supported by the
+drives or scanners. Many different devices are supported by the
parallel port IDE subsystem, including:
- MicroSolutions backpack CD-ROM
- MicroSolutions backpack PD/CD
- MicroSolutions backpack hard-drives
- MicroSolutions backpack 8000t tape drive
- SyQuest EZ-135, EZ-230 & SparQ drives
- Avatar Shark
- Imation Superdisk LS-120
- Maxell Superdisk LS-120
- FreeCom Power CD
- Hewlett-Packard 5GB and 8GB tape drives
- Hewlett-Packard 7100 and 7200 CD-RW drives
+ - MicroSolutions backpack CD-ROM
+ - MicroSolutions backpack PD/CD
+ - MicroSolutions backpack hard-drives
+ - MicroSolutions backpack 8000t tape drive
+ - SyQuest EZ-135, EZ-230 & SparQ drives
+ - Avatar Shark
+ - Imation Superdisk LS-120
+ - Maxell Superdisk LS-120
+ - FreeCom Power CD
+ - Hewlett-Packard 5GB and 8GB tape drives
+ - Hewlett-Packard 7100 and 7200 CD-RW drives
as well as most of the clone and no-name products on the market.
To support such a wide range of devices, PARIDE, the parallel port IDE
subsystem, is actually structured in three parts. There is a base
paride module which provides a registry and some common methods for
-accessing the parallel ports. The second component is a set of
-high-level drivers for each of the different types of supported devices:
+accessing the parallel ports. The second component is a set of
+high-level drivers for each of the different types of supported devices:
+ === =============
pd IDE disk
pcd ATAPI CD-ROM
pf ATAPI disk
pt ATAPI tape
pg ATAPI generic
+ === =============
(Currently, the pg driver is only used with CD-R drives).
The high-level drivers function according to the relevant standards.
The third component of PARIDE is a set of low-level protocol drivers
for each of the parallel port IDE adapter chips. Thanks to the interest
-and encouragement of Linux users from many parts of the world,
+and encouragement of Linux users from many parts of the world,
support is available for almost all known adapter protocols:
+ ==== ====================================== ====
aten ATEN EH-100 (HK)
bpck Microsolutions backpack (US)
comm DataStor (old-type) "commuter" adapter (TW)
@@ -83,9 +88,11 @@ support is available for almost all known adapter protocols:
ktti KT Technology PHd adapter (SG)
on20 OnSpec 90c20 (US)
on26 OnSpec 90c26 (US)
+ ==== ====================================== ====
2. Using the PARIDE subsystem
+=============================
While configuring the Linux kernel, you may choose either to build
the PARIDE drivers into your kernel, or to build them as modules.
@@ -94,10 +101,10 @@ In either case, you will need to select "Parallel port IDE device support"
as well as at least one of the high-level drivers and at least one
of the parallel port communication protocols. If you do not know
what kind of parallel port adapter is used in your drive, you could
-begin by checking the file names and any text files on your DOS
+begin by checking the file names and any text files on your DOS
installation floppy. Alternatively, you can look at the markings on
the adapter chip itself. That's usually sufficient to identify the
-correct device.
+correct device.
You can actually select all the protocol modules, and allow the PARIDE
subsystem to try them all for you.
@@ -105,8 +112,9 @@ subsystem to try them all for you.
For the "brand-name" products listed above, here are the protocol
and high-level drivers that you would use:
+ ================ ============ ====== ========
Manufacturer Model Driver Protocol
-
+ ================ ============ ====== ========
MicroSolutions CD-ROM pcd bpck
MicroSolutions PD drive pf bpck
MicroSolutions hard-drive pd bpck
@@ -119,8 +127,10 @@ and high-level drivers that you would use:
Hewlett-Packard 5GB Tape pt epat
Hewlett-Packard 7200e (CD) pcd epat
Hewlett-Packard 7200e (CD-R) pg epat
+ ================ ============ ====== ========
2.1 Configuring built-in drivers
+---------------------------------
We recommend that you get to know how the drivers work and how to
configure them as loadable modules, before attempting to compile a
@@ -143,7 +153,7 @@ protocol identification number and, for some devices, the drive's
chain ID. While your system is booting, a number of messages are
displayed on the console. Like all such messages, they can be
reviewed with the 'dmesg' command. Among those messages will be
-some lines like:
+some lines like::
paride: bpck registered as protocol 0
paride: epat registered as protocol 1
@@ -158,10 +168,10 @@ the last two digits of the drive's serial number (but read MicroSolutions'
documentation about this).
As an example, let's assume that you have a MicroSolutions PD/CD drive
-with unit ID number 36 connected to the parallel port at 0x378, a SyQuest
-EZ-135 connected to the chained port on the PD/CD drive and also an
-Imation Superdisk connected to port 0x278. You could give the following
-options on your boot command:
+with unit ID number 36 connected to the parallel port at 0x378, a SyQuest
+EZ-135 connected to the chained port on the PD/CD drive and also an
+Imation Superdisk connected to port 0x278. You could give the following
+options on your boot command::
pd.drive0=0x378,1 pf.drive0=0x278,1 pf.drive1=0x378,0,36
@@ -169,24 +179,27 @@ In the last option, pf.drive1 configures device /dev/pf1, the 0x378
is the parallel port base address, the 0 is the protocol registration
number and 36 is the chain ID.
-Please note: while PARIDE will work both with and without the
+Please note: while PARIDE will work both with and without the
PARPORT parallel port sharing system that is included by the
"Parallel port support" option, PARPORT must be included and enabled
if you want to use chains of devices on the same parallel port.
2.2 Loading and configuring PARIDE as modules
+----------------------------------------------
It is much faster and simpler to get to understand the PARIDE drivers
-if you use them as loadable kernel modules.
+if you use them as loadable kernel modules.
-Note 1: using these drivers with the "kerneld" automatic module loading
-system is not recommended for beginners, and is not documented here.
+Note 1:
+ using these drivers with the "kerneld" automatic module loading
+ system is not recommended for beginners, and is not documented here.
-Note 2: if you build PARPORT support as a loadable module, PARIDE must
-also be built as loadable modules, and PARPORT must be loaded before the
-PARIDE modules.
+Note 2:
+ if you build PARPORT support as a loadable module, PARIDE must
+ also be built as loadable modules, and PARPORT must be loaded before
+ the PARIDE modules.
-To use PARIDE, you must begin by
+To use PARIDE, you must begin by::
insmod paride
@@ -195,8 +208,8 @@ among other tasks.
Then, load as many of the protocol modules as you think you might need.
As you load each module, it will register the protocols that it supports,
-and print a log message to your kernel log file and your console. For
-example:
+and print a log message to your kernel log file and your console. For
+example::
# insmod epat
paride: epat registered as protocol 0
@@ -205,22 +218,22 @@ example:
paride: k971 registered as protocol 2
Finally, you can load high-level drivers for each kind of device that
-you have connected. By default, each driver will autoprobe for a single
+you have connected. By default, each driver will autoprobe for a single
device, but you can support up to four similar devices by giving their
individual co-ordinates when you load the driver.
For example, if you had two no-name CD-ROM drives both using the
KingByte KBIC-951A adapter, one on port 0x378 and the other on 0x3bc
-you could give the following command:
+you could give the following command::
# insmod pcd drive0=0x378,1 drive1=0x3bc,1
For most adapters, giving a port address and protocol number is sufficient,
-but check the source files in linux/drivers/block/paride for more
+but check the source files in linux/drivers/block/paride for more
information. (Hopefully someone will write some man pages one day !).
As another example, here's what happens when PARPORT is installed, and
-a SyQuest EZ-135 is attached to port 0x378:
+a SyQuest EZ-135 is attached to port 0x378::
# insmod paride
paride: version 1.0 installed
@@ -237,46 +250,47 @@ Note that the last line is the output from the generic partition table
scanner - in this case it reports that it has found a disk with one partition.
2.3 Using a PARIDE device
+--------------------------
Once the drivers have been loaded, you can access PARIDE devices in the
same way as their traditional counterparts. You will probably need to
create the device "special files". Here is a simple script that you can
-cut to a file and execute:
-
-#!/bin/bash
-#
-# mkd -- a script to create the device special files for the PARIDE subsystem
-#
-function mkdev {
- mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1
-}
-#
-function pd {
- D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) )
- mkdev pd$D b 45 $[ $1 * 16 ]
- for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
- do mkdev pd$D$P b 45 $[ $1 * 16 + $P ]
- done
-}
-#
-cd /dev
-#
-for u in 0 1 2 3 ; do pd $u ; done
-for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done
-for u in 0 1 2 3 ; do mkdev pf$u b 47 $u ; done
-for u in 0 1 2 3 ; do mkdev pt$u c 96 $u ; done
-for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done
-for u in 0 1 2 3 ; do mkdev pg$u c 97 $u ; done
-#
-# end of mkd
+cut to a file and execute::
+
+ #!/bin/bash
+ #
+ # mkd -- a script to create the device special files for the PARIDE subsystem
+ #
+ function mkdev {
+ mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1
+ }
+ #
+ function pd {
+ D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) )
+ mkdev pd$D b 45 $[ $1 * 16 ]
+ for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ do mkdev pd$D$P b 45 $[ $1 * 16 + $P ]
+ done
+ }
+ #
+ cd /dev
+ #
+ for u in 0 1 2 3 ; do pd $u ; done
+ for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done
+ for u in 0 1 2 3 ; do mkdev pf$u b 47 $u ; done
+ for u in 0 1 2 3 ; do mkdev pt$u c 96 $u ; done
+ for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done
+ for u in 0 1 2 3 ; do mkdev pg$u c 97 $u ; done
+ #
+ # end of mkd
With the device files and drivers in place, you can access PARIDE devices
-like any other Linux device. For example, to mount a CD-ROM in pcd0, use:
+like any other Linux device. For example, to mount a CD-ROM in pcd0, use::
mount /dev/pcd0 /cdrom
If you have a fresh Avatar Shark cartridge, and the drive is pda, you
-might do something like:
+might do something like::
fdisk /dev/pda -- make a new partition table with
partition 1 of type 83
@@ -289,41 +303,46 @@ might do something like:
Devices like the Imation superdisk work in the same way, except that
they do not have a partition table. For example to make a 120MB
-floppy that you could share with a DOS system:
+floppy that you could share with a DOS system::
mkdosfs /dev/pf0
mount /dev/pf0 /mnt
2.4 The pf driver
+------------------
The pf driver is intended for use with parallel port ATAPI disk
devices. The most common devices in this category are PD drives
and LS-120 drives. Traditionally, media for these devices are not
partitioned. Consequently, the pf driver does not support partitioned
-media. This may be changed in a future version of the driver.
+media. This may be changed in a future version of the driver.
2.5 Using the pt driver
+------------------------
The pt driver for parallel port ATAPI tape drives is a minimal driver.
-It does not yet support many of the standard tape ioctl operations.
+It does not yet support many of the standard tape ioctl operations.
For best performance, a block size of 32KB should be used. You will
probably want to set the parallel port delay to 0, if you can.
2.6 Using the pg driver
+------------------------
The pg driver can be used in conjunction with the cdrecord program
to create CD-ROMs. Please get cdrecord version 1.6.1 or later
-from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ . To record CD-R media
-your parallel port should ideally be set to EPP mode, and the "port delay"
-should be set to 0. With those settings it is possible to record at 2x
+from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ . To record CD-R media
+your parallel port should ideally be set to EPP mode, and the "port delay"
+should be set to 0. With those settings it is possible to record at 2x
speed without any buffer underruns. If you cannot get the driver to work
in EPP mode, try to use "bidirectional" or "PS/2" mode and 1x speeds only.
3. Troubleshooting
+==================
3.1 Use EPP mode if you can
+----------------------------
The most common problems that people report with the PARIDE drivers
concern the parallel port CMOS settings. At this time, none of the
@@ -332,6 +351,7 @@ If you are able to do so, please set your parallel port into EPP mode
using your CMOS setup procedure.
3.2 Check the port delay
+-------------------------
Some parallel ports cannot reliably transfer data at full speed. To
offset the errors, the PARIDE protocol modules introduce a "port
@@ -347,23 +367,25 @@ read the comments at the beginning of the driver source files in
linux/drivers/block/paride.
3.3 Some drives need a printer reset
+-------------------------------------
There appear to be a number of "noname" external drives on the market
that do not always power up correctly. We have noticed this with some
drives based on OnSpec and older Freecom adapters. In these rare cases,
the adapter can often be reinitialised by issuing a "printer reset" on
-the parallel port. As the reset operation is potentially disruptive in
-multiple device environments, the PARIDE drivers will not do it
-automatically. You can however, force a printer reset by doing:
+the parallel port. As the reset operation is potentially disruptive in
+multiple device environments, the PARIDE drivers will not do it
+automatically. You can however, force a printer reset by doing::
insmod lp reset=1
rmmod lp
If you have one of these marginal cases, you should probably build
your paride drivers as modules, and arrange to do the printer reset
-before loading the PARIDE drivers.
+before loading the PARIDE drivers.
3.4 Use the verbose option and dmesg if you need help
+------------------------------------------------------
While a lot of testing has gone into these drivers to make them work
as smoothly as possible, problems will arise. If you do have problems,
@@ -373,7 +395,7 @@ clues, then please make sure that only one drive is hooked to your system,
and that either (a) PARPORT is enabled or (b) no other device driver
is using your parallel port (check in /proc/ioports). Then, load the
appropriate drivers (you can load several protocol modules if you want)
-as in:
+as in::
# insmod paride
# insmod epat
@@ -394,12 +416,14 @@ by e-mail to grant@torque.net, or join the linux-parport mailing list
and post your report there.
3.5 For more information or help
+---------------------------------
You can join the linux-parport mailing list by sending a mail message
-to
+to:
+
linux-parport-request@torque.net
-with the single word
+with the single word::
subscribe
@@ -412,6 +436,4 @@ have in your mail headers, when sending mail to the list server.
You might also find some useful information on the linux-parport
web pages (although they are not always up to date) at
- http://web.archive.org/web/*/http://www.torque.net/parport/
-
-
+ http://web.archive.org/web/%2E/http://www.torque.net/parport/
diff --git a/Documentation/blockdev/ramdisk.txt b/Documentation/admin-guide/blockdev/ramdisk.rst
index 501e12e0323e..b7c2268f8dec 100644
--- a/Documentation/blockdev/ramdisk.txt
+++ b/Documentation/admin-guide/blockdev/ramdisk.rst
@@ -1,7 +1,8 @@
+==========================================
Using the RAM disk block device with Linux
-------------------------------------------
+==========================================
-Contents:
+.. Contents:
1) Overview
2) Kernel Command Line Parameters
@@ -42,7 +43,7 @@ rescue floppy disk.
2a) Kernel Command Line Parameters
ramdisk_size=N
- ==============
+ Size of the ramdisk.
This parameter tells the RAM disk driver to set up RAM disks of N k size. The
default is 4096 (4 MB).
@@ -50,16 +51,13 @@ default is 4096 (4 MB).
2b) Module parameters
rd_nr
- =====
- /dev/ramX devices created.
+ /dev/ramX devices created.
max_part
- ========
- Maximum partition number.
+ Maximum partition number.
rd_size
- =======
- See ramdisk_size.
+ See ramdisk_size.
3) Using "rdev -r"
------------------
@@ -71,11 +69,11 @@ to 2 MB (2^11) of where to find the RAM disk (this used to be the size). Bit
prompt/wait sequence is to be given before trying to read the RAM disk. Since
the RAM disk dynamically grows as data is being written into it, a size field
is not required. Bits 11 to 13 are not currently used and may as well be zero.
-These numbers are no magical secrets, as seen below:
+These numbers are no magical secrets, as seen below::
-./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF
-./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000
-./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000
+ ./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF
+ ./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000
+ ./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000
Consider a typical two floppy disk setup, where you will have the
kernel on disk one, and have already put a RAM disk image onto disk #2.
@@ -92,20 +90,23 @@ sequence so that you have a chance to switch floppy disks.
The command line equivalent is: "prompt_ramdisk=1"
Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word.
-So to create disk one of the set, you would do:
+So to create disk one of the set, you would do::
/usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0
/usr/src/linux# rdev /dev/fd0 /dev/fd0
/usr/src/linux# rdev -r /dev/fd0 49152
-If you make a boot disk that has LILO, then for the above, you would use:
+If you make a boot disk that has LILO, then for the above, you would use::
+
append = "ramdisk_start=0 load_ramdisk=1 prompt_ramdisk=1"
-Since the default start = 0 and the default prompt = 1, you could use:
+
+Since the default start = 0 and the default prompt = 1, you could use::
+
append = "load_ramdisk=1"
4) An Example of Creating a Compressed RAM Disk
-----------------------------------------------
+-----------------------------------------------
To create a RAM disk image, you will need a spare block device to
construct it on. This can be the RAM disk device itself, or an
@@ -120,11 +121,11 @@ a) Decide on the RAM disk size that you want. Say 2 MB for this example.
Create it by writing to the RAM disk device. (This step is not currently
required, but may be in the future.) It is wise to zero out the
area (esp. for disks) so that maximal compression is achieved for
- the unused blocks of the image that you are about to create.
+ the unused blocks of the image that you are about to create::
dd if=/dev/zero of=/dev/ram0 bs=1k count=2048
-b) Make a filesystem on it. Say ext2fs for this example.
+b) Make a filesystem on it. Say ext2fs for this example::
mke2fs -vm0 /dev/ram0 2048
@@ -133,11 +134,11 @@ c) Mount it, copy the files you want to it (eg: /etc/* /dev/* ...)
d) Compress the contents of the RAM disk. The level of compression
will be approximately 50% of the space used by the files. Unused
- space on the RAM disk will compress to almost nothing.
+ space on the RAM disk will compress to almost nothing::
dd if=/dev/ram0 bs=1k count=2048 | gzip -v9 > /tmp/ram_image.gz
-e) Put the kernel onto the floppy
+e) Put the kernel onto the floppy::
dd if=zImage of=/dev/fd0 bs=1k
@@ -146,13 +147,13 @@ f) Put the RAM disk image onto the floppy, after the kernel. Use an offset
(possibly larger) kernel onto the same floppy later without overlapping
the RAM disk image. An offset of 400 kB for kernels about 350 kB in
size would be reasonable. Make sure offset+size of ram_image.gz is
- not larger than the total space on your floppy (usually 1440 kB).
+ not larger than the total space on your floppy (usually 1440 kB)::
dd if=/tmp/ram_image.gz of=/dev/fd0 bs=1k seek=400
g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc.
For prompt_ramdisk=1, load_ramdisk=1, ramdisk_start=400, one would
- have 2^15 + 2^14 + 400 = 49552.
+ have 2^15 + 2^14 + 400 = 49552::
rdev /dev/fd0 /dev/fd0
rdev -r /dev/fd0 49552
@@ -160,15 +161,17 @@ g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc.
That is it. You now have your boot/root compressed RAM disk floppy. Some
users may wish to combine steps (d) and (f) by using a pipe.
---------------------------------------------------------------------------
+
Paul Gortmaker 12/95
Changelog:
----------
-10-22-04 : Updated to reflect changes in command line options, remove
+10-22-04 :
+ Updated to reflect changes in command line options, remove
obsolete references, general cleanup.
James Nelson (james4765@gmail.com)
-12-95 : Original Document
+12-95 :
+ Original Document
diff --git a/Documentation/blockdev/zram.txt b/Documentation/admin-guide/blockdev/zram.rst
index 4df0ce271085..6eccf13219ff 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -1,7 +1,9 @@
+========================================
zram: Compressed RAM based block devices
-----------------------------------------
+========================================
-* Introduction
+Introduction
+============
The zram module creates RAM based block devices named /dev/zram<id>
(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
@@ -12,9 +14,11 @@ use as swap disks, various caches under /var and maybe many more :)
Statistics for individual zram devices are exported through sysfs nodes at
/sys/block/zram<id>/
-* Usage
+Usage
+=====
There are several ways to configure and manage zram device(-s):
+
a) using zram and zram_control sysfs attributes
b) using zramctl utility, provided by util-linux (util-linux@vger.kernel.org).
@@ -22,7 +26,7 @@ In this document we will describe only 'manual' zram configuration steps,
IOW, zram and zram_control sysfs attributes.
In order to get a better idea about zramctl please consult util-linux
-documentation, zramctl man-page or `zramctl --help'. Please be informed
+documentation, zramctl man-page or `zramctl --help`. Please be informed
that zram maintainers do not develop/maintain util-linux or zramctl, should
you have any questions please contact util-linux@vger.kernel.org
@@ -30,19 +34,23 @@ Following shows a typical sequence of steps for using zram.
WARNING
=======
+
For the sake of simplicity we skip error checking parts in most of the
examples below. However, it is your sole responsibility to handle errors.
zram sysfs attributes always return negative values in case of errors.
The list of possible return codes:
--EBUSY -- an attempt to modify an attribute that cannot be changed once
-the device has been initialised. Please reset device first;
--ENOMEM -- zram was not able to allocate enough memory to fulfil your
-needs;
--EINVAL -- invalid input has been provided.
+
+======== =============================================================
+-EBUSY an attempt to modify an attribute that cannot be changed once
+ the device has been initialised. Please reset device first;
+-ENOMEM zram was not able to allocate enough memory to fulfil your
+ needs;
+-EINVAL invalid input has been provided.
+======== =============================================================
If you use 'echo', the returned value that is changed by 'echo' utility,
-and, in general case, something like:
+and, in general case, something like::
echo 3 > /sys/block/zram0/max_comp_streams
if [ $? -ne 0 ];
@@ -51,7 +59,11 @@ and, in general case, something like:
should suffice.
-1) Load Module:
+1) Load Module
+==============
+
+::
+
modprobe zram num_devices=4
This creates 4 devices: /dev/zram{0,1,2,3}
@@ -59,6 +71,8 @@ num_devices parameter is optional and tells zram how many devices should be
pre-created. Default: 1.
2) Set max number of compression streams
+========================================
+
Regardless the value passed to this attribute, ZRAM will always
allocate multiple compression streams - one per online CPUs - thus
allowing several concurrent compression operations. The number of
@@ -66,16 +80,20 @@ allocated compression streams goes down when some of the CPUs
become offline. There is no single-compression-stream mode anymore,
unless you are running a UP system or has only 1 CPU online.
-To find out how many streams are currently available:
+To find out how many streams are currently available::
+
cat /sys/block/zram0/max_comp_streams
3) Select compression algorithm
+===============================
+
Using comp_algorithm device attribute one can see available and
currently selected (shown in square brackets) compression algorithms,
change selected compression algorithm (once the device is initialised
there is no way to change compression algorithm).
-Examples:
+Examples::
+
#show supported compression algorithms
cat /sys/block/zram0/comp_algorithm
lzo [lz4]
@@ -83,20 +101,23 @@ Examples:
#select lzo compression algorithm
echo lzo > /sys/block/zram0/comp_algorithm
-For the time being, the `comp_algorithm' content does not necessarily
+For the time being, the `comp_algorithm` content does not necessarily
show every compression algorithm supported by the kernel. We keep this
list primarily to simplify device configuration and one can configure
a new device with a compression algorithm that is not listed in
-`comp_algorithm'. The thing is that, internally, ZRAM uses Crypto API
+`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API
and, if some of the algorithms were built as modules, it's impossible
to list all of them using, for instance, /proc/crypto or any other
method. This, however, has an advantage of permitting the usage of
custom crypto compression modules (implementing S/W or H/W compression).
4) Set Disksize
+===============
+
Set disk size by writing the value to sysfs node 'disksize'.
The value can be either in bytes or you can use mem suffixes.
-Examples:
+Examples::
+
# Initialize /dev/zram0 with 50MB disksize
echo $((50*1024*1024)) > /sys/block/zram0/disksize
@@ -111,10 +132,13 @@ since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
size of the disk when not in use so a huge zram is wasteful.
5) Set memory limit: Optional
+=============================
+
Set memory limit by writing the value to sysfs node 'mem_limit'.
The value can be either in bytes or you can use mem suffixes.
In addition, you could change the value in runtime.
-Examples:
+Examples::
+
# limit /dev/zram0 with 50MB memory
echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
@@ -126,7 +150,11 @@ Examples:
# To disable memory limit
echo 0 > /sys/block/zram0/mem_limit
-6) Activate:
+6) Activate
+===========
+
+::
+
mkswap /dev/zram0
swapon /dev/zram0
@@ -134,6 +162,7 @@ Examples:
mount /dev/zram1 /tmp
7) Add/remove zram devices
+==========================
zram provides a control interface, which enables dynamic (on-demand) device
addition and removal.
@@ -142,44 +171,51 @@ In order to add a new /dev/zramX device, perform read operation on hot_add
attribute. This will return either new device's device id (meaning that you
can use /dev/zram<id>) or error code.
-Example:
+Example::
+
cat /sys/class/zram-control/hot_add
1
To remove the existing /dev/zramX device (where X is a device id)
-execute
+execute::
+
echo X > /sys/class/zram-control/hot_remove
-8) Stats:
+8) Stats
+========
+
Per-device statistics are exported as various nodes under /sys/block/zram<id>/
A brief description of exported device attributes. For more details please
read Documentation/ABI/testing/sysfs-block-zram.
+====================== ====== ===============================================
Name access description
----- ------ -----------
+====================== ====== ===============================================
disksize RW show and set the device's disk size
initstate RO shows the initialization state of the device
reset WO trigger device reset
-mem_used_max WO reset the `mem_used_max' counter (see later)
-mem_limit WO specifies the maximum amount of memory ZRAM can use
- to store the compressed data
-writeback_limit WO specifies the maximum amount of write IO zram can
- write out to backing device as 4KB unit
+mem_used_max WO reset the `mem_used_max` counter (see later)
+mem_limit WO specifies the maximum amount of memory ZRAM can
+ use to store the compressed data
+writeback_limit WO specifies the maximum amount of write IO zram
+ can write out to backing device as 4KB unit
writeback_limit_enable RW show and set writeback_limit feature
-max_comp_streams RW the number of possible concurrent compress operations
+max_comp_streams RW the number of possible concurrent compress
+ operations
comp_algorithm RW show and change the compression algorithm
compact WO trigger memory compaction
debug_stat RO this file is used for zram debugging purposes
backing_dev RW set up backend storage for zram to write out
idle WO mark allocated slot as idle
+====================== ====== ===============================================
User space is advised to use the following files to read the device statistics.
File /sys/block/zram<id>/stat
-Represents block layer statistics. Read Documentation/block/stat.txt for
+Represents block layer statistics. Read Documentation/block/stat.rst for
details.
File /sys/block/zram<id>/io_stat
@@ -188,23 +224,31 @@ The stat file represents device's I/O statistics not accounted by block
layer and, thus, not available in zram<id>/stat file. It consists of a
single line of text and contains the following stats separated by
whitespace:
- failed_reads the number of failed reads
- failed_writes the number of failed writes
- invalid_io the number of non-page-size-aligned I/O requests
+
+ ============= =============================================================
+ failed_reads The number of failed reads
+ failed_writes The number of failed writes
+ invalid_io The number of non-page-size-aligned I/O requests
notify_free Depending on device usage scenario it may account
+
a) the number of pages freed because of swap slot free
- notifications or b) the number of pages freed because of
- REQ_OP_DISCARD requests sent by bio. The former ones are
- sent to a swap block device when a swap slot is freed,
- which implies that this disk is being used as a swap disk.
+ notifications
+ b) the number of pages freed because of
+ REQ_OP_DISCARD requests sent by bio. The former ones are
+ sent to a swap block device when a swap slot is freed,
+ which implies that this disk is being used as a swap disk.
+
The latter ones are sent by filesystem mounted with
discard option, whenever some data blocks are getting
discarded.
+ ============= =============================================================
File /sys/block/zram<id>/mm_stat
The stat file represents device's mm statistics. It consists of a single
line of text and contains the following stats separated by whitespace:
+
+ ================ =============================================================
orig_data_size uncompressed size of data stored in this disk.
This excludes same-element-filled pages (same_pages) since
no memory is allocated for them.
@@ -223,58 +267,71 @@ line of text and contains the following stats separated by whitespace:
No memory is allocated for such pages.
pages_compacted the number of pages freed during compaction
huge_pages the number of incompressible pages
+ ================ =============================================================
File /sys/block/zram<id>/bd_stat
The stat file represents device's backing device statistics. It consists of
a single line of text and contains the following stats separated by whitespace:
+
+ ============== =============================================================
bd_count size of data written in backing device.
Unit: 4K bytes
bd_reads the number of reads from backing device
Unit: 4K bytes
bd_writes the number of writes to backing device
Unit: 4K bytes
+ ============== =============================================================
+
+9) Deactivate
+=============
+
+::
-9) Deactivate:
swapoff /dev/zram0
umount /dev/zram1
-10) Reset:
- Write any positive value to 'reset' sysfs node
- echo 1 > /sys/block/zram0/reset
- echo 1 > /sys/block/zram1/reset
+10) Reset
+=========
+
+ Write any positive value to 'reset' sysfs node::
+
+ echo 1 > /sys/block/zram0/reset
+ echo 1 > /sys/block/zram1/reset
This frees all the memory allocated for the given device and
resets the disksize to zero. You must set the disksize again
before reusing the device.
-* Optional Feature
+Optional Feature
+================
-= writeback
+writeback
+---------
With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page
to backing storage rather than keeping it in memory.
-To use the feature, admin should set up backing device via
+To use the feature, admin should set up backing device via::
- "echo /dev/sda5 > /sys/block/zramX/backing_dev"
+ echo /dev/sda5 > /sys/block/zramX/backing_dev
before disksize setting. It supports only partition at this moment.
-If admin want to use incompressible page writeback, they could do via
+If admin want to use incompressible page writeback, they could do via::
- "echo huge > /sys/block/zramX/write"
+ echo huge > /sys/block/zramX/write
To use idle page writeback, first, user need to declare zram pages
-as idle.
+as idle::
- "echo all > /sys/block/zramX/idle"
+ echo all > /sys/block/zramX/idle
From now on, any pages on zram are idle pages. The idle mark
will be removed until someone request access of the block.
IOW, unless there is access request, those pages are still idle pages.
-Admin can request writeback of those idle pages at right timing via
+Admin can request writeback of those idle pages at right timing via::
- "echo idle > /sys/block/zramX/writeback"
+ echo idle > /sys/block/zramX/writeback
With the command, zram writeback idle pages from memory to the storage.
@@ -285,7 +342,7 @@ to guarantee storage health for entire product life.
To overcome the concern, zram supports "writeback_limit" feature.
The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
any writeback. IOW, if admin want to apply writeback budget, he should
-enable writeback_limit_enable via
+enable writeback_limit_enable via::
$ echo 1 > /sys/block/zramX/writeback_limit_enable
@@ -296,7 +353,7 @@ until admin set the budget via /sys/block/zramX/writeback_limit.
assigned via /sys/block/zramX/writeback_limit is meaninless.)
If admin want to limit writeback as per-day 400M, he could do it
-like below.
+like below::
$ MB_SHIFT=20
$ 4K_SHIFT=12
@@ -305,16 +362,16 @@ like below.
$ echo 1 > /sys/block/zram0/writeback_limit_enable
If admin want to allow further write again once the bugdet is exausted,
-he could do it like below
+he could do it like below::
$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
/sys/block/zram0/writeback_limit
-If admin want to see remaining writeback budget since he set,
+If admin want to see remaining writeback budget since he set::
$ cat /sys/block/zramX/writeback_limit
-If admin want to disable writeback limit, he could do
+If admin want to disable writeback limit, he could do::
$ echo 0 > /sys/block/zramX/writeback_limit_enable
@@ -326,25 +383,35 @@ budget in next setting is user's job.
If admin want to measure writeback count in a certain period, he could
know it via /sys/block/zram0/bd_stat's 3rd column.
-= memory tracking
+memory tracking
+===============
With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
zram block. It could be useful to catch cold or incompressible
pages of the process with*pagemap.
+
If you enable the feature, you could see block state via
-/sys/kernel/debug/zram/zram0/block_state". The output is as follows,
+/sys/kernel/debug/zram/zram0/block_state". The output is as follows::
300 75.033841 .wh.
301 63.806904 s...
302 63.806919 ..hi
-First column is zram's block index.
-Second column is access time since the system was booted
-Third column is state of the block.
-(s: same page
-w: written page to backing store
-h: huge page
-i: idle page)
+First column
+ zram's block index.
+Second column
+ access time since the system was booted
+Third column
+ state of the block:
+
+ s:
+ same page
+ w:
+ written page to backing store
+ h:
+ huge page
+ i:
+ idle page
First line of above example says 300th block is accessed at 75.033841sec
and the block's state is huge so it is written back to the backing
diff --git a/Documentation/btmrvl.txt b/Documentation/admin-guide/btmrvl.rst
index ec57740ead0c..ec57740ead0c 100644
--- a/Documentation/btmrvl.txt
+++ b/Documentation/admin-guide/btmrvl.rst
diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst
index b761aa2a51d2..44b8a4edd348 100644
--- a/Documentation/admin-guide/bug-hunting.rst
+++ b/Documentation/admin-guide/bug-hunting.rst
@@ -90,9 +90,9 @@ the disk is not available then you have three options:
run a null modem to a second machine and capture the output there
using your favourite communication program. Minicom works well.
-(3) Use Kdump (see Documentation/kdump/kdump.rst),
+(3) Use Kdump (see Documentation/admin-guide/kdump/kdump.rst),
extract the kernel ring buffer from old memory with using dmesg
- gdbmacro in Documentation/kdump/gdbmacros.txt.
+ gdbmacro in Documentation/admin-guide/kdump/gdbmacros.txt.
Finding the bug's location
--------------------------
diff --git a/Documentation/cgroup-v1/blkio-controller.rst b/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
index 1d7d962933be..1d7d962933be 100644
--- a/Documentation/cgroup-v1/blkio-controller.rst
+++ b/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
diff --git a/Documentation/cgroup-v1/cgroups.rst b/Documentation/admin-guide/cgroup-v1/cgroups.rst
index 46bbe7e022d4..b0688011ed06 100644
--- a/Documentation/cgroup-v1/cgroups.rst
+++ b/Documentation/admin-guide/cgroup-v1/cgroups.rst
@@ -3,7 +3,7 @@ Control Groups
==============
Written by Paul Menage <menage@google.com> based on
-Documentation/cgroup-v1/cpusets.rst
+Documentation/admin-guide/cgroup-v1/cpusets.rst
Original copyright statements from cpusets.txt:
@@ -76,7 +76,7 @@ On their own, the only use for cgroups is for simple job
tracking. The intention is that other subsystems hook into the generic
cgroup support to provide new attributes for cgroups, such as
accounting/limiting the resources which processes in a cgroup can
-access. For example, cpusets (see Documentation/cgroup-v1/cpusets.rst) allow
+access. For example, cpusets (see Documentation/admin-guide/cgroup-v1/cpusets.rst) allow
you to associate a set of CPUs and a set of memory nodes with the
tasks in each cgroup.
diff --git a/Documentation/cgroup-v1/cpuacct.rst b/Documentation/admin-guide/cgroup-v1/cpuacct.rst
index d30ed81d2ad7..d30ed81d2ad7 100644
--- a/Documentation/cgroup-v1/cpuacct.rst
+++ b/Documentation/admin-guide/cgroup-v1/cpuacct.rst
diff --git a/Documentation/cgroup-v1/cpusets.rst b/Documentation/admin-guide/cgroup-v1/cpusets.rst
index b6a42cdea72b..86a6ae995d54 100644
--- a/Documentation/cgroup-v1/cpusets.rst
+++ b/Documentation/admin-guide/cgroup-v1/cpusets.rst
@@ -49,7 +49,7 @@ hooks, beyond what is already present, required to manage dynamic
job placement on large systems.
Cpusets use the generic cgroup subsystem described in
-Documentation/cgroup-v1/cgroups.rst.
+Documentation/admin-guide/cgroup-v1/cgroups.rst.
Requests by a task, using the sched_setaffinity(2) system call to
include CPUs in its CPU affinity mask, and using the mbind(2) and
diff --git a/Documentation/cgroup-v1/devices.rst b/Documentation/admin-guide/cgroup-v1/devices.rst
index e1886783961e..e1886783961e 100644
--- a/Documentation/cgroup-v1/devices.rst
+++ b/Documentation/admin-guide/cgroup-v1/devices.rst
diff --git a/Documentation/cgroup-v1/freezer-subsystem.rst b/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst
index 582d3427de3f..582d3427de3f 100644
--- a/Documentation/cgroup-v1/freezer-subsystem.rst
+++ b/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst
diff --git a/Documentation/cgroup-v1/hugetlb.rst b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
index a3902aa253a9..a3902aa253a9 100644
--- a/Documentation/cgroup-v1/hugetlb.rst
+++ b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
diff --git a/Documentation/cgroup-v1/index.rst b/Documentation/admin-guide/cgroup-v1/index.rst
index fe76d42edc11..10bf48bae0b0 100644
--- a/Documentation/cgroup-v1/index.rst
+++ b/Documentation/admin-guide/cgroup-v1/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
========================
Control Groups version 1
========================
diff --git a/Documentation/cgroup-v1/memcg_test.rst b/Documentation/admin-guide/cgroup-v1/memcg_test.rst
index 91bd18c6a514..3f7115e07b5d 100644
--- a/Documentation/cgroup-v1/memcg_test.rst
+++ b/Documentation/admin-guide/cgroup-v1/memcg_test.rst
@@ -10,7 +10,7 @@ Because VM is getting complex (one of reasons is memcg...), memcg's behavior
is complex. This is a document for memcg's internal behavior.
Please note that implementation details can be changed.
-(*) Topics on API should be in Documentation/cgroup-v1/memory.rst)
+(*) Topics on API should be in Documentation/admin-guide/cgroup-v1/memory.rst)
0. How to record usage ?
========================
@@ -327,7 +327,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
You can see charges have been moved by reading ``*.usage_in_bytes`` or
memory.stat of both A and B.
- See 8.2 of Documentation/cgroup-v1/memory.rst to see what value should
+ See 8.2 of Documentation/admin-guide/cgroup-v1/memory.rst to see what value should
be written to move_charge_at_immigrate.
9.10 Memory thresholds
diff --git a/Documentation/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index 41bdc038dad9..41bdc038dad9 100644
--- a/Documentation/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
diff --git a/Documentation/cgroup-v1/net_cls.rst b/Documentation/admin-guide/cgroup-v1/net_cls.rst
index a2cf272af7a0..a2cf272af7a0 100644
--- a/Documentation/cgroup-v1/net_cls.rst
+++ b/Documentation/admin-guide/cgroup-v1/net_cls.rst
diff --git a/Documentation/cgroup-v1/net_prio.rst b/Documentation/admin-guide/cgroup-v1/net_prio.rst
index b40905871c64..b40905871c64 100644
--- a/Documentation/cgroup-v1/net_prio.rst
+++ b/Documentation/admin-guide/cgroup-v1/net_prio.rst
diff --git a/Documentation/cgroup-v1/pids.rst b/Documentation/admin-guide/cgroup-v1/pids.rst
index 6acebd9e72c8..6acebd9e72c8 100644
--- a/Documentation/cgroup-v1/pids.rst
+++ b/Documentation/admin-guide/cgroup-v1/pids.rst
diff --git a/Documentation/cgroup-v1/rdma.rst b/Documentation/admin-guide/cgroup-v1/rdma.rst
index 2fcb0a9bf790..2fcb0a9bf790 100644
--- a/Documentation/cgroup-v1/rdma.rst
+++ b/Documentation/admin-guide/cgroup-v1/rdma.rst
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 8269e869cb1e..3b29005aa981 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -9,7 +9,7 @@ This is the authoritative documentation on the design, interface and
conventions of cgroup v2. It describes all userland-visible aspects
of cgroup including core and specific controller behaviors. All
future changes must be reflected in this document. Documentation for
-v1 is available under Documentation/cgroup-v1/.
+v1 is available under Documentation/admin-guide/cgroup-v1/.
.. CONTENTS
@@ -1014,7 +1014,7 @@ All time durations are in microseconds.
A read-only nested-key file which exists on non-root cgroups.
Shows pressure stall information for CPU. See
- Documentation/accounting/psi.txt for details.
+ Documentation/accounting/psi.rst for details.
Memory
@@ -1355,7 +1355,7 @@ PAGE_SIZE multiple when read back.
A read-only nested-key file which exists on non-root cgroups.
Shows pressure stall information for memory. See
- Documentation/accounting/psi.txt for details.
+ Documentation/accounting/psi.rst for details.
Usage Guidelines
@@ -1498,7 +1498,7 @@ IO Interface Files
A read-only nested-key file which exists on non-root cgroups.
Shows pressure stall information for IO. See
- Documentation/accounting/psi.txt for details.
+ Documentation/accounting/psi.rst for details.
Writeback
diff --git a/Documentation/clearing-warn-once.txt b/Documentation/admin-guide/clearing-warn-once.rst
index 211fd926cf00..211fd926cf00 100644
--- a/Documentation/clearing-warn-once.txt
+++ b/Documentation/admin-guide/clearing-warn-once.rst
diff --git a/Documentation/cpu-load.txt b/Documentation/admin-guide/cpu-load.rst
index 2d01ce43d2a2..2d01ce43d2a2 100644
--- a/Documentation/cpu-load.txt
+++ b/Documentation/admin-guide/cpu-load.rst
diff --git a/Documentation/cputopology.txt b/Documentation/admin-guide/cputopology.rst
index b90dafcc8237..b90dafcc8237 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/admin-guide/cputopology.rst
diff --git a/Documentation/device-mapper/cache-policies.rst b/Documentation/admin-guide/device-mapper/cache-policies.rst
index b17fe352fc41..b17fe352fc41 100644
--- a/Documentation/device-mapper/cache-policies.rst
+++ b/Documentation/admin-guide/device-mapper/cache-policies.rst
diff --git a/Documentation/device-mapper/cache.rst b/Documentation/admin-guide/device-mapper/cache.rst
index f15e5254d05b..f15e5254d05b 100644
--- a/Documentation/device-mapper/cache.rst
+++ b/Documentation/admin-guide/device-mapper/cache.rst
diff --git a/Documentation/device-mapper/delay.rst b/Documentation/admin-guide/device-mapper/delay.rst
index 917ba8c33359..917ba8c33359 100644
--- a/Documentation/device-mapper/delay.rst
+++ b/Documentation/admin-guide/device-mapper/delay.rst
diff --git a/Documentation/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst
index 8f4a3f889d43..8f4a3f889d43 100644
--- a/Documentation/device-mapper/dm-crypt.rst
+++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst
diff --git a/Documentation/device-mapper/dm-dust.txt b/Documentation/admin-guide/device-mapper/dm-dust.txt
index 954d402a1f6a..954d402a1f6a 100644
--- a/Documentation/device-mapper/dm-dust.txt
+++ b/Documentation/admin-guide/device-mapper/dm-dust.txt
diff --git a/Documentation/device-mapper/dm-flakey.rst b/Documentation/admin-guide/device-mapper/dm-flakey.rst
index 86138735879d..86138735879d 100644
--- a/Documentation/device-mapper/dm-flakey.rst
+++ b/Documentation/admin-guide/device-mapper/dm-flakey.rst
diff --git a/Documentation/device-mapper/dm-init.rst b/Documentation/admin-guide/device-mapper/dm-init.rst
index e5242ff17e9b..e5242ff17e9b 100644
--- a/Documentation/device-mapper/dm-init.rst
+++ b/Documentation/admin-guide/device-mapper/dm-init.rst
diff --git a/Documentation/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst
index a30aa91b5fbe..a30aa91b5fbe 100644
--- a/Documentation/device-mapper/dm-integrity.rst
+++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst
diff --git a/Documentation/device-mapper/dm-io.rst b/Documentation/admin-guide/device-mapper/dm-io.rst
index d2492917a1f5..d2492917a1f5 100644
--- a/Documentation/device-mapper/dm-io.rst
+++ b/Documentation/admin-guide/device-mapper/dm-io.rst
diff --git a/Documentation/device-mapper/dm-log.rst b/Documentation/admin-guide/device-mapper/dm-log.rst
index ba4fce39bc27..ba4fce39bc27 100644
--- a/Documentation/device-mapper/dm-log.rst
+++ b/Documentation/admin-guide/device-mapper/dm-log.rst
diff --git a/Documentation/device-mapper/dm-queue-length.rst b/Documentation/admin-guide/device-mapper/dm-queue-length.rst
index d8e381c1cb02..d8e381c1cb02 100644
--- a/Documentation/device-mapper/dm-queue-length.rst
+++ b/Documentation/admin-guide/device-mapper/dm-queue-length.rst
diff --git a/Documentation/device-mapper/dm-raid.rst b/Documentation/admin-guide/device-mapper/dm-raid.rst
index 2fe255b130fb..2fe255b130fb 100644
--- a/Documentation/device-mapper/dm-raid.rst
+++ b/Documentation/admin-guide/device-mapper/dm-raid.rst
diff --git a/Documentation/device-mapper/dm-service-time.rst b/Documentation/admin-guide/device-mapper/dm-service-time.rst
index facf277fc13c..facf277fc13c 100644
--- a/Documentation/device-mapper/dm-service-time.rst
+++ b/Documentation/admin-guide/device-mapper/dm-service-time.rst
diff --git a/Documentation/device-mapper/dm-uevent.rst b/Documentation/admin-guide/device-mapper/dm-uevent.rst
index 4a8ee8d069c9..4a8ee8d069c9 100644
--- a/Documentation/device-mapper/dm-uevent.rst
+++ b/Documentation/admin-guide/device-mapper/dm-uevent.rst
diff --git a/Documentation/device-mapper/dm-zoned.rst b/Documentation/admin-guide/device-mapper/dm-zoned.rst
index 07f56ebc1730..07f56ebc1730 100644
--- a/Documentation/device-mapper/dm-zoned.rst
+++ b/Documentation/admin-guide/device-mapper/dm-zoned.rst
diff --git a/Documentation/device-mapper/era.rst b/Documentation/admin-guide/device-mapper/era.rst
index 90dd5c670b9f..90dd5c670b9f 100644
--- a/Documentation/device-mapper/era.rst
+++ b/Documentation/admin-guide/device-mapper/era.rst
diff --git a/Documentation/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst
index 105e253bc231..c77c58b8f67b 100644
--- a/Documentation/device-mapper/index.rst
+++ b/Documentation/admin-guide/device-mapper/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
=============
Device Mapper
=============
diff --git a/Documentation/device-mapper/kcopyd.rst b/Documentation/admin-guide/device-mapper/kcopyd.rst
index 7651d395127f..7651d395127f 100644
--- a/Documentation/device-mapper/kcopyd.rst
+++ b/Documentation/admin-guide/device-mapper/kcopyd.rst
diff --git a/Documentation/device-mapper/linear.rst b/Documentation/admin-guide/device-mapper/linear.rst
index 9d17fc6e64a9..9d17fc6e64a9 100644
--- a/Documentation/device-mapper/linear.rst
+++ b/Documentation/admin-guide/device-mapper/linear.rst
diff --git a/Documentation/device-mapper/log-writes.rst b/Documentation/admin-guide/device-mapper/log-writes.rst
index 23141f2ffb7c..23141f2ffb7c 100644
--- a/Documentation/device-mapper/log-writes.rst
+++ b/Documentation/admin-guide/device-mapper/log-writes.rst
diff --git a/Documentation/device-mapper/persistent-data.rst b/Documentation/admin-guide/device-mapper/persistent-data.rst
index 2065c3c5a091..2065c3c5a091 100644
--- a/Documentation/device-mapper/persistent-data.rst
+++ b/Documentation/admin-guide/device-mapper/persistent-data.rst
diff --git a/Documentation/device-mapper/snapshot.rst b/Documentation/admin-guide/device-mapper/snapshot.rst
index ccdd8b587a74..ccdd8b587a74 100644
--- a/Documentation/device-mapper/snapshot.rst
+++ b/Documentation/admin-guide/device-mapper/snapshot.rst
diff --git a/Documentation/device-mapper/statistics.rst b/Documentation/admin-guide/device-mapper/statistics.rst
index 3d80a9f850cc..41ded0bc5933 100644
--- a/Documentation/device-mapper/statistics.rst
+++ b/Documentation/admin-guide/device-mapper/statistics.rst
@@ -13,7 +13,7 @@ the range specified.
The I/O statistics counters for each step-sized area of a region are
in the same format as `/sys/block/*/stat` or `/proc/diskstats` (see:
-Documentation/iostats.txt). But two extra counters (12 and 13) are
+Documentation/admin-guide/iostats.rst). But two extra counters (12 and 13) are
provided: total time spent reading and writing. When the histogram
argument is used, the 14th parameter is reported that represents the
histogram of latencies. All these counters may be accessed by sending
@@ -151,7 +151,7 @@ Messages
The first 11 counters have the same meaning as
`/sys/block/*/stat or /proc/diskstats`.
- Please refer to Documentation/iostats.txt for details.
+ Please refer to Documentation/admin-guide/iostats.rst for details.
1. the number of reads completed
2. the number of reads merged
diff --git a/Documentation/device-mapper/striped.rst b/Documentation/admin-guide/device-mapper/striped.rst
index e9a8da192ae1..e9a8da192ae1 100644
--- a/Documentation/device-mapper/striped.rst
+++ b/Documentation/admin-guide/device-mapper/striped.rst
diff --git a/Documentation/device-mapper/switch.rst b/Documentation/admin-guide/device-mapper/switch.rst
index 7dde06be1a4f..7dde06be1a4f 100644
--- a/Documentation/device-mapper/switch.rst
+++ b/Documentation/admin-guide/device-mapper/switch.rst
diff --git a/Documentation/device-mapper/thin-provisioning.rst b/Documentation/admin-guide/device-mapper/thin-provisioning.rst
index bafebf79da4b..bafebf79da4b 100644
--- a/Documentation/device-mapper/thin-provisioning.rst
+++ b/Documentation/admin-guide/device-mapper/thin-provisioning.rst
diff --git a/Documentation/device-mapper/unstriped.rst b/Documentation/admin-guide/device-mapper/unstriped.rst
index 0a8d3eb3f072..0a8d3eb3f072 100644
--- a/Documentation/device-mapper/unstriped.rst
+++ b/Documentation/admin-guide/device-mapper/unstriped.rst
diff --git a/Documentation/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst
index a4d1c1476d72..a4d1c1476d72 100644
--- a/Documentation/device-mapper/verity.rst
+++ b/Documentation/admin-guide/device-mapper/verity.rst
diff --git a/Documentation/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst
index d3d7690f5e8d..d3d7690f5e8d 100644
--- a/Documentation/device-mapper/writecache.rst
+++ b/Documentation/admin-guide/device-mapper/writecache.rst
diff --git a/Documentation/device-mapper/zero.rst b/Documentation/admin-guide/device-mapper/zero.rst
index 11fb5cf4597c..11fb5cf4597c 100644
--- a/Documentation/device-mapper/zero.rst
+++ b/Documentation/admin-guide/device-mapper/zero.rst
diff --git a/Documentation/efi-stub.txt b/Documentation/admin-guide/efi-stub.rst
index 833edb0d0bc4..833edb0d0bc4 100644
--- a/Documentation/efi-stub.txt
+++ b/Documentation/admin-guide/efi-stub.rst
diff --git a/Documentation/gpio/index.rst b/Documentation/admin-guide/gpio/index.rst
index 09a4a553f434..a244ba4e87d5 100644
--- a/Documentation/gpio/index.rst
+++ b/Documentation/admin-guide/gpio/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
====
gpio
diff --git a/Documentation/gpio/sysfs.rst b/Documentation/admin-guide/gpio/sysfs.rst
index ec09ffd983e7..ec09ffd983e7 100644
--- a/Documentation/gpio/sysfs.rst
+++ b/Documentation/admin-guide/gpio/sysfs.rst
diff --git a/Documentation/highuid.txt b/Documentation/admin-guide/highuid.rst
index 6ee70465c0ea..6ee70465c0ea 100644
--- a/Documentation/highuid.txt
+++ b/Documentation/admin-guide/highuid.rst
diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst
index 656aee262e23..f83212fae4d5 100644
--- a/Documentation/admin-guide/hw-vuln/l1tf.rst
+++ b/Documentation/admin-guide/hw-vuln/l1tf.rst
@@ -241,7 +241,7 @@ Guest mitigation mechanisms
For further information about confining guests to a single or to a group
of cores consult the cpusets documentation:
- https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.rst
+ https://www.kernel.org/doc/Documentation/admin-guide/cgroup-v1/cpusets.rst
.. _interrupt_isolation:
diff --git a/Documentation/hw_random.txt b/Documentation/admin-guide/hw_random.rst
index 121de96e395e..121de96e395e 100644
--- a/Documentation/hw_random.txt
+++ b/Documentation/admin-guide/hw_random.rst
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 24fbe0568eff..280355d08af5 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -16,6 +16,7 @@ etc.
README
kernel-parameters
devices
+ sysctl/index
This section describes CPU vulnerabilities and their mitigations.
@@ -38,6 +39,8 @@ problems and bugs in particular.
ramoops
dynamic-debug-howto
init
+ kdump/index
+ perf/index
This is the beginning of a section with information of interest to
application developers. Documents covering various aspects of the kernel
@@ -56,11 +59,13 @@ configure specific aspects of kernel behavior to your liking.
initrd
cgroup-v2
+ cgroup-v1/index
serial-console
braille-console
parport
md
module-signing
+ rapidio
sysrq
unicode
vga-softcursor
@@ -69,14 +74,37 @@ configure specific aspects of kernel behavior to your liking.
java
ras
bcache
+ blockdev/index
ext4
binderfs
pm/index
thunderbolt
LSM/index
mm/index
+ namespaces/index
perf-security
acpi/index
+ aoe/index
+ btmrvl
+ clearing-warn-once
+ cpu-load
+ cputopology
+ device-mapper/index
+ efi-stub
+ gpio/index
+ highuid
+ hw_random
+ iostats
+ kernel-per-CPU-kthreads
+ laptops/index
+ lcd-panel-cgram
+ ldm
+ lockup-watchdogs
+ numastat
+ pnp
+ rtc
+ svga
+ video-output
.. only:: subproject and html
diff --git a/Documentation/iostats.txt b/Documentation/admin-guide/iostats.rst
index 5d63b18bd6d1..5d63b18bd6d1 100644
--- a/Documentation/iostats.txt
+++ b/Documentation/admin-guide/iostats.rst
diff --git a/Documentation/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt
index 220d0a80ca2c..220d0a80ca2c 100644
--- a/Documentation/kdump/gdbmacros.txt
+++ b/Documentation/admin-guide/kdump/gdbmacros.txt
diff --git a/Documentation/kdump/index.rst b/Documentation/admin-guide/kdump/index.rst
index 2b17fcf6867a..8e2ebd0383cd 100644
--- a/Documentation/kdump/index.rst
+++ b/Documentation/admin-guide/kdump/index.rst
@@ -1,4 +1,3 @@
-:orphan:
================================================================
Documentation for Kdump - The kexec-based Crash Dumping Solution
diff --git a/Documentation/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
index ac7e131d2935..ac7e131d2935 100644
--- a/Documentation/kdump/kdump.rst
+++ b/Documentation/admin-guide/kdump/kdump.rst
diff --git a/Documentation/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index 007a6b86e0ee..007a6b86e0ee 100644
--- a/Documentation/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 5d29ba5ad88c..d05d531b4ec9 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -118,7 +118,7 @@ parameter is applicable::
LOOP Loopback device support is enabled.
M68k M68k architecture is enabled.
These options have more detailed description inside of
- Documentation/m68k/kernel-options.txt.
+ Documentation/m68k/kernel-options.rst.
MDA MDA console support is enabled.
MIPS MIPS architecture is enabled.
MOUSE Appropriate mouse support is enabled.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bf8221abfe0a..a5f4004e8705 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -430,7 +430,7 @@
blkdevparts= Manual partition parsing of block device(s) for
embedded devices based on command line input.
- See Documentation/block/cmdline-partition.txt
+ See Documentation/block/cmdline-partition.rst
boot_delay= Milliseconds to delay each printk during boot.
Values larger than 10 seconds (10000) are changed to
@@ -708,14 +708,14 @@
[KNL, x86_64] select a region under 4G first, and
fall back to reserve region above 4G when '@offset'
hasn't been specified.
- See Documentation/kdump/kdump.rst for further details.
+ See Documentation/admin-guide/kdump/kdump.rst for further details.
crashkernel=range1:size1[,range2:size2,...][@offset]
[KNL] Same as above, but depends on the memory
in the running system. The syntax of range is
start-[end] where start and end are both
a memory unit (amount[KMG]). See also
- Documentation/kdump/kdump.rst for an example.
+ Documentation/admin-guide/kdump/kdump.rst for an example.
crashkernel=size[KMG],high
[KNL, x86_64] range could be above 4G. Allow kernel
@@ -930,7 +930,7 @@
edid/1680x1050.bin, or edid/1920x1080.bin is given
and no file with the same name exists. Details and
instructions how to build your own EDID data are
- available in Documentation/EDID/howto.rst. An EDID
+ available in Documentation/driver-api/edid.rst. An EDID
data set will only be used for a particular connector,
if its name and a colon are prepended to the EDID
name. Each connector may use a unique EDID data
@@ -1199,15 +1199,15 @@
elevator= [IOSCHED]
Format: { "mq-deadline" | "kyber" | "bfq" }
- See Documentation/block/deadline-iosched.txt,
- Documentation/block/kyber-iosched.txt and
- Documentation/block/bfq-iosched.txt for details.
+ See Documentation/block/deadline-iosched.rst,
+ Documentation/block/kyber-iosched.rst and
+ Documentation/block/bfq-iosched.rst for details.
elfcorehdr=[size[KMG]@]offset[KMG] [IA64,PPC,SH,X86,S390]
Specifies physical address of start of kernel core
image elf header and optionally the size. Generally
kexec loader will pass this option to capture kernel.
- See Documentation/kdump/kdump.rst for details.
+ See Documentation/admin-guide/kdump/kdump.rst for details.
enable_mtrr_cleanup [X86]
The kernel tries to adjust MTRR layout from continuous
@@ -1249,7 +1249,7 @@
See also Documentation/fault-injection/.
floppy= [HW]
- See Documentation/blockdev/floppy.txt.
+ See Documentation/admin-guide/blockdev/floppy.rst.
force_pal_cache_flush
[IA-64] Avoid check_sal_cache_flush which may hang on
@@ -2234,7 +2234,7 @@
memblock=debug [KNL] Enable memblock debug messages.
load_ramdisk= [RAM] List of ramdisks to load from floppy
- See Documentation/blockdev/ramdisk.txt.
+ See Documentation/admin-guide/blockdev/ramdisk.rst.
lockd.nlm_grace_period=P [NFS] Assign grace period.
Format: <integer>
@@ -3155,7 +3155,7 @@
numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
'node', 'default' can be specified
This can be set from sysctl after boot.
- See Documentation/sysctl/vm.txt for details.
+ See Documentation/admin-guide/sysctl/vm.rst for details.
ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
See Documentation/debugging-via-ohci1394.txt for more
@@ -3279,7 +3279,7 @@
pcd. [PARIDE]
See header of drivers/block/paride/pcd.c.
- See also Documentation/blockdev/paride.txt.
+ See also Documentation/admin-guide/blockdev/paride.rst.
pci=option[,option...] [PCI] various PCI subsystem options.
@@ -3523,7 +3523,7 @@
needed on a platform with proper driver support.
pd. [PARIDE]
- See Documentation/blockdev/paride.txt.
+ See Documentation/admin-guide/blockdev/paride.rst.
pdcchassis= [PARISC,HW] Disable/Enable PDC Chassis Status codes at
boot time.
@@ -3538,10 +3538,10 @@
and performance comparison.
pf. [PARIDE]
- See Documentation/blockdev/paride.txt.
+ See Documentation/admin-guide/blockdev/paride.rst.
pg. [PARIDE]
- See Documentation/blockdev/paride.txt.
+ See Documentation/admin-guide/blockdev/paride.rst.
pirq= [SMP,APIC] Manual mp-table setup
See Documentation/x86/i386/IO-APIC.rst.
@@ -3653,7 +3653,7 @@
prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk
before loading.
- See Documentation/blockdev/ramdisk.txt.
+ See Documentation/admin-guide/blockdev/ramdisk.rst.
psi= [KNL] Enable or disable pressure stall information
tracking.
@@ -3675,7 +3675,7 @@
pstore.backend= Specify the name of the pstore backend to use
pt. [PARIDE]
- See Documentation/blockdev/paride.txt.
+ See Documentation/admin-guide/blockdev/paride.rst.
pti= [X86_64] Control Page Table Isolation of user and
kernel address spaces. Disabling this feature
@@ -3704,7 +3704,7 @@
See Documentation/admin-guide/md.rst.
ramdisk_size= [RAM] Sizes of RAM disks in kilobytes
- See Documentation/blockdev/ramdisk.txt.
+ See Documentation/admin-guide/blockdev/ramdisk.rst.
random.trust_cpu={on,off}
[KNL] Enable or disable trusting the use of the
@@ -4100,7 +4100,7 @@
relax_domain_level=
[KNL, SMP] Set scheduler's default relax_domain_level.
- See Documentation/cgroup-v1/cpusets.rst.
+ See Documentation/admin-guide/cgroup-v1/cpusets.rst.
reserve= [KNL,BUGS] Force kernel to ignore I/O ports or memory
Format: <base1>,<size1>[,<base2>,<size2>,...]
@@ -4358,7 +4358,7 @@
Format: <integer>
sonypi.*= [HW] Sony Programmable I/O Control Device driver
- See Documentation/laptops/sonypi.txt
+ See Documentation/admin-guide/laptops/sonypi.rst
spectre_v2= [X86] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
@@ -4610,7 +4610,7 @@
swapaccount=[0|1]
[KNL] Enable accounting of swap in memory resource
controller if no parameter or 1 is given or disable
- it if 0 is given (See Documentation/cgroup-v1/memory.rst)
+ it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst)
swiotlb= [ARM,IA-64,PPC,MIPS,X86]
Format: { <int> | force | noforce }
@@ -5077,7 +5077,7 @@
vga= [BOOT,X86-32] Select a particular video mode
See Documentation/x86/boot.rst and
- Documentation/svga.txt.
+ Documentation/admin-guide/svga.rst.
Use vga=ask for menu.
This is actually a boot loader parameter; the value is
passed to the kernel using a special protocol.
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
index 5623b9916411..4f18456dd3b1 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
@@ -12,7 +12,7 @@ References
- Documentation/IRQ-affinity.txt: Binding interrupts to sets of CPUs.
-- Documentation/cgroup-v1: Using cgroups to bind tasks to sets of CPUs.
+- Documentation/admin-guide/cgroup-v1: Using cgroups to bind tasks to sets of CPUs.
- man taskset: Using the taskset command to bind tasks to sets
of CPUs.
diff --git a/Documentation/laptops/asus-laptop.txt b/Documentation/admin-guide/laptops/asus-laptop.rst
index 5f2858712aa0..95176321a25a 100644
--- a/Documentation/laptops/asus-laptop.txt
+++ b/Documentation/admin-guide/laptops/asus-laptop.rst
@@ -1,6 +1,9 @@
+==================
Asus Laptop Extras
+==================
Version 0.1
+
August 6, 2009
Corentin Chary <corentincj@iksaif.net>
@@ -10,11 +13,12 @@ http://acpi4asus.sf.net/
It may also support some MEDION, JVC or VICTOR laptops (such as MEDION 9675 or
VICTOR XP7210 for example). It makes all the extra buttons generate input
events (like keyboards).
+
On some models adds support for changing the display brightness and output,
switching the LCD backlight on and off, and most importantly, allows you to
blink those fancy LEDs intended for reporting mail and wireless status.
-This driver supercedes the old asus_acpi driver.
+This driver supersedes the old asus_acpi driver.
Requirements
------------
@@ -49,7 +53,7 @@ Usage
see some lines like this :
Asus Laptop Extras version 0.42
- L2D model detected.
+ - L2D model detected.
If it is not the output you have on your laptop, send it (and the laptop's
DSDT) to me.
@@ -68,9 +72,12 @@ Usage
LEDs
----
- You can modify LEDs be echoing values to /sys/class/leds/asus::*/brightness :
+ You can modify LEDs be echoing values to `/sys/class/leds/asus/*/brightness`::
+
echo 1 > /sys/class/leds/asus::mail/brightness
+
will switch the mail LED on.
+
You can also know if they are on/off by reading their content and use
kernel triggers like disk-activity or heartbeat.
@@ -81,7 +88,7 @@ Backlight
/sys/class/backlight/asus-laptop/. Brightness Values are between 0 and 15.
Wireless devices
----------------
+----------------
You can turn the internal Bluetooth adapter on/off with the bluetooth entry
(only on models with Bluetooth). This usually controls the associated LED.
@@ -93,18 +100,20 @@ Display switching
Note: the display switching code is currently considered EXPERIMENTAL.
Switching works for the following models:
- L3800C
- A2500H
- L5800C
- M5200N
- W1000N (albeit with some glitches)
- M6700R
- A6JC
- F3J
+
+ - L3800C
+ - A2500H
+ - L5800C
+ - M5200N
+ - W1000N (albeit with some glitches)
+ - M6700R
+ - A6JC
+ - F3J
Switching doesn't work for the following:
- M3700N
- L2X00D (locks the laptop under certain conditions)
+
+ - M3700N
+ - L2X00D (locks the laptop under certain conditions)
To switch the displays, echo values from 0 to 15 to
/sys/devices/platform/asus-laptop/display. The significance of those values
@@ -113,48 +122,51 @@ Display switching
+-------+-----+-----+-----+-----+-----+
| Bin | Val | DVI | TV | CRT | LCD |
+-------+-----+-----+-----+-----+-----+
- + 0000 + 0 + + + + +
+ | 0000 | 0 | | | | |
+-------+-----+-----+-----+-----+-----+
- + 0001 + 1 + + + + X +
+ | 0001 | 1 | | | | X |
+-------+-----+-----+-----+-----+-----+
- + 0010 + 2 + + + X + +
+ | 0010 | 2 | | | X | |
+-------+-----+-----+-----+-----+-----+
- + 0011 + 3 + + + X + X +
+ | 0011 | 3 | | | X | X |
+-------+-----+-----+-----+-----+-----+
- + 0100 + 4 + + X + + +
+ | 0100 | 4 | | X | | |
+-------+-----+-----+-----+-----+-----+
- + 0101 + 5 + + X + + X +
+ | 0101 | 5 | | X | | X |
+-------+-----+-----+-----+-----+-----+
- + 0110 + 6 + + X + X + +
+ | 0110 | 6 | | X | X | |
+-------+-----+-----+-----+-----+-----+
- + 0111 + 7 + + X + X + X +
+ | 0111 | 7 | | X | X | X |
+-------+-----+-----+-----+-----+-----+
- + 1000 + 8 + X + + + +
+ | 1000 | 8 | X | | | |
+-------+-----+-----+-----+-----+-----+
- + 1001 + 9 + X + + + X +
+ | 1001 | 9 | X | | | X |
+-------+-----+-----+-----+-----+-----+
- + 1010 + 10 + X + + X + +
+ | 1010 | 10 | X | | X | |
+-------+-----+-----+-----+-----+-----+
- + 1011 + 11 + X + + X + X +
+ | 1011 | 11 | X | | X | X |
+-------+-----+-----+-----+-----+-----+
- + 1100 + 12 + X + X + + +
+ | 1100 | 12 | X | X | | |
+-------+-----+-----+-----+-----+-----+
- + 1101 + 13 + X + X + + X +
+ | 1101 | 13 | X | X | | X |
+-------+-----+-----+-----+-----+-----+
- + 1110 + 14 + X + X + X + +
+ | 1110 | 14 | X | X | X | |
+-------+-----+-----+-----+-----+-----+
- + 1111 + 15 + X + X + X + X +
+ | 1111 | 15 | X | X | X | X |
+-------+-----+-----+-----+-----+-----+
In most cases, the appropriate displays must be plugged in for the above
combinations to work. TV-Out may need to be initialized at boot time.
Debugging:
+
1) Check whether the Fn+F8 key:
+
a) does not lock the laptop (try a boot with noapic / nolapic if it does)
b) generates events (0x6n, where n is the value corresponding to the
configuration above)
c) actually works
+
Record the disp value at every configuration.
2) Echo values from 0 to 15 to /sys/devices/platform/asus-laptop/display.
Record its value, note any change. If nothing changes, try a broader range,
@@ -164,7 +176,7 @@ Display switching
Note: on some machines (e.g. L3C), after the module has been loaded, only 0x6n
events are generated and no actual switching occurs. In such a case, a line
- like:
+ like::
echo $((10#$arg-60)) > /sys/devices/platform/asus-laptop/display
@@ -180,15 +192,16 @@ LED display
several items of information.
LED display works for the following models:
- W1000N
- W1J
- To control the LED display, use the following :
+ - W1000N
+ - W1J
+
+ To control the LED display, use the following::
echo 0x0T000DDD > /sys/devices/platform/asus-laptop/
where T control the 3 letters display, and DDD the 3 digits display,
- according to the tables below.
+ according to the tables below::
DDD (digits)
000 to 999 = display digits
@@ -208,8 +221,8 @@ LED display
For example "echo 0x01000001 >/sys/devices/platform/asus-laptop/ledd"
would display "DVD001".
-Driver options:
----------------
+Driver options
+--------------
Options can be passed to the asus-laptop driver using the standard
module argument syntax (<param>=<value> when passing the option to the
@@ -219,6 +232,7 @@ Driver options:
wapf: WAPF defines the behavior of the Fn+Fx wlan key
The significance of values is yet to be found, but
most of the time:
+
- 0x0 should do nothing
- 0x1 should allow to control the device with Fn+Fx key.
- 0x4 should send an ACPI event (0x88) while pressing the Fn+Fx key
@@ -237,7 +251,7 @@ Unsupported models
- ASUS L7300G
- ASUS L8400
-Patches, Errors, Questions:
+Patches, Errors, Questions
--------------------------
I appreciate any success or failure
@@ -253,5 +267,5 @@ Patches, Errors, Questions:
Any other comments or patches are also more than welcome.
acpi4asus-user@lists.sourceforge.net
- http://sourceforge.net/projects/acpi4asus
+ http://sourceforge.net/projects/acpi4asus
diff --git a/Documentation/laptops/disk-shock-protection.txt b/Documentation/admin-guide/laptops/disk-shock-protection.rst
index 0e6ba2663834..e97c5f78d8c3 100644
--- a/Documentation/laptops/disk-shock-protection.txt
+++ b/Documentation/admin-guide/laptops/disk-shock-protection.rst
@@ -1,17 +1,18 @@
+==========================
Hard disk shock protection
==========================
Author: Elias Oltmanns <eo@nebensachen.de>
+
Last modified: 2008-10-03
-0. Contents
------------
+.. 0. Contents
-1. Intro
-2. The interface
-3. References
-4. CREDITS
+ 1. Intro
+ 2. The interface
+ 3. References
+ 4. CREDITS
1. Intro
@@ -36,8 +37,8 @@ that).
----------------
For each ATA device, the kernel exports the file
-block/*/device/unload_heads in sysfs (here assumed to be mounted under
-/sys). Access to /sys/block/*/device/unload_heads is denied with
+`block/*/device/unload_heads` in sysfs (here assumed to be mounted under
+/sys). Access to `/sys/block/*/device/unload_heads` is denied with
-EOPNOTSUPP if the device does not support the unload feature.
Otherwise, writing an integer value to this file will take the heads
of the respective drive off the platter and block all I/O operations
@@ -54,18 +55,18 @@ cancel a previously set timeout and resume normal operation
immediately by specifying a timeout of 0. Values below -2 are rejected
with -EINVAL (see below for the special meaning of -1 and -2). If the
timeout specified for a recent head park request has not yet expired,
-reading from /sys/block/*/device/unload_heads will report the number
+reading from `/sys/block/*/device/unload_heads` will report the number
of milliseconds remaining until normal operation will be resumed;
otherwise, reading the unload_heads attribute will return 0.
For example, do the following in order to park the heads of drive
-/dev/sda and stop all I/O operations for five seconds:
+/dev/sda and stop all I/O operations for five seconds::
-# echo 5000 > /sys/block/sda/device/unload_heads
+ # echo 5000 > /sys/block/sda/device/unload_heads
-A simple
+A simple::
-# cat /sys/block/sda/device/unload_heads
+ # cat /sys/block/sda/device/unload_heads
will show you how many milliseconds are left before normal operation
will be resumed.
@@ -112,9 +113,9 @@ unload_heads attribute. If you know that your device really does
support the unload feature (for instance, because the vendor of your
laptop or the hard drive itself told you so), then you can tell the
kernel to enable the usage of this feature for that drive by writing
-the special value -1 to the unload_heads attribute:
+the special value -1 to the unload_heads attribute::
-# echo -1 > /sys/block/sda/device/unload_heads
+ # echo -1 > /sys/block/sda/device/unload_heads
will enable the feature for /dev/sda, and giving -2 instead of -1 will
disable it again.
@@ -135,6 +136,7 @@ for use. Please feel free to add projects that have been the victims
of my ignorance.
- http://www.thinkwiki.org/wiki/HDAPS
+
See this page for information about Linux support of the hard disk
active protection system as implemented in IBM/Lenovo Thinkpads.
diff --git a/Documentation/admin-guide/laptops/index.rst b/Documentation/admin-guide/laptops/index.rst
new file mode 100644
index 000000000000..cd9a1c2695fd
--- /dev/null
+++ b/Documentation/admin-guide/laptops/index.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Laptop Drivers
+==============
+
+.. toctree::
+ :maxdepth: 1
+
+ asus-laptop
+ disk-shock-protection
+ laptop-mode
+ lg-laptop
+ sony-laptop
+ sonypi
+ thinkpad-acpi
+ toshiba_haps
diff --git a/Documentation/laptops/laptop-mode.txt b/Documentation/admin-guide/laptops/laptop-mode.rst
index 1c707fc9b141..c984c4262f2e 100644
--- a/Documentation/laptops/laptop-mode.txt
+++ b/Documentation/admin-guide/laptops/laptop-mode.rst
@@ -1,8 +1,11 @@
+===============================================
How to conserve battery power using laptop-mode
------------------------------------------------
+===============================================
Document Author: Bart Samwel (bart@samwel.tk)
+
Date created: January 2, 2004
+
Last modified: December 06, 2004
Introduction
@@ -12,17 +15,16 @@ Laptop mode is used to minimize the time that the hard disk needs to be spun up,
to conserve battery power on laptops. It has been reported to cause significant
power savings.
-Contents
---------
+.. Contents
-* Introduction
-* Installation
-* Caveats
-* The Details
-* Tips & Tricks
-* Control script
-* ACPI integration
-* Monitoring tool
+ * Introduction
+ * Installation
+ * Caveats
+ * The Details
+ * Tips & Tricks
+ * Control script
+ * ACPI integration
+ * Monitoring tool
Installation
@@ -33,7 +35,7 @@ or anything. Simply install all the files included in this document, and
laptop mode will automatically be started when you're on battery. For
your convenience, a tarball containing an installer can be downloaded at:
-http://www.samwel.tk/laptop_mode/laptop_mode/
+ http://www.samwel.tk/laptop_mode/laptop_mode/
To configure laptop mode, you need to edit the configuration file, which is
located in /etc/default/laptop-mode on Debian-based systems, or in
@@ -209,7 +211,7 @@ Tips & Tricks
this on powerbooks too. I hope that this is a piece of information that
might be useful to the Laptop Mode patch or its users."
-* In syslog.conf, you can prefix entries with a dash ``-'' to omit syncing the
+* In syslog.conf, you can prefix entries with a dash `-` to omit syncing the
file after every logging. When you're using laptop-mode and your disk doesn't
spin down, this is a likely culprit.
@@ -233,83 +235,82 @@ configuration file
It should be installed as /etc/default/laptop-mode on Debian, and as
/etc/sysconfig/laptop-mode on Red Hat, SUSE, Mandrake, and other work-alikes.
---------------------CONFIG FILE BEGIN-------------------------------------------
-# Maximum time, in seconds, of hard drive spindown time that you are
-# comfortable with. Worst case, it's possible that you could lose this
-# amount of work if your battery fails you while in laptop mode.
-#MAX_AGE=600
-
-# Automatically disable laptop mode when the number of minutes of battery
-# that you have left goes below this threshold.
-MINIMUM_BATTERY_MINUTES=10
-
-# Read-ahead, in 512-byte sectors. You can spin down the disk while playing MP3/OGG
-# by setting the disk readahead to 8MB (READAHEAD=16384). Effectively, the disk
-# will read a complete MP3 at once, and will then spin down while the MP3/OGG is
-# playing.
-#READAHEAD=4096
-
-# Shall we remount journaled fs. with appropriate commit interval? (1=yes)
-#DO_REMOUNTS=1
-
-# And shall we add the "noatime" option to that as well? (1=yes)
-#DO_REMOUNT_NOATIME=1
-
-# Dirty synchronous ratio. At this percentage of dirty pages the process
-# which
-# calls write() does its own writeback
-#DIRTY_RATIO=40
-
-#
-# Allowed dirty background ratio, in percent. Once DIRTY_RATIO has been
-# exceeded, the kernel will wake flusher threads which will then reduce the
-# amount of dirty memory to dirty_background_ratio. Set this nice and low,
-# so once some writeout has commenced, we do a lot of it.
-#
-#DIRTY_BACKGROUND_RATIO=5
-
-# kernel default dirty buffer age
-#DEF_AGE=30
-#DEF_UPDATE=5
-#DEF_DIRTY_BACKGROUND_RATIO=10
-#DEF_DIRTY_RATIO=40
-#DEF_XFS_AGE_BUFFER=15
-#DEF_XFS_SYNC_INTERVAL=30
-#DEF_XFS_BUFD_INTERVAL=1
-
-# This must be adjusted manually to the value of HZ in the running kernel
-# on 2.4, until the XFS people change their 2.4 external interfaces to work in
-# centisecs. This can be automated, but it's a work in progress that still
-# needs# some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for
-# external interfaces, and that is currently always set to 100. So you don't
-# need to change this on 2.6.
-#XFS_HZ=100
-
-# Should the maximum CPU frequency be adjusted down while on battery?
-# Requires CPUFreq to be setup.
-# See Documentation/admin-guide/pm/cpufreq.rst for more info
-#DO_CPU=0
-
-# When on battery what is the maximum CPU speed that the system should
-# use? Legal values are "slowest" for the slowest speed that your
-# CPU is able to operate at, or a value listed in:
-# /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies
-# Only applicable if DO_CPU=1.
-#CPU_MAXFREQ=slowest
-
-# Idle timeout for your hard drive (man hdparm for valid values, -S option)
-# Default is 2 hours on AC (AC_HD=244) and 20 seconds for battery (BATT_HD=4).
-#AC_HD=244
-#BATT_HD=4
-
-# The drives for which to adjust the idle timeout. Separate them by a space,
-# e.g. HD="/dev/hda /dev/hdb".
-#HD="/dev/hda"
-
-# Set the spindown timeout on a hard drive?
-#DO_HD=1
-
---------------------CONFIG FILE END---------------------------------------------
+Config file::
+
+ # Maximum time, in seconds, of hard drive spindown time that you are
+ # comfortable with. Worst case, it's possible that you could lose this
+ # amount of work if your battery fails you while in laptop mode.
+ #MAX_AGE=600
+
+ # Automatically disable laptop mode when the number of minutes of battery
+ # that you have left goes below this threshold.
+ MINIMUM_BATTERY_MINUTES=10
+
+ # Read-ahead, in 512-byte sectors. You can spin down the disk while playing MP3/OGG
+ # by setting the disk readahead to 8MB (READAHEAD=16384). Effectively, the disk
+ # will read a complete MP3 at once, and will then spin down while the MP3/OGG is
+ # playing.
+ #READAHEAD=4096
+
+ # Shall we remount journaled fs. with appropriate commit interval? (1=yes)
+ #DO_REMOUNTS=1
+
+ # And shall we add the "noatime" option to that as well? (1=yes)
+ #DO_REMOUNT_NOATIME=1
+
+ # Dirty synchronous ratio. At this percentage of dirty pages the process
+ # which
+ # calls write() does its own writeback
+ #DIRTY_RATIO=40
+
+ #
+ # Allowed dirty background ratio, in percent. Once DIRTY_RATIO has been
+ # exceeded, the kernel will wake flusher threads which will then reduce the
+ # amount of dirty memory to dirty_background_ratio. Set this nice and low,
+ # so once some writeout has commenced, we do a lot of it.
+ #
+ #DIRTY_BACKGROUND_RATIO=5
+
+ # kernel default dirty buffer age
+ #DEF_AGE=30
+ #DEF_UPDATE=5
+ #DEF_DIRTY_BACKGROUND_RATIO=10
+ #DEF_DIRTY_RATIO=40
+ #DEF_XFS_AGE_BUFFER=15
+ #DEF_XFS_SYNC_INTERVAL=30
+ #DEF_XFS_BUFD_INTERVAL=1
+
+ # This must be adjusted manually to the value of HZ in the running kernel
+ # on 2.4, until the XFS people change their 2.4 external interfaces to work in
+ # centisecs. This can be automated, but it's a work in progress that still
+ # needs# some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for
+ # external interfaces, and that is currently always set to 100. So you don't
+ # need to change this on 2.6.
+ #XFS_HZ=100
+
+ # Should the maximum CPU frequency be adjusted down while on battery?
+ # Requires CPUFreq to be setup.
+ # See Documentation/admin-guide/pm/cpufreq.rst for more info
+ #DO_CPU=0
+
+ # When on battery what is the maximum CPU speed that the system should
+ # use? Legal values are "slowest" for the slowest speed that your
+ # CPU is able to operate at, or a value listed in:
+ # /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies
+ # Only applicable if DO_CPU=1.
+ #CPU_MAXFREQ=slowest
+
+ # Idle timeout for your hard drive (man hdparm for valid values, -S option)
+ # Default is 2 hours on AC (AC_HD=244) and 20 seconds for battery (BATT_HD=4).
+ #AC_HD=244
+ #BATT_HD=4
+
+ # The drives for which to adjust the idle timeout. Separate them by a space,
+ # e.g. HD="/dev/hda /dev/hdb".
+ #HD="/dev/hda"
+
+ # Set the spindown timeout on a hard drive?
+ #DO_HD=1
Control script
@@ -318,125 +319,126 @@ Control script
Please note that this control script works for the Linux 2.4 and 2.6 series (thanks
to Kiko Piris).
---------------------CONTROL SCRIPT BEGIN----------------------------------------
-#!/bin/bash
-
-# start or stop laptop_mode, best run by a power management daemon when
-# ac gets connected/disconnected from a laptop
-#
-# install as /sbin/laptop_mode
-#
-# Contributors to this script: Kiko Piris
-# Bart Samwel
-# Micha Feigin
-# Andrew Morton
-# Herve Eychenne
-# Dax Kelson
-#
-# Original Linux 2.4 version by: Jens Axboe
-
-#############################################################################
-
-# Source config
-if [ -f /etc/default/laptop-mode ] ; then
+Control script::
+
+ #!/bin/bash
+
+ # start or stop laptop_mode, best run by a power management daemon when
+ # ac gets connected/disconnected from a laptop
+ #
+ # install as /sbin/laptop_mode
+ #
+ # Contributors to this script: Kiko Piris
+ # Bart Samwel
+ # Micha Feigin
+ # Andrew Morton
+ # Herve Eychenne
+ # Dax Kelson
+ #
+ # Original Linux 2.4 version by: Jens Axboe
+
+ #############################################################################
+
+ # Source config
+ if [ -f /etc/default/laptop-mode ] ; then
# Debian
. /etc/default/laptop-mode
-elif [ -f /etc/sysconfig/laptop-mode ] ; then
+ elif [ -f /etc/sysconfig/laptop-mode ] ; then
# Others
- . /etc/sysconfig/laptop-mode
-fi
-
-# Don't raise an error if the config file is incomplete
-# set defaults instead:
-
-# Maximum time, in seconds, of hard drive spindown time that you are
-# comfortable with. Worst case, it's possible that you could lose this
-# amount of work if your battery fails you while in laptop mode.
-MAX_AGE=${MAX_AGE:-'600'}
-
-# Read-ahead, in kilobytes
-READAHEAD=${READAHEAD:-'4096'}
-
-# Shall we remount journaled fs. with appropriate commit interval? (1=yes)
-DO_REMOUNTS=${DO_REMOUNTS:-'1'}
-
-# And shall we add the "noatime" option to that as well? (1=yes)
-DO_REMOUNT_NOATIME=${DO_REMOUNT_NOATIME:-'1'}
-
-# Shall we adjust the idle timeout on a hard drive?
-DO_HD=${DO_HD:-'1'}
-
-# Adjust idle timeout on which hard drive?
-HD="${HD:-'/dev/hda'}"
-
-# spindown time for HD (hdparm -S values)
-AC_HD=${AC_HD:-'244'}
-BATT_HD=${BATT_HD:-'4'}
-
-# Dirty synchronous ratio. At this percentage of dirty pages the process which
-# calls write() does its own writeback
-DIRTY_RATIO=${DIRTY_RATIO:-'40'}
-
-# cpu frequency scaling
-# See Documentation/admin-guide/pm/cpufreq.rst for more info
-DO_CPU=${CPU_MANAGE:-'0'}
-CPU_MAXFREQ=${CPU_MAXFREQ:-'slowest'}
-
-#
-# Allowed dirty background ratio, in percent. Once DIRTY_RATIO has been
-# exceeded, the kernel will wake flusher threads which will then reduce the
-# amount of dirty memory to dirty_background_ratio. Set this nice and low,
-# so once some writeout has commenced, we do a lot of it.
-#
-DIRTY_BACKGROUND_RATIO=${DIRTY_BACKGROUND_RATIO:-'5'}
-
-# kernel default dirty buffer age
-DEF_AGE=${DEF_AGE:-'30'}
-DEF_UPDATE=${DEF_UPDATE:-'5'}
-DEF_DIRTY_BACKGROUND_RATIO=${DEF_DIRTY_BACKGROUND_RATIO:-'10'}
-DEF_DIRTY_RATIO=${DEF_DIRTY_RATIO:-'40'}
-DEF_XFS_AGE_BUFFER=${DEF_XFS_AGE_BUFFER:-'15'}
-DEF_XFS_SYNC_INTERVAL=${DEF_XFS_SYNC_INTERVAL:-'30'}
-DEF_XFS_BUFD_INTERVAL=${DEF_XFS_BUFD_INTERVAL:-'1'}
-
-# This must be adjusted manually to the value of HZ in the running kernel
-# on 2.4, until the XFS people change their 2.4 external interfaces to work in
-# centisecs. This can be automated, but it's a work in progress that still needs
-# some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for external
-# interfaces, and that is currently always set to 100. So you don't need to
-# change this on 2.6.
-XFS_HZ=${XFS_HZ:-'100'}
-
-#############################################################################
-
-KLEVEL="$(uname -r |
- {
+ . /etc/sysconfig/laptop-mode
+ fi
+
+ # Don't raise an error if the config file is incomplete
+ # set defaults instead:
+
+ # Maximum time, in seconds, of hard drive spindown time that you are
+ # comfortable with. Worst case, it's possible that you could lose this
+ # amount of work if your battery fails you while in laptop mode.
+ MAX_AGE=${MAX_AGE:-'600'}
+
+ # Read-ahead, in kilobytes
+ READAHEAD=${READAHEAD:-'4096'}
+
+ # Shall we remount journaled fs. with appropriate commit interval? (1=yes)
+ DO_REMOUNTS=${DO_REMOUNTS:-'1'}
+
+ # And shall we add the "noatime" option to that as well? (1=yes)
+ DO_REMOUNT_NOATIME=${DO_REMOUNT_NOATIME:-'1'}
+
+ # Shall we adjust the idle timeout on a hard drive?
+ DO_HD=${DO_HD:-'1'}
+
+ # Adjust idle timeout on which hard drive?
+ HD="${HD:-'/dev/hda'}"
+
+ # spindown time for HD (hdparm -S values)
+ AC_HD=${AC_HD:-'244'}
+ BATT_HD=${BATT_HD:-'4'}
+
+ # Dirty synchronous ratio. At this percentage of dirty pages the process which
+ # calls write() does its own writeback
+ DIRTY_RATIO=${DIRTY_RATIO:-'40'}
+
+ # cpu frequency scaling
+ # See Documentation/admin-guide/pm/cpufreq.rst for more info
+ DO_CPU=${CPU_MANAGE:-'0'}
+ CPU_MAXFREQ=${CPU_MAXFREQ:-'slowest'}
+
+ #
+ # Allowed dirty background ratio, in percent. Once DIRTY_RATIO has been
+ # exceeded, the kernel will wake flusher threads which will then reduce the
+ # amount of dirty memory to dirty_background_ratio. Set this nice and low,
+ # so once some writeout has commenced, we do a lot of it.
+ #
+ DIRTY_BACKGROUND_RATIO=${DIRTY_BACKGROUND_RATIO:-'5'}
+
+ # kernel default dirty buffer age
+ DEF_AGE=${DEF_AGE:-'30'}
+ DEF_UPDATE=${DEF_UPDATE:-'5'}
+ DEF_DIRTY_BACKGROUND_RATIO=${DEF_DIRTY_BACKGROUND_RATIO:-'10'}
+ DEF_DIRTY_RATIO=${DEF_DIRTY_RATIO:-'40'}
+ DEF_XFS_AGE_BUFFER=${DEF_XFS_AGE_BUFFER:-'15'}
+ DEF_XFS_SYNC_INTERVAL=${DEF_XFS_SYNC_INTERVAL:-'30'}
+ DEF_XFS_BUFD_INTERVAL=${DEF_XFS_BUFD_INTERVAL:-'1'}
+
+ # This must be adjusted manually to the value of HZ in the running kernel
+ # on 2.4, until the XFS people change their 2.4 external interfaces to work in
+ # centisecs. This can be automated, but it's a work in progress that still needs
+ # some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for external
+ # interfaces, and that is currently always set to 100. So you don't need to
+ # change this on 2.6.
+ XFS_HZ=${XFS_HZ:-'100'}
+
+ #############################################################################
+
+ KLEVEL="$(uname -r |
+ {
IFS='.' read a b c
echo $a.$b
}
-)"
-case "$KLEVEL" in
+ )"
+ case "$KLEVEL" in
"2.4"|"2.6")
;;
*)
echo "Unhandled kernel version: $KLEVEL ('uname -r' = '$(uname -r)')" >&2
exit 1
;;
-esac
+ esac
-if [ ! -e /proc/sys/vm/laptop_mode ] ; then
+ if [ ! -e /proc/sys/vm/laptop_mode ] ; then
echo "Kernel is not patched with laptop_mode patch." >&2
exit 1
-fi
+ fi
-if [ ! -w /proc/sys/vm/laptop_mode ] ; then
+ if [ ! -w /proc/sys/vm/laptop_mode ] ; then
echo "You do not have enough privileges to enable laptop_mode." >&2
exit 1
-fi
+ fi
-# Remove an option (the first parameter) of the form option=<number> from
-# a mount options string (the rest of the parameters).
-parse_mount_opts () {
+ # Remove an option (the first parameter) of the form option=<number> from
+ # a mount options string (the rest of the parameters).
+ parse_mount_opts () {
OPT="$1"
shift
echo ",$*," | sed \
@@ -444,11 +446,11 @@ parse_mount_opts () {
-e 's/,,*/,/g' \
-e 's/^,//' \
-e 's/,$//'
-}
+ }
-# Remove an option (the first parameter) without any arguments from
-# a mount option string (the rest of the parameters).
-parse_nonumber_mount_opts () {
+ # Remove an option (the first parameter) without any arguments from
+ # a mount option string (the rest of the parameters).
+ parse_nonumber_mount_opts () {
OPT="$1"
shift
echo ",$*," | sed \
@@ -456,20 +458,20 @@ parse_nonumber_mount_opts () {
-e 's/,,*/,/g' \
-e 's/^,//' \
-e 's/,$//'
-}
-
-# Find out the state of a yes/no option (e.g. "atime"/"noatime") in
-# fstab for a given filesystem, and use this state to replace the
-# value of the option in another mount options string. The device
-# is the first argument, the option name the second, and the default
-# value the third. The remainder is the mount options string.
-#
-# Example:
-# parse_yesno_opts_wfstab /dev/hda1 atime atime defaults,noatime
-#
-# If fstab contains, say, "rw" for this filesystem, then the result
-# will be "defaults,atime".
-parse_yesno_opts_wfstab () {
+ }
+
+ # Find out the state of a yes/no option (e.g. "atime"/"noatime") in
+ # fstab for a given filesystem, and use this state to replace the
+ # value of the option in another mount options string. The device
+ # is the first argument, the option name the second, and the default
+ # value the third. The remainder is the mount options string.
+ #
+ # Example:
+ # parse_yesno_opts_wfstab /dev/hda1 atime atime defaults,noatime
+ #
+ # If fstab contains, say, "rw" for this filesystem, then the result
+ # will be "defaults,atime".
+ parse_yesno_opts_wfstab () {
L_DEV="$1"
OPT="$2"
DEF_OPT="$3"
@@ -491,21 +493,21 @@ parse_yesno_opts_wfstab () {
# option not specified in fstab -- choose the default.
echo "$PARSEDOPTS1,$DEF_OPT"
fi
-}
-
-# Find out the state of a numbered option (e.g. "commit=NNN") in
-# fstab for a given filesystem, and use this state to replace the
-# value of the option in another mount options string. The device
-# is the first argument, and the option name the second. The
-# remainder is the mount options string in which the replacement
-# must be done.
-#
-# Example:
-# parse_mount_opts_wfstab /dev/hda1 commit defaults,commit=7
-#
-# If fstab contains, say, "commit=3,rw" for this filesystem, then the
-# result will be "rw,commit=3".
-parse_mount_opts_wfstab () {
+ }
+
+ # Find out the state of a numbered option (e.g. "commit=NNN") in
+ # fstab for a given filesystem, and use this state to replace the
+ # value of the option in another mount options string. The device
+ # is the first argument, and the option name the second. The
+ # remainder is the mount options string in which the replacement
+ # must be done.
+ #
+ # Example:
+ # parse_mount_opts_wfstab /dev/hda1 commit defaults,commit=7
+ #
+ # If fstab contains, say, "commit=3,rw" for this filesystem, then the
+ # result will be "rw,commit=3".
+ parse_mount_opts_wfstab () {
L_DEV="$1"
OPT="$2"
shift 2
@@ -523,9 +525,9 @@ parse_mount_opts_wfstab () {
# option not specified in fstab: set it to 0
echo "$PARSEDOPTS1,$OPT=0"
fi
-}
+ }
-deduce_fstype () {
+ deduce_fstype () {
MP="$1"
# My root filesystem unfortunately has
# type "unknown" in /etc/mtab. If we encounter
@@ -538,13 +540,13 @@ deduce_fstype () {
exit 0
fi
done
-}
+ }
-if [ $DO_REMOUNT_NOATIME -eq 1 ] ; then
+ if [ $DO_REMOUNT_NOATIME -eq 1 ] ; then
NOATIME_OPT=",noatime"
-fi
+ fi
-case "$1" in
+ case "$1" in
start)
AGE=$((100*$MAX_AGE))
XFS_AGE=$(($XFS_HZ*$MAX_AGE))
@@ -687,10 +689,9 @@ case "$1" in
exit 1
;;
-esac
+ esac
-exit 0
---------------------CONTROL SCRIPT END------------------------------------------
+ exit 0
ACPI integration
@@ -701,78 +702,76 @@ kick off the laptop_mode script and run hdparm. The part that
automatically disables laptop mode when the battery is low was
written by Jan Topinski.
------------------/etc/acpi/events/ac_adapter BEGIN------------------------------
-event=ac_adapter
-action=/etc/acpi/actions/ac.sh %e
-----------------/etc/acpi/events/ac_adapter END---------------------------------
+/etc/acpi/events/ac_adapter::
+
+ event=ac_adapter
+ action=/etc/acpi/actions/ac.sh %e
+
+/etc/acpi/events/battery::
+ event=battery.*
+ action=/etc/acpi/actions/battery.sh %e
------------------/etc/acpi/events/battery BEGIN---------------------------------
-event=battery.*
-action=/etc/acpi/actions/battery.sh %e
-----------------/etc/acpi/events/battery END------------------------------------
+/etc/acpi/actions/ac.sh::
+ #!/bin/bash
-----------------/etc/acpi/actions/ac.sh BEGIN-----------------------------------
-#!/bin/bash
+ # ac on/offline event handler
-# ac on/offline event handler
+ status=`awk '/^state: / { print $2 }' /proc/acpi/ac_adapter/$2/state`
-status=`awk '/^state: / { print $2 }' /proc/acpi/ac_adapter/$2/state`
+ case $status in
+ "on-line")
+ /sbin/laptop_mode stop
+ exit 0
+ ;;
+ "off-line")
+ /sbin/laptop_mode start
+ exit 0
+ ;;
+ esac
-case $status in
- "on-line")
- /sbin/laptop_mode stop
- exit 0
- ;;
- "off-line")
- /sbin/laptop_mode start
- exit 0
- ;;
-esac
----------------------------/etc/acpi/actions/ac.sh END--------------------------
+/etc/acpi/actions/battery.sh::
----------------------------/etc/acpi/actions/battery.sh BEGIN-------------------
-#! /bin/bash
+ #! /bin/bash
-# Automatically disable laptop mode when the battery almost runs out.
+ # Automatically disable laptop mode when the battery almost runs out.
-BATT_INFO=/proc/acpi/battery/$2/state
+ BATT_INFO=/proc/acpi/battery/$2/state
-if [[ -f /proc/sys/vm/laptop_mode ]]
-then
- LM=`cat /proc/sys/vm/laptop_mode`
- if [[ $LM -gt 0 ]]
- then
- if [[ -f $BATT_INFO ]]
+ if [[ -f /proc/sys/vm/laptop_mode ]]
+ then
+ LM=`cat /proc/sys/vm/laptop_mode`
+ if [[ $LM -gt 0 ]]
then
- # Source the config file only now that we know we need
- if [ -f /etc/default/laptop-mode ] ; then
- # Debian
- . /etc/default/laptop-mode
- elif [ -f /etc/sysconfig/laptop-mode ] ; then
- # Others
- . /etc/sysconfig/laptop-mode
- fi
- MINIMUM_BATTERY_MINUTES=${MINIMUM_BATTERY_MINUTES:-'10'}
-
- ACTION="`cat $BATT_INFO | grep charging | cut -c 26-`"
- if [[ ACTION -eq "discharging" ]]
- then
- PRESENT_RATE=`cat $BATT_INFO | grep "present rate:" | sed "s/.* \([0-9][0-9]* \).*/\1/" `
- REMAINING=`cat $BATT_INFO | grep "remaining capacity:" | sed "s/.* \([0-9][0-9]* \).*/\1/" `
- fi
- if (($REMAINING * 60 / $PRESENT_RATE < $MINIMUM_BATTERY_MINUTES))
- then
- /sbin/laptop_mode stop
- fi
- else
- logger -p daemon.warning "You are using laptop mode and your battery interface $BATT_INFO is missing. This may lead to loss of data when the battery runs out. Check kernel ACPI support and /proc/acpi/battery folder, and edit /etc/acpi/battery.sh to set BATT_INFO to the correct path."
+ if [[ -f $BATT_INFO ]]
+ then
+ # Source the config file only now that we know we need
+ if [ -f /etc/default/laptop-mode ] ; then
+ # Debian
+ . /etc/default/laptop-mode
+ elif [ -f /etc/sysconfig/laptop-mode ] ; then
+ # Others
+ . /etc/sysconfig/laptop-mode
+ fi
+ MINIMUM_BATTERY_MINUTES=${MINIMUM_BATTERY_MINUTES:-'10'}
+
+ ACTION="`cat $BATT_INFO | grep charging | cut -c 26-`"
+ if [[ ACTION -eq "discharging" ]]
+ then
+ PRESENT_RATE=`cat $BATT_INFO | grep "present rate:" | sed "s/.* \([0-9][0-9]* \).*/\1/" `
+ REMAINING=`cat $BATT_INFO | grep "remaining capacity:" | sed "s/.* \([0-9][0-9]* \).*/\1/" `
+ fi
+ if (($REMAINING * 60 / $PRESENT_RATE < $MINIMUM_BATTERY_MINUTES))
+ then
+ /sbin/laptop_mode stop
+ fi
+ else
+ logger -p daemon.warning "You are using laptop mode and your battery interface $BATT_INFO is missing. This may lead to loss of data when the battery runs out. Check kernel ACPI support and /proc/acpi/battery folder, and edit /etc/acpi/battery.sh to set BATT_INFO to the correct path."
+ fi
fi
- fi
-fi
----------------------------/etc/acpi/actions/battery.sh END--------------------
+ fi
Monitoring tool
diff --git a/Documentation/laptops/lg-laptop.rst b/Documentation/admin-guide/laptops/lg-laptop.rst
index f2c2ffe31101..ce9b14671cb9 100644
--- a/Documentation/laptops/lg-laptop.rst
+++ b/Documentation/admin-guide/laptops/lg-laptop.rst
@@ -1,6 +1,5 @@
.. SPDX-License-Identifier: GPL-2.0+
-:orphan:
LG Gram laptop extra features
=============================
diff --git a/Documentation/laptops/sony-laptop.txt b/Documentation/admin-guide/laptops/sony-laptop.rst
index 978b1e615155..9edcc7f6612f 100644
--- a/Documentation/laptops/sony-laptop.txt
+++ b/Documentation/admin-guide/laptops/sony-laptop.rst
@@ -1,7 +1,9 @@
+=========================================
Sony Notebook Control Driver (SNC) Readme
------------------------------------------
- Copyright (C) 2004- 2005 Stelian Pop <stelian@popies.net>
- Copyright (C) 2007 Mattia Dongili <malattia@linux.it>
+=========================================
+
+ - Copyright (C) 2004- 2005 Stelian Pop <stelian@popies.net>
+ - Copyright (C) 2007 Mattia Dongili <malattia@linux.it>
This mini-driver drives the SNC and SPIC device present in the ACPI BIOS of the
Sony Vaio laptops. This driver mixes both devices functions under the same
@@ -10,6 +12,7 @@ obsoleted by sony-laptop now.
Fn keys (hotkeys):
------------------
+
Some models report hotkeys through the SNC or SPIC devices, such events are
reported both through the ACPI subsystem as acpi events and through the INPUT
subsystem. See the logs of /proc/bus/input/devices to find out what those
@@ -28,11 +31,14 @@ If your laptop model supports it, you will find sysfs files in the
/sys/class/backlight/sony/
directory. You will be able to query and set the current screen
brightness:
+
+ ====================== =========================================
brightness get/set screen brightness (an integer
between 0 and 7)
actual_brightness reading from this file will query the HW
to get real brightness value
max_brightness the maximum brightness value
+ ====================== =========================================
Platform specific:
@@ -45,6 +51,8 @@ You then read/write integer values from/to those files by using
standard UNIX tools.
The files are:
+
+ ====================== ==========================================
brightness_default screen brightness which will be set
when the laptop will be rebooted
cdpower power on/off the internal CD drive
@@ -53,21 +61,39 @@ The files are:
(only in debug mode)
bluetoothpower power on/off the internal bluetooth device
fanspeed get/set the fan speed
+ ====================== ==========================================
Note that some files may be missing if they are not supported
by your particular laptop model.
-Example usage:
+Example usage::
+
# echo "1" > /sys/devices/platform/sony-laptop/brightness_default
-sets the lowest screen brightness for the next and later reboots,
+
+sets the lowest screen brightness for the next and later reboots
+
+::
+
# echo "8" > /sys/devices/platform/sony-laptop/brightness_default
-sets the highest screen brightness for the next and later reboots,
+
+sets the highest screen brightness for the next and later reboots
+
+::
+
# cat /sys/devices/platform/sony-laptop/brightness_default
-retrieves the value.
+
+retrieves the value
+
+::
# echo "0" > /sys/devices/platform/sony-laptop/audiopower
-powers off the sound card,
+
+powers off the sound card
+
+::
+
# echo "1" > /sys/devices/platform/sony-laptop/audiopower
+
powers on the sound card.
@@ -76,7 +102,8 @@ RFkill control:
More recent Vaio models expose a consistent set of ACPI methods to
control radio frequency emitting devices. If you are a lucky owner of
such a laptop you will find the necessary rfkill devices under
-/sys/class/rfkill. Check those starting with sony-* in
+/sys/class/rfkill. Check those starting with sony-* in::
+
# grep . /sys/class/rfkill/*/{state,name}
@@ -88,26 +115,29 @@ you are not afraid of any side effects doing strange things with
your ACPI BIOS could have on your laptop), load the driver and
pass the option 'debug=1'.
-REPEAT: DON'T DO THIS IF YOU DON'T LIKE RISKY BUSINESS.
+REPEAT:
+ **DON'T DO THIS IF YOU DON'T LIKE RISKY BUSINESS.**
In your kernel logs you will find the list of all ACPI methods
the SNC device has on your laptop.
* For new models you will see a long list of meaningless method names,
-reading the DSDT table source should reveal that:
+ reading the DSDT table source should reveal that:
+
(1) the SNC device uses an internal capability lookup table
(2) SN00 is used to find values in the lookup table
(3) SN06 and SN07 are used to call into the real methods based on
offsets you can obtain iterating the table using SN00
(4) SN02 used to enable events.
+
Some values in the capability lookup table are more or less known, see
the code for all sony_call_snc_handle calls, others are more obscure.
* For old models you can see the GCDP/GCDP methods used to pwer on/off
-the CD drive, but there are others and they are usually different from
-model to model.
+ the CD drive, but there are others and they are usually different from
+ model to model.
-I HAVE NO IDEA WHAT THOSE METHODS DO.
+**I HAVE NO IDEA WHAT THOSE METHODS DO.**
The sony-laptop driver creates, for some of those methods (the most
current ones found on several Vaio models), an entry under
diff --git a/Documentation/laptops/sonypi.txt b/Documentation/admin-guide/laptops/sonypi.rst
index 606bdb9ce036..c6eaaf48f7c1 100644
--- a/Documentation/laptops/sonypi.txt
+++ b/Documentation/admin-guide/laptops/sonypi.rst
@@ -1,11 +1,13 @@
+==================================================
Sony Programmable I/O Control Device Driver Readme
---------------------------------------------------
- Copyright (C) 2001-2004 Stelian Pop <stelian@popies.net>
- Copyright (C) 2001-2002 Alcôve <www.alcove.com>
- Copyright (C) 2001 Michael Ashley <m.ashley@unsw.edu.au>
- Copyright (C) 2001 Junichi Morita <jun1m@mars.dti.ne.jp>
- Copyright (C) 2000 Takaya Kinjo <t-kinjo@tc4.so-net.ne.jp>
- Copyright (C) 2000 Andrew Tridgell <tridge@samba.org>
+==================================================
+
+ - Copyright (C) 2001-2004 Stelian Pop <stelian@popies.net>
+ - Copyright (C) 2001-2002 Alcôve <www.alcove.com>
+ - Copyright (C) 2001 Michael Ashley <m.ashley@unsw.edu.au>
+ - Copyright (C) 2001 Junichi Morita <jun1m@mars.dti.ne.jp>
+ - Copyright (C) 2000 Takaya Kinjo <t-kinjo@tc4.so-net.ne.jp>
+ - Copyright (C) 2000 Andrew Tridgell <tridge@samba.org>
This driver enables access to the Sony Programmable I/O Control Device which
can be found in many Sony Vaio laptops. Some newer Sony laptops (seems to be
@@ -14,6 +16,7 @@ sonypi device and are not supported at all by this driver.
It will give access (through a user space utility) to some events those laptops
generate, like:
+
- jogdial events (the small wheel on the side of Vaios)
- capture button events (only on Vaio Picturebook series)
- Fn keys
@@ -49,7 +52,8 @@ module argument syntax (<param>=<value> when passing the option to the
module or sonypi.<param>=<value> on the kernel boot line when sonypi is
statically linked into the kernel). Those options are:
- minor: minor number of the misc device /dev/sonypi,
+ =============== =======================================================
+ minor: minor number of the misc device /dev/sonypi,
default is -1 (automatic allocation, see /proc/misc
or kernel logs)
@@ -85,17 +89,18 @@ statically linked into the kernel). Those options are:
set to 0xffffffff, meaning that all possible events
will be tried. You can use the following bits to
construct your own event mask (from
- drivers/char/sonypi.h):
- SONYPI_JOGGER_MASK 0x0001
- SONYPI_CAPTURE_MASK 0x0002
- SONYPI_FNKEY_MASK 0x0004
- SONYPI_BLUETOOTH_MASK 0x0008
- SONYPI_PKEY_MASK 0x0010
- SONYPI_BACK_MASK 0x0020
- SONYPI_HELP_MASK 0x0040
- SONYPI_LID_MASK 0x0080
- SONYPI_ZOOM_MASK 0x0100
- SONYPI_THUMBPHRASE_MASK 0x0200
+ drivers/char/sonypi.h)::
+
+ SONYPI_JOGGER_MASK 0x0001
+ SONYPI_CAPTURE_MASK 0x0002
+ SONYPI_FNKEY_MASK 0x0004
+ SONYPI_BLUETOOTH_MASK 0x0008
+ SONYPI_PKEY_MASK 0x0010
+ SONYPI_BACK_MASK 0x0020
+ SONYPI_HELP_MASK 0x0040
+ SONYPI_LID_MASK 0x0080
+ SONYPI_ZOOM_MASK 0x0100
+ SONYPI_THUMBPHRASE_MASK 0x0200
SONYPI_MEYE_MASK 0x0400
SONYPI_MEMORYSTICK_MASK 0x0800
SONYPI_BATTERY_MASK 0x1000
@@ -105,17 +110,18 @@ statically linked into the kernel). Those options are:
created, one which interprets the jogdial events as
mouse events, the other one which acts like a
keyboard reporting the pressing of the special keys.
+ =============== =======================================================
Module use:
-----------
In order to automatically load the sonypi module on use, you can put those
-lines a configuration file in /etc/modprobe.d/:
+lines a configuration file in /etc/modprobe.d/::
alias char-major-10-250 sonypi
options sonypi minor=250
-This supposes the use of minor 250 for the sonypi device:
+This supposes the use of minor 250 for the sonypi device::
# mknod /dev/sonypi c 10 250
@@ -148,5 +154,5 @@ Bugs:
http://www.acc.umu.se/~erikw/program/smartdimmer-0.1.tar.bz2
- since all development was done by reverse engineering, there is
- _absolutely no guarantee_ that this driver will not crash your
+ *absolutely no guarantee* that this driver will not crash your
laptop. Permanently.
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
index 75ef063622d2..adea0bf2acc5 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
@@ -1,12 +1,15 @@
- ThinkPad ACPI Extras Driver
+===========================
+ThinkPad ACPI Extras Driver
+===========================
- Version 0.25
- October 16th, 2013
+Version 0.25
- Borislav Deianov <borislav@users.sf.net>
- Henrique de Moraes Holschuh <hmh@hmh.eng.br>
- http://ibm-acpi.sf.net/
+October 16th, 2013
+- Borislav Deianov <borislav@users.sf.net>
+- Henrique de Moraes Holschuh <hmh@hmh.eng.br>
+
+http://ibm-acpi.sf.net/
This is a Linux driver for the IBM and Lenovo ThinkPad laptops. It
supports various features of these laptops which are accessible
@@ -91,7 +94,8 @@ yet ready or stabilized, it is expected that this interface will change,
and any and all userspace programs must deal with it.
-Notes about the sysfs interface:
+Notes about the sysfs interface
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Unlike what was done with the procfs interface, correctness when talking
to the sysfs interfaces will be enforced, as will correctness in the
@@ -129,6 +133,7 @@ Driver version
--------------
procfs: /proc/acpi/ibm/driver
+
sysfs driver attribute: version
The driver name and version. No commands can be written to this file.
@@ -141,9 +146,13 @@ sysfs driver attribute: interface_version
Version of the thinkpad-acpi sysfs interface, as an unsigned long
(output in hex format: 0xAAAABBCC), where:
- AAAA - major revision
- BB - minor revision
- CC - bugfix revision
+
+ AAAA
+ - major revision
+ BB
+ - minor revision
+ CC
+ - bugfix revision
The sysfs interface version changelog for the driver can be found at the
end of this document. Changes to the sysfs interface done by the kernel
@@ -170,6 +179,7 @@ Hot keys
--------
procfs: /proc/acpi/ibm/hotkey
+
sysfs device attribute: hotkey_*
In a ThinkPad, the ACPI HKEY handler is responsible for communicating
@@ -181,7 +191,7 @@ firmware will behave in many situations.
The driver enables the HKEY ("hot key") event reporting automatically
when loaded, and disables it when it is removed.
-The driver will report HKEY events in the following format:
+The driver will report HKEY events in the following format::
ibm/hotkey HKEY 00000080 0000xxxx
@@ -217,9 +227,10 @@ ThinkPads, it is still possible to support some extra hotkeys by
polling the "CMOS NVRAM" at least 10 times per second. The driver
attempts to enables this functionality automatically when required.
-procfs notes:
+procfs notes
+^^^^^^^^^^^^
-The following commands can be written to the /proc/acpi/ibm/hotkey file:
+The following commands can be written to the /proc/acpi/ibm/hotkey file::
echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys
echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys
@@ -227,7 +238,7 @@ The following commands can be written to the /proc/acpi/ibm/hotkey file:
echo reset > /proc/acpi/ibm/hotkey -- restore the recommended mask
The following commands have been deprecated and will cause the kernel
-to log a warning:
+to log a warning::
echo enable > /proc/acpi/ibm/hotkey -- does nothing
echo disable > /proc/acpi/ibm/hotkey -- returns an error
@@ -237,7 +248,8 @@ maintain maximum bug-to-bug compatibility, it does not report any masks,
nor does it allow one to manipulate the hot key mask when the firmware
does not support masks at all, even if NVRAM polling is in use.
-sysfs notes:
+sysfs notes
+^^^^^^^^^^^
hotkey_bios_enabled:
DEPRECATED, WILL BE REMOVED SOON.
@@ -349,7 +361,8 @@ sysfs notes:
This attribute has poll()/select() support.
-input layer notes:
+input layer notes
+^^^^^^^^^^^^^^^^^
A Hot key is mapped to a single input layer EV_KEY event, possibly
followed by an EV_MSC MSC_SCAN event that shall contain that key's scan
@@ -362,11 +375,13 @@ remapping KEY_UNKNOWN keys.
The events are available in an input device, with the following id:
- Bus: BUS_HOST
- vendor: 0x1014 (PCI_VENDOR_ID_IBM) or
+ ============== ==============================
+ Bus BUS_HOST
+ vendor 0x1014 (PCI_VENDOR_ID_IBM) or
0x17aa (PCI_VENDOR_ID_LENOVO)
- product: 0x5054 ("TP")
- version: 0x4101
+ product 0x5054 ("TP")
+ version 0x4101
+ ============== ==============================
The version will have its LSB incremented if the keymap changes in a
backwards-compatible way. The MSB shall always be 0x41 for this input
@@ -380,9 +395,10 @@ backwards-compatible change for this input device.
Thinkpad-acpi Hot Key event map (version 0x4101):
+======= ======= ============== ==============================================
ACPI Scan
event code Key Notes
-
+======= ======= ============== ==============================================
0x1001 0x00 FN+F1 -
0x1002 0x01 FN+F2 IBM: battery (rare)
@@ -426,7 +442,9 @@ event code Key Notes
or toggle screen expand
0x1009 0x08 FN+F9 -
- .. .. ..
+
+... ... ... ...
+
0x100B 0x0A FN+F11 -
0x100C 0x0B FN+F12 Sleep to disk. You are always
@@ -480,8 +498,11 @@ event code Key Notes
0x1018 0x17 THINKPAD ThinkPad/Access IBM/Lenovo key
0x1019 0x18 unknown
-.. .. ..
+
+... ... ...
+
0x1020 0x1F unknown
+======= ======= ============== ==============================================
The ThinkPad firmware does not allow one to differentiate when most hot
keys are pressed or released (either that, or we don't know how to, yet).
@@ -499,14 +520,17 @@ generate input device EV_KEY events.
In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW
events for switches:
+============== ==============================================
SW_RFKILL_ALL T60 and later hardware rfkill rocker switch
SW_TABLET_MODE Tablet ThinkPads HKEY events 0x5009 and 0x500A
+============== ==============================================
-Non hotkey ACPI HKEY event map:
--------------------------------
+Non hotkey ACPI HKEY event map
+------------------------------
Events that are never propagated by the driver:
+====== ==================================================
0x2304 System is waking up from suspend to undock
0x2305 System is waking up from suspend to eject bay
0x2404 System is waking up from hibernation to undock
@@ -519,10 +543,12 @@ Events that are never propagated by the driver:
0x6000 KEYBOARD: Numlock key pressed
0x6005 KEYBOARD: Fn key pressed (TO BE VERIFIED)
0x7000 Radio Switch may have changed state
+====== ==================================================
Events that are propagated by the driver to userspace:
+====== =====================================================
0x2313 ALARM: System is waking up from suspend because
the battery is nearly empty
0x2413 ALARM: System is waking up from hibernation because
@@ -544,6 +570,7 @@ Events that are propagated by the driver to userspace:
0x6040 Nvidia Optimus/AC adapter related (TO BE VERIFIED)
0x60C0 X1 Yoga 2016, Tablet mode status changed
0x60F0 Thermal Transformation changed (GMTS, Windows)
+====== =====================================================
Battery nearly empty alarms are a last resort attempt to get the
operating system to hibernate or shutdown cleanly (0x2313), or shutdown
@@ -562,7 +589,8 @@ cycle, or a system shutdown. Obviously, something is very wrong if this
happens.
-Brightness hotkey notes:
+Brightness hotkey notes
+^^^^^^^^^^^^^^^^^^^^^^^
Don't mess with the brightness hotkeys in a Thinkpad. If you want
notifications for OSD, use the sysfs backlight class event support.
@@ -579,7 +607,9 @@ Bluetooth
---------
procfs: /proc/acpi/ibm/bluetooth
+
sysfs device attribute: bluetooth_enable (deprecated)
+
sysfs rfkill class: switch "tpacpi_bluetooth_sw"
This feature shows the presence and current state of a ThinkPad
@@ -588,36 +618,39 @@ Bluetooth device in the internal ThinkPad CDC slot.
If the ThinkPad supports it, the Bluetooth state is stored in NVRAM,
so it is kept across reboots and power-off.
-Procfs notes:
+Procfs notes
+^^^^^^^^^^^^
-If Bluetooth is installed, the following commands can be used:
+If Bluetooth is installed, the following commands can be used::
echo enable > /proc/acpi/ibm/bluetooth
echo disable > /proc/acpi/ibm/bluetooth
-Sysfs notes:
+Sysfs notes
+^^^^^^^^^^^
If the Bluetooth CDC card is installed, it can be enabled /
disabled through the "bluetooth_enable" thinkpad-acpi device
attribute, and its current status can also be queried.
enable:
- 0: disables Bluetooth / Bluetooth is disabled
- 1: enables Bluetooth / Bluetooth is enabled.
+
+ - 0: disables Bluetooth / Bluetooth is disabled
+ - 1: enables Bluetooth / Bluetooth is enabled.
Note: this interface has been superseded by the generic rfkill
class. It has been deprecated, and it will be removed in year
2010.
rfkill controller switch "tpacpi_bluetooth_sw": refer to
- Documentation/rfkill.txt for details.
+ Documentation/driver-api/rfkill.rst for details.
Video output control -- /proc/acpi/ibm/video
--------------------------------------------
This feature allows control over the devices used for video output -
-LCD, CRT or DVI (if available). The following commands are available:
+LCD, CRT or DVI (if available). The following commands are available::
echo lcd_enable > /proc/acpi/ibm/video
echo lcd_disable > /proc/acpi/ibm/video
@@ -630,9 +663,10 @@ LCD, CRT or DVI (if available). The following commands are available:
echo expand_toggle > /proc/acpi/ibm/video
echo video_switch > /proc/acpi/ibm/video
-NOTE: Access to this feature is restricted to processes owning the
-CAP_SYS_ADMIN capability for safety reasons, as it can interact badly
-enough with some versions of X.org to crash it.
+NOTE:
+ Access to this feature is restricted to processes owning the
+ CAP_SYS_ADMIN capability for safety reasons, as it can interact badly
+ enough with some versions of X.org to crash it.
Each video output device can be enabled or disabled individually.
Reading /proc/acpi/ibm/video shows the status of each device.
@@ -665,18 +699,21 @@ ThinkLight control
------------------
procfs: /proc/acpi/ibm/light
+
sysfs attributes: as per LED class, for the "tpacpi::thinklight" LED
-procfs notes:
+procfs notes
+^^^^^^^^^^^^
The ThinkLight status can be read and set through the procfs interface. A
few models which do not make the status available will show the ThinkLight
-status as "unknown". The available commands are:
+status as "unknown". The available commands are::
echo on > /proc/acpi/ibm/light
echo off > /proc/acpi/ibm/light
-sysfs notes:
+sysfs notes
+^^^^^^^^^^^
The ThinkLight sysfs interface is documented by the LED class
documentation, in Documentation/leds/leds-class.rst. The ThinkLight LED name
@@ -691,6 +728,7 @@ CMOS/UCMS control
-----------------
procfs: /proc/acpi/ibm/cmos
+
sysfs device attribute: cmos_command
This feature is mostly used internally by the ACPI firmware to keep the legacy
@@ -707,16 +745,16 @@ The range of valid cmos command numbers is 0 to 21, but not all have an
effect and the behavior varies from model to model. Here is the behavior
on the X40 (tpb is the ThinkPad Buttons utility):
- 0 - Related to "Volume down" key press
- 1 - Related to "Volume up" key press
- 2 - Related to "Mute on" key press
- 3 - Related to "Access IBM" key press
- 4 - Related to "LCD brightness up" key press
- 5 - Related to "LCD brightness down" key press
- 11 - Related to "toggle screen expansion" key press/function
- 12 - Related to "ThinkLight on"
- 13 - Related to "ThinkLight off"
- 14 - Related to "ThinkLight" key press (toggle ThinkLight)
+ - 0 - Related to "Volume down" key press
+ - 1 - Related to "Volume up" key press
+ - 2 - Related to "Mute on" key press
+ - 3 - Related to "Access IBM" key press
+ - 4 - Related to "LCD brightness up" key press
+ - 5 - Related to "LCD brightness down" key press
+ - 11 - Related to "toggle screen expansion" key press/function
+ - 12 - Related to "ThinkLight on"
+ - 13 - Related to "ThinkLight off"
+ - 14 - Related to "ThinkLight" key press (toggle ThinkLight)
The cmos command interface is prone to firmware split-brain problems, as
in newer ThinkPads it is just a compatibility layer. Do not use it, it is
@@ -748,9 +786,10 @@ are aware of the consequences are welcome to enabling it.
Audio mute and microphone mute LEDs are supported, but currently not
visible to userspace. They are used by the snd-hda-intel audio driver.
-procfs notes:
+procfs notes
+^^^^^^^^^^^^
-The available commands are:
+The available commands are::
echo '<LED number> on' >/proc/acpi/ibm/led
echo '<LED number> off' >/proc/acpi/ibm/led
@@ -760,23 +799,24 @@ The <LED number> range is 0 to 15. The set of LEDs that can be
controlled varies from model to model. Here is the common ThinkPad
mapping:
- 0 - power
- 1 - battery (orange)
- 2 - battery (green)
- 3 - UltraBase/dock
- 4 - UltraBay
- 5 - UltraBase battery slot
- 6 - (unknown)
- 7 - standby
- 8 - dock status 1
- 9 - dock status 2
- 10, 11 - (unknown)
- 12 - thinkvantage
- 13, 14, 15 - (unknown)
+ - 0 - power
+ - 1 - battery (orange)
+ - 2 - battery (green)
+ - 3 - UltraBase/dock
+ - 4 - UltraBay
+ - 5 - UltraBase battery slot
+ - 6 - (unknown)
+ - 7 - standby
+ - 8 - dock status 1
+ - 9 - dock status 2
+ - 10, 11 - (unknown)
+ - 12 - thinkvantage
+ - 13, 14, 15 - (unknown)
All of the above can be turned on and off and can be made to blink.
-sysfs notes:
+sysfs notes
+^^^^^^^^^^^
The ThinkPad LED sysfs interface is described in detail by the LED class
documentation, in Documentation/leds/leds-class.rst.
@@ -815,7 +855,7 @@ The BEEP method is used internally by the ACPI firmware to provide
audible alerts in various situations. This feature allows the same
sounds to be triggered manually.
-The commands are non-negative integer numbers:
+The commands are non-negative integer numbers::
echo <number> >/proc/acpi/ibm/beep
@@ -823,25 +863,26 @@ The valid <number> range is 0 to 17. Not all numbers trigger sounds
and the sounds vary from model to model. Here is the behavior on the
X40:
- 0 - stop a sound in progress (but use 17 to stop 16)
- 2 - two beeps, pause, third beep ("low battery")
- 3 - single beep
- 4 - high, followed by low-pitched beep ("unable")
- 5 - single beep
- 6 - very high, followed by high-pitched beep ("AC/DC")
- 7 - high-pitched beep
- 9 - three short beeps
- 10 - very long beep
- 12 - low-pitched beep
- 15 - three high-pitched beeps repeating constantly, stop with 0
- 16 - one medium-pitched beep repeating constantly, stop with 17
- 17 - stop 16
+ - 0 - stop a sound in progress (but use 17 to stop 16)
+ - 2 - two beeps, pause, third beep ("low battery")
+ - 3 - single beep
+ - 4 - high, followed by low-pitched beep ("unable")
+ - 5 - single beep
+ - 6 - very high, followed by high-pitched beep ("AC/DC")
+ - 7 - high-pitched beep
+ - 9 - three short beeps
+ - 10 - very long beep
+ - 12 - low-pitched beep
+ - 15 - three high-pitched beeps repeating constantly, stop with 0
+ - 16 - one medium-pitched beep repeating constantly, stop with 17
+ - 17 - stop 16
Temperature sensors
-------------------
procfs: /proc/acpi/ibm/thermal
+
sysfs device attributes: (hwmon "thinkpad") temp*_input
Most ThinkPads include six or more separate temperature sensors but only
@@ -850,10 +891,14 @@ feature shows readings from up to eight different sensors on older
ThinkPads, and up to sixteen different sensors on newer ThinkPads.
For example, on the X40, a typical output may be:
-temperatures: 42 42 45 41 36 -128 33 -128
+
+temperatures:
+ 42 42 45 41 36 -128 33 -128
On the T43/p, a typical output may be:
-temperatures: 48 48 36 52 38 -128 31 -128 48 52 48 -128 -128 -128 -128 -128
+
+temperatures:
+ 48 48 36 52 38 -128 31 -128 48 52 48 -128 -128 -128 -128 -128
The mapping of thermal sensors to physical locations varies depending on
system-board model (and thus, on ThinkPad model).
@@ -863,46 +908,53 @@ tries to track down these locations for various models.
Most (newer?) models seem to follow this pattern:
-1: CPU
-2: (depends on model)
-3: (depends on model)
-4: GPU
-5: Main battery: main sensor
-6: Bay battery: main sensor
-7: Main battery: secondary sensor
-8: Bay battery: secondary sensor
-9-15: (depends on model)
+- 1: CPU
+- 2: (depends on model)
+- 3: (depends on model)
+- 4: GPU
+- 5: Main battery: main sensor
+- 6: Bay battery: main sensor
+- 7: Main battery: secondary sensor
+- 8: Bay battery: secondary sensor
+- 9-15: (depends on model)
For the R51 (source: Thomas Gruber):
-2: Mini-PCI
-3: Internal HDD
+
+- 2: Mini-PCI
+- 3: Internal HDD
For the T43, T43/p (source: Shmidoax/Thinkwiki.org)
http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_T43.2C_T43p
-2: System board, left side (near PCMCIA slot), reported as HDAPS temp
-3: PCMCIA slot
-9: MCH (northbridge) to DRAM Bus
-10: Clock-generator, mini-pci card and ICH (southbridge), under Mini-PCI
- card, under touchpad
-11: Power regulator, underside of system board, below F2 key
+
+- 2: System board, left side (near PCMCIA slot), reported as HDAPS temp
+- 3: PCMCIA slot
+- 9: MCH (northbridge) to DRAM Bus
+- 10: Clock-generator, mini-pci card and ICH (southbridge), under Mini-PCI
+ card, under touchpad
+- 11: Power regulator, underside of system board, below F2 key
The A31 has a very atypical layout for the thermal sensors
(source: Milos Popovic, http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_A31)
-1: CPU
-2: Main Battery: main sensor
-3: Power Converter
-4: Bay Battery: main sensor
-5: MCH (northbridge)
-6: PCMCIA/ambient
-7: Main Battery: secondary sensor
-8: Bay Battery: secondary sensor
+- 1: CPU
+- 2: Main Battery: main sensor
+- 3: Power Converter
+- 4: Bay Battery: main sensor
+- 5: MCH (northbridge)
+- 6: PCMCIA/ambient
+- 7: Main Battery: secondary sensor
+- 8: Bay Battery: secondary sensor
+
+
+Procfs notes
+^^^^^^^^^^^^
-Procfs notes:
Readings from sensors that are not available return -128.
No commands can be written to this file.
-Sysfs notes:
+Sysfs notes
+^^^^^^^^^^^
+
Sensors that are not available return the ENXIO error. This
status may change at runtime, as there are hotplug thermal
sensors, like those inside the batteries and docks.
@@ -921,6 +973,7 @@ ftp://ftp.suse.com/pub/people/trenn/sources/ec
Use it to determine the register holding the fan
speed on some models. To do that, do the following:
+
- make sure the battery is fully charged
- make sure the fan is running
- use above mentioned tool to read out the EC
@@ -941,6 +994,7 @@ LCD brightness control
----------------------
procfs: /proc/acpi/ibm/brightness
+
sysfs backlight device "thinkpad_screen"
This feature allows software control of the LCD brightness on ThinkPad
@@ -985,15 +1039,17 @@ brightness_enable=0 forces it to be disabled. brightness_enable=1
forces it to be enabled when available, even if the standard ACPI
interface is also available.
-Procfs notes:
+Procfs notes
+^^^^^^^^^^^^
- The available commands are:
+The available commands are::
echo up >/proc/acpi/ibm/brightness
echo down >/proc/acpi/ibm/brightness
echo 'level <level>' >/proc/acpi/ibm/brightness
-Sysfs notes:
+Sysfs notes
+^^^^^^^^^^^
The interface is implemented through the backlight sysfs class, which is
poorly documented at this time.
@@ -1038,6 +1094,7 @@ Volume control (Console Audio control)
--------------------------------------
procfs: /proc/acpi/ibm/volume
+
ALSA: "ThinkPad Console Audio Control", default ID: "ThinkPadEC"
NOTE: by default, the volume control interface operates in read-only
@@ -1053,7 +1110,8 @@ Software volume control should be done only in the main AC97/HDA
mixer.
-About the ThinkPad Console Audio control:
+About the ThinkPad Console Audio control
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ThinkPads have a built-in amplifier and muting circuit that drives the
console headphone and speakers. This circuit is after the main AC97
@@ -1092,13 +1150,14 @@ normal key presses to the operating system (thinkpad-acpi is not
involved).
-The ThinkPad-ACPI volume control:
+The ThinkPad-ACPI volume control
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The preferred way to interact with the Console Audio control is the
ALSA interface.
The legacy procfs interface allows one to read the current state,
-and if volume control is enabled, accepts the following commands:
+and if volume control is enabled, accepts the following commands::
echo up >/proc/acpi/ibm/volume
echo down >/proc/acpi/ibm/volume
@@ -1137,13 +1196,15 @@ Fan control and monitoring: fan speed, fan enable/disable
---------------------------------------------------------
procfs: /proc/acpi/ibm/fan
-sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1,
- pwm1_enable, fan2_input
+
+sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1, pwm1_enable, fan2_input
+
sysfs hwmon driver attributes: fan_watchdog
-NOTE NOTE NOTE: fan control operations are disabled by default for
-safety reasons. To enable them, the module parameter "fan_control=1"
-must be given to thinkpad-acpi.
+NOTE NOTE NOTE:
+ fan control operations are disabled by default for
+ safety reasons. To enable them, the module parameter "fan_control=1"
+ must be given to thinkpad-acpi.
This feature attempts to show the current fan speed, control mode and
other fan data that might be available. The speed is read directly
@@ -1154,7 +1215,8 @@ value on other models.
Some Lenovo ThinkPads support a secondary fan. This fan cannot be
controlled separately, it shares the main fan control.
-Fan levels:
+Fan levels
+^^^^^^^^^^
Most ThinkPad fans work in "levels" at the firmware interface. Level 0
stops the fan. The higher the level, the higher the fan speed, although
@@ -1209,9 +1271,10 @@ therefore, not suitable to protect against fan mode changes made through
means other than the "enable", "disable", and "level" procfs fan
commands, or the hwmon fan control sysfs interface.
-Procfs notes:
+Procfs notes
+^^^^^^^^^^^^
-The fan may be enabled or disabled with the following commands:
+The fan may be enabled or disabled with the following commands::
echo enable >/proc/acpi/ibm/fan
echo disable >/proc/acpi/ibm/fan
@@ -1219,7 +1282,7 @@ The fan may be enabled or disabled with the following commands:
Placing a fan on level 0 is the same as disabling it. Enabling a fan
will try to place it in a safe level if it is too slow or disabled.
-The fan level can be controlled with the command:
+The fan level can be controlled with the command::
echo 'level <level>' > /proc/acpi/ibm/fan
@@ -1231,7 +1294,7 @@ compatibility.
On the X31 and X40 (and ONLY on those models), the fan speed can be
controlled to a certain degree. Once the fan is running, it can be
-forced to run faster or slower with the following command:
+forced to run faster or slower with the following command::
echo 'speed <speed>' > /proc/acpi/ibm/fan
@@ -1241,13 +1304,14 @@ effect or the fan speed eventually settles somewhere in that range. The
fan cannot be stopped or started with this command. This functionality
is incomplete, and not available through the sysfs interface.
-To program the safety watchdog, use the "watchdog" command.
+To program the safety watchdog, use the "watchdog" command::
echo 'watchdog <interval in seconds>' > /proc/acpi/ibm/fan
If you want to disable the watchdog, use 0 as the interval.
-Sysfs notes:
+Sysfs notes
+^^^^^^^^^^^
The sysfs interface follows the hwmon subsystem guidelines for the most
part, and the exception is the fan safety watchdog.
@@ -1261,10 +1325,10 @@ to the firmware).
Features not yet implemented by the driver return ENOSYS.
hwmon device attribute pwm1_enable:
- 0: PWM offline (fan is set to full-speed mode)
- 1: Manual PWM control (use pwm1 to set fan level)
- 2: Hardware PWM control (EC "auto" mode)
- 3: reserved (Software PWM control, not implemented yet)
+ - 0: PWM offline (fan is set to full-speed mode)
+ - 1: Manual PWM control (use pwm1 to set fan level)
+ - 2: Hardware PWM control (EC "auto" mode)
+ - 3: reserved (Software PWM control, not implemented yet)
Modes 0 and 2 are not supported by all ThinkPads, and the
driver is not always able to detect this. If it does know a
@@ -1304,7 +1368,9 @@ WAN
---
procfs: /proc/acpi/ibm/wan
+
sysfs device attribute: wwan_enable (deprecated)
+
sysfs rfkill class: switch "tpacpi_wwan_sw"
This feature shows the presence and current state of the built-in
@@ -1316,29 +1382,31 @@ so it is kept across reboots and power-off.
It was tested on a Lenovo ThinkPad X60. It should probably work on other
ThinkPad models which come with this module installed.
-Procfs notes:
+Procfs notes
+^^^^^^^^^^^^
-If the W-WAN card is installed, the following commands can be used:
+If the W-WAN card is installed, the following commands can be used::
echo enable > /proc/acpi/ibm/wan
echo disable > /proc/acpi/ibm/wan
-Sysfs notes:
+Sysfs notes
+^^^^^^^^^^^
If the W-WAN card is installed, it can be enabled /
disabled through the "wwan_enable" thinkpad-acpi device
attribute, and its current status can also be queried.
enable:
- 0: disables WWAN card / WWAN card is disabled
- 1: enables WWAN card / WWAN card is enabled.
+ - 0: disables WWAN card / WWAN card is disabled
+ - 1: enables WWAN card / WWAN card is enabled.
Note: this interface has been superseded by the generic rfkill
class. It has been deprecated, and it will be removed in year
2010.
rfkill controller switch "tpacpi_wwan_sw": refer to
- Documentation/rfkill.txt for details.
+ Documentation/driver-api/rfkill.rst for details.
EXPERIMENTAL: UWB
@@ -1354,10 +1422,11 @@ sysfs rfkill class: switch "tpacpi_uwb_sw"
This feature exports an rfkill controller for the UWB device, if one is
present and enabled in the BIOS.
-Sysfs notes:
+Sysfs notes
+^^^^^^^^^^^
rfkill controller switch "tpacpi_uwb_sw": refer to
- Documentation/rfkill.txt for details.
+ Documentation/driver-api/rfkill.rst for details.
Adaptive keyboard
-----------------
@@ -1368,11 +1437,11 @@ This sysfs attribute controls the keyboard "face" that will be shown on the
Lenovo X1 Carbon 2nd gen (2014)'s adaptive keyboard. The value can be read
and set.
-1 = Home mode
-2 = Web-browser mode
-3 = Web-conference mode
-4 = Function mode
-5 = Layflat mode
+- 1 = Home mode
+- 2 = Web-browser mode
+- 3 = Web-conference mode
+- 4 = Function mode
+- 5 = Layflat mode
For more details about which buttons will appear depending on the mode, please
review the laptop's user guide:
@@ -1382,13 +1451,13 @@ Multiple Commands, Module Parameters
------------------------------------
Multiple commands can be written to the proc files in one shot by
-separating them with commas, for example:
+separating them with commas, for example::
echo enable,0xffff > /proc/acpi/ibm/hotkey
echo lcd_disable,crt_enable > /proc/acpi/ibm/video
Commands can also be specified when loading the thinkpad-acpi module,
-for example:
+for example::
modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable
@@ -1397,14 +1466,16 @@ Enabling debugging output
-------------------------
The module takes a debug parameter which can be used to selectively
-enable various classes of debugging output, for example:
+enable various classes of debugging output, for example::
modprobe thinkpad_acpi debug=0xffff
will enable all debugging output classes. It takes a bitmask, so
to enable more than one output class, just add their values.
+ ============= ======================================
Debug bitmask Description
+ ============= ======================================
0x8000 Disclose PID of userspace programs
accessing some functions of the driver
0x0001 Initialization and probing
@@ -1415,6 +1486,7 @@ to enable more than one output class, just add their values.
0x0010 Fan control
0x0020 Backlight brightness
0x0040 Audio mixer/volume control
+ ============= ======================================
There is also a kernel build option to enable more debugging
information, which may be necessary to debug driver problems.
@@ -1432,8 +1504,10 @@ the module parameter force_load=1. Regardless of whether this works or
not, please contact ibm-acpi-devel@lists.sourceforge.net with a report.
-Sysfs interface changelog:
+Sysfs interface changelog
+^^^^^^^^^^^^^^^^^^^^^^^^^
+========= ===============================================================
0x000100: Initial sysfs support, as a single platform driver and
device.
0x000200: Hot key support for 32 hot keys, and radio slider switch
@@ -1485,3 +1559,4 @@ Sysfs interface changelog:
0x030000: Thermal and fan sysfs attributes were moved to the hwmon
device instead of being attached to the backing platform
device.
+========= ===============================================================
diff --git a/Documentation/laptops/toshiba_haps.txt b/Documentation/admin-guide/laptops/toshiba_haps.rst
index 0c1d88dedbde..d28b6c3f2849 100644
--- a/Documentation/laptops/toshiba_haps.txt
+++ b/Documentation/admin-guide/laptops/toshiba_haps.rst
@@ -1,18 +1,19 @@
-Kernel driver toshiba_haps
+====================================
Toshiba HDD Active Protection Sensor
====================================
+Kernel driver: toshiba_haps
+
Author: Azael Avalos <coproscefalo@gmail.com>
-0. Contents
------------
+.. 0. Contents
-1. Description
-2. Interface
-3. Accelerometer axes
-4. Supported devices
-5. Usage
+ 1. Description
+ 2. Interface
+ 3. Accelerometer axes
+ 4. Supported devices
+ 5. Usage
1. Description
@@ -32,17 +33,20 @@ file to set the desired protection level or sensor sensibility.
------------
This device comes with 3 methods:
-_STA - Checks existence of the device, returning Zero if the device does not
+
+==== =====================================================================
+_STA Checks existence of the device, returning Zero if the device does not
exists or is not supported.
-PTLV - Sets the desired protection level.
-RSSS - Shuts down the HDD protection interface for a few seconds,
+PTLV Sets the desired protection level.
+RSSS Shuts down the HDD protection interface for a few seconds,
then restores normal operation.
+==== =====================================================================
Note:
-The presence of Solid State Drives (SSD) can make this driver to fail loading,
-given the fact that such drives have no movable parts, and thus, not requiring
-any "protection" as well as failing during the evaluation of the _STA method
-found under this device.
+ The presence of Solid State Drives (SSD) can make this driver to fail loading,
+ given the fact that such drives have no movable parts, and thus, not requiring
+ any "protection" as well as failing during the evaluation of the _STA method
+ found under this device.
3. Accelerometer axes
@@ -66,11 +70,18 @@ conventional HDD and not only SSD, or a combination of both HDD and SSD.
--------
The sysfs files under /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS620A:00/ are:
-protection_level - The protection_level is readable and writeable, and
+
+================ ============================================================
+protection_level The protection_level is readable and writeable, and
provides a way to let userspace query the current protection
level, as well as set the desired protection level, the
- available protection levels are:
- 0 - Disabled | 1 - Low | 2 - Medium | 3 - High
-reset_protection - The reset_protection entry is writeable only, being "1"
+ available protection levels are::
+
+ ============ ======= ========== ========
+ 0 - Disabled 1 - Low 2 - Medium 3 - High
+ ============ ======= ========== ========
+
+reset_protection The reset_protection entry is writeable only, being "1"
the only parameter it accepts, it is used to trigger
a reset of the protection interface.
+================ ============================================================
diff --git a/Documentation/auxdisplay/lcd-panel-cgram.txt b/Documentation/admin-guide/lcd-panel-cgram.rst
index 7f82c905763d..a3eb00c62f53 100644
--- a/Documentation/auxdisplay/lcd-panel-cgram.txt
+++ b/Documentation/admin-guide/lcd-panel-cgram.rst
@@ -1,3 +1,7 @@
+======================================
+Parallel port LCD/Keypad Panel support
+======================================
+
Some LCDs allow you to define up to 8 characters, mapped to ASCII
characters 0 to 7. The escape code to define a new character is
'\e[LG' followed by one digit from 0 to 7, representing the character
@@ -7,7 +11,7 @@ illuminated pixel with LSB on the right. Lines are numbered from the
top of the character to the bottom. On a 5x7 matrix, only the 5 lower
bits of the 7 first bytes are used for each character. If the string
is incomplete, only complete lines will be redefined. Here are some
-examples :
+examples::
printf "\e[LG0010101050D1F0C04;" => 0 = [enter]
printf "\e[LG1040E1F0000000000;" => 1 = [up]
@@ -21,4 +25,3 @@ examples :
printf "\e[LG00002061E1E060200;" => small speaker
Willy
-
diff --git a/Documentation/ldm.txt b/Documentation/admin-guide/ldm.rst
index 12c571368e73..12c571368e73 100644
--- a/Documentation/ldm.txt
+++ b/Documentation/admin-guide/ldm.rst
diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/admin-guide/lockup-watchdogs.rst
index 290840c160af..290840c160af 100644
--- a/Documentation/lockup-watchdogs.txt
+++ b/Documentation/admin-guide/lockup-watchdogs.rst
diff --git a/Documentation/cma/debugfs.txt b/Documentation/admin-guide/mm/cma_debugfs.rst
index 6cef20a8cedc..4e06ffabd78a 100644
--- a/Documentation/cma/debugfs.txt
+++ b/Documentation/admin-guide/mm/cma_debugfs.rst
@@ -1,3 +1,7 @@
+=====================
+CMA Debugfs Interface
+=====================
+
The CMA debugfs interface is useful to retrieve basic information out of the
different CMA areas and to test allocation/release in each of the areas.
@@ -12,7 +16,7 @@ The structure of the files created under that directory is as follows:
- [RO] count: Amount of memory in the CMA area.
- [RO] order_per_bit: Order of pages represented by one bit.
- [RO] bitmap: The bitmap of page states in the zone.
- - [WO] alloc: Allocate N pages from that CMA area. For example:
+ - [WO] alloc: Allocate N pages from that CMA area. For example::
echo 5 > <debugfs>/cma/cma-2/alloc
diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst
index ddf8d8d33377..11db46448354 100644
--- a/Documentation/admin-guide/mm/index.rst
+++ b/Documentation/admin-guide/mm/index.rst
@@ -11,7 +11,7 @@ processes address space and many other cool things.
Linux memory management is a complex system with many configurable
settings. Most of these settings are available via ``/proc``
filesystem and can be quired and adjusted using ``sysctl``. These APIs
-are described in Documentation/sysctl/vm.txt and in `man 5 proc`_.
+are described in Documentation/admin-guide/sysctl/vm.rst and in `man 5 proc`_.
.. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html
@@ -26,6 +26,7 @@ the Linux memory management.
:maxdepth: 1
concepts
+ cma_debugfs
hugetlbpage
idle_page_tracking
ksm
diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst
index 9303786632d1..874eb0c77d34 100644
--- a/Documentation/admin-guide/mm/ksm.rst
+++ b/Documentation/admin-guide/mm/ksm.rst
@@ -59,7 +59,7 @@ MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
If a region of memory must be split into at least one new MADV_MERGEABLE
or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
-will exceed ``vm.max_map_count`` (see Documentation/sysctl/vm.txt).
+will exceed ``vm.max_map_count`` (see Documentation/admin-guide/sysctl/vm.rst).
Like other madvise calls, they are intended for use on mapped areas of
the user address space: they will report ENOMEM if the specified range
diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst
index 546f174e5d6a..8463f5538fda 100644
--- a/Documentation/admin-guide/mm/numa_memory_policy.rst
+++ b/Documentation/admin-guide/mm/numa_memory_policy.rst
@@ -15,7 +15,7 @@ document attempts to describe the concepts and APIs of the 2.6 memory policy
support.
Memory policies should not be confused with cpusets
-(``Documentation/cgroup-v1/cpusets.rst``)
+(``Documentation/admin-guide/cgroup-v1/cpusets.rst``)
which is an administrative mechanism for restricting the nodes from which
memory may be allocated by a set of processes. Memory policies are a
programming interface that a NUMA-aware application can take advantage of. When
diff --git a/Documentation/namespaces/compatibility-list.txt b/Documentation/admin-guide/namespaces/compatibility-list.rst
index defc5589bfcd..318800b2a943 100644
--- a/Documentation/namespaces/compatibility-list.txt
+++ b/Documentation/admin-guide/namespaces/compatibility-list.rst
@@ -1,4 +1,6 @@
- Namespaces compatibility list
+=============================
+Namespaces compatibility list
+=============================
This document contains the information about the problems user
may have when creating tasks living in different namespaces.
@@ -7,13 +9,16 @@ Here's the summary. This matrix shows the known problems, that
occur when tasks share some namespace (the columns) while living
in different other namespaces (the rows):
- UTS IPC VFS PID User Net
+==== === === === === ==== ===
+- UTS IPC VFS PID User Net
+==== === === === === ==== ===
UTS X
IPC X 1
VFS X
PID 1 1 X
User 2 2 X
Net X
+==== === === === === ==== ===
1. Both the IPC and the PID namespaces provide IDs to address
object inside the kernel. E.g. semaphore with IPCID or
@@ -36,4 +41,3 @@ Net X
even having equal UIDs.
But currently this is not so.
-
diff --git a/Documentation/admin-guide/namespaces/index.rst b/Documentation/admin-guide/namespaces/index.rst
new file mode 100644
index 000000000000..384f2e0f33d2
--- /dev/null
+++ b/Documentation/admin-guide/namespaces/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+Namespaces
+==========
+
+.. toctree::
+ :maxdepth: 1
+
+ compatibility-list
+ resource-control
diff --git a/Documentation/namespaces/resource-control.txt b/Documentation/admin-guide/namespaces/resource-control.rst
index abc13c394738..369556e00f0c 100644
--- a/Documentation/namespaces/resource-control.txt
+++ b/Documentation/admin-guide/namespaces/resource-control.rst
@@ -1,3 +1,7 @@
+===========================
+Namespaces research control
+===========================
+
There are a lot of kinds of objects in the kernel that don't have
individual limits or that have limits that are ineffective when a set
of processes is allowed to switch user ids. With user namespaces
diff --git a/Documentation/numastat.txt b/Documentation/admin-guide/numastat.rst
index aaf1667489f8..aaf1667489f8 100644
--- a/Documentation/numastat.txt
+++ b/Documentation/admin-guide/numastat.rst
diff --git a/Documentation/perf/arm-ccn.txt b/Documentation/admin-guide/perf/arm-ccn.rst
index 15cdb7bc57c3..832b0c64023a 100644
--- a/Documentation/perf/arm-ccn.txt
+++ b/Documentation/admin-guide/perf/arm-ccn.rst
@@ -1,3 +1,4 @@
+==========================
ARM Cache Coherent Network
==========================
@@ -29,6 +30,7 @@ Crosspoint watchpoint-based events (special "event" value 0xfe)
require "xp" and "vc" as as above plus "port" (device port index),
"dir" (transmit/receive direction), comparator values ("cmp_l"
and "cmp_h") and "mask", being index of the comparator mask.
+
Masks are defined separately from the event description
(due to limited number of the config values) in the "cmp_mask"
directory, with first 8 configurable by user and additional
@@ -44,16 +46,16 @@ request the events on this processor (if not, the perf_event->cpu value
will be overwritten anyway). In case of this processor being offlined,
the events are migrated to another one and the attribute is updated.
-Example of perf tool use:
+Example of perf tool use::
-/ # perf list | grep ccn
- ccn/cycles/ [Kernel PMU event]
-<...>
- ccn/xp_valid_flit,xp=?,port=?,vc=?,dir=?/ [Kernel PMU event]
-<...>
+ / # perf list | grep ccn
+ ccn/cycles/ [Kernel PMU event]
+ <...>
+ ccn/xp_valid_flit,xp=?,port=?,vc=?,dir=?/ [Kernel PMU event]
+ <...>
-/ # perf stat -a -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \
- sleep 1
+ / # perf stat -a -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \
+ sleep 1
The driver does not support sampling, therefore "perf record" will
not work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/perf/arm_dsu_pmu.txt b/Documentation/admin-guide/perf/arm_dsu_pmu.rst
index d611e15f5add..7fd34db75d13 100644
--- a/Documentation/perf/arm_dsu_pmu.txt
+++ b/Documentation/admin-guide/perf/arm_dsu_pmu.rst
@@ -1,3 +1,4 @@
+==================================
ARM DynamIQ Shared Unit (DSU) PMU
==================================
@@ -13,7 +14,7 @@ PMU doesn't support process specific events and cannot be used in sampling mode.
The DSU provides a bitmap for a subset of implemented events via hardware
registers. There is no way for the driver to determine if the other events
are available or not. Hence the driver exposes only those events advertised
-by the DSU, in "events" directory under :
+by the DSU, in "events" directory under::
/sys/bus/event_sources/devices/arm_dsu_<N>/
@@ -23,6 +24,6 @@ and use the raw event code for the unlisted events.
The driver also exposes the CPUs connected to the DSU instance in "associated_cpus".
-e.g usage :
+e.g usage::
perf stat -a -e arm_dsu_0/cycles/
diff --git a/Documentation/perf/hisi-pmu.txt b/Documentation/admin-guide/perf/hisi-pmu.rst
index 267a028b2741..404a5c3d9d00 100644
--- a/Documentation/perf/hisi-pmu.txt
+++ b/Documentation/admin-guide/perf/hisi-pmu.rst
@@ -1,5 +1,7 @@
+======================================================
HiSilicon SoC uncore Performance Monitoring Unit (PMU)
======================================================
+
The HiSilicon SoC chip includes various independent system device PMUs
such as L3 cache (L3C), Hydra Home Agent (HHA) and DDRC. These PMUs are
independent and have hardware logic to gather statistics and performance
@@ -11,11 +13,13 @@ called Super CPU cluster (SCCL) and is made up of 6 CCLs. Each SCCL has
two HHAs (0 - 1) and four DDRCs (0 - 3), respectively.
HiSilicon SoC uncore PMU driver
----------------------------------------
+-------------------------------
+
Each device PMU has separate registers for event counting, control and
interrupt, and the PMU driver shall register perf PMU drivers like L3C,
HHA and DDRC etc. The available events and configuration options shall
-be described in the sysfs, see :
+be described in the sysfs, see:
+
/sys/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>/, or
/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
The "perf list" command shall list the available events from sysfs.
@@ -24,27 +28,30 @@ Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU
name will appear in event listing as hisi_sccl<sccl-id>_module<index-id>.
where "sccl-id" is the identifier of the SCCL and "index-id" is the index of
module.
+
e.g. hisi_sccl3_l3c0/rd_hit_cpipe is READ_HIT_CPIPE event of L3C index #0 in
SCCL ID #3.
+
e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
SCCL ID #1.
The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
ID used to count the uncore PMU event.
-Example usage of perf:
-$# perf list
-hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event]
-------------------------------------------
-hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event]
-------------------------------------------
-hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event]
-------------------------------------------
-hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event]
-------------------------------------------
-
-$# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5
-$# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5
+Example usage of perf::
+
+ $# perf list
+ hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event]
+ ------------------------------------------
+ hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event]
+ ------------------------------------------
+ hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event]
+ ------------------------------------------
+ hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event]
+ ------------------------------------------
+
+ $# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5
+ $# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5
The current driver does not support sampling. So "perf record" is unsupported.
Also attach to a task is unsupported as the events are all uncore.
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
new file mode 100644
index 000000000000..ee4bfd2a740f
--- /dev/null
+++ b/Documentation/admin-guide/perf/index.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Performance monitor support
+===========================
+
+.. toctree::
+ :maxdepth: 1
+
+ hisi-pmu
+ qcom_l2_pmu
+ qcom_l3_pmu
+ arm-ccn
+ xgene-pmu
+ arm_dsu_pmu
+ thunderx2-pmu
diff --git a/Documentation/perf/qcom_l2_pmu.txt b/Documentation/admin-guide/perf/qcom_l2_pmu.rst
index b25b97659ab9..c130178a4a55 100644
--- a/Documentation/perf/qcom_l2_pmu.txt
+++ b/Documentation/admin-guide/perf/qcom_l2_pmu.rst
@@ -1,3 +1,4 @@
+=====================================================================
Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU)
=====================================================================
@@ -28,7 +29,7 @@ The driver provides a "cpumask" sysfs attribute which contains a mask
consisting of one CPU per cluster which will be used to handle all the PMU
events on that cluster.
-Examples for use with perf:
+Examples for use with perf::
perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1
diff --git a/Documentation/perf/qcom_l3_pmu.txt b/Documentation/admin-guide/perf/qcom_l3_pmu.rst
index 96b3a9444a0d..a3d014a46bfd 100644
--- a/Documentation/perf/qcom_l3_pmu.txt
+++ b/Documentation/admin-guide/perf/qcom_l3_pmu.rst
@@ -1,3 +1,4 @@
+===========================================================================
Qualcomm Datacenter Technologies L3 Cache Performance Monitoring Unit (PMU)
===========================================================================
@@ -17,7 +18,7 @@ The hardware implements 32bit event counters and has a flat 8bit event space
exposed via the "event" format attribute. In addition to the 32bit physical
counters the driver supports virtual 64bit hardware counters by using hardware
counter chaining. This feature is exposed via the "lc" (long counter) format
-flag. E.g.:
+flag. E.g.::
perf stat -e l3cache_0_0/read-miss,lc/
diff --git a/Documentation/perf/thunderx2-pmu.txt b/Documentation/admin-guide/perf/thunderx2-pmu.rst
index dffc57143736..08e33675853a 100644
--- a/Documentation/perf/thunderx2-pmu.txt
+++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst
@@ -1,3 +1,4 @@
+=============================================================
Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE)
=============================================================
@@ -24,18 +25,18 @@ and configuration options under sysfs, see
The driver does not support sampling, therefore "perf record" will not
work. Per-task perf sessions are also not supported.
-Examples:
+Examples::
-# perf stat -a -e uncore_dmc_0/cnt_cycles/ sleep 1
+ # perf stat -a -e uncore_dmc_0/cnt_cycles/ sleep 1
-# perf stat -a -e \
-uncore_dmc_0/cnt_cycles/,\
-uncore_dmc_0/data_transfers/,\
-uncore_dmc_0/read_txns/,\
-uncore_dmc_0/write_txns/ sleep 1
+ # perf stat -a -e \
+ uncore_dmc_0/cnt_cycles/,\
+ uncore_dmc_0/data_transfers/,\
+ uncore_dmc_0/read_txns/,\
+ uncore_dmc_0/write_txns/ sleep 1
-# perf stat -a -e \
-uncore_l3c_0/read_request/,\
-uncore_l3c_0/read_hit/,\
-uncore_l3c_0/inv_request/,\
-uncore_l3c_0/inv_hit/ sleep 1
+ # perf stat -a -e \
+ uncore_l3c_0/read_request/,\
+ uncore_l3c_0/read_hit/,\
+ uncore_l3c_0/inv_request/,\
+ uncore_l3c_0/inv_hit/ sleep 1
diff --git a/Documentation/perf/xgene-pmu.txt b/Documentation/admin-guide/perf/xgene-pmu.rst
index d7cff4454e5b..644f8ed89152 100644
--- a/Documentation/perf/xgene-pmu.txt
+++ b/Documentation/admin-guide/perf/xgene-pmu.rst
@@ -1,3 +1,4 @@
+================================================
APM X-Gene SoC Performance Monitoring Unit (PMU)
================================================
@@ -33,7 +34,7 @@ each PMU, please refer to APM X-Gene User Manual.
Each perf driver also provides a "cpumask" sysfs attribute, which contains a
single CPU ID of the processor which will be used to handle all the PMU events.
-Example for perf tool use:
+Example for perf tool use::
/ # perf list | grep -e l3c -e iob -e mcb -e mc
l3c0/ackq-full/ [Kernel PMU event]
diff --git a/Documentation/pnp.txt b/Documentation/admin-guide/pnp.rst
index bab2d10631f0..bab2d10631f0 100644
--- a/Documentation/pnp.txt
+++ b/Documentation/admin-guide/pnp.rst
diff --git a/Documentation/driver-api/rapidio.rst b/Documentation/admin-guide/rapidio.rst
index 71ff658ab78e..71ff658ab78e 100644
--- a/Documentation/driver-api/rapidio.rst
+++ b/Documentation/admin-guide/rapidio.rst
diff --git a/Documentation/rtc.txt b/Documentation/admin-guide/rtc.rst
index 688c95b11919..688c95b11919 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/admin-guide/rtc.rst
diff --git a/Documentation/svga.txt b/Documentation/admin-guide/svga.rst
index b6c2f9acca92..b6c2f9acca92 100644
--- a/Documentation/svga.txt
+++ b/Documentation/admin-guide/svga.rst
diff --git a/Documentation/admin-guide/sysctl/abi.rst b/Documentation/admin-guide/sysctl/abi.rst
new file mode 100644
index 000000000000..599bcde7f0b7
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/abi.rst
@@ -0,0 +1,67 @@
+================================
+Documentation for /proc/sys/abi/
+================================
+
+kernel version 2.6.0.test2
+
+Copyright (c) 2003, Fabian Frederick <ffrederick@users.sourceforge.net>
+
+For general info: index.rst.
+
+------------------------------------------------------------------------------
+
+This path is binary emulation relevant aka personality types aka abi.
+When a process is executed, it's linked to an exec_domain whose
+personality is defined using values available from /proc/sys/abi.
+You can find further details about abi in include/linux/personality.h.
+
+Here are the files featuring in 2.6 kernel:
+
+- defhandler_coff
+- defhandler_elf
+- defhandler_lcall7
+- defhandler_libcso
+- fake_utsname
+- trace
+
+defhandler_coff
+---------------
+
+defined value:
+ PER_SCOSVR3::
+
+ 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE
+
+defhandler_elf
+--------------
+
+defined value:
+ PER_LINUX::
+
+ 0
+
+defhandler_lcall7
+-----------------
+
+defined value :
+ PER_SVR4::
+
+ 0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
+
+defhandler_libsco
+-----------------
+
+defined value:
+ PER_SVR4::
+
+ 0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
+
+fake_utsname
+------------
+
+Unused
+
+trace
+-----
+
+Unused
diff --git a/Documentation/sysctl/fs.txt b/Documentation/admin-guide/sysctl/fs.rst
index ebc679bcb2dc..2a45119e3331 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/admin-guide/sysctl/fs.rst
@@ -1,10 +1,16 @@
-Documentation for /proc/sys/fs/* kernel version 2.2.10
- (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
- (c) 2009, Shen Feng<shen@cn.fujitsu.com>
+===============================
+Documentation for /proc/sys/fs/
+===============================
-For general info and legal blurb, please look in README.
+kernel version 2.2.10
-==============================================================
+Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2009, Shen Feng<shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in intro.rst.
+
+------------------------------------------------------------------------------
This file contains documentation for the sysctl files in
/proc/sys/fs/ and is valid for Linux kernel version 2.2.
@@ -16,9 +22,10 @@ system, it is advisable to read both documentation and source
before actually making adjustments.
1. /proc/sys/fs
-----------------------------------------------------------
+===============
Currently, these files are in /proc/sys/fs:
+
- aio-max-nr
- aio-nr
- dentry-state
@@ -42,9 +49,9 @@ Currently, these files are in /proc/sys/fs:
- super-max
- super-nr
-==============================================================
-aio-nr & aio-max-nr:
+aio-nr & aio-max-nr
+-------------------
aio-nr is the running total of the number of events specified on the
io_setup system call for all currently active aio contexts. If aio-nr
@@ -52,21 +59,20 @@ reaches aio-max-nr then io_setup will fail with EAGAIN. Note that
raising aio-max-nr does not result in the pre-allocation or re-sizing
of any kernel data structures.
-==============================================================
-dentry-state:
+dentry-state
+------------
-From linux/include/linux/dcache.h:
---------------------------------------------------------------
-struct dentry_stat_t dentry_stat {
+From linux/include/linux/dcache.h::
+
+ struct dentry_stat_t dentry_stat {
int nr_dentry;
int nr_unused;
int age_limit; /* age in seconds */
int want_pages; /* pages requested by system */
int nr_negative; /* # of unused negative dentries */
int dummy; /* Reserved for future use */
-};
---------------------------------------------------------------
+ };
Dentries are dynamically allocated and deallocated.
@@ -84,9 +90,9 @@ negative dentries which do not map to any files. Instead,
they help speeding up rejection of non-existing files provided
by the users.
-==============================================================
-dquot-max & dquot-nr:
+dquot-max & dquot-nr
+--------------------
The file dquot-max shows the maximum number of cached disk
quota entries.
@@ -98,9 +104,9 @@ If the number of free cached disk quotas is very low and
you have some awesome number of simultaneous system users,
you might want to raise the limit.
-==============================================================
-file-max & file-nr:
+file-max & file-nr
+------------------
The value in file-max denotes the maximum number of file-
handles that the Linux kernel will allocate. When you get lots
@@ -119,18 +125,19 @@ used file handles.
Attempts to allocate more file descriptors than file-max are
reported with printk, look for "VFS: file-max limit <number>
reached".
-==============================================================
-nr_open:
+
+nr_open
+-------
This denotes the maximum number of file-handles a process can
allocate. Default value is 1024*1024 (1048576) which should be
enough for most machines. Actual limit depends on RLIMIT_NOFILE
resource limit.
-==============================================================
-inode-max, inode-nr & inode-state:
+inode-max, inode-nr & inode-state
+---------------------------------
As with file handles, the kernel allocates the inode structures
dynamically, but can't free them yet.
@@ -157,9 +164,9 @@ preshrink is nonzero when the nr_inodes > inode-max and the
system needs to prune the inode list instead of allocating
more.
-==============================================================
-overflowgid & overflowuid:
+overflowgid & overflowuid
+-------------------------
Some filesystems only support 16-bit UIDs and GIDs, although in Linux
UIDs and GIDs are 32 bits. When one of these filesystems is mounted
@@ -169,18 +176,18 @@ to a fixed value before being written to disk.
These sysctls allow you to change the value of the fixed UID and GID.
The default is 65534.
-==============================================================
-pipe-user-pages-hard:
+pipe-user-pages-hard
+--------------------
Maximum total number of pages a non-privileged user may allocate for pipes.
Once this limit is reached, no new pipes may be allocated until usage goes
below the limit again. When set to 0, no limit is applied, which is the default
setting.
-==============================================================
-pipe-user-pages-soft:
+pipe-user-pages-soft
+--------------------
Maximum total number of pages a non-privileged user may allocate for pipes
before the pipe size gets limited to a single page. Once this limit is reached,
@@ -190,9 +197,9 @@ denied until usage goes below the limit again. The default value allows to
allocate up to 1024 pipes at their default size. When set to 0, no limit is
applied.
-==============================================================
-protected_fifos:
+protected_fifos
+---------------
The intent of this protection is to avoid unintentional writes to
an attacker-controlled FIFO, where a program expected to create a regular
@@ -208,9 +215,9 @@ When set to "2" it also applies to group writable sticky directories.
This protection is based on the restrictions in Openwall.
-==============================================================
-protected_hardlinks:
+protected_hardlinks
+--------------------
A long-standing class of security issues is the hardlink-based
time-of-check-time-of-use race, most commonly seen in world-writable
@@ -228,9 +235,9 @@ already own the source file, or do not have read/write access to it.
This protection is based on the restrictions in Openwall and grsecurity.
-==============================================================
-protected_regular:
+protected_regular
+-----------------
This protection is similar to protected_fifos, but it
avoids writes to an attacker-controlled regular file, where a program
@@ -244,9 +251,9 @@ owned by the owner of the directory.
When set to "2" it also applies to group writable sticky directories.
-==============================================================
-protected_symlinks:
+protected_symlinks
+------------------
A long-standing class of security issues is the symlink-based
time-of-check-time-of-use race, most commonly seen in world-writable
@@ -264,34 +271,38 @@ follower match, or when the directory owner matches the symlink's owner.
This protection is based on the restrictions in Openwall and grsecurity.
-==============================================================
suid_dumpable:
+--------------
This value can be used to query and set the core dump mode for setuid
or otherwise protected/tainted binaries. The modes are
-0 - (default) - traditional behaviour. Any process which has changed
- privilege levels or is execute only will not be dumped.
-1 - (debug) - all processes dump core when possible. The core dump is
- owned by the current user and no security is applied. This is
- intended for system debugging situations only. Ptrace is unchecked.
- This is insecure as it allows regular users to examine the memory
- contents of privileged processes.
-2 - (suidsafe) - any binary which normally would not be dumped is dumped
- anyway, but only if the "core_pattern" kernel sysctl is set to
- either a pipe handler or a fully qualified path. (For more details
- on this limitation, see CVE-2006-2451.) This mode is appropriate
- when administrators are attempting to debug problems in a normal
- environment, and either have a core dump pipe handler that knows
- to treat privileged core dumps with care, or specific directory
- defined for catching core dumps. If a core dump happens without
- a pipe handler or fully qualifid path, a message will be emitted
- to syslog warning about the lack of a correct setting.
-
-==============================================================
-
-super-max & super-nr:
+= ========== ===============================================================
+0 (default) traditional behaviour. Any process which has changed
+ privilege levels or is execute only will not be dumped.
+1 (debug) all processes dump core when possible. The core dump is
+ owned by the current user and no security is applied. This is
+ intended for system debugging situations only.
+ Ptrace is unchecked.
+ This is insecure as it allows regular users to examine the
+ memory contents of privileged processes.
+2 (suidsafe) any binary which normally would not be dumped is dumped
+ anyway, but only if the "core_pattern" kernel sysctl is set to
+ either a pipe handler or a fully qualified path. (For more
+ details on this limitation, see CVE-2006-2451.) This mode is
+ appropriate when administrators are attempting to debug
+ problems in a normal environment, and either have a core dump
+ pipe handler that knows to treat privileged core dumps with
+ care, or specific directory defined for catching core dumps.
+ If a core dump happens without a pipe handler or fully
+ qualified path, a message will be emitted to syslog warning
+ about the lack of a correct setting.
+= ========== ===============================================================
+
+
+super-max & super-nr
+--------------------
These numbers control the maximum number of superblocks, and
thus the maximum number of mounted filesystems the kernel
@@ -299,33 +310,33 @@ can have. You only need to increase super-max if you need to
mount more filesystems than the current value in super-max
allows you to.
-==============================================================
-aio-nr & aio-max-nr:
+aio-nr & aio-max-nr
+-------------------
aio-nr shows the current system-wide number of asynchronous io
requests. aio-max-nr allows you to change the maximum value
aio-nr can grow to.
-==============================================================
-mount-max:
+mount-max
+---------
This denotes the maximum number of mounts that may exist
in a mount namespace.
-==============================================================
2. /proc/sys/fs/binfmt_misc
-----------------------------------------------------------
+===========================
Documentation for the files in /proc/sys/fs/binfmt_misc is
in Documentation/admin-guide/binfmt-misc.rst.
3. /proc/sys/fs/mqueue - POSIX message queues filesystem
-----------------------------------------------------------
+========================================================
+
The "mqueue" filesystem provides the necessary kernel features to enable the
creation of a user space library that implements the POSIX message queues
@@ -356,7 +367,7 @@ the default message size value if attr parameter of mq_open(2) is NULL. If it
exceed msgsize_max, the default value is initialized msgsize_max.
4. /proc/sys/fs/epoll - Configuration options for the epoll interface
---------------------------------------------------------
+=====================================================================
This directory contains configuration options for the epoll(7) interface.
@@ -371,4 +382,3 @@ Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
on a 64bit one.
The current default value for max_user_watches is the 1/32 of the available
low memory, divided for the "watch" cost in bytes.
-
diff --git a/Documentation/sysctl/README b/Documentation/admin-guide/sysctl/index.rst
index d5f24ab0ecc3..03346f98c7b9 100644
--- a/Documentation/sysctl/README
+++ b/Documentation/admin-guide/sysctl/index.rst
@@ -1,5 +1,10 @@
-Documentation for /proc/sys/ kernel version 2.2.10
- (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+===========================
+Documentation for /proc/sys
+===========================
+
+Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+------------------------------------------------------------------------------
'Why', I hear you ask, 'would anyone even _want_ documentation
for them sysctl files? If anybody really needs it, it's all in
@@ -12,11 +17,12 @@ have the time or knowledge to read the source code.
Furthermore, the programmers who built sysctl have built it to
be actually used, not just for the fun of programming it :-)
-==============================================================
+------------------------------------------------------------------------------
Legal blurb:
As usual, there are two main things to consider:
+
1. you get what you pay for
2. it's free
@@ -35,15 +41,17 @@ stories to: <riel@nl.linux.org>
Rik van Riel.
-==============================================================
+--------------------------------------------------------------
-Introduction:
+Introduction
+============
Sysctl is a means of configuring certain aspects of the kernel
at run-time, and the /proc/sys/ directory is there so that you
don't even need special tools to do it!
In fact, there are only four things needed to use these config
facilities:
+
- a running Linux system
- root access
- common sense (this is especially hard to come by these days)
@@ -54,7 +62,9 @@ several (arch-dependent?) subdirs. Each subdir is mainly about
one part of the kernel, so you can do configuration on a piece
by piece basis, or just some 'thematic frobbing'.
-The subdirs are about:
+This documentation is about:
+
+=============== ===============================================================
abi/ execution domains & personalities
debug/ <empty>
dev/ device specific information (eg dev/cdrom/info)
@@ -70,7 +80,19 @@ sunrpc/ SUN Remote Procedure Call (NFS)
vm/ memory management tuning
buffer and cache management
user/ Per user per user namespace limits
+=============== ===============================================================
These are the subdirs I have on my system. There might be more
or other subdirs in another setup. If you see another dir, I'd
really like to hear about it :-)
+
+.. toctree::
+ :maxdepth: 1
+
+ abi
+ fs
+ kernel
+ net
+ sunrpc
+ user
+ vm
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/admin-guide/sysctl/kernel.rst
index 1b2fe17cd2fa..032c7cd3cede 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1,10 +1,16 @@
-Documentation for /proc/sys/kernel/* kernel version 2.2.10
- (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
- (c) 2009, Shen Feng<shen@cn.fujitsu.com>
+===================================
+Documentation for /proc/sys/kernel/
+===================================
-For general info and legal blurb, please look in README.
+kernel version 2.2.10
-==============================================================
+Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2009, Shen Feng<shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
This file contains documentation for the sysctl files in
/proc/sys/kernel/ and is valid for Linux kernel version 2.2.
@@ -101,9 +107,9 @@ show up in /proc/sys/kernel:
- watchdog_thresh
- version
-==============================================================
acct:
+=====
highwater lowwater frequency
@@ -118,18 +124,18 @@ That is, suspend accounting if there left <= 2% free; resume it
if we got >=4%; consider information about amount of free space
valid for 30 seconds.
-==============================================================
acpi_video_flags:
+=================
flags
See Doc*/kernel/power/video.txt, it allows mode of video boot to be
set during run time.
-==============================================================
auto_msgmni:
+============
This variable has no effect and may be removed in future kernel
releases. Reading it always returns 0.
@@ -139,9 +145,8 @@ Echoing "1" into this file enabled msgmni automatic recomputing.
Echoing "0" turned it off. auto_msgmni default value was 1.
-==============================================================
-
bootloader_type:
+================
x86 bootloader identification
@@ -156,9 +161,9 @@ the value 340 = 0x154.
See the type_of_loader and ext_loader_type fields in
Documentation/x86/boot.rst for additional information.
-==============================================================
bootloader_version:
+===================
x86 bootloader version
@@ -168,27 +173,31 @@ file will contain the value 564 = 0x234.
See the type_of_loader and ext_loader_ver fields in
Documentation/x86/boot.rst for additional information.
-==============================================================
-cap_last_cap
+cap_last_cap:
+=============
Highest valid capability of the running kernel. Exports
CAP_LAST_CAP from the kernel.
-==============================================================
core_pattern:
+=============
core_pattern is used to specify a core dumpfile pattern name.
-. max length 127 characters; default value is "core"
-. core_pattern is used as a pattern template for the output filename;
+
+* max length 127 characters; default value is "core"
+* core_pattern is used as a pattern template for the output filename;
certain string patterns (beginning with '%') are substituted with
their actual values.
-. backward compatibility with core_uses_pid:
+* backward compatibility with core_uses_pid:
+
If core_pattern does not include "%p" (default does not)
and core_uses_pid is set, then .PID will be appended to
the filename.
-. corename format specifiers:
+
+* corename format specifiers::
+
%<NUL> '%' is dropped
%% output one '%'
%p pid
@@ -205,13 +214,14 @@ core_pattern is used to specify a core dumpfile pattern name.
%e executable filename (may be shortened)
%E executable path
%<OTHER> both are dropped
-. If the first character of the pattern is a '|', the kernel will treat
+
+* If the first character of the pattern is a '|', the kernel will treat
the rest of the pattern as a command to run. The core dump will be
written to the standard input of that program instead of to a file.
-==============================================================
core_pipe_limit:
+================
This sysctl is only applicable when core_pattern is configured to pipe
core files to a user space helper (when the first character of
@@ -232,9 +242,9 @@ parallel, but that no waiting will take place (i.e. the collecting
process is not guaranteed access to /proc/<crashing pid>/). This
value defaults to 0.
-==============================================================
core_uses_pid:
+==============
The default coredump filename is "core". By setting
core_uses_pid to 1, the coredump filename becomes core.PID.
@@ -242,9 +252,9 @@ If core_pattern does not include "%p" (default does not)
and core_uses_pid is set, then .PID will be appended to
the filename.
-==============================================================
ctrl-alt-del:
+=============
When the value in this file is 0, ctrl-alt-del is trapped and
sent to the init(1) program to handle a graceful restart.
@@ -252,14 +262,15 @@ When, however, the value is > 0, Linux's reaction to a Vulcan
Nerve Pinch (tm) will be an immediate reboot, without even
syncing its dirty buffers.
-Note: when a program (like dosemu) has the keyboard in 'raw'
-mode, the ctrl-alt-del is intercepted by the program before it
-ever reaches the kernel tty layer, and it's up to the program
-to decide what to do with it.
+Note:
+ when a program (like dosemu) has the keyboard in 'raw'
+ mode, the ctrl-alt-del is intercepted by the program before it
+ ever reaches the kernel tty layer, and it's up to the program
+ to decide what to do with it.
-==============================================================
dmesg_restrict:
+===============
This toggle indicates whether unprivileged users are prevented
from using dmesg(8) to view messages from the kernel's log buffer.
@@ -270,18 +281,21 @@ dmesg(8).
The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the
default value of dmesg_restrict.
-==============================================================
domainname & hostname:
+======================
These files can be used to set the NIS/YP domainname and the
hostname of your box in exactly the same way as the commands
-domainname and hostname, i.e.:
-# echo "darkstar" > /proc/sys/kernel/hostname
-# echo "mydomain" > /proc/sys/kernel/domainname
-has the same effect as
-# hostname "darkstar"
-# domainname "mydomain"
+domainname and hostname, i.e.::
+
+ # echo "darkstar" > /proc/sys/kernel/hostname
+ # echo "mydomain" > /proc/sys/kernel/domainname
+
+has the same effect as::
+
+ # hostname "darkstar"
+ # domainname "mydomain"
Note, however, that the classic darkstar.frop.org has the
hostname "darkstar" and DNS (Internet Domain Name Server)
@@ -290,8 +304,9 @@ Information Service) or YP (Yellow Pages) domainname. These two
domain names are in general different. For a detailed discussion
see the hostname(1) man page.
-==============================================================
+
hardlockup_all_cpu_backtrace:
+=============================
This value controls the hard lockup detector behavior when a hard
lockup condition is detected as to whether or not to gather further
@@ -301,9 +316,10 @@ will be initiated.
0: do nothing. This is the default behavior.
1: on detection capture more debug information.
-==============================================================
+
hardlockup_panic:
+=================
This parameter can be used to control whether the kernel panics
when a hard lockup is detected.
@@ -311,19 +327,19 @@ when a hard lockup is detected.
0 - don't panic on hard lockup
1 - panic on hard lockup
-See Documentation/lockup-watchdogs.txt for more information. This can
+See Documentation/admin-guide/lockup-watchdogs.rst for more information. This can
also be set using the nmi_watchdog kernel parameter.
-==============================================================
hotplug:
+========
Path for the hotplug policy agent.
Default value is "/sbin/hotplug".
-==============================================================
hung_task_panic:
+================
Controls the kernel's behavior when a hung task is detected.
This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
@@ -332,27 +348,28 @@ This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
1: panic immediately.
-==============================================================
hung_task_check_count:
+======================
The upper bound on the number of tasks that are checked.
This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-==============================================================
hung_task_timeout_secs:
+=======================
When a task in D state did not get scheduled
for more than this value report a warning.
This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
0: means infinite timeout - no checking done.
+
Possible values to set are in range {0..LONG_MAX/HZ}.
-==============================================================
hung_task_check_interval_secs:
+==============================
Hung task check interval. If hung task checking is enabled
(see hung_task_timeout_secs), the check is done every
@@ -362,9 +379,9 @@ This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
0 (default): means use hung_task_timeout_secs as checking interval.
Possible values to set are in range {0..LONG_MAX/HZ}.
-==============================================================
hung_task_warnings:
+===================
The maximum number of warnings to report. During a check interval
if a hung task is detected, this value is decreased by 1.
@@ -373,9 +390,9 @@ This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-1: report an infinite number of warnings.
-==============================================================
hyperv_record_panic_msg:
+========================
Controls whether the panic kmsg data should be reported to Hyper-V.
@@ -383,9 +400,9 @@ Controls whether the panic kmsg data should be reported to Hyper-V.
1: report the panic kmsg data. This is the default behavior.
-==============================================================
kexec_load_disabled:
+====================
A toggle indicating if the kexec_load syscall has been disabled. This
value defaults to 0 (false: kexec_load enabled), but can be set to 1
@@ -395,9 +412,9 @@ loaded before disabling the syscall, allowing a system to set up (and
later use) an image without it being altered. Generally used together
with the "modules_disabled" sysctl.
-==============================================================
kptr_restrict:
+==============
This toggle indicates whether restrictions are placed on
exposing kernel addresses via /proc and other interfaces.
@@ -420,16 +437,16 @@ values to unprivileged users is a concern.
When kptr_restrict is set to (2), kernel pointers printed using
%pK will be replaced with 0's regardless of privileges.
-==============================================================
l2cr: (PPC only)
+================
This flag controls the L2 cache of G3 processor boards. If
0, the cache is disabled. Enabled if nonzero.
-==============================================================
modules_disabled:
+=================
A toggle value indicating if modules are allowed to be loaded
in an otherwise modular kernel. This toggle defaults to off
@@ -437,9 +454,9 @@ in an otherwise modular kernel. This toggle defaults to off
neither loaded nor unloaded, and the toggle cannot be set back
to false. Generally used with the "kexec_load_disabled" toggle.
-==============================================================
msg_next_id, sem_next_id, and shm_next_id:
+==========================================
These three toggles allows to specify desired id for next allocated IPC
object: message, semaphore or shared memory respectively.
@@ -448,21 +465,22 @@ By default they are equal to -1, which means generic allocation logic.
Possible values to set are in range {0..INT_MAX}.
Notes:
-1) kernel doesn't guarantee, that new object will have desired id. So,
-it's up to userspace, how to handle an object with "wrong" id.
-2) Toggle with non-default value will be set back to -1 by kernel after
-successful IPC object allocation. If an IPC object allocation syscall
-fails, it is undefined if the value remains unmodified or is reset to -1.
+ 1) kernel doesn't guarantee, that new object will have desired id. So,
+ it's up to userspace, how to handle an object with "wrong" id.
+ 2) Toggle with non-default value will be set back to -1 by kernel after
+ successful IPC object allocation. If an IPC object allocation syscall
+ fails, it is undefined if the value remains unmodified or is reset to -1.
-==============================================================
nmi_watchdog:
+=============
This parameter can be used to control the NMI watchdog
(i.e. the hard lockup detector) on x86 systems.
- 0 - disable the hard lockup detector
- 1 - enable the hard lockup detector
+0 - disable the hard lockup detector
+
+1 - enable the hard lockup detector
The hard lockup detector monitors each CPU for its ability to respond to
timer interrupts. The mechanism utilizes CPU performance counter registers
@@ -470,15 +488,15 @@ that are programmed to generate Non-Maskable Interrupts (NMIs) periodically
while a CPU is busy. Hence, the alternative name 'NMI watchdog'.
The NMI watchdog is disabled by default if the kernel is running as a guest
-in a KVM virtual machine. This default can be overridden by adding
+in a KVM virtual machine. This default can be overridden by adding::
nmi_watchdog=1
to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst).
-==============================================================
-numa_balancing
+numa_balancing:
+===============
Enables/disables automatic page fault based NUMA memory
balancing. Memory is moved automatically to nodes
@@ -500,10 +518,9 @@ faults may be controlled by the numa_balancing_scan_period_min_ms,
numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
numa_balancing_scan_size_mb, and numa_balancing_settle_count sysctls.
-==============================================================
+numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
+===============================================================================================================================
-numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms,
-numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
Automatic NUMA balancing scans tasks address space and unmaps pages to
detect if pages are properly placed or if the data should be migrated to a
@@ -539,16 +556,18 @@ rate for each task.
numa_balancing_scan_size_mb is how many megabytes worth of pages are
scanned for a given scan.
-==============================================================
osrelease, ostype & version:
+============================
+
+::
-# cat osrelease
-2.1.88
-# cat ostype
-Linux
-# cat version
-#5 Wed Feb 25 21:49:24 MET 1998
+ # cat osrelease
+ 2.1.88
+ # cat ostype
+ Linux
+ # cat version
+ #5 Wed Feb 25 21:49:24 MET 1998
The files osrelease and ostype should be clear enough. Version
needs a little more clarification however. The '#5' means that
@@ -556,9 +575,9 @@ this is the fifth kernel built from this source base and the
date behind it indicates the time the kernel was built.
The only way to tune these values is to rebuild the kernel :-)
-==============================================================
overflowgid & overflowuid:
+==========================
if your architecture did not always support 32-bit UIDs (i.e. arm,
i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
@@ -568,17 +587,17 @@ actual UID or GID would exceed 65535.
These sysctls allow you to change the value of the fixed UID and GID.
The default is 65534.
-==============================================================
panic:
+======
The value in this file represents the number of seconds the kernel
waits before rebooting on a panic. When you use the software watchdog,
the recommended setting is 60.
-==============================================================
panic_on_io_nmi:
+================
Controls the kernel's behavior when a CPU receives an NMI caused by
an IO error.
@@ -591,20 +610,20 @@ an IO error.
servers issue this sort of NMI when the dump button is pushed,
and you can use this option to take a crash dump.
-==============================================================
panic_on_oops:
+==============
Controls the kernel's behaviour when an oops or BUG is encountered.
0: try to continue operation
-1: panic immediately. If the `panic' sysctl is also non-zero then the
+1: panic immediately. If the `panic` sysctl is also non-zero then the
machine will be rebooted.
-==============================================================
panic_on_stackoverflow:
+=======================
Controls the kernel's behavior when detecting the overflows of
kernel, IRQ and exception stacks except a user stack.
@@ -614,9 +633,9 @@ This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
1: panic immediately.
-==============================================================
panic_on_unrecovered_nmi:
+=========================
The default Linux behaviour on an NMI of either memory or unknown is
to continue operation. For many environments such as scientific
@@ -627,9 +646,9 @@ A small number of systems do generate NMI's for bizarre random reasons
such as power management so the default is off. That sysctl works like
the existing panic controls already in that directory.
-==============================================================
panic_on_warn:
+==============
Calls panic() in the WARN() path when set to 1. This is useful to avoid
a kernel rebuild when attempting to kdump at the location of a WARN().
@@ -638,25 +657,28 @@ a kernel rebuild when attempting to kdump at the location of a WARN().
1: call panic() after printing out WARN() location.
-==============================================================
panic_print:
+============
Bitmask for printing system info when panic happens. User can chose
combination of the following bits:
-bit 0: print all tasks info
-bit 1: print system memory info
-bit 2: print timer info
-bit 3: print locks info if CONFIG_LOCKDEP is on
-bit 4: print ftrace buffer
+===== ========================================
+bit 0 print all tasks info
+bit 1 print system memory info
+bit 2 print timer info
+bit 3 print locks info if CONFIG_LOCKDEP is on
+bit 4 print ftrace buffer
+===== ========================================
+
+So for example to print tasks and memory info on panic, user can::
-So for example to print tasks and memory info on panic, user can:
echo 3 > /proc/sys/kernel/panic_print
-==============================================================
panic_on_rcu_stall:
+===================
When set to 1, calls panic() after RCU stall detection messages. This
is useful to define the root cause of RCU stalls using a vmcore.
@@ -665,9 +687,9 @@ is useful to define the root cause of RCU stalls using a vmcore.
1: panic() after printing RCU stall messages.
-==============================================================
perf_cpu_time_max_percent:
+==========================
Hints to the kernel how much CPU time it should be allowed to
use to handle perf sampling events. If the perf subsystem
@@ -680,10 +702,12 @@ unexpectedly take too long to execute, the NMIs can become
stacked up next to each other so much that nothing else is
allowed to execute.
-0: disable the mechanism. Do not monitor or correct perf's
+0:
+ disable the mechanism. Do not monitor or correct perf's
sampling rate no matter how CPU time it takes.
-1-100: attempt to throttle perf's sample rate to this
+1-100:
+ attempt to throttle perf's sample rate to this
percentage of CPU. Note: the kernel calculates an
"expected" length of each sample event. 100 here means
100% of that expected length. Even if this is set to
@@ -691,23 +715,30 @@ allowed to execute.
length is exceeded. Set to 0 if you truly do not care
how much CPU is consumed.
-==============================================================
perf_event_paranoid:
+====================
Controls use of the performance events system by unprivileged
users (without CAP_SYS_ADMIN). The default value is 2.
- -1: Allow use of (almost) all events by all users
+=== ==================================================================
+ -1 Allow use of (almost) all events by all users
+
Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
->=0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
+
+>=0 Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
+
Disallow raw tracepoint access by users without CAP_SYS_ADMIN
->=1: Disallow CPU event access by users without CAP_SYS_ADMIN
->=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
-==============================================================
+>=1 Disallow CPU event access by users without CAP_SYS_ADMIN
+
+>=2 Disallow kernel profiling by users without CAP_SYS_ADMIN
+=== ==================================================================
+
perf_event_max_stack:
+=====================
Controls maximum number of stack frames to copy for (attr.sample_type &
PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
@@ -718,17 +749,17 @@ enabled, otherwise writing to this file will return -EBUSY.
The default value is 127.
-==============================================================
perf_event_mlock_kb:
+====================
Control size of per-cpu ring buffer not counted agains mlock limit.
The default value is 512 + 1 page
-==============================================================
perf_event_max_contexts_per_stack:
+==================================
Controls maximum number of stack frame context entries for
(attr.sample_type & PERF_SAMPLE_CALLCHAIN) configured events, for
@@ -739,25 +770,25 @@ enabled, otherwise writing to this file will return -EBUSY.
The default value is 8.
-==============================================================
pid_max:
+========
PID allocation wrap value. When the kernel's next PID value
reaches this value, it wraps back to a minimum PID value.
PIDs of value pid_max or larger are not allocated.
-==============================================================
ns_last_pid:
+============
The last pid allocated in the current (the one task using this sysctl
lives in) pid namespace. When selecting a pid for a next task on fork
kernel tries to allocate a number starting from this one.
-==============================================================
powersave-nap: (PPC only)
+=========================
If set, Linux-PPC will use the 'nap' mode of powersaving,
otherwise the 'doze' mode will be used.
@@ -765,6 +796,7 @@ otherwise the 'doze' mode will be used.
==============================================================
printk:
+=======
The four values in printk denote: console_loglevel,
default_message_loglevel, minimum_console_loglevel and
@@ -774,25 +806,29 @@ These values influence printk() behavior when printing or
logging error messages. See 'man 2 syslog' for more info on
the different loglevels.
-- console_loglevel: messages with a higher priority than
- this will be printed to the console
-- default_message_loglevel: messages without an explicit priority
- will be printed with this priority
-- minimum_console_loglevel: minimum (highest) value to which
- console_loglevel can be set
-- default_console_loglevel: default value for console_loglevel
+- console_loglevel:
+ messages with a higher priority than
+ this will be printed to the console
+- default_message_loglevel:
+ messages without an explicit priority
+ will be printed with this priority
+- minimum_console_loglevel:
+ minimum (highest) value to which
+ console_loglevel can be set
+- default_console_loglevel:
+ default value for console_loglevel
-==============================================================
printk_delay:
+=============
Delay each printk message in printk_delay milliseconds
Value from 0 - 10000 is allowed.
-==============================================================
printk_ratelimit:
+=================
Some warning messages are rate limited. printk_ratelimit specifies
the minimum length of time between these messages (in jiffies), by
@@ -800,48 +836,52 @@ default we allow one every 5 seconds.
A value of 0 will disable rate limiting.
-==============================================================
printk_ratelimit_burst:
+=======================
While long term we enforce one message per printk_ratelimit
seconds, we do allow a burst of messages to pass through.
printk_ratelimit_burst specifies the number of messages we can
send before ratelimiting kicks in.
-==============================================================
printk_devkmsg:
+===============
Control the logging to /dev/kmsg from userspace:
-ratelimit: default, ratelimited
+ratelimit:
+ default, ratelimited
+
on: unlimited logging to /dev/kmsg from userspace
+
off: logging to /dev/kmsg disabled
The kernel command line parameter printk.devkmsg= overrides this and is
a one-time setting until next reboot: once set, it cannot be changed by
this sysctl interface anymore.
-==============================================================
randomize_va_space:
+===================
This option can be used to select the type of process address
space randomization that is used in the system, for architectures
that support this feature.
-0 - Turn the process address space randomization off. This is the
+== ===========================================================================
+0 Turn the process address space randomization off. This is the
default for architectures that do not support this feature anyways,
and kernels that are booted with the "norandmaps" parameter.
-1 - Make the addresses of mmap base, stack and VDSO page randomized.
+1 Make the addresses of mmap base, stack and VDSO page randomized.
This, among other things, implies that shared libraries will be
loaded to random addresses. Also for PIE-linked binaries, the
location of code start is randomized. This is the default if the
CONFIG_COMPAT_BRK option is enabled.
-2 - Additionally enable heap randomization. This is the default if
+2 Additionally enable heap randomization. This is the default if
CONFIG_COMPAT_BRK is disabled.
There are a few legacy applications out there (such as some ancient
@@ -854,18 +894,19 @@ that support this feature.
Systems with ancient and/or broken binaries should be configured
with CONFIG_COMPAT_BRK enabled, which excludes the heap from process
address space randomization.
+== ===========================================================================
-==============================================================
reboot-cmd: (Sparc only)
+========================
??? This seems to be a way to give an argument to the Sparc
ROM/Flash boot loader. Maybe to tell it what to do after
rebooting. ???
-==============================================================
rtsig-max & rtsig-nr:
+=====================
The file rtsig-max can be used to tune the maximum number
of POSIX realtime (queued) signals that can be outstanding
@@ -873,9 +914,9 @@ in the system.
rtsig-nr shows the number of RT signals currently queued.
-==============================================================
sched_energy_aware:
+===================
Enables/disables Energy Aware Scheduling (EAS). EAS starts
automatically on platforms where it can run (that is,
@@ -884,17 +925,17 @@ Model available). If your platform happens to meet the
requirements for EAS but you do not want to use it, change
this value to 0.
-==============================================================
sched_schedstats:
+=================
Enables/disables scheduler statistics. Enabling this feature
incurs a small amount of overhead in the scheduler but is
useful for debugging and performance tuning.
-==============================================================
sg-big-buff:
+============
This file shows the size of the generic SCSI (sg) buffer.
You can't tune it just yet, but you could change it on
@@ -905,9 +946,9 @@ There shouldn't be any reason to change this value. If
you can come up with one, you probably know what you
are doing anyway :)
-==============================================================
shmall:
+=======
This parameter sets the total amount of shared memory pages that
can be used system wide. Hence, SHMALL should always be at least
@@ -916,20 +957,20 @@ ceil(shmmax/PAGE_SIZE).
If you are not sure what the default PAGE_SIZE is on your Linux
system, you can run the following command:
-# getconf PAGE_SIZE
+ # getconf PAGE_SIZE
-==============================================================
shmmax:
+=======
This value can be used to query and set the run time limit
on the maximum shared memory segment size that can be created.
Shared memory segments up to 1Gb are now supported in the
kernel. This value defaults to SHMMAX.
-==============================================================
shm_rmid_forced:
+================
Linux lets you set resource limits, including how much memory one
process can consume, via setrlimit(2). Unfortunately, shared memory
@@ -948,28 +989,30 @@ need this.
Note that if you change this from 0 to 1, already created segments
without users and with a dead originative process will be destroyed.
-==============================================================
sysctl_writes_strict:
+=====================
Control how file position affects the behavior of updating sysctl values
via the /proc/sys interface:
- -1 - Legacy per-write sysctl value handling, with no printk warnings.
+ == ======================================================================
+ -1 Legacy per-write sysctl value handling, with no printk warnings.
Each write syscall must fully contain the sysctl value to be
written, and multiple writes on the same sysctl file descriptor
will rewrite the sysctl value, regardless of file position.
- 0 - Same behavior as above, but warn about processes that perform writes
+ 0 Same behavior as above, but warn about processes that perform writes
to a sysctl file descriptor when the file position is not 0.
- 1 - (default) Respect file position when writing sysctl strings. Multiple
+ 1 (default) Respect file position when writing sysctl strings. Multiple
writes will append to the sysctl value buffer. Anything past the max
length of the sysctl value buffer will be ignored. Writes to numeric
sysctl entries must always be at file position 0 and the value must
be fully contained in the buffer sent in the write syscall.
+ == ======================================================================
-==============================================================
softlockup_all_cpu_backtrace:
+=============================
This value controls the soft lockup detector thread's behavior
when a soft lockup condition is detected as to whether or not
@@ -983,13 +1026,14 @@ NMI.
1: on detection capture more debug information.
-==============================================================
-soft_watchdog
+soft_watchdog:
+==============
This parameter can be used to control the soft lockup detector.
0 - disable the soft lockup detector
+
1 - enable the soft lockup detector
The soft lockup detector monitors CPUs for threads that are hogging the CPUs
@@ -999,9 +1043,9 @@ interrupts which are needed for the 'watchdog/N' threads to be woken up by
the watchdog timer function, otherwise the NMI watchdog - if enabled - can
detect a hard lockup condition.
-==============================================================
-stack_erasing
+stack_erasing:
+==============
This parameter can be used to control kernel stack erasing at the end
of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.
@@ -1015,37 +1059,40 @@ compilation sees a 1% slowdown, other systems and workloads may vary.
1: kernel stack erasing is enabled (default), it is performed before
returning to the userspace at the end of syscalls.
-==============================================================
+
tainted
+=======
Non-zero if the kernel has been tainted. Numeric values, which can be
ORed together. The letters are seen in "Tainted" line of Oops reports.
- 1 (P): proprietary module was loaded
- 2 (F): module was force loaded
- 4 (S): SMP kernel oops on an officially SMP incapable processor
- 8 (R): module was force unloaded
- 16 (M): processor reported a Machine Check Exception (MCE)
- 32 (B): bad page referenced or some unexpected page flags
- 64 (U): taint requested by userspace application
- 128 (D): kernel died recently, i.e. there was an OOPS or BUG
- 256 (A): an ACPI table was overridden by user
- 512 (W): kernel issued warning
- 1024 (C): staging driver was loaded
- 2048 (I): workaround for bug in platform firmware applied
- 4096 (O): externally-built ("out-of-tree") module was loaded
- 8192 (E): unsigned module was loaded
- 16384 (L): soft lockup occurred
- 32768 (K): kernel has been live patched
- 65536 (X): Auxiliary taint, defined and used by for distros
-131072 (T): The kernel was built with the struct randomization plugin
+====== ===== ==============================================================
+ 1 `(P)` proprietary module was loaded
+ 2 `(F)` module was force loaded
+ 4 `(S)` SMP kernel oops on an officially SMP incapable processor
+ 8 `(R)` module was force unloaded
+ 16 `(M)` processor reported a Machine Check Exception (MCE)
+ 32 `(B)` bad page referenced or some unexpected page flags
+ 64 `(U)` taint requested by userspace application
+ 128 `(D)` kernel died recently, i.e. there was an OOPS or BUG
+ 256 `(A)` an ACPI table was overridden by user
+ 512 `(W)` kernel issued warning
+ 1024 `(C)` staging driver was loaded
+ 2048 `(I)` workaround for bug in platform firmware applied
+ 4096 `(O)` externally-built ("out-of-tree") module was loaded
+ 8192 `(E)` unsigned module was loaded
+ 16384 `(L)` soft lockup occurred
+ 32768 `(K)` kernel has been live patched
+ 65536 `(X)` Auxiliary taint, defined and used by for distros
+131072 `(T)` The kernel was built with the struct randomization plugin
+====== ===== ==============================================================
See Documentation/admin-guide/tainted-kernels.rst for more information.
-==============================================================
-threads-max
+threads-max:
+============
This value controls the maximum number of threads that can be created
using fork().
@@ -1055,8 +1102,10 @@ maximum number of threads is created, the thread structures occupy only
a part (1/8th) of the available RAM pages.
The minimum value that can be written to threads-max is 20.
+
The maximum value that can be written to threads-max is given by the
constant FUTEX_TID_MASK (0x3fffffff).
+
If a value outside of this range is written to threads-max an error
EINVAL occurs.
@@ -1064,9 +1113,9 @@ The value written is checked against the available RAM pages. If the
thread structures would occupy too much (more than 1/8th) of the
available RAM pages threads-max is reduced accordingly.
-==============================================================
unknown_nmi_panic:
+==================
The value in this file affects behavior of handling NMI. When the
value is non-zero, unknown NMI is trapped and then panic occurs. At
@@ -1075,28 +1124,29 @@ that time, kernel debugging information is displayed on console.
NMI switch that most IA32 servers have fires unknown NMI up, for
example. If a system hangs up, try pressing the NMI switch.
-==============================================================
watchdog:
+=========
This parameter can be used to disable or enable the soft lockup detector
_and_ the NMI watchdog (i.e. the hard lockup detector) at the same time.
0 - disable both lockup detectors
+
1 - enable both lockup detectors
The soft lockup detector and the NMI watchdog can also be disabled or
enabled individually, using the soft_watchdog and nmi_watchdog parameters.
-If the watchdog parameter is read, for example by executing
+If the watchdog parameter is read, for example by executing::
cat /proc/sys/kernel/watchdog
the output of this command (0 or 1) shows the logical OR of soft_watchdog
and nmi_watchdog.
-==============================================================
watchdog_cpumask:
+=================
This value can be used to control on which cpus the watchdog may run.
The default cpumask is all possible cores, but if NO_HZ_FULL is
@@ -1111,13 +1161,13 @@ if a kernel lockup was suspected on those cores.
The argument value is the standard cpulist format for cpumasks,
so for example to enable the watchdog on cores 0, 2, 3, and 4 you
-might say:
+might say::
echo 0,2-4 > /proc/sys/kernel/watchdog_cpumask
-==============================================================
watchdog_thresh:
+================
This value can be used to control the frequency of hrtimer and NMI
events and the soft and hard lockup thresholds. The default threshold
@@ -1125,5 +1175,3 @@ is 10 seconds.
The softlockup threshold is (2 * watchdog_thresh). Setting this
tunable to zero will disable lockup detection altogether.
-
-==============================================================
diff --git a/Documentation/sysctl/net.txt b/Documentation/admin-guide/sysctl/net.rst
index 2ae91d3873bb..a7d44e71019d 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -1,12 +1,25 @@
-Documentation for /proc/sys/net/*
- (c) 1999 Terrehon Bowden <terrehon@pacbell.net>
- Bodo Bauer <bb@ricochet.net>
- (c) 2000 Jorge Nerin <comandante@zaralinux.com>
- (c) 2009 Shen Feng <shen@cn.fujitsu.com>
+================================
+Documentation for /proc/sys/net/
+================================
-For general info and legal blurb, please look in README.
+Copyright
-==============================================================
+Copyright (c) 1999
+
+ - Terrehon Bowden <terrehon@pacbell.net>
+ - Bodo Bauer <bb@ricochet.net>
+
+Copyright (c) 2000
+
+ - Jorge Nerin <comandante@zaralinux.com>
+
+Copyright (c) 2009
+
+ - Shen Feng <shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
This file contains the documentation for the sysctl files in
/proc/sys/net
@@ -17,20 +30,22 @@ see only some of them, depending on your kernel's configuration.
Table : Subdirectories in /proc/sys/net
-..............................................................................
- Directory Content Directory Content
- core General parameter appletalk Appletalk protocol
- unix Unix domain sockets netrom NET/ROM
- 802 E802 protocol ax25 AX25
- ethernet Ethernet protocol rose X.25 PLP layer
- ipv4 IP version 4 x25 X.25 protocol
- ipx IPX token-ring IBM token ring
- bridge Bridging decnet DEC net
- ipv6 IP version 6 tipc TIPC
-..............................................................................
+
+ ========= =================== = ========== ==================
+ Directory Content Directory Content
+ ========= =================== = ========== ==================
+ core General parameter appletalk Appletalk protocol
+ unix Unix domain sockets netrom NET/ROM
+ 802 E802 protocol ax25 AX25
+ ethernet Ethernet protocol rose X.25 PLP layer
+ ipv4 IP version 4 x25 X.25 protocol
+ ipx IPX token-ring IBM token ring
+ bridge Bridging decnet DEC net
+ ipv6 IP version 6 tipc TIPC
+ ========= =================== = ========== ==================
1. /proc/sys/net/core - Network core options
--------------------------------------------------------
+============================================
bpf_jit_enable
--------------
@@ -44,6 +59,7 @@ restricted C into a sequence of BPF instructions. After program load
through bpf(2) and passing a verifier in the kernel, a JIT will then
translate these BPF proglets into native CPU instructions. There are
two flavors of JITs, the newer eBPF JIT currently supported on:
+
- x86_64
- x86_32
- arm64
@@ -55,6 +71,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on:
- riscv
And the older cBPF JIT supported on the following archs:
+
- mips
- ppc
- sparc
@@ -65,10 +82,11 @@ compile them transparently. Older cBPF JITs can only translate
tcpdump filters, seccomp rules, etc, but not mentioned eBPF
programs loaded through bpf(2).
-Values :
- 0 - disable the JIT (default value)
- 1 - enable the JIT
- 2 - enable the JIT and ask the compiler to emit traces on kernel log.
+Values:
+
+ - 0 - disable the JIT (default value)
+ - 1 - enable the JIT
+ - 2 - enable the JIT and ask the compiler to emit traces on kernel log.
bpf_jit_harden
--------------
@@ -76,10 +94,12 @@ bpf_jit_harden
This enables hardening for the BPF JIT compiler. Supported are eBPF
JIT backends. Enabling hardening trades off performance, but can
mitigate JIT spraying.
-Values :
- 0 - disable JIT hardening (default value)
- 1 - enable JIT hardening for unprivileged users only
- 2 - enable JIT hardening for all users
+
+Values:
+
+ - 0 - disable JIT hardening (default value)
+ - 1 - enable JIT hardening for unprivileged users only
+ - 2 - enable JIT hardening for all users
bpf_jit_kallsyms
----------------
@@ -89,9 +109,11 @@ addresses to the kernel, meaning they neither show up in traces nor
in /proc/kallsyms. This enables export of these addresses, which can
be used for debugging/tracing. If bpf_jit_harden is enabled, this
feature is disabled.
+
Values :
- 0 - disable JIT kallsyms export (default value)
- 1 - enable JIT kallsyms export for privileged users only
+
+ - 0 - disable JIT kallsyms export (default value)
+ - 1 - enable JIT kallsyms export for privileged users only
bpf_jit_limit
-------------
@@ -102,7 +124,7 @@ been surpassed. bpf_jit_limit contains the value of the global limit
in bytes.
dev_weight
---------------
+----------
The maximum number of packets that kernel can handle on a NAPI interrupt,
it's a Per-CPU variable. For drivers that support LRO or GRO_HW, a hardware
@@ -111,7 +133,7 @@ aggregated packet is counted as one packet in this context.
Default: 64
dev_weight_rx_bias
---------------
+------------------
RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
of the driver for the per softirq cycle netdev_budget. This parameter influences
@@ -120,19 +142,22 @@ processing during RX softirq cycles. It is further meant for making current
dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
+
Default: 1
dev_weight_tx_bias
---------------
+------------------
Scales the maximum number of packets that can be processed during a TX softirq cycle.
Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
+
Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
+
Default: 1
default_qdisc
---------------
+-------------
The default queuing discipline to use for network devices. This allows
overriding the default of pfifo_fast with an alternative. Since the default
@@ -144,17 +169,21 @@ which require setting up classes and bandwidths. Note that physical multiqueue
interfaces still use mq as root qdisc, which in turn uses this default for its
leaves. Virtual devices (like e.g. lo or veth) ignore this setting and instead
default to noqueue.
+
Default: pfifo_fast
busy_read
-----------------
+---------
+
Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL)
Approximate time in us to busy loop waiting for packets on the device queue.
This sets the default value of the SO_BUSY_POLL socket option.
Can be set or overridden per socket by setting socket option SO_BUSY_POLL,
which is the preferred method of enabling. If you need to enable the feature
globally via sysctl, a value of 50 is recommended.
+
Will increase power usage.
+
Default: 0 (off)
busy_poll
@@ -167,7 +196,9 @@ For more than that you probably want to use epoll.
Note that only sockets with SO_BUSY_POLL set will be busy polled,
so you want to either selectively set SO_BUSY_POLL on those sockets or set
sysctl.net.busy_read globally.
+
Will increase power usage.
+
Default: 0 (off)
rmem_default
@@ -185,6 +216,7 @@ tstamp_allow_data
Allow processes to receive tx timestamps looped together with the original
packet contents. If disabled, transmit timestamp requests from unprivileged
processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
+
Default: 1 (on)
@@ -250,19 +282,24 @@ randomly generated.
Some user space might need to gather its content even if drivers do not
provide ethtool -x support yet.
-myhost:~# cat /proc/sys/net/core/netdev_rss_key
-84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8: ... (52 bytes total)
+::
+
+ myhost:~# cat /proc/sys/net/core/netdev_rss_key
+ 84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8: ... (52 bytes total)
File contains nul bytes if no driver ever called netdev_rss_key_fill() function.
+
Note:
-/proc/sys/net/core/netdev_rss_key contains 52 bytes of key,
-but most drivers only use 40 bytes of it.
+ /proc/sys/net/core/netdev_rss_key contains 52 bytes of key,
+ but most drivers only use 40 bytes of it.
+
+::
-myhost:~# ethtool -x eth0
-RX flow hash indirection table for eth0 with 8 RX ring(s):
- 0: 0 1 2 3 4 5 6 7
-RSS hash key:
-84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8:43:e3:c9:0c:fd:17:55:c2:3a:4d:69:ed:f1:42:89
+ myhost:~# ethtool -x eth0
+ RX flow hash indirection table for eth0 with 8 RX ring(s):
+ 0: 0 1 2 3 4 5 6 7
+ RSS hash key:
+ 84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8:43:e3:c9:0c:fd:17:55:c2:3a:4d:69:ed:f1:42:89
netdev_tstamp_prequeue
----------------------
@@ -293,7 +330,7 @@ user space is responsible for creating them if needed.
Default : 0 (for compatibility reasons)
devconf_inherit_init_net
-----------------------------
+------------------------
Controls if a new network namespace should inherit all current
settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By
@@ -307,7 +344,7 @@ forced to reset to their default values.
Default : 0 (for compatibility reasons)
2. /proc/sys/net/unix - Parameters for Unix domain sockets
--------------------------------------------------------
+----------------------------------------------------------
There is only one file in this directory.
unix_dgram_qlen limits the max number of datagrams queued in Unix domain
@@ -315,13 +352,13 @@ socket's buffer. It will not take effect unless PF_UNIX flag is specified.
3. /proc/sys/net/ipv4 - IPV4 settings
--------------------------------------------------------
+-------------------------------------
Please see: Documentation/networking/ip-sysctl.txt and ipvs-sysctl.txt for
descriptions of these entries.
4. Appletalk
--------------------------------------------------------
+------------
The /proc/sys/net/appletalk directory holds the Appletalk configuration data
when Appletalk is loaded. The configurable parameters are:
@@ -366,7 +403,7 @@ route flags, and the device the route is using.
5. IPX
--------------------------------------------------------
+------
The IPX protocol has no tunable values in proc/sys/net.
@@ -391,14 +428,16 @@ gives the destination network, the router node (or Directly) and the network
address of the router (or Connected) for internal networks.
6. TIPC
--------------------------------------------------------
+-------
tipc_rmem
-----------
+---------
The TIPC protocol now has a tunable for the receive memory, similar to the
tcp_rmem - i.e. a vector of 3 INTEGERs: (min, default, max)
+::
+
# cat /proc/sys/net/tipc/tipc_rmem
4252725 34021800 68043600
#
@@ -409,7 +448,7 @@ is not at this point in time used in any meaningful way, but the triplet is
preserved in order to be consistent with things like tcp_rmem.
named_timeout
---------------
+-------------
TIPC name table updates are distributed asynchronously in a cluster, without
any form of transaction handling. This means that different race scenarios are
diff --git a/Documentation/sysctl/sunrpc.txt b/Documentation/admin-guide/sysctl/sunrpc.rst
index ae1ecac6f85a..09780a682afd 100644
--- a/Documentation/sysctl/sunrpc.txt
+++ b/Documentation/admin-guide/sysctl/sunrpc.rst
@@ -1,9 +1,14 @@
-Documentation for /proc/sys/sunrpc/* kernel version 2.2.10
- (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+===================================
+Documentation for /proc/sys/sunrpc/
+===================================
-For general info and legal blurb, please look in README.
+kernel version 2.2.10
-==============================================================
+Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
This file contains the documentation for the sysctl files in
/proc/sys/sunrpc and is valid for Linux kernel version 2.2.
diff --git a/Documentation/sysctl/user.txt b/Documentation/admin-guide/sysctl/user.rst
index a5882865836e..650eaa03f15e 100644
--- a/Documentation/sysctl/user.txt
+++ b/Documentation/admin-guide/sysctl/user.rst
@@ -1,7 +1,12 @@
-Documentation for /proc/sys/user/* kernel version 4.9.0
- (c) 2016 Eric Biederman <ebiederm@xmission.com>
+=================================
+Documentation for /proc/sys/user/
+=================================
-==============================================================
+kernel version 4.9.0
+
+Copyright (c) 2016 Eric Biederman <ebiederm@xmission.com>
+
+------------------------------------------------------------------------------
This file contains the documentation for the sysctl files in
/proc/sys/user.
@@ -30,37 +35,44 @@ user namespace does not allow a user to escape their current limits.
Currently, these files are in /proc/sys/user:
-- max_cgroup_namespaces
+max_cgroup_namespaces
+=====================
The maximum number of cgroup namespaces that any user in the current
user namespace may create.
-- max_ipc_namespaces
+max_ipc_namespaces
+==================
The maximum number of ipc namespaces that any user in the current
user namespace may create.
-- max_mnt_namespaces
+max_mnt_namespaces
+==================
The maximum number of mount namespaces that any user in the current
user namespace may create.
-- max_net_namespaces
+max_net_namespaces
+==================
The maximum number of network namespaces that any user in the
current user namespace may create.
-- max_pid_namespaces
+max_pid_namespaces
+==================
The maximum number of pid namespaces that any user in the current
user namespace may create.
-- max_user_namespaces
+max_user_namespaces
+===================
The maximum number of user namespaces that any user in the current
user namespace may create.
-- max_uts_namespaces
+max_uts_namespaces
+==================
The maximum number of user namespaces that any user in the current
user namespace may create.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/admin-guide/sysctl/vm.rst
index 749322060f10..64aeee1009ca 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -1,10 +1,16 @@
-Documentation for /proc/sys/vm/* kernel version 2.6.29
- (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
- (c) 2008 Peter W. Morreale <pmorreale@novell.com>
+===============================
+Documentation for /proc/sys/vm/
+===============================
-For general info and legal blurb, please look in README.
+kernel version 2.6.29
-==============================================================
+Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2008 Peter W. Morreale <pmorreale@novell.com>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
This file contains the documentation for the sysctl files in
/proc/sys/vm and is valid for Linux kernel version 2.6.29.
@@ -68,9 +74,9 @@ Currently, these files are in /proc/sys/vm:
- watermark_scale_factor
- zone_reclaim_mode
-==============================================================
admin_reserve_kbytes
+====================
The amount of free memory in the system that should be reserved for users
with the capability cap_sys_admin.
@@ -97,25 +103,25 @@ On x86_64 this is about 128MB.
Changing this takes effect whenever an application requests memory.
-==============================================================
block_dump
+==========
block_dump enables block I/O debugging when set to a nonzero value. More
-information on block I/O debugging is in Documentation/laptops/laptop-mode.txt.
+information on block I/O debugging is in Documentation/admin-guide/laptops/laptop-mode.rst.
-==============================================================
compact_memory
+==============
Available only when CONFIG_COMPACTION is set. When 1 is written to the file,
all zones are compacted such that free memory is available in contiguous
blocks where possible. This can be important for example in the allocation of
huge pages although processes will also directly compact memory as required.
-==============================================================
compact_unevictable_allowed
+===========================
Available only when CONFIG_COMPACTION is set. When set to 1, compaction is
allowed to examine the unevictable lru (mlocked pages) for pages to compact.
@@ -123,21 +129,22 @@ This should be used on systems where stalls for minor page faults are an
acceptable trade for large contiguous free memory. Set to 0 to prevent
compaction from moving pages that are unevictable. Default value is 1.
-==============================================================
dirty_background_bytes
+======================
Contains the amount of dirty memory at which the background kernel
flusher threads will start writeback.
-Note: dirty_background_bytes is the counterpart of dirty_background_ratio. Only
-one of them may be specified at a time. When one sysctl is written it is
-immediately taken into account to evaluate the dirty memory limits and the
-other appears as 0 when read.
+Note:
+ dirty_background_bytes is the counterpart of dirty_background_ratio. Only
+ one of them may be specified at a time. When one sysctl is written it is
+ immediately taken into account to evaluate the dirty memory limits and the
+ other appears as 0 when read.
-==============================================================
dirty_background_ratio
+======================
Contains, as a percentage of total available memory that contains free pages
and reclaimable pages, the number of pages at which the background kernel
@@ -145,9 +152,9 @@ flusher threads will start writing out dirty data.
The total available memory is not equal to total system memory.
-==============================================================
dirty_bytes
+===========
Contains the amount of dirty memory at which a process generating disk writes
will itself start writeback.
@@ -161,18 +168,18 @@ Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
value lower than this limit will be ignored and the old configuration will be
retained.
-==============================================================
dirty_expire_centisecs
+======================
This tunable is used to define when dirty data is old enough to be eligible
for writeout by the kernel flusher threads. It is expressed in 100'ths
of a second. Data which has been dirty in-memory for longer than this
interval will be written out next time a flusher thread wakes up.
-==============================================================
dirty_ratio
+===========
Contains, as a percentage of total available memory that contains free pages
and reclaimable pages, the number of pages at which a process which is
@@ -180,9 +187,9 @@ generating disk writes will itself start writing out dirty data.
The total available memory is not equal to total system memory.
-==============================================================
dirtytime_expire_seconds
+========================
When a lazytime inode is constantly having its pages dirtied, the inode with
an updated timestamp will never get chance to be written out. And, if the
@@ -192,34 +199,39 @@ eventually gets pushed out to disk. This tunable is used to define when dirty
inode is old enough to be eligible for writeback by the kernel flusher threads.
And, it is also used as the interval to wakeup dirtytime_writeback thread.
-==============================================================
dirty_writeback_centisecs
+=========================
-The kernel flusher threads will periodically wake up and write `old' data
+The kernel flusher threads will periodically wake up and write `old` data
out to disk. This tunable expresses the interval between those wakeups, in
100'ths of a second.
Setting this to zero disables periodic writeback altogether.
-==============================================================
drop_caches
+===========
Writing to this will cause the kernel to drop clean caches, as well as
reclaimable slab objects like dentries and inodes. Once dropped, their
memory becomes free.
-To free pagecache:
+To free pagecache::
+
echo 1 > /proc/sys/vm/drop_caches
-To free reclaimable slab objects (includes dentries and inodes):
+
+To free reclaimable slab objects (includes dentries and inodes)::
+
echo 2 > /proc/sys/vm/drop_caches
-To free slab objects and pagecache:
+
+To free slab objects and pagecache::
+
echo 3 > /proc/sys/vm/drop_caches
This is a non-destructive operation and will not free any dirty objects.
To increase the number of objects freed by this operation, the user may run
-`sync' prior to writing to /proc/sys/vm/drop_caches. This will minimize the
+`sync` prior to writing to /proc/sys/vm/drop_caches. This will minimize the
number of dirty objects on the system and create more candidates to be
dropped.
@@ -233,16 +245,16 @@ dropped objects, especially if they were under heavy use. Because of this,
use outside of a testing or debugging environment is not recommended.
You may see informational messages in your kernel log when this file is
-used:
+used::
cat (1234): drop_caches: 3
These are informational only. They do not mean that anything is wrong
with your system. To disable them, echo 4 (bit 2) into drop_caches.
-==============================================================
extfrag_threshold
+=================
This parameter affects whether the kernel will compact memory or direct
reclaim to satisfy a high-order allocation. The extfrag/extfrag_index file in
@@ -254,9 +266,9 @@ implies that the allocation will succeed as long as watermarks are met.
The kernel will not compact memory in a zone if the
fragmentation index is <= extfrag_threshold. The default value is 500.
-==============================================================
highmem_is_dirtyable
+====================
Available only for systems with CONFIG_HIGHMEM enabled (32b systems).
@@ -274,30 +286,30 @@ OOM killer because some writers (e.g. direct block device writes) can
only use the low memory and they can fill it up with dirty data without
any throttling.
-==============================================================
hugetlb_shm_group
+=================
hugetlb_shm_group contains group id that is allowed to create SysV
shared memory segment using hugetlb page.
-==============================================================
laptop_mode
+===========
laptop_mode is a knob that controls "laptop mode". All the things that are
-controlled by this knob are discussed in Documentation/laptops/laptop-mode.txt.
+controlled by this knob are discussed in Documentation/admin-guide/laptops/laptop-mode.rst.
-==============================================================
legacy_va_layout
+================
If non-zero, this sysctl disables the new 32-bit mmap layout - the kernel
will use the legacy (2.4) layout for all processes.
-==============================================================
lowmem_reserve_ratio
+====================
For some specialised workloads on highmem machines it is dangerous for
the kernel to allow process memory to be allocated from the "lowmem"
@@ -308,7 +320,7 @@ And on large highmem machines this lack of reclaimable lowmem memory
can be fatal.
So the Linux page allocator has a mechanism which prevents allocations
-which _could_ use highmem from using too much lowmem. This means that
+which *could* use highmem from using too much lowmem. This means that
a certain amount of lowmem is defended from the possibility of being
captured into pinned user memory.
@@ -316,39 +328,37 @@ captured into pinned user memory.
mechanism will also defend that region from allocations which could use
highmem or lowmem).
-The `lowmem_reserve_ratio' tunable determines how aggressive the kernel is
+The `lowmem_reserve_ratio` tunable determines how aggressive the kernel is
in defending these lower zones.
If you have a machine which uses highmem or ISA DMA and your
applications are using mlock(), or if you are running with no swap then
you probably should change the lowmem_reserve_ratio setting.
-The lowmem_reserve_ratio is an array. You can see them by reading this file.
--
-% cat /proc/sys/vm/lowmem_reserve_ratio
-256 256 32
--
+The lowmem_reserve_ratio is an array. You can see them by reading this file::
+
+ % cat /proc/sys/vm/lowmem_reserve_ratio
+ 256 256 32
But, these values are not used directly. The kernel calculates # of protection
pages for each zones from them. These are shown as array of protection pages
in /proc/zoneinfo like followings. (This is an example of x86-64 box).
-Each zone has an array of protection pages like this.
-
--
-Node 0, zone DMA
- pages free 1355
- min 3
- low 3
- high 4
+Each zone has an array of protection pages like this::
+
+ Node 0, zone DMA
+ pages free 1355
+ min 3
+ low 3
+ high 4
:
:
- numa_other 0
- protection: (0, 2004, 2004, 2004)
+ numa_other 0
+ protection: (0, 2004, 2004, 2004)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- pagesets
- cpu: 0 pcp: 0
- :
--
+ pagesets
+ cpu: 0 pcp: 0
+ :
+
These protections are added to score to judge whether this zone should be used
for page allocation or should be reclaimed.
@@ -359,20 +369,24 @@ not be used because pages_free(1355) is smaller than watermark + protection[2]
normal page requirement. If requirement is DMA zone(index=0), protection[0]
(=0) is used.
-zone[i]'s protection[j] is calculated by following expression.
+zone[i]'s protection[j] is calculated by following expression::
-(i < j):
- zone[i]->protection[j]
- = (total sums of managed_pages from zone[i+1] to zone[j] on the node)
- / lowmem_reserve_ratio[i];
-(i = j):
- (should not be protected. = 0;
-(i > j):
- (not necessary, but looks 0)
+ (i < j):
+ zone[i]->protection[j]
+ = (total sums of managed_pages from zone[i+1] to zone[j] on the node)
+ / lowmem_reserve_ratio[i];
+ (i = j):
+ (should not be protected. = 0;
+ (i > j):
+ (not necessary, but looks 0)
The default values of lowmem_reserve_ratio[i] are
+
+ === ====================================
256 (if zone[i] means DMA or DMA32 zone)
- 32 (others).
+ 32 (others)
+ === ====================================
+
As above expression, they are reciprocal number of ratio.
256 means 1/256. # of protection pages becomes about "0.39%" of total managed
pages of higher zones on the node.
@@ -381,9 +395,9 @@ If you would like to protect more pages, smaller values are effective.
The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
disables protection of the pages.
-==============================================================
max_map_count:
+==============
This file contains the maximum number of memory map areas a process
may have. Memory map areas are used as a side-effect of calling
@@ -396,9 +410,9 @@ e.g., up to one or two maps per allocation.
The default value is 65536.
-=============================================================
memory_failure_early_kill:
+==========================
Control how to kill processes when uncorrected memory error (typically
a 2bit error in a memory module) is detected in the background by hardware
@@ -424,9 +438,9 @@ check handling and depends on the hardware capabilities.
Applications can override this setting individually with the PR_MCE_KILL prctl
-==============================================================
memory_failure_recovery
+=======================
Enable memory failure recovery (when supported by the platform)
@@ -434,9 +448,9 @@ Enable memory failure recovery (when supported by the platform)
0: Always panic on a memory failure.
-==============================================================
-min_free_kbytes:
+min_free_kbytes
+===============
This is used to force the Linux VM to keep a minimum number
of kilobytes free. The VM uses this number to compute a
@@ -450,9 +464,9 @@ become subtly broken, and prone to deadlock under high loads.
Setting this too high will OOM your machine instantly.
-=============================================================
-min_slab_ratio:
+min_slab_ratio
+==============
This is available only on NUMA kernels.
@@ -468,9 +482,9 @@ Note that slab reclaim is triggered in a per zone / node fashion.
The process of reclaiming slab memory is currently not node specific
and may not be fast.
-=============================================================
-min_unmapped_ratio:
+min_unmapped_ratio
+==================
This is available only on NUMA kernels.
@@ -485,9 +499,9 @@ files and similar are considered.
The default is 1 percent.
-==============================================================
mmap_min_addr
+=============
This file indicates the amount of address space which a user process will
be restricted from mmapping. Since kernel null dereference bugs could
@@ -498,9 +512,9 @@ security module. Setting this value to something like 64k will allow the
vast majority of applications to work correctly and provide defense in depth
against future potential kernel bugs.
-==============================================================
-mmap_rnd_bits:
+mmap_rnd_bits
+=============
This value can be used to select the number of bits to use to
determine the random offset to the base address of vma regions
@@ -511,9 +525,9 @@ by the architecture's minimum and maximum supported values.
This value can be changed after boot using the
/proc/sys/vm/mmap_rnd_bits tunable
-==============================================================
-mmap_rnd_compat_bits:
+mmap_rnd_compat_bits
+====================
This value can be used to select the number of bits to use to
determine the random offset to the base address of vma regions
@@ -525,35 +539,35 @@ architecture's minimum and maximum supported values.
This value can be changed after boot using the
/proc/sys/vm/mmap_rnd_compat_bits tunable
-==============================================================
nr_hugepages
+============
Change the minimum size of the hugepage pool.
See Documentation/admin-guide/mm/hugetlbpage.rst
-==============================================================
nr_hugepages_mempolicy
+======================
Change the size of the hugepage pool at run-time on a specific
set of NUMA nodes.
See Documentation/admin-guide/mm/hugetlbpage.rst
-==============================================================
nr_overcommit_hugepages
+=======================
Change the maximum size of the hugepage pool. The maximum is
nr_hugepages + nr_overcommit_hugepages.
See Documentation/admin-guide/mm/hugetlbpage.rst
-==============================================================
nr_trim_pages
+=============
This is available only on NOMMU kernels.
@@ -568,16 +582,17 @@ The default value is 1.
See Documentation/nommu-mmap.txt for more information.
-==============================================================
numa_zonelist_order
+===================
This sysctl is only for NUMA and it is deprecated. Anything but
Node order will fail!
'where the memory is allocated from' is controlled by zonelists.
+
(This documentation ignores ZONE_HIGHMEM/ZONE_DMA32 for simple explanation.
- you may be able to read ZONE_DMA as ZONE_DMA32...)
+you may be able to read ZONE_DMA as ZONE_DMA32...)
In non-NUMA case, a zonelist for GFP_KERNEL is ordered as following.
ZONE_NORMAL -> ZONE_DMA
@@ -585,10 +600,10 @@ This means that a memory allocation request for GFP_KERNEL will
get memory from ZONE_DMA only when ZONE_NORMAL is not available.
In NUMA case, you can think of following 2 types of order.
-Assume 2 node NUMA and below is zonelist of Node(0)'s GFP_KERNEL
+Assume 2 node NUMA and below is zonelist of Node(0)'s GFP_KERNEL::
-(A) Node(0) ZONE_NORMAL -> Node(0) ZONE_DMA -> Node(1) ZONE_NORMAL
-(B) Node(0) ZONE_NORMAL -> Node(1) ZONE_NORMAL -> Node(0) ZONE_DMA.
+ (A) Node(0) ZONE_NORMAL -> Node(0) ZONE_DMA -> Node(1) ZONE_NORMAL
+ (B) Node(0) ZONE_NORMAL -> Node(1) ZONE_NORMAL -> Node(0) ZONE_DMA.
Type(A) offers the best locality for processes on Node(0), but ZONE_DMA
will be used before ZONE_NORMAL exhaustion. This increases possibility of
@@ -616,9 +631,9 @@ order will be selected.
Default order is recommended unless this is causing problems for your
system/application.
-==============================================================
oom_dump_tasks
+==============
Enables a system-wide task dump (excluding kernel threads) to be produced
when the kernel performs an OOM-killing and includes such information as
@@ -638,9 +653,9 @@ OOM killer actually kills a memory-hogging task.
The default value is 1 (enabled).
-==============================================================
oom_kill_allocating_task
+========================
This enables or disables killing the OOM-triggering task in
out-of-memory situations.
@@ -659,9 +674,9 @@ is used in oom_kill_allocating_task.
The default value is 0.
-==============================================================
-overcommit_kbytes:
+overcommit_kbytes
+=================
When overcommit_memory is set to 2, the committed address space is not
permitted to exceed swap plus this amount of physical RAM. See below.
@@ -670,9 +685,9 @@ Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
of them may be specified at a time. Setting one disables the other (which
then appears as 0 when read).
-==============================================================
-overcommit_memory:
+overcommit_memory
+=================
This value contains a flag that enables memory overcommitment.
@@ -695,17 +710,17 @@ The default value is 0.
See Documentation/vm/overcommit-accounting.rst and
mm/util.c::__vm_enough_memory() for more information.
-==============================================================
-overcommit_ratio:
+overcommit_ratio
+================
When overcommit_memory is set to 2, the committed address
space is not permitted to exceed swap plus this percentage
of physical RAM. See above.
-==============================================================
page-cluster
+============
page-cluster controls the number of pages up to which consecutive pages
are read in from swap in a single attempt. This is the swap counterpart
@@ -725,9 +740,9 @@ Lower values mean lower latencies for initial faults, but at the same time
extra faults and I/O delays for following faults if they would have been part of
that consecutive pages readahead would have brought in.
-=============================================================
panic_on_oom
+============
This enables or disables panic on out-of-memory feature.
@@ -747,14 +762,16 @@ above-mentioned. Even oom happens under memory cgroup, the whole
system panics.
The default value is 0.
+
1 and 2 are for failover of clustering. Please select either
according to your policy of failover.
+
panic_on_oom=2+kdump gives you very strong tool to investigate
why oom happens. You can get snapshot.
-=============================================================
percpu_pagelist_fraction
+========================
This is the fraction of pages at most (high mark pcp->high) in each zone that
are allocated for each per cpu page list. The min value for this is 8. It
@@ -770,16 +787,16 @@ The initial value is zero. Kernel does not use this value at boot time to set
the high water marks for each per cpu page list. If the user writes '0' to this
sysctl, it will revert to this default behavior.
-==============================================================
stat_interval
+=============
The time interval between which vm statistics are updated. The default
is 1 second.
-==============================================================
stat_refresh
+============
Any read or write (by root only) flushes all the per-cpu vm statistics
into their global totals, for more accurate reports when testing
@@ -790,24 +807,26 @@ as 0) and "fails" with EINVAL if any are found, with a warning in dmesg.
(At time of writing, a few stats are known sometimes to be found negative,
with no ill effects: errors and warnings on these stats are suppressed.)
-==============================================================
numa_stat
+=========
This interface allows runtime configuration of numa statistics.
When page allocation performance becomes a bottleneck and you can tolerate
some possible tool breakage and decreased numa counter precision, you can
-do:
+do::
+
echo 0 > /proc/sys/vm/numa_stat
When page allocation performance is not a bottleneck and you want all
-tooling to work, you can do:
+tooling to work, you can do::
+
echo 1 > /proc/sys/vm/numa_stat
-==============================================================
swappiness
+==========
This control is used to define how aggressive the kernel will swap
memory pages. Higher values will increase aggressiveness, lower values
@@ -817,9 +836,9 @@ than the high water mark in a zone.
The default value is 60.
-==============================================================
unprivileged_userfaultfd
+========================
This flag controls whether unprivileged users can use the userfaultfd
system calls. Set this to 1 to allow unprivileged users to use the
@@ -828,9 +847,9 @@ privileged users (with SYS_CAP_PTRACE capability).
The default value is 1.
-==============================================================
-- user_reserve_kbytes
+user_reserve_kbytes
+===================
When overcommit_memory is set to 2, "never overcommit" mode, reserve
min(3% of current process size, user_reserve_kbytes) of free memory.
@@ -846,10 +865,9 @@ Any subsequent attempts to execute a command will result in
Changing this takes effect whenever an application requests memory.
-==============================================================
vfs_cache_pressure
-------------------
+==================
This percentage value controls the tendency of the kernel to reclaim
the memory which is used for caching of directory and inode objects.
@@ -867,9 +885,9 @@ performance impact. Reclaim code needs to take various locks to find freeable
directory and inode objects. With vfs_cache_pressure=1000, it will look for
ten times more freeable objects than there are.
-=============================================================
-watermark_boost_factor:
+watermark_boost_factor
+======================
This factor controls the level of reclaim when memory is being fragmented.
It defines the percentage of the high watermark of a zone that will be
@@ -887,9 +905,9 @@ fragmentation events that occurred in the recent past. If this value is
smaller than a pageblock then a pageblocks worth of pages will be reclaimed
(e.g. 2MB on 64-bit x86). A boost factor of 0 will disable the feature.
-=============================================================
-watermark_scale_factor:
+watermark_scale_factor
+======================
This factor controls the aggressiveness of kswapd. It defines the
amount of memory left in a node/system before kswapd is woken up and
@@ -905,20 +923,22 @@ that the number of free pages kswapd maintains for latency reasons is
too small for the allocation bursts occurring in the system. This knob
can then be used to tune kswapd aggressiveness accordingly.
-==============================================================
-zone_reclaim_mode:
+zone_reclaim_mode
+=================
Zone_reclaim_mode allows someone to set more or less aggressive approaches to
reclaim memory when a zone runs out of memory. If it is set to zero then no
zone reclaim occurs. Allocations will be satisfied from other zones / nodes
in the system.
-This is value ORed together of
+This is value OR'ed together of
-1 = Zone reclaim on
-2 = Zone reclaim writes dirty pages out
-4 = Zone reclaim swaps pages
+= ===================================
+1 Zone reclaim on
+2 Zone reclaim writes dirty pages out
+4 Zone reclaim swaps pages
+= ===================================
zone_reclaim_mode is disabled by default. For file servers or workloads
that benefit from having their data cached, zone_reclaim_mode should be
@@ -942,5 +962,3 @@ of other processes running on other nodes will not be affected.
Allowing regular swap effectively restricts allocations to the local
node unless explicitly overridden by memory policies or cpuset
configurations.
-
-============ End of Document =================================
diff --git a/Documentation/video-output.txt b/Documentation/admin-guide/video-output.rst
index 56d6fa2e2368..56d6fa2e2368 100644
--- a/Documentation/video-output.txt
+++ b/Documentation/admin-guide/video-output.rst
diff --git a/Documentation/arm/Marvell/README b/Documentation/arm/Marvell/README
deleted file mode 100644
index 56ada27c53be..000000000000
--- a/Documentation/arm/Marvell/README
+++ /dev/null
@@ -1,395 +0,0 @@
-ARM Marvell SoCs
-================
-
-This document lists all the ARM Marvell SoCs that are currently
-supported in mainline by the Linux kernel. As the Marvell families of
-SoCs are large and complex, it is hard to understand where the support
-for a particular SoC is available in the Linux kernel. This document
-tries to help in understanding where those SoCs are supported, and to
-match them with their corresponding public datasheet, when available.
-
-Orion family
-------------
-
- Flavors:
- 88F5082
- 88F5181
- 88F5181L
- 88F5182
- Datasheet : http://www.embeddedarm.com/documentation/third-party/MV88F5182-datasheet.pdf
- Programmer's User Guide : http://www.embeddedarm.com/documentation/third-party/MV88F5182-opensource-manual.pdf
- User Manual : http://www.embeddedarm.com/documentation/third-party/MV88F5182-usermanual.pdf
- 88F5281
- Datasheet : http://www.ocmodshop.com/images/reviews/networking/qnap_ts409u/marvel_88f5281_data_sheet.pdf
- 88F6183
- Core: Feroceon 88fr331 (88f51xx) or 88fr531-vd (88f52xx) ARMv5 compatible
- Linux kernel mach directory: arch/arm/mach-orion5x
- Linux kernel plat directory: arch/arm/plat-orion
-
-Kirkwood family
----------------
-
- Flavors:
- 88F6282 a.k.a Armada 300
- Product Brief : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
- 88F6283 a.k.a Armada 310
- Product Brief : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
- 88F6190
- Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6190-003_WEB.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
- 88F6192
- Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6192-003_ver1.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
- 88F6182
- 88F6180
- Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6180-003_ver1.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6180_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
- 88F6281
- Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6281-004_ver1.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6281_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
- Homepage: http://www.marvell.com/embedded-processors/kirkwood/
- Core: Feroceon 88fr131 ARMv5 compatible
- Linux kernel mach directory: arch/arm/mach-mvebu
- Linux kernel plat directory: none
-
-Discovery family
-----------------
-
- Flavors:
- MV78100
- Product Brief : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78100-003_WEB.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78100_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
- MV78200
- Product Brief : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78200-002_WEB.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78200_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
- MV76100
- Not supported by the Linux kernel.
-
- Core: Feroceon 88fr571-vd ARMv5 compatible
-
- Linux kernel mach directory: arch/arm/mach-mv78xx0
- Linux kernel plat directory: arch/arm/plat-orion
-
-EBU Armada family
------------------
-
- Armada 370 Flavors:
- 88F6710
- 88F6707
- 88F6W11
- Product Brief: http://www.marvell.com/embedded-processors/armada-300/assets/Marvell_ARMADA_370_SoC.pdf
- Hardware Spec: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-datasheet.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-FunctionalSpec-datasheet.pdf
- Core: Sheeva ARMv7 compatible PJ4B
-
- Armada 375 Flavors:
- 88F6720
- Product Brief: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA_375_SoC-01_product_brief.pdf
- Core: ARM Cortex-A9
-
- Armada 38x Flavors:
- 88F6810 Armada 380
- 88F6820 Armada 385
- 88F6828 Armada 388
- Product infos: http://www.marvell.com/embedded-processors/armada-38x/
- Functional Spec: https://marvellcorp.wufoo.com/forms/marvell-armada-38x-functional-specifications/
- Core: ARM Cortex-A9
-
- Armada 39x Flavors:
- 88F6920 Armada 390
- 88F6928 Armada 398
- Product infos: http://www.marvell.com/embedded-processors/armada-39x/
- Core: ARM Cortex-A9
-
- Armada XP Flavors:
- MV78230
- MV78260
- MV78460
- NOTE: not to be confused with the non-SMP 78xx0 SoCs
- Product Brief: http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
- Hardware Specs:
- http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
- http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
- http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
- Core: Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
-
- Linux kernel mach directory: arch/arm/mach-mvebu
- Linux kernel plat directory: none
-
-EBU Armada family ARMv8
------------------------
-
- Armada 3710/3720 Flavors:
- 88F3710
- 88F3720
- Core: ARM Cortex A53 (ARMv8)
-
- Homepage: http://www.marvell.com/embedded-processors/armada-3700/
- Product Brief: http://www.marvell.com/embedded-processors/assets/PB-88F3700-FNL.pdf
- Device tree files: arch/arm64/boot/dts/marvell/armada-37*
-
- Armada 7K Flavors:
- 88F7020 (AP806 Dual + one CP110)
- 88F7040 (AP806 Quad + one CP110)
- Core: ARM Cortex A72
-
- Homepage: http://www.marvell.com/embedded-processors/armada-70xx/
- Product Brief: http://www.marvell.com/embedded-processors/assets/Armada7020PB-Jan2016.pdf
- http://www.marvell.com/embedded-processors/assets/Armada7040PB-Jan2016.pdf
- Device tree files: arch/arm64/boot/dts/marvell/armada-70*
-
- Armada 8K Flavors:
- 88F8020 (AP806 Dual + two CP110)
- 88F8040 (AP806 Quad + two CP110)
- Core: ARM Cortex A72
-
- Homepage: http://www.marvell.com/embedded-processors/armada-80xx/
- Product Brief: http://www.marvell.com/embedded-processors/assets/Armada8020PB-Jan2016.pdf
- http://www.marvell.com/embedded-processors/assets/Armada8040PB-Jan2016.pdf
- Device tree files: arch/arm64/boot/dts/marvell/armada-80*
-
-Avanta family
--------------
-
- Flavors:
- 88F6510
- 88F6530P
- 88F6550
- 88F6560
- Homepage : http://www.marvell.com/broadband/
- Product Brief: http://www.marvell.com/broadband/assets/Marvell_Avanta_88F6510_305_060-001_product_brief.pdf
- No public datasheet available.
-
- Core: ARMv5 compatible
-
- Linux kernel mach directory: no code in mainline yet, planned for the future
- Linux kernel plat directory: no code in mainline yet, planned for the future
-
-Storage family
---------------
-
- Armada SP:
- 88RC1580
- Product infos: http://www.marvell.com/storage/armada-sp/
- Core: Sheeva ARMv7 comatible Quad-core PJ4C
- (not supported in upstream Linux kernel)
-
-Dove family (application processor)
------------------------------------
-
- Flavors:
- 88AP510 a.k.a Armada 510
- Product Brief : http://www.marvell.com/application-processors/armada-500/assets/Marvell_Armada510_SoC.pdf
- Hardware Spec : http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Hardware-Spec.pdf
- Functional Spec : http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Functional-Spec.pdf
- Homepage: http://www.marvell.com/application-processors/armada-500/
- Core: ARMv7 compatible
-
- Directory: arch/arm/mach-mvebu (DT enabled platforms)
- arch/arm/mach-dove (non-DT enabled platforms)
-
-PXA 2xx/3xx/93x/95x family
---------------------------
-
- Flavors:
- PXA21x, PXA25x, PXA26x
- Application processor only
- Core: ARMv5 XScale1 core
- PXA270, PXA271, PXA272
- Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_pb.pdf
- Design guide : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_design_guide.pdf
- Developers manual : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_dev_man.pdf
- Specification : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_emts.pdf
- Specification update : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_spec_update.pdf
- Application processor only
- Core: ARMv5 XScale2 core
- PXA300, PXA310, PXA320
- PXA 300 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA300_PB_R4.pdf
- PXA 310 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA310_PB_R4.pdf
- PXA 320 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA320_PB_R4.pdf
- Design guide : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Design_Guide.pdf
- Developers manual : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Developers_Manual.zip
- Specifications : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_EMTS.pdf
- Specification Update : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Spec_Update.zip
- Reference Manual : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_TavorP_BootROM_Ref_Manual.pdf
- Application processor only
- Core: ARMv5 XScale3 core
- PXA930, PXA935
- Application processor with Communication processor
- Core: ARMv5 XScale3 core
- PXA955
- Application processor with Communication processor
- Core: ARMv7 compatible Sheeva PJ4 core
-
- Comments:
-
- * This line of SoCs originates from the XScale family developed by
- Intel and acquired by Marvell in ~2006. The PXA21x, PXA25x,
- PXA26x, PXA27x, PXA3xx and PXA93x were developed by Intel, while
- the later PXA95x were developed by Marvell.
-
- * Due to their XScale origin, these SoCs have virtually nothing in
- common with the other (Kirkwood, Dove, etc.) families of Marvell
- SoCs, except with the MMP/MMP2 family of SoCs.
-
- Linux kernel mach directory: arch/arm/mach-pxa
- Linux kernel plat directory: arch/arm/plat-pxa
-
-MMP/MMP2/MMP3 family (communication processor)
------------------------------------------
-
- Flavors:
- PXA168, a.k.a Armada 168
- Homepage : http://www.marvell.com/application-processors/armada-100/armada-168.jsp
- Product brief : http://www.marvell.com/application-processors/armada-100/assets/pxa_168_pb.pdf
- Hardware manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_datasheet.pdf
- Software manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_software_manual.pdf
- Specification update : http://www.marvell.com/application-processors/armada-100/assets/ARMADA16x_Spec_update.pdf
- Boot ROM manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_ref_manual.pdf
- App node package : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_app_note_package.pdf
- Application processor only
- Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
- PXA910/PXA920
- Homepage : http://www.marvell.com/communication-processors/pxa910/
- Product Brief : http://www.marvell.com/communication-processors/pxa910/assets/Marvell_PXA910_Platform-001_PB_final.pdf
- Application processor with Communication processor
- Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
- PXA688, a.k.a. MMP2, a.k.a Armada 610
- Product Brief : http://www.marvell.com/application-processors/armada-600/assets/armada610_pb.pdf
- Application processor only
- Core: ARMv7 compatible Sheeva PJ4 88sv581x core
- PXA2128, a.k.a. MMP3 (OLPC XO4, Linux support not upstream)
- Product Brief : http://www.marvell.com/application-processors/armada/pxa2128/assets/Marvell-ARMADA-PXA2128-SoC-PB.pdf
- Application processor only
- Core: Dual-core ARMv7 compatible Sheeva PJ4C core
- PXA960/PXA968/PXA978 (Linux support not upstream)
- Application processor with Communication Processor
- Core: ARMv7 compatible Sheeva PJ4 core
- PXA986/PXA988 (Linux support not upstream)
- Application processor with Communication Processor
- Core: Dual-core ARMv7 compatible Sheeva PJ4B-MP core
- PXA1088/PXA1920 (Linux support not upstream)
- Application processor with Communication Processor
- Core: quad-core ARMv7 Cortex-A7
- PXA1908/PXA1928/PXA1936
- Application processor with Communication Processor
- Core: multi-core ARMv8 Cortex-A53
-
- Comments:
-
- * This line of SoCs originates from the XScale family developed by
- Intel and acquired by Marvell in ~2006. All the processors of
- this MMP/MMP2 family were developed by Marvell.
-
- * Due to their XScale origin, these SoCs have virtually nothing in
- common with the other (Kirkwood, Dove, etc.) families of Marvell
- SoCs, except with the PXA family of SoCs listed above.
-
- Linux kernel mach directory: arch/arm/mach-mmp
- Linux kernel plat directory: arch/arm/plat-pxa
-
-Berlin family (Multimedia Solutions)
--------------------------------------
-
- Flavors:
- 88DE3010, Armada 1000 (no Linux support)
- Core: Marvell PJ1 (ARMv5TE), Dual-core
- Product Brief: http://www.marvell.com.cn/digital-entertainment/assets/armada_1000_pb.pdf
- 88DE3005, Armada 1500 Mini
- Design name: BG2CD
- Core: ARM Cortex-A9, PL310 L2CC
- 88DE3006, Armada 1500 Mini Plus
- Design name: BG2CDP
- Core: Dual Core ARM Cortex-A7
- 88DE3100, Armada 1500
- Design name: BG2
- Core: Marvell PJ4B-MP (ARMv7), Tauros3 L2CC
- 88DE3114, Armada 1500 Pro
- Design name: BG2Q
- Core: Quad Core ARM Cortex-A9, PL310 L2CC
- 88DE3214, Armada 1500 Pro 4K
- Design name: BG3
- Core: ARM Cortex-A15, CA15 integrated L2CC
- 88DE3218, ARMADA 1500 Ultra
- Core: ARM Cortex-A53
-
- Homepage: https://www.synaptics.com/products/multimedia-solutions
- Directory: arch/arm/mach-berlin
-
- Comments:
-
- * This line of SoCs is based on Marvell Sheeva or ARM Cortex CPUs
- with Synopsys DesignWare (IRQ, GPIO, Timers, ...) and PXA IP (SDHCI, USB, ETH, ...).
-
- * The Berlin family was acquired by Synaptics from Marvell in 2017.
-
-CPU Cores
----------
-
-The XScale cores were designed by Intel, and shipped by Marvell in the older
-PXA processors. Feroceon is a Marvell designed core that developed in-house,
-and that evolved into Sheeva. The XScale and Feroceon cores were phased out
-over time and replaced with Sheeva cores in later products, which subsequently
-got replaced with licensed ARM Cortex-A cores.
-
- XScale 1
- CPUID 0x69052xxx
- ARMv5, iWMMXt
- XScale 2
- CPUID 0x69054xxx
- ARMv5, iWMMXt
- XScale 3
- CPUID 0x69056xxx or 0x69056xxx
- ARMv5, iWMMXt
- Feroceon-1850 88fr331 "Mohawk"
- CPUID 0x5615331x or 0x41xx926x
- ARMv5TE, single issue
- Feroceon-2850 88fr531-vd "Jolteon"
- CPUID 0x5605531x or 0x41xx926x
- ARMv5TE, VFP, dual-issue
- Feroceon 88fr571-vd "Jolteon"
- CPUID 0x5615571x
- ARMv5TE, VFP, dual-issue
- Feroceon 88fr131 "Mohawk-D"
- CPUID 0x5625131x
- ARMv5TE, single-issue in-order
- Sheeva PJ1 88sv331 "Mohawk"
- CPUID 0x561584xx
- ARMv5, single-issue iWMMXt v2
- Sheeva PJ4 88sv581x "Flareon"
- CPUID 0x560f581x
- ARMv7, idivt, optional iWMMXt v2
- Sheeva PJ4B 88sv581x
- CPUID 0x561f581x
- ARMv7, idivt, optional iWMMXt v2
- Sheeva PJ4B-MP / PJ4C
- CPUID 0x562f584x
- ARMv7, idivt/idiva, LPAE, optional iWMMXt v2 and/or NEON
-
-Long-term plans
----------------
-
- * Unify the mach-dove/, mach-mv78xx0/, mach-orion5x/ into the
- mach-mvebu/ to support all SoCs from the Marvell EBU (Engineering
- Business Unit) in a single mach-<foo> directory. The plat-orion/
- would therefore disappear.
-
- * Unify the mach-mmp/ and mach-pxa/ into the same mach-pxa
- directory. The plat-pxa/ would therefore disappear.
-
-Credits
--------
-
- Maen Suleiman <maen@marvell.com>
- Lior Amsalem <alior@marvell.com>
- Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
- Andrew Lunn <andrew@lunn.ch>
- Nicolas Pitre <nico@fluxnic.net>
- Eric Miao <eric.y.miao@gmail.com>
diff --git a/Documentation/arm/Netwinder b/Documentation/arm/Netwinder
deleted file mode 100644
index f1b457fbd3de..000000000000
--- a/Documentation/arm/Netwinder
+++ /dev/null
@@ -1,78 +0,0 @@
-NetWinder specific documentation
-================================
-
-The NetWinder is a small low-power computer, primarily designed
-to run Linux. It is based around the StrongARM RISC processor,
-DC21285 PCI bridge, with PC-type hardware glued around it.
-
-Port usage
-==========
-
-Min - Max Description
----------------------------
-0x0000 - 0x000f DMA1
-0x0020 - 0x0021 PIC1
-0x0060 - 0x006f Keyboard
-0x0070 - 0x007f RTC
-0x0080 - 0x0087 DMA1
-0x0088 - 0x008f DMA2
-0x00a0 - 0x00a3 PIC2
-0x00c0 - 0x00df DMA2
-0x0180 - 0x0187 IRDA
-0x01f0 - 0x01f6 ide0
-0x0201 Game port
-0x0203 RWA010 configuration read
-0x0220 - ? SoundBlaster
-0x0250 - ? WaveArtist
-0x0279 RWA010 configuration index
-0x02f8 - 0x02ff Serial ttyS1
-0x0300 - 0x031f Ether10
-0x0338 GPIO1
-0x033a GPIO2
-0x0370 - 0x0371 W83977F configuration registers
-0x0388 - ? AdLib
-0x03c0 - 0x03df VGA
-0x03f6 ide0
-0x03f8 - 0x03ff Serial ttyS0
-0x0400 - 0x0408 DC21143
-0x0480 - 0x0487 DMA1
-0x0488 - 0x048f DMA2
-0x0a79 RWA010 configuration write
-0xe800 - 0xe80f ide0/ide1 BM DMA
-
-
-Interrupt usage
-===============
-
-IRQ type Description
----------------------------
- 0 ISA 100Hz timer
- 1 ISA Keyboard
- 2 ISA cascade
- 3 ISA Serial ttyS1
- 4 ISA Serial ttyS0
- 5 ISA PS/2 mouse
- 6 ISA IRDA
- 7 ISA Printer
- 8 ISA RTC alarm
- 9 ISA
-10 ISA GP10 (Orange reset button)
-11 ISA
-12 ISA WaveArtist
-13 ISA
-14 ISA hda1
-15 ISA
-
-DMA usage
-=========
-
-DMA type Description
----------------------------
- 0 ISA IRDA
- 1 ISA
- 2 ISA cascade
- 3 ISA WaveArtist
- 4 ISA
- 5 ISA
- 6 ISA
- 7 ISA WaveArtist
diff --git a/Documentation/arm/SA1100/FreeBird b/Documentation/arm/SA1100/FreeBird
deleted file mode 100644
index ab9193663b2b..000000000000
--- a/Documentation/arm/SA1100/FreeBird
+++ /dev/null
@@ -1,21 +0,0 @@
-Freebird-1.1 is produced by Legend(C), Inc.
-http://web.archive.org/web/*/http://www.legend.com.cn
-and software/linux maintained by Coventive(C), Inc.
-(http://www.coventive.com)
-
-Based on the Nicolas's strongarm kernel tree.
-
-===============================================================
-Maintainer:
-
-Chester Kuo <chester@coventive.com>
- <chester@linux.org.tw>
-
-Author :
-Tim wu <timwu@coventive.com>
-CIH <cih@coventive.com>
-Eric Peng <ericpeng@coventive.com>
-Jeff Lee <jeff_lee@coventive.com>
-Allen Cheng
-Tony Liu <tonyliu@coventive.com>
-
diff --git a/Documentation/arm/SA1100/empeg b/Documentation/arm/SA1100/empeg
deleted file mode 100644
index 4ece4849a42c..000000000000
--- a/Documentation/arm/SA1100/empeg
+++ /dev/null
@@ -1,2 +0,0 @@
-See ../empeg/README
-
diff --git a/Documentation/arm/SA1100/serial_UART b/Documentation/arm/SA1100/serial_UART
deleted file mode 100644
index a63966f1d083..000000000000
--- a/Documentation/arm/SA1100/serial_UART
+++ /dev/null
@@ -1,47 +0,0 @@
-The SA1100 serial port had its major/minor numbers officially assigned:
-
-> Date: Sun, 24 Sep 2000 21:40:27 -0700
-> From: H. Peter Anvin <hpa@transmeta.com>
-> To: Nicolas Pitre <nico@CAM.ORG>
-> Cc: Device List Maintainer <device@lanana.org>
-> Subject: Re: device
->
-> Okay. Note that device numbers 204 and 205 are used for "low density
-> serial devices", so you will have a range of minors on those majors (the
-> tty device layer handles this just fine, so you don't have to worry about
-> doing anything special.)
->
-> So your assignments are:
->
-> 204 char Low-density serial ports
-> 5 = /dev/ttySA0 SA1100 builtin serial port 0
-> 6 = /dev/ttySA1 SA1100 builtin serial port 1
-> 7 = /dev/ttySA2 SA1100 builtin serial port 2
->
-> 205 char Low-density serial ports (alternate device)
-> 5 = /dev/cusa0 Callout device for ttySA0
-> 6 = /dev/cusa1 Callout device for ttySA1
-> 7 = /dev/cusa2 Callout device for ttySA2
->
-
-You must create those inodes in /dev on the root filesystem used
-by your SA1100-based device:
-
- mknod ttySA0 c 204 5
- mknod ttySA1 c 204 6
- mknod ttySA2 c 204 7
- mknod cusa0 c 205 5
- mknod cusa1 c 205 6
- mknod cusa2 c 205 7
-
-In addition to the creation of the appropriate device nodes above, you
-must ensure your user space applications make use of the correct device
-name. The classic example is the content of the /etc/inittab file where
-you might have a getty process started on ttyS0. In this case:
-
-- replace occurrences of ttyS0 with ttySA0, ttyS1 with ttySA1, etc.
-
-- don't forget to add 'ttySA0', 'console', or the appropriate tty name
- in /etc/securetty for root to be allowed to login as well.
-
-
diff --git a/Documentation/arm/README b/Documentation/arm/arm.rst
index 9d1e5b2c92e6..2edc509df92a 100644
--- a/Documentation/arm/README
+++ b/Documentation/arm/arm.rst
@@ -1,5 +1,6 @@
- ARM Linux 2.6
- =============
+=======================
+ARM Linux 2.6 and upper
+=======================
Please check <ftp://ftp.arm.linux.org.uk/pub/armlinux> for
updates.
@@ -18,22 +19,28 @@ Compilation of kernel
line as detailed below.
If you wish to cross-compile, then alter the following lines in the top
- level make file:
+ level make file::
ARCH = <whatever>
- with
+
+ with::
+
ARCH = arm
- and
+ and::
CROSS_COMPILE=
- to
+
+ to::
+
CROSS_COMPILE=<your-path-to-your-compiler-without-gcc>
- eg.
+
+ eg.::
+
CROSS_COMPILE=arm-linux-
- Do a 'make config', followed by 'make Image' to build the kernel
- (arch/arm/boot/Image). A compressed image can be built by doing a
+ Do a 'make config', followed by 'make Image' to build the kernel
+ (arch/arm/boot/Image). A compressed image can be built by doing a
'make zImage' instead of 'make Image'.
@@ -46,7 +53,7 @@ Bug reports etc
Bug reports should be sent to linux-arm-kernel@lists.arm.linux.org.uk,
or submitted through the web form at
- http://www.arm.linux.org.uk/developer/
+ http://www.arm.linux.org.uk/developer/
When sending bug reports, please ensure that they contain all relevant
information, eg. the kernel messages that were printed before/during
@@ -60,11 +67,13 @@ Include files
which are there to reduce the clutter in the top-level directory. These
directories, and their purpose is listed below:
- arch-* machine/platform specific header files
- hardware driver-internal ARM specific data structures/definitions
- mach descriptions of generic ARM to specific machine interfaces
- proc-* processor dependent header files (currently only two
+ ============= ==========================================================
+ `arch-*` machine/platform specific header files
+ `hardware` driver-internal ARM specific data structures/definitions
+ `mach` descriptions of generic ARM to specific machine interfaces
+ `proc-*` processor dependent header files (currently only two
categories)
+ ============= ==========================================================
Machine/Platform support
@@ -129,7 +138,7 @@ ST506 hard drives
HDC base to the source.
As of 31/3/96 it works with two drives (you should get the ADFS
- *configure harddrive set to 2). I've got an internal 20MB and a great
+ `*configure` harddrive set to 2). I've got an internal 20MB and a great
big external 5.25" FH 64MB drive (who could ever want more :-) ).
I've just got 240K/s off it (a dd with bs=128k); thats about half of what
@@ -149,13 +158,13 @@ ST506 hard drives
are welcome.
-CONFIG_MACH_ and CONFIG_ARCH_
------------------------------
+`CONFIG_MACH_` and `CONFIG_ARCH_`
+---------------------------------
A change was made in 2003 to the macro names for new machines.
- Historically, CONFIG_ARCH_ was used for the bonafide architecture,
+ Historically, `CONFIG_ARCH_` was used for the bonafide architecture,
e.g. SA1100, as well as implementations of the architecture,
e.g. Assabet. It was decided to change the implementation macros
- to read CONFIG_MACH_ for clarity. Moreover, a retroactive fixup has
+ to read `CONFIG_MACH_` for clarity. Moreover, a retroactive fixup has
not been made because it would complicate patching.
Previous registrations may be found online.
@@ -163,7 +172,7 @@ CONFIG_MACH_ and CONFIG_ARCH_
<http://www.arm.linux.org.uk/developer/machines/>
Kernel entry (head.S)
---------------------------
+---------------------
The initial entry into the kernel is via head.S, which uses machine
independent code. The machine is selected by the value of 'r1' on
entry, which must be kept unique.
@@ -201,4 +210,5 @@ Kernel entry (head.S)
platform is DT-only, you do not need a registered machine type.
---
+
Russell King (15/03/2004)
diff --git a/Documentation/arm/Booting b/Documentation/arm/booting.rst
index f1f965ce93d6..4babb6c6ae1e 100644
--- a/Documentation/arm/Booting
+++ b/Documentation/arm/booting.rst
@@ -1,7 +1,9 @@
- Booting ARM Linux
- =================
+=================
+Booting ARM Linux
+=================
Author: Russell King
+
Date : 18 May 2002
The following documentation is relevant to 2.4.18-rmk6 and beyond.
@@ -25,8 +27,10 @@ following:
1. Setup and initialise RAM
---------------------------
-Existing boot loaders: MANDATORY
-New boot loaders: MANDATORY
+Existing boot loaders:
+ MANDATORY
+New boot loaders:
+ MANDATORY
The boot loader is expected to find and initialise all RAM that the
kernel will use for volatile data storage in the system. It performs
@@ -39,8 +43,10 @@ sees fit.)
2. Initialise one serial port
-----------------------------
-Existing boot loaders: OPTIONAL, RECOMMENDED
-New boot loaders: OPTIONAL, RECOMMENDED
+Existing boot loaders:
+ OPTIONAL, RECOMMENDED
+New boot loaders:
+ OPTIONAL, RECOMMENDED
The boot loader should initialise and enable one serial port on the
target. This allows the kernel serial driver to automatically detect
@@ -57,8 +63,10 @@ serial format options as described in
3. Detect the machine type
--------------------------
-Existing boot loaders: OPTIONAL
-New boot loaders: MANDATORY except for DT-only platforms
+Existing boot loaders:
+ OPTIONAL
+New boot loaders:
+ MANDATORY except for DT-only platforms
The boot loader should detect the machine type its running on by some
method. Whether this is a hard coded value or some algorithm that
@@ -74,8 +82,10 @@ necessary, but assures that it will not match any existing types.
4. Setup boot data
------------------
-Existing boot loaders: OPTIONAL, HIGHLY RECOMMENDED
-New boot loaders: MANDATORY
+Existing boot loaders:
+ OPTIONAL, HIGHLY RECOMMENDED
+New boot loaders:
+ MANDATORY
The boot loader must provide either a tagged list or a dtb image for
passing configuration data to the kernel. The physical address of the
@@ -97,15 +107,15 @@ entirety; some tags behave as the former, others the latter.
The boot loader must pass at a minimum the size and location of
the system memory, and root filesystem location. Therefore, the
-minimum tagged list should look:
+minimum tagged list should look::
- +-----------+
-base -> | ATAG_CORE | |
- +-----------+ |
- | ATAG_MEM | | increasing address
- +-----------+ |
- | ATAG_NONE | |
- +-----------+ v
+ +-----------+
+ base -> | ATAG_CORE | |
+ +-----------+ |
+ | ATAG_MEM | | increasing address
+ +-----------+ |
+ | ATAG_NONE | |
+ +-----------+ v
The tagged list should be stored in system RAM.
@@ -134,8 +144,10 @@ A safe location is just above the 128MiB boundary from start of RAM.
5. Load initramfs.
------------------
-Existing boot loaders: OPTIONAL
-New boot loaders: OPTIONAL
+Existing boot loaders:
+ OPTIONAL
+New boot loaders:
+ OPTIONAL
If an initramfs is in use then, as with the dtb, it must be placed in
a region of memory where the kernel decompressor will not overwrite it
@@ -149,8 +161,10 @@ recommended above.
6. Calling the kernel image
---------------------------
-Existing boot loaders: MANDATORY
-New boot loaders: MANDATORY
+Existing boot loaders:
+ MANDATORY
+New boot loaders:
+ MANDATORY
There are two options for calling the kernel zImage. If the zImage
is stored in flash, and is linked correctly to be run from flash,
@@ -174,12 +188,14 @@ In any case, the following conditions must be met:
you many hours of debug.
- CPU register settings
- r0 = 0,
- r1 = machine type number discovered in (3) above.
- r2 = physical address of tagged list in system RAM, or
- physical address of device tree block (dtb) in system RAM
+
+ - r0 = 0,
+ - r1 = machine type number discovered in (3) above.
+ - r2 = physical address of tagged list in system RAM, or
+ physical address of device tree block (dtb) in system RAM
- CPU mode
+
All forms of interrupts must be disabled (IRQs and FIQs)
For CPUs which do not include the ARM virtualization extensions, the
@@ -195,8 +211,11 @@ In any case, the following conditions must be met:
entered in SVC mode.
- Caches, MMUs
+
The MMU must be off.
+
Instruction cache may be on or off.
+
Data cache must be off.
If the kernel is entered in HYP mode, the above requirements apply to
diff --git a/Documentation/arm/cluster-pm-race-avoidance.txt b/Documentation/arm/cluster-pm-race-avoidance.rst
index 750b6fc24af9..aa58603d3f28 100644
--- a/Documentation/arm/cluster-pm-race-avoidance.txt
+++ b/Documentation/arm/cluster-pm-race-avoidance.rst
@@ -1,3 +1,4 @@
+=========================================================
Cluster-wide Power-up/power-down race avoidance algorithm
=========================================================
@@ -46,10 +47,12 @@ Basic model
Each cluster and CPU is assigned a state, as follows:
- DOWN
- COMING_UP
- UP
- GOING_DOWN
+ - DOWN
+ - COMING_UP
+ - UP
+ - GOING_DOWN
+
+::
+---------> UP ----------+
| v
@@ -60,18 +63,22 @@ Each cluster and CPU is assigned a state, as follows:
+--------- DOWN <--------+
-DOWN: The CPU or cluster is not coherent, and is either powered off or
+DOWN:
+ The CPU or cluster is not coherent, and is either powered off or
suspended, or is ready to be powered off or suspended.
-COMING_UP: The CPU or cluster has committed to moving to the UP state.
+COMING_UP:
+ The CPU or cluster has committed to moving to the UP state.
It may be part way through the process of initialisation and
enabling coherency.
-UP: The CPU or cluster is active and coherent at the hardware
+UP:
+ The CPU or cluster is active and coherent at the hardware
level. A CPU in this state is not necessarily being used
actively by the kernel.
-GOING_DOWN: The CPU or cluster has committed to moving to the DOWN
+GOING_DOWN:
+ The CPU or cluster has committed to moving to the DOWN
state. It may be part way through the process of teardown and
coherency exit.
@@ -86,8 +93,8 @@ CPUs in the cluster simultaneously modifying the state. The cluster-
level states are described in the "Cluster state" section.
To help distinguish the CPU states from cluster states in this
-discussion, the state names are given a CPU_ prefix for the CPU states,
-and a CLUSTER_ or INBOUND_ prefix for the cluster states.
+discussion, the state names are given a `CPU_` prefix for the CPU states,
+and a `CLUSTER_` or `INBOUND_` prefix for the cluster states.
CPU state
@@ -101,10 +108,12 @@ This means that CPUs fit the basic model closely.
The algorithm defines the following states for each CPU in the system:
- CPU_DOWN
- CPU_COMING_UP
- CPU_UP
- CPU_GOING_DOWN
+ - CPU_DOWN
+ - CPU_COMING_UP
+ - CPU_UP
+ - CPU_GOING_DOWN
+
+::
cluster setup and
CPU setup complete policy decision
@@ -130,17 +139,17 @@ requirement for any external event to happen.
CPU_DOWN:
-
A CPU reaches the CPU_DOWN state when it is ready for
power-down. On reaching this state, the CPU will typically
power itself down or suspend itself, via a WFI instruction or a
firmware call.
- Next state: CPU_COMING_UP
- Conditions: none
+ Next state:
+ CPU_COMING_UP
+ Conditions:
+ none
Trigger events:
-
a) an explicit hardware power-up operation, resulting
from a policy decision on another CPU;
@@ -148,15 +157,17 @@ CPU_DOWN:
CPU_COMING_UP:
-
A CPU cannot start participating in hardware coherency until the
cluster is set up and coherent. If the cluster is not ready,
then the CPU will wait in the CPU_COMING_UP state until the
cluster has been set up.
- Next state: CPU_UP
- Conditions: The CPU's parent cluster must be in CLUSTER_UP.
- Trigger events: Transition of the parent cluster to CLUSTER_UP.
+ Next state:
+ CPU_UP
+ Conditions:
+ The CPU's parent cluster must be in CLUSTER_UP.
+ Trigger events:
+ Transition of the parent cluster to CLUSTER_UP.
Refer to the "Cluster state" section for a description of the
CLUSTER_UP state.
@@ -178,20 +189,25 @@ CPU_UP:
The CPU remains in this state until an explicit policy decision
is made to shut down or suspend the CPU.
- Next state: CPU_GOING_DOWN
- Conditions: none
- Trigger events: explicit policy decision
+ Next state:
+ CPU_GOING_DOWN
+ Conditions:
+ none
+ Trigger events:
+ explicit policy decision
CPU_GOING_DOWN:
-
While in this state, the CPU exits coherency, including any
operations required to achieve this (such as cleaning data
caches).
- Next state: CPU_DOWN
- Conditions: local CPU teardown complete
- Trigger events: (spontaneous)
+ Next state:
+ CPU_DOWN
+ Conditions:
+ local CPU teardown complete
+ Trigger events:
+ (spontaneous)
Cluster state
@@ -212,20 +228,20 @@ independently of the CPU which is tearing down the cluster. For this
reason, the cluster state is split into two parts:
"cluster" state: The global state of the cluster; or the state
- on the outbound side:
+ on the outbound side:
- CLUSTER_DOWN
- CLUSTER_UP
- CLUSTER_GOING_DOWN
+ - CLUSTER_DOWN
+ - CLUSTER_UP
+ - CLUSTER_GOING_DOWN
"inbound" state: The state of the cluster on the inbound side.
- INBOUND_NOT_COMING_UP
- INBOUND_COMING_UP
+ - INBOUND_NOT_COMING_UP
+ - INBOUND_COMING_UP
The different pairings of these states results in six possible
- states for the cluster as a whole:
+ states for the cluster as a whole::
CLUSTER_UP
+==========> INBOUND_NOT_COMING_UP -------------+
@@ -284,11 +300,12 @@ reason, the cluster state is split into two parts:
CLUSTER_DOWN/INBOUND_NOT_COMING_UP:
+ Next state:
+ CLUSTER_DOWN/INBOUND_COMING_UP (inbound)
+ Conditions:
+ none
- Next state: CLUSTER_DOWN/INBOUND_COMING_UP (inbound)
- Conditions: none
Trigger events:
-
a) an explicit hardware power-up operation, resulting
from a policy decision on another CPU;
@@ -306,9 +323,12 @@ CLUSTER_DOWN/INBOUND_COMING_UP:
setup to enable other CPUs in the cluster to enter coherency
safely.
- Next state: CLUSTER_UP/INBOUND_COMING_UP (inbound)
- Conditions: cluster-level setup and hardware coherency complete
- Trigger events: (spontaneous)
+ Next state:
+ CLUSTER_UP/INBOUND_COMING_UP (inbound)
+ Conditions:
+ cluster-level setup and hardware coherency complete
+ Trigger events:
+ (spontaneous)
CLUSTER_UP/INBOUND_COMING_UP:
@@ -321,9 +341,12 @@ CLUSTER_UP/INBOUND_COMING_UP:
CLUSTER_UP/INBOUND_NOT_COMING_UP. All other CPUs on the cluster
should consider treat these two states as equivalent.
- Next state: CLUSTER_UP/INBOUND_NOT_COMING_UP (inbound)
- Conditions: none
- Trigger events: (spontaneous)
+ Next state:
+ CLUSTER_UP/INBOUND_NOT_COMING_UP (inbound)
+ Conditions:
+ none
+ Trigger events:
+ (spontaneous)
CLUSTER_UP/INBOUND_NOT_COMING_UP:
@@ -335,9 +358,12 @@ CLUSTER_UP/INBOUND_NOT_COMING_UP:
The cluster will remain in this state until a policy decision is
made to power the cluster down.
- Next state: CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP (outbound)
- Conditions: none
- Trigger events: policy decision to power down the cluster
+ Next state:
+ CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP (outbound)
+ Conditions:
+ none
+ Trigger events:
+ policy decision to power down the cluster
CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP:
@@ -359,13 +385,16 @@ CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP:
Next states:
CLUSTER_DOWN/INBOUND_NOT_COMING_UP (outbound)
- Conditions: cluster torn down and ready to power off
- Trigger events: (spontaneous)
+ Conditions:
+ cluster torn down and ready to power off
+ Trigger events:
+ (spontaneous)
CLUSTER_GOING_DOWN/INBOUND_COMING_UP (inbound)
- Conditions: none
- Trigger events:
+ Conditions:
+ none
+ Trigger events:
a) an explicit hardware power-up operation,
resulting from a policy decision on another
CPU;
@@ -396,13 +425,19 @@ CLUSTER_GOING_DOWN/INBOUND_COMING_UP:
Next states:
CLUSTER_UP/INBOUND_COMING_UP (outbound)
- Conditions: cluster-level setup and hardware
+ Conditions:
+ cluster-level setup and hardware
coherency complete
- Trigger events: (spontaneous)
+
+ Trigger events:
+ (spontaneous)
CLUSTER_DOWN/INBOUND_COMING_UP (outbound)
- Conditions: cluster torn down and ready to power off
- Trigger events: (spontaneous)
+ Conditions:
+ cluster torn down and ready to power off
+
+ Trigger events:
+ (spontaneous)
Last man and First man selection
@@ -452,30 +487,30 @@ Implementation:
arch/arm/common/mcpm_entry.c (everything else):
__mcpm_cpu_going_down() signals the transition of a CPU to the
- CPU_GOING_DOWN state.
+ CPU_GOING_DOWN state.
__mcpm_cpu_down() signals the transition of a CPU to the CPU_DOWN
- state.
+ state.
A CPU transitions to CPU_COMING_UP and then to CPU_UP via the
- low-level power-up code in mcpm_head.S. This could
- involve CPU-specific setup code, but in the current
- implementation it does not.
+ low-level power-up code in mcpm_head.S. This could
+ involve CPU-specific setup code, but in the current
+ implementation it does not.
__mcpm_outbound_enter_critical() and __mcpm_outbound_leave_critical()
- handle transitions from CLUSTER_UP to CLUSTER_GOING_DOWN
- and from there to CLUSTER_DOWN or back to CLUSTER_UP (in
- the case of an aborted cluster power-down).
+ handle transitions from CLUSTER_UP to CLUSTER_GOING_DOWN
+ and from there to CLUSTER_DOWN or back to CLUSTER_UP (in
+ the case of an aborted cluster power-down).
- These functions are more complex than the __mcpm_cpu_*()
- functions due to the extra inter-CPU coordination which
- is needed for safe transitions at the cluster level.
+ These functions are more complex than the __mcpm_cpu_*()
+ functions due to the extra inter-CPU coordination which
+ is needed for safe transitions at the cluster level.
A cluster transitions from CLUSTER_DOWN back to CLUSTER_UP via
- the low-level power-up code in mcpm_head.S. This
- typically involves platform-specific setup code,
- provided by the platform-specific power_up_setup
- function registered via mcpm_sync_init.
+ the low-level power-up code in mcpm_head.S. This
+ typically involves platform-specific setup code,
+ provided by the platform-specific power_up_setup
+ function registered via mcpm_sync_init.
Deep topologies:
diff --git a/Documentation/arm/firmware.txt b/Documentation/arm/firmware.rst
index 7f175dbb427e..efd844baec1d 100644
--- a/Documentation/arm/firmware.txt
+++ b/Documentation/arm/firmware.rst
@@ -1,5 +1,7 @@
-Interface for registering and calling firmware-specific operations for ARM.
-----
+==========================================================================
+Interface for registering and calling firmware-specific operations for ARM
+==========================================================================
+
Written by Tomasz Figa <t.figa@samsung.com>
Some boards are running with secure firmware running in TrustZone secure
@@ -9,7 +11,7 @@ operations and call them when needed.
Firmware operations can be specified by filling in a struct firmware_ops
with appropriate callbacks and then registering it with register_firmware_ops()
-function.
+function::
void register_firmware_ops(const struct firmware_ops *ops)
@@ -19,7 +21,7 @@ and its members can be found in arch/arm/include/asm/firmware.h header.
There is a default, empty set of operations provided, so there is no need to
set anything if platform does not require firmware operations.
-To call a firmware operation, a helper macro is provided
+To call a firmware operation, a helper macro is provided::
#define call_firmware_op(op, ...) \
((firmware_ops->op) ? firmware_ops->op(__VA_ARGS__) : (-ENOSYS))
@@ -28,7 +30,7 @@ the macro checks if the operation is provided and calls it or otherwise returns
-ENOSYS to signal that given operation is not available (for example, to allow
fallback to legacy operation).
-Example of registering firmware operations:
+Example of registering firmware operations::
/* board file */
@@ -56,7 +58,7 @@ Example of registering firmware operations:
register_firmware_ops(&platformX_firmware_ops);
}
-Example of using a firmware operation:
+Example of using a firmware operation::
/* some platform code, e.g. SMP initialization */
diff --git a/Documentation/arm/index.rst b/Documentation/arm/index.rst
new file mode 100644
index 000000000000..5fc072dd0c5e
--- /dev/null
+++ b/Documentation/arm/index.rst
@@ -0,0 +1,80 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+ARM Architecture
+================
+
+.. toctree::
+ :maxdepth: 1
+
+ arm
+ booting
+ cluster-pm-race-avoidance
+ firmware
+ interrupts
+ kernel_mode_neon
+ kernel_user_helpers
+ memory
+ mem_alignment
+ tcm
+ setup
+ swp_emulation
+ uefi
+ vlocks
+ porting
+
+SoC-specific documents
+======================
+
+.. toctree::
+ :maxdepth: 1
+
+ ixp4xx
+
+ marvel
+ microchip
+
+ netwinder
+ nwfpe/index
+
+ keystone/overview
+ keystone/knav-qmss
+
+ omap/index
+
+ pxa/mfp
+
+
+ sa1100/index
+
+ stm32/stm32f746-overview
+ stm32/overview
+ stm32/stm32h743-overview
+ stm32/stm32f769-overview
+ stm32/stm32f429-overview
+ stm32/stm32mp157-overview
+
+ sunxi
+
+ samsung/index
+ samsung-s3c24xx/index
+
+ sunxi/clocks
+
+ spear/overview
+
+ sti/stih416-overview
+ sti/stih407-overview
+ sti/stih418-overview
+ sti/overview
+ sti/stih415-overview
+
+ vfp/release-notes
+
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arm/Interrupts b/Documentation/arm/interrupts.rst
index f09ab1b90ef1..2ae70e0e9732 100644
--- a/Documentation/arm/Interrupts
+++ b/Documentation/arm/interrupts.rst
@@ -1,8 +1,10 @@
-2.5.2-rmk5
-----------
+==========
+Interrupts
+==========
-This is the first kernel that contains a major shake up of some of the
-major architecture-specific subsystems.
+2.5.2-rmk5:
+ This is the first kernel that contains a major shake up of some of the
+ major architecture-specific subsystems.
Firstly, it contains some pretty major changes to the way we handle the
MMU TLB. Each MMU TLB variant is now handled completely separately -
@@ -18,7 +20,7 @@ Unfortunately, this means that machine types that touch the irq_desc[]
array (basically all machine types) will break, and this means every
machine type that we currently have.
-Lets take an example. On the Assabet with Neponset, we have:
+Lets take an example. On the Assabet with Neponset, we have::
GPIO25 IRR:2
SA1100 ------------> Neponset -----------> SA1111
@@ -48,42 +50,47 @@ the irqdesc array). This doesn't have to be a real "IC"; indeed the
SA11x0 IRQs are handled by two separate "chip" structures, one for
GPIO0-10, and another for all the rest. It is just a container for
the various operations (maybe this'll change to a better name).
-This structure has the following operations:
-
-struct irqchip {
- /*
- * Acknowledge the IRQ.
- * If this is a level-based IRQ, then it is expected to mask the IRQ
- * as well.
- */
- void (*ack)(unsigned int irq);
- /*
- * Mask the IRQ in hardware.
- */
- void (*mask)(unsigned int irq);
- /*
- * Unmask the IRQ in hardware.
- */
- void (*unmask)(unsigned int irq);
- /*
- * Re-run the IRQ
- */
- void (*rerun)(unsigned int irq);
- /*
- * Set the type of the IRQ.
- */
- int (*type)(unsigned int irq, unsigned int, type);
-};
-
-ack - required. May be the same function as mask for IRQs
+This structure has the following operations::
+
+ struct irqchip {
+ /*
+ * Acknowledge the IRQ.
+ * If this is a level-based IRQ, then it is expected to mask the IRQ
+ * as well.
+ */
+ void (*ack)(unsigned int irq);
+ /*
+ * Mask the IRQ in hardware.
+ */
+ void (*mask)(unsigned int irq);
+ /*
+ * Unmask the IRQ in hardware.
+ */
+ void (*unmask)(unsigned int irq);
+ /*
+ * Re-run the IRQ
+ */
+ void (*rerun)(unsigned int irq);
+ /*
+ * Set the type of the IRQ.
+ */
+ int (*type)(unsigned int irq, unsigned int, type);
+ };
+
+ack
+ - required. May be the same function as mask for IRQs
handled by do_level_IRQ.
-mask - required.
-unmask - required.
-rerun - optional. Not required if you're using do_level_IRQ for all
+mask
+ - required.
+unmask
+ - required.
+rerun
+ - optional. Not required if you're using do_level_IRQ for all
IRQs that use this 'irqchip'. Generally expected to re-trigger
the hardware IRQ if possible. If not, may call the handler
directly.
-type - optional. If you don't support changing the type of an IRQ,
+type
+ - optional. If you don't support changing the type of an IRQ,
it should be null so people can detect if they are unable to
set the IRQ type.
@@ -109,6 +116,7 @@ manipulation, nor state tracking. This is useful for things like the
SMC9196 and USAR above.
So, what's changed?
+===================
1. Machine implementations must not write to the irqdesc array.
@@ -118,24 +126,19 @@ So, what's changed?
absolutely necessary.
set_irq_chip(irq,chip)
-
Set the mask/unmask methods for handling this IRQ
set_irq_handler(irq,handler)
-
Set the handler for this IRQ (level, edge, simple)
set_irq_chained_handler(irq,handler)
-
Set a "chained" handler for this IRQ - automatically
enables this IRQ (eg, Neponset and SA1111 handlers).
set_irq_flags(irq,flags)
-
Set the valid/probe/noautoenable flags.
set_irq_type(irq,type)
-
Set active the IRQ edge(s)/level. This replaces the
SA1111 INTPOL manipulation, and the set_GPIO_IRQ_edge()
function. Type should be one of IRQ_TYPE_xxx defined in
@@ -158,10 +161,9 @@ So, what's changed?
be re-checked for pending events. (see the Neponset IRQ handler for
details).
-7. fixup_irq() is gone, as is arch/arm/mach-*/include/mach/irq.h
+7. fixup_irq() is gone, as is `arch/arm/mach-*/include/mach/irq.h`
Please note that this will not solve all problems - some of them are
hardware based. Mixing level-based and edge-based IRQs on the same
parent signal (eg neponset) is one such area where a software based
solution can't provide the full answer to low IRQ latency.
-
diff --git a/Documentation/arm/IXP4xx b/Documentation/arm/ixp4xx.rst
index e48b74de6ac0..a57235616294 100644
--- a/Documentation/arm/IXP4xx
+++ b/Documentation/arm/ixp4xx.rst
@@ -1,6 +1,6 @@
-
--------------------------------------------------------------------------
+===========================================================
Release Notes for Linux on Intel's IXP4xx Network Processor
+===========================================================
Maintained by Deepak Saxena <dsaxena@plexity.net>
-------------------------------------------------------------------------
@@ -8,7 +8,7 @@ Maintained by Deepak Saxena <dsaxena@plexity.net>
1. Overview
Intel's IXP4xx network processor is a highly integrated SOC that
-is targeted for network applications, though it has become popular
+is targeted for network applications, though it has become popular
in industrial control and other areas due to low cost and power
consumption. The IXP4xx family currently consists of several processors
that support different network offload functions such as encryption,
@@ -20,7 +20,7 @@ For more information on the various versions of the CPU, see:
http://developer.intel.com/design/network/products/npfamily/ixp4xx.htm
-Intel also made the IXCP1100 CPU for sometime which is an IXP4xx
+Intel also made the IXCP1100 CPU for sometime which is an IXP4xx
stripped of much of the network intelligence.
2. Linux Support
@@ -31,7 +31,7 @@ Linux currently supports the following features on the IXP4xx chips:
- PCI interface
- Flash access (MTD/JFFS)
- I2C through GPIO on IXP42x
-- GPIO for input/output/interrupts
+- GPIO for input/output/interrupts
See arch/arm/mach-ixp4xx/include/mach/platform.h for access functions.
- Timers (watchdog, OS)
@@ -45,7 +45,7 @@ require the use of Intel's proprietary CSR software:
If you need to use any of the above, you need to download Intel's
software from:
- http://developer.intel.com/design/network/products/npfamily/ixp425.htm
+ http://developer.intel.com/design/network/products/npfamily/ixp425.htm
DO NOT POST QUESTIONS TO THE LINUX MAILING LISTS REGARDING THE PROPRIETARY
SOFTWARE.
@@ -53,14 +53,14 @@ SOFTWARE.
There are several websites that provide directions/pointers on using
Intel's software:
- http://sourceforge.net/projects/ixp4xx-osdg/
- Open Source Developer's Guide for using uClinux and the Intel libraries
+ - http://sourceforge.net/projects/ixp4xx-osdg/
+ Open Source Developer's Guide for using uClinux and the Intel libraries
-http://gatewaymaker.sourceforge.net/
- Simple one page summary of building a gateway using an IXP425 and Linux
+ - http://gatewaymaker.sourceforge.net/
+ Simple one page summary of building a gateway using an IXP425 and Linux
-http://ixp425.sourceforge.net/
- ATM device driver for IXP425 that relies on Intel's libraries
+ - http://ixp425.sourceforge.net/
+ ATM device driver for IXP425 that relies on Intel's libraries
3. Known Issues/Limitations
@@ -70,7 +70,7 @@ The IXP4xx family allows for up to 256MB of memory but the PCI interface
can only expose 64MB of that memory to the PCI bus. This means that if
you are running with > 64MB, all PCI buffers outside of the accessible
range will be bounced using the routines in arch/arm/common/dmabounce.c.
-
+
3b. Limited outbound PCI window
IXP4xx provides two methods of accessing PCI memory space:
@@ -79,15 +79,15 @@ IXP4xx provides two methods of accessing PCI memory space:
To access PCI via this space, we simply ioremap() the BAR
into the kernel and we can use the standard read[bwl]/write[bwl]
macros. This is the preffered method due to speed but it
- limits the system to just 64MB of PCI memory. This can be
+ limits the system to just 64MB of PCI memory. This can be
problamatic if using video cards and other memory-heavy devices.
-
-2) If > 64MB of memory space is required, the IXP4xx can be
- configured to use indirect registers to access PCI This allows
- for up to 128MB (0x48000000 to 0x4fffffff) of memory on the bus.
- The disadvantage of this is that every PCI access requires
- three local register accesses plus a spinlock, but in some
- cases the performance hit is acceptable. In addition, you cannot
+
+2) If > 64MB of memory space is required, the IXP4xx can be
+ configured to use indirect registers to access PCI This allows
+ for up to 128MB (0x48000000 to 0x4fffffff) of memory on the bus.
+ The disadvantage of this is that every PCI access requires
+ three local register accesses plus a spinlock, but in some
+ cases the performance hit is acceptable. In addition, you cannot
mmap() PCI devices in this case due to the indirect nature
of the PCI window.
@@ -96,14 +96,14 @@ you need more PCI memory, enable the IXP4XX_INDIRECT_PCI config option.
3c. GPIO as Interrupts
-Currently the code only handles level-sensitive GPIO interrupts
+Currently the code only handles level-sensitive GPIO interrupts
4. Supported platforms
ADI Engineering Coyote Gateway Reference Platform
http://www.adiengineering.com/productsCoyote.html
- The ADI Coyote platform is reference design for those building
+ The ADI Coyote platform is reference design for those building
small residential/office gateways. One NPE is connected to a 10/100
interface, one to 4-port 10/100 switch, and the third to and ADSL
interface. In addition, it also supports to POTs interfaces connected
@@ -119,9 +119,9 @@ http://www.gateworks.com/support/overview.php
the expansion bus.
Intel IXDP425 Development Platform
-http://www.intel.com/design/network/products/npfamily/ixdpg425.htm
+http://www.intel.com/design/network/products/npfamily/ixdpg425.htm
- This is Intel's standard reference platform for the IXDP425 and is
+ This is Intel's standard reference platform for the IXDP425 and is
also known as the Richfield board. It contains 4 PCI slots, 16MB
of flash, two 10/100 ports and one ADSL port.
@@ -161,11 +161,12 @@ The IXP4xx work has been funded by Intel Corp. and MontaVista Software, Inc.
The following people have contributed patches/comments/etc:
-Lennerty Buytenhek
-Lutz Jaenicke
-Justin Mayfield
-Robert E. Ranslam
-[I know I've forgotten others, please email me to be added]
+- Lennerty Buytenhek
+- Lutz Jaenicke
+- Justin Mayfield
+- Robert E. Ranslam
+
+[I know I've forgotten others, please email me to be added]
-------------------------------------------------------------------------
diff --git a/Documentation/arm/kernel_mode_neon.txt b/Documentation/arm/kernel_mode_neon.rst
index b9e060c5b61e..9bfb71a2a9b9 100644
--- a/Documentation/arm/kernel_mode_neon.txt
+++ b/Documentation/arm/kernel_mode_neon.rst
@@ -1,3 +1,4 @@
+================
Kernel mode NEON
================
@@ -86,6 +87,7 @@ instructions appearing in unexpected places if no special care is taken.
Therefore, the recommended and only supported way of using NEON/VFP in the
kernel is by adhering to the following rules:
+
* isolate the NEON code in a separate compilation unit and compile it with
'-march=armv7-a -mfpu=neon -mfloat-abi=softfp';
* issue the calls to kernel_neon_begin(), kernel_neon_end() as well as the calls
@@ -115,6 +117,7 @@ NEON intrinsics
NEON intrinsics are also supported. However, as code using NEON intrinsics
relies on the GCC header <arm_neon.h>, (which #includes <stdint.h>), you should
observe the following in addition to the rules above:
+
* Compile the unit containing the NEON intrinsics with '-ffreestanding' so GCC
uses its builtin version of <stdint.h> (this is a C99 header which the kernel
does not supply);
diff --git a/Documentation/arm/kernel_user_helpers.txt b/Documentation/arm/kernel_user_helpers.rst
index 5673594717cf..eb6f3d916622 100644
--- a/Documentation/arm/kernel_user_helpers.txt
+++ b/Documentation/arm/kernel_user_helpers.rst
@@ -1,3 +1,4 @@
+============================
Kernel-provided User Helpers
============================
@@ -43,7 +44,7 @@ kuser_helper_version
Location: 0xffff0ffc
-Reference declaration:
+Reference declaration::
extern int32_t __kuser_helper_version;
@@ -53,17 +54,17 @@ Definition:
running kernel. User space may read this to determine the availability
of a particular helper.
-Usage example:
+Usage example::
-#define __kuser_helper_version (*(int32_t *)0xffff0ffc)
+ #define __kuser_helper_version (*(int32_t *)0xffff0ffc)
-void check_kuser_version(void)
-{
+ void check_kuser_version(void)
+ {
if (__kuser_helper_version < 2) {
fprintf(stderr, "can't do atomic operations, kernel too old\n");
abort();
}
-}
+ }
Notes:
@@ -77,7 +78,7 @@ kuser_get_tls
Location: 0xffff0fe0
-Reference prototype:
+Reference prototype::
void * __kuser_get_tls(void);
@@ -97,16 +98,16 @@ Definition:
Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
-Usage example:
+Usage example::
-typedef void * (__kuser_get_tls_t)(void);
-#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
+ typedef void * (__kuser_get_tls_t)(void);
+ #define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
-void foo()
-{
+ void foo()
+ {
void *tls = __kuser_get_tls();
printf("TLS = %p\n", tls);
-}
+ }
Notes:
@@ -117,7 +118,7 @@ kuser_cmpxchg
Location: 0xffff0fc0
-Reference prototype:
+Reference prototype::
int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
@@ -139,18 +140,18 @@ Clobbered registers:
Definition:
- Atomically store newval in *ptr only if *ptr is equal to oldval.
- Return zero if *ptr was changed or non-zero if no exchange happened.
- The C flag is also set if *ptr was changed to allow for assembly
+ Atomically store newval in `*ptr` only if `*ptr` is equal to oldval.
+ Return zero if `*ptr` was changed or non-zero if no exchange happened.
+ The C flag is also set if `*ptr` was changed to allow for assembly
optimization in the calling code.
-Usage example:
+Usage example::
-typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
-#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
+ typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
+ #define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
-int atomic_add(volatile int *ptr, int val)
-{
+ int atomic_add(volatile int *ptr, int val)
+ {
int old, new;
do {
@@ -159,7 +160,7 @@ int atomic_add(volatile int *ptr, int val)
} while(__kuser_cmpxchg(old, new, ptr));
return new;
-}
+ }
Notes:
@@ -172,7 +173,7 @@ kuser_memory_barrier
Location: 0xffff0fa0
-Reference prototype:
+Reference prototype::
void __kuser_memory_barrier(void);
@@ -193,10 +194,10 @@ Definition:
Apply any needed memory barrier to preserve consistency with data modified
manually and __kuser_cmpxchg usage.
-Usage example:
+Usage example::
-typedef void (__kuser_dmb_t)(void);
-#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
+ typedef void (__kuser_dmb_t)(void);
+ #define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
Notes:
@@ -207,7 +208,7 @@ kuser_cmpxchg64
Location: 0xffff0f60
-Reference prototype:
+Reference prototype::
int __kuser_cmpxchg64(const int64_t *oldval,
const int64_t *newval,
@@ -231,22 +232,22 @@ Clobbered registers:
Definition:
- Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr
- is equal to the 64-bit value pointed by *oldval. Return zero if *ptr was
+ Atomically store the 64-bit value pointed by `*newval` in `*ptr` only if `*ptr`
+ is equal to the 64-bit value pointed by `*oldval`. Return zero if `*ptr` was
changed or non-zero if no exchange happened.
- The C flag is also set if *ptr was changed to allow for assembly
+ The C flag is also set if `*ptr` was changed to allow for assembly
optimization in the calling code.
-Usage example:
+Usage example::
-typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
- const int64_t *newval,
- volatile int64_t *ptr);
-#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
+ typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
+ const int64_t *newval,
+ volatile int64_t *ptr);
+ #define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
-int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
-{
+ int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
+ {
int64_t old, new;
do {
@@ -255,7 +256,7 @@ int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
} while(__kuser_cmpxchg64(&old, &new, ptr));
return new;
-}
+ }
Notes:
diff --git a/Documentation/arm/keystone/knav-qmss.txt b/Documentation/arm/keystone/knav-qmss.rst
index fcdb9fd5f53a..7f7638d80b42 100644
--- a/Documentation/arm/keystone/knav-qmss.txt
+++ b/Documentation/arm/keystone/knav-qmss.rst
@@ -1,4 +1,6 @@
-* Texas Instruments Keystone Navigator Queue Management SubSystem driver
+======================================================================
+Texas Instruments Keystone Navigator Queue Management SubSystem driver
+======================================================================
Driver source code path
drivers/soc/ti/knav_qmss.c
@@ -34,11 +36,13 @@ driver that interface with the accumulator PDSP. This configures
accumulator channels defined in DTS (example in DT documentation) to monitor
1 or 32 queues per channel. More description on the firmware is available in
CPPI/QMSS Low Level Driver document (docs/CPPI_QMSS_LLD_SDS.pdf) at
+
git://git.ti.com/keystone-rtos/qmss-lld.git
k2_qmss_pdsp_acc48_k2_le_1_0_0_9.bin firmware supports upto 48 accumulator
channels. This firmware is available under ti-keystone folder of
firmware.git at
+
git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
To use copy the firmware image to lib/firmware folder of the initramfs or
diff --git a/Documentation/arm/keystone/Overview.txt b/Documentation/arm/keystone/overview.rst
index 400c0c270d2e..cd90298c493c 100644
--- a/Documentation/arm/keystone/Overview.txt
+++ b/Documentation/arm/keystone/overview.rst
@@ -1,5 +1,6 @@
- TI Keystone Linux Overview
- --------------------------
+==========================
+TI Keystone Linux Overview
+==========================
Introduction
------------
@@ -9,47 +10,65 @@ for users to run Linux on Keystone based EVMs from Texas Instruments.
Following SoCs & EVMs are currently supported:-
------------- K2HK SoC and EVM --------------------------------------------------
+K2HK SoC and EVM
+=================
a.k.a Keystone 2 Hawking/Kepler SoC
TCI6636K2H & TCI6636K2K: See documentation at
+
http://www.ti.com/product/tci6638k2k
http://www.ti.com/product/tci6638k2h
EVM:
-http://www.advantech.com/Support/TI-EVM/EVMK2HX_sd.aspx
+ http://www.advantech.com/Support/TI-EVM/EVMK2HX_sd.aspx
------------- K2E SoC and EVM ---------------------------------------------------
+K2E SoC and EVM
+===============
a.k.a Keystone 2 Edison SoC
-K2E - 66AK2E05: See documentation at
+
+K2E - 66AK2E05:
+
+See documentation at
+
http://www.ti.com/product/66AK2E05/technicaldocuments
EVM:
-https://www.einfochips.com/index.php/partnerships/texas-instruments/k2e-evm.html
+ https://www.einfochips.com/index.php/partnerships/texas-instruments/k2e-evm.html
------------- K2L SoC and EVM ---------------------------------------------------
+K2L SoC and EVM
+===============
a.k.a Keystone 2 Lamarr SoC
-K2L - TCI6630K2L: See documentation at
+
+K2L - TCI6630K2L:
+
+See documentation at
http://www.ti.com/product/TCI6630K2L/technicaldocuments
+
EVM:
-https://www.einfochips.com/index.php/partnerships/texas-instruments/k2l-evm.html
+ https://www.einfochips.com/index.php/partnerships/texas-instruments/k2l-evm.html
Configuration
-------------
All of the K2 SoCs/EVMs share a common defconfig, keystone_defconfig and same
image is used to boot on individual EVMs. The platform configuration is
-specified through DTS. Following are the DTS used:-
- K2HK EVM : k2hk-evm.dts
- K2E EVM : k2e-evm.dts
- K2L EVM : k2l-evm.dts
+specified through DTS. Following are the DTS used:
+
+ K2HK EVM:
+ k2hk-evm.dts
+ K2E EVM:
+ k2e-evm.dts
+ K2L EVM:
+ k2l-evm.dts
The device tree documentation for the keystone machines are located at
+
Documentation/devicetree/bindings/arm/keystone/keystone.txt
Document Author
---------------
Murali Karicheri <m-karicheri2@ti.com>
+
Copyright 2015 Texas Instruments
diff --git a/Documentation/arm/marvel.rst b/Documentation/arm/marvel.rst
new file mode 100644
index 000000000000..16ab2eb085b8
--- /dev/null
+++ b/Documentation/arm/marvel.rst
@@ -0,0 +1,488 @@
+================
+ARM Marvell SoCs
+================
+
+This document lists all the ARM Marvell SoCs that are currently
+supported in mainline by the Linux kernel. As the Marvell families of
+SoCs are large and complex, it is hard to understand where the support
+for a particular SoC is available in the Linux kernel. This document
+tries to help in understanding where those SoCs are supported, and to
+match them with their corresponding public datasheet, when available.
+
+Orion family
+------------
+
+ Flavors:
+ - 88F5082
+ - 88F5181
+ - 88F5181L
+ - 88F5182
+
+ - Datasheet: http://www.embeddedarm.com/documentation/third-party/MV88F5182-datasheet.pdf
+ - Programmer's User Guide: http://www.embeddedarm.com/documentation/third-party/MV88F5182-opensource-manual.pdf
+ - User Manual: http://www.embeddedarm.com/documentation/third-party/MV88F5182-usermanual.pdf
+ - 88F5281
+
+ - Datasheet: http://www.ocmodshop.com/images/reviews/networking/qnap_ts409u/marvel_88f5281_data_sheet.pdf
+ - 88F6183
+ Core:
+ Feroceon 88fr331 (88f51xx) or 88fr531-vd (88f52xx) ARMv5 compatible
+ Linux kernel mach directory:
+ arch/arm/mach-orion5x
+ Linux kernel plat directory:
+ arch/arm/plat-orion
+
+Kirkwood family
+---------------
+
+ Flavors:
+ - 88F6282 a.k.a Armada 300
+
+ - Product Brief : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
+ - 88F6283 a.k.a Armada 310
+
+ - Product Brief : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
+ - 88F6190
+
+ - Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6190-003_WEB.pdf
+ - Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
+ - Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ - 88F6192
+
+ - Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6192-003_ver1.pdf
+ - Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
+ - Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ - 88F6182
+ - 88F6180
+
+ - Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6180-003_ver1.pdf
+ - Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6180_OpenSource.pdf
+ - Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ - 88F6281
+
+ - Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6281-004_ver1.pdf
+ - Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6281_OpenSource.pdf
+ - Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ Homepage:
+ http://www.marvell.com/embedded-processors/kirkwood/
+ Core:
+ Feroceon 88fr131 ARMv5 compatible
+ Linux kernel mach directory:
+ arch/arm/mach-mvebu
+ Linux kernel plat directory:
+ none
+
+Discovery family
+----------------
+
+ Flavors:
+ - MV78100
+
+ - Product Brief : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78100-003_WEB.pdf
+ - Hardware Spec : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78100_OpenSource.pdf
+ - Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
+ - MV78200
+
+ - Product Brief : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78200-002_WEB.pdf
+ - Hardware Spec : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78200_OpenSource.pdf
+ - Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
+ - MV76100
+
+ Not supported by the Linux kernel.
+
+ Core:
+ Feroceon 88fr571-vd ARMv5 compatible
+
+ Linux kernel mach directory:
+ arch/arm/mach-mv78xx0
+ Linux kernel plat directory:
+ arch/arm/plat-orion
+
+EBU Armada family
+-----------------
+
+ Armada 370 Flavors:
+ - 88F6710
+ - 88F6707
+ - 88F6W11
+
+ - Product Brief: http://www.marvell.com/embedded-processors/armada-300/assets/Marvell_ARMADA_370_SoC.pdf
+ - Hardware Spec: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-datasheet.pdf
+ - Functional Spec: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-FunctionalSpec-datasheet.pdf
+
+ Core:
+ Sheeva ARMv7 compatible PJ4B
+
+ Armada 375 Flavors:
+ - 88F6720
+
+ - Product Brief: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA_375_SoC-01_product_brief.pdf
+
+ Core:
+ ARM Cortex-A9
+
+ Armada 38x Flavors:
+ - 88F6810 Armada 380
+ - 88F6820 Armada 385
+ - 88F6828 Armada 388
+
+ - Product infos: http://www.marvell.com/embedded-processors/armada-38x/
+ - Functional Spec: https://marvellcorp.wufoo.com/forms/marvell-armada-38x-functional-specifications/
+
+ Core:
+ ARM Cortex-A9
+
+ Armada 39x Flavors:
+ - 88F6920 Armada 390
+ - 88F6928 Armada 398
+
+ - Product infos: http://www.marvell.com/embedded-processors/armada-39x/
+
+ Core:
+ ARM Cortex-A9
+
+ Armada XP Flavors:
+ - MV78230
+ - MV78260
+ - MV78460
+
+ NOTE:
+ not to be confused with the non-SMP 78xx0 SoCs
+
+ Product Brief:
+ http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
+
+ Functional Spec:
+ http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
+
+ - Hardware Specs:
+
+ - http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
+ - http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
+ - http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
+
+ Core:
+ Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
+
+ Linux kernel mach directory:
+ arch/arm/mach-mvebu
+ Linux kernel plat directory:
+ none
+
+EBU Armada family ARMv8
+-----------------------
+
+ Armada 3710/3720 Flavors:
+ - 88F3710
+ - 88F3720
+
+ Core:
+ ARM Cortex A53 (ARMv8)
+
+ Homepage:
+ http://www.marvell.com/embedded-processors/armada-3700/
+
+ Product Brief:
+ http://www.marvell.com/embedded-processors/assets/PB-88F3700-FNL.pdf
+
+ Device tree files:
+ arch/arm64/boot/dts/marvell/armada-37*
+
+ Armada 7K Flavors:
+ - 88F7020 (AP806 Dual + one CP110)
+ - 88F7040 (AP806 Quad + one CP110)
+
+ Core: ARM Cortex A72
+
+ Homepage:
+ http://www.marvell.com/embedded-processors/armada-70xx/
+
+ Product Brief:
+ - http://www.marvell.com/embedded-processors/assets/Armada7020PB-Jan2016.pdf
+ - http://www.marvell.com/embedded-processors/assets/Armada7040PB-Jan2016.pdf
+
+ Device tree files:
+ arch/arm64/boot/dts/marvell/armada-70*
+
+ Armada 8K Flavors:
+ - 88F8020 (AP806 Dual + two CP110)
+ - 88F8040 (AP806 Quad + two CP110)
+ Core:
+ ARM Cortex A72
+
+ Homepage:
+ http://www.marvell.com/embedded-processors/armada-80xx/
+
+ Product Brief:
+ - http://www.marvell.com/embedded-processors/assets/Armada8020PB-Jan2016.pdf
+ - http://www.marvell.com/embedded-processors/assets/Armada8040PB-Jan2016.pdf
+
+ Device tree files:
+ arch/arm64/boot/dts/marvell/armada-80*
+
+Avanta family
+-------------
+
+ Flavors:
+ - 88F6510
+ - 88F6530P
+ - 88F6550
+ - 88F6560
+
+ Homepage:
+ http://www.marvell.com/broadband/
+
+ Product Brief:
+ http://www.marvell.com/broadband/assets/Marvell_Avanta_88F6510_305_060-001_product_brief.pdf
+
+ No public datasheet available.
+
+ Core:
+ ARMv5 compatible
+
+ Linux kernel mach directory:
+ no code in mainline yet, planned for the future
+ Linux kernel plat directory:
+ no code in mainline yet, planned for the future
+
+Storage family
+--------------
+
+ Armada SP:
+ - 88RC1580
+
+ Product infos:
+ http://www.marvell.com/storage/armada-sp/
+
+ Core:
+ Sheeva ARMv7 comatible Quad-core PJ4C
+
+ (not supported in upstream Linux kernel)
+
+Dove family (application processor)
+-----------------------------------
+
+ Flavors:
+ - 88AP510 a.k.a Armada 510
+
+ Product Brief:
+ http://www.marvell.com/application-processors/armada-500/assets/Marvell_Armada510_SoC.pdf
+
+ Hardware Spec:
+ http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Hardware-Spec.pdf
+
+ Functional Spec:
+ http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Functional-Spec.pdf
+
+ Homepage:
+ http://www.marvell.com/application-processors/armada-500/
+
+ Core:
+ ARMv7 compatible
+
+ Directory:
+ - arch/arm/mach-mvebu (DT enabled platforms)
+ - arch/arm/mach-dove (non-DT enabled platforms)
+
+PXA 2xx/3xx/93x/95x family
+--------------------------
+
+ Flavors:
+ - PXA21x, PXA25x, PXA26x
+ - Application processor only
+ - Core: ARMv5 XScale1 core
+ - PXA270, PXA271, PXA272
+ - Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_pb.pdf
+ - Design guide : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_design_guide.pdf
+ - Developers manual : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_dev_man.pdf
+ - Specification : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_emts.pdf
+ - Specification update : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_spec_update.pdf
+ - Application processor only
+ - Core: ARMv5 XScale2 core
+ - PXA300, PXA310, PXA320
+ - PXA 300 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA300_PB_R4.pdf
+ - PXA 310 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA310_PB_R4.pdf
+ - PXA 320 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA320_PB_R4.pdf
+ - Design guide : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Design_Guide.pdf
+ - Developers manual : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Developers_Manual.zip
+ - Specifications : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_EMTS.pdf
+ - Specification Update : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Spec_Update.zip
+ - Reference Manual : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_TavorP_BootROM_Ref_Manual.pdf
+ - Application processor only
+ - Core: ARMv5 XScale3 core
+ - PXA930, PXA935
+ - Application processor with Communication processor
+ - Core: ARMv5 XScale3 core
+ - PXA955
+ - Application processor with Communication processor
+ - Core: ARMv7 compatible Sheeva PJ4 core
+
+ Comments:
+
+ * This line of SoCs originates from the XScale family developed by
+ Intel and acquired by Marvell in ~2006. The PXA21x, PXA25x,
+ PXA26x, PXA27x, PXA3xx and PXA93x were developed by Intel, while
+ the later PXA95x were developed by Marvell.
+
+ * Due to their XScale origin, these SoCs have virtually nothing in
+ common with the other (Kirkwood, Dove, etc.) families of Marvell
+ SoCs, except with the MMP/MMP2 family of SoCs.
+
+ Linux kernel mach directory:
+ arch/arm/mach-pxa
+ Linux kernel plat directory:
+ arch/arm/plat-pxa
+
+MMP/MMP2/MMP3 family (communication processor)
+----------------------------------------------
+
+ Flavors:
+ - PXA168, a.k.a Armada 168
+ - Homepage : http://www.marvell.com/application-processors/armada-100/armada-168.jsp
+ - Product brief : http://www.marvell.com/application-processors/armada-100/assets/pxa_168_pb.pdf
+ - Hardware manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_datasheet.pdf
+ - Software manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_software_manual.pdf
+ - Specification update : http://www.marvell.com/application-processors/armada-100/assets/ARMADA16x_Spec_update.pdf
+ - Boot ROM manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_ref_manual.pdf
+ - App node package : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_app_note_package.pdf
+ - Application processor only
+ - Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
+ - PXA910/PXA920
+ - Homepage : http://www.marvell.com/communication-processors/pxa910/
+ - Product Brief : http://www.marvell.com/communication-processors/pxa910/assets/Marvell_PXA910_Platform-001_PB_final.pdf
+ - Application processor with Communication processor
+ - Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
+ - PXA688, a.k.a. MMP2, a.k.a Armada 610
+ - Product Brief : http://www.marvell.com/application-processors/armada-600/assets/armada610_pb.pdf
+ - Application processor only
+ - Core: ARMv7 compatible Sheeva PJ4 88sv581x core
+ - PXA2128, a.k.a. MMP3 (OLPC XO4, Linux support not upstream)
+ - Product Brief : http://www.marvell.com/application-processors/armada/pxa2128/assets/Marvell-ARMADA-PXA2128-SoC-PB.pdf
+ - Application processor only
+ - Core: Dual-core ARMv7 compatible Sheeva PJ4C core
+ - PXA960/PXA968/PXA978 (Linux support not upstream)
+ - Application processor with Communication Processor
+ - Core: ARMv7 compatible Sheeva PJ4 core
+ - PXA986/PXA988 (Linux support not upstream)
+ - Application processor with Communication Processor
+ - Core: Dual-core ARMv7 compatible Sheeva PJ4B-MP core
+ - PXA1088/PXA1920 (Linux support not upstream)
+ - Application processor with Communication Processor
+ - Core: quad-core ARMv7 Cortex-A7
+ - PXA1908/PXA1928/PXA1936
+ - Application processor with Communication Processor
+ - Core: multi-core ARMv8 Cortex-A53
+
+ Comments:
+
+ * This line of SoCs originates from the XScale family developed by
+ Intel and acquired by Marvell in ~2006. All the processors of
+ this MMP/MMP2 family were developed by Marvell.
+
+ * Due to their XScale origin, these SoCs have virtually nothing in
+ common with the other (Kirkwood, Dove, etc.) families of Marvell
+ SoCs, except with the PXA family of SoCs listed above.
+
+ Linux kernel mach directory:
+ arch/arm/mach-mmp
+ Linux kernel plat directory:
+ arch/arm/plat-pxa
+
+Berlin family (Multimedia Solutions)
+-------------------------------------
+
+ - Flavors:
+ - 88DE3010, Armada 1000 (no Linux support)
+ - Core: Marvell PJ1 (ARMv5TE), Dual-core
+ - Product Brief: http://www.marvell.com.cn/digital-entertainment/assets/armada_1000_pb.pdf
+ - 88DE3005, Armada 1500 Mini
+ - Design name: BG2CD
+ - Core: ARM Cortex-A9, PL310 L2CC
+ - 88DE3006, Armada 1500 Mini Plus
+ - Design name: BG2CDP
+ - Core: Dual Core ARM Cortex-A7
+ - 88DE3100, Armada 1500
+ - Design name: BG2
+ - Core: Marvell PJ4B-MP (ARMv7), Tauros3 L2CC
+ - 88DE3114, Armada 1500 Pro
+ - Design name: BG2Q
+ - Core: Quad Core ARM Cortex-A9, PL310 L2CC
+ - 88DE3214, Armada 1500 Pro 4K
+ - Design name: BG3
+ - Core: ARM Cortex-A15, CA15 integrated L2CC
+ - 88DE3218, ARMADA 1500 Ultra
+ - Core: ARM Cortex-A53
+
+ Homepage: https://www.synaptics.com/products/multimedia-solutions
+ Directory: arch/arm/mach-berlin
+
+ Comments:
+
+ * This line of SoCs is based on Marvell Sheeva or ARM Cortex CPUs
+ with Synopsys DesignWare (IRQ, GPIO, Timers, ...) and PXA IP (SDHCI, USB, ETH, ...).
+
+ * The Berlin family was acquired by Synaptics from Marvell in 2017.
+
+CPU Cores
+---------
+
+The XScale cores were designed by Intel, and shipped by Marvell in the older
+PXA processors. Feroceon is a Marvell designed core that developed in-house,
+and that evolved into Sheeva. The XScale and Feroceon cores were phased out
+over time and replaced with Sheeva cores in later products, which subsequently
+got replaced with licensed ARM Cortex-A cores.
+
+ XScale 1
+ CPUID 0x69052xxx
+ ARMv5, iWMMXt
+ XScale 2
+ CPUID 0x69054xxx
+ ARMv5, iWMMXt
+ XScale 3
+ CPUID 0x69056xxx or 0x69056xxx
+ ARMv5, iWMMXt
+ Feroceon-1850 88fr331 "Mohawk"
+ CPUID 0x5615331x or 0x41xx926x
+ ARMv5TE, single issue
+ Feroceon-2850 88fr531-vd "Jolteon"
+ CPUID 0x5605531x or 0x41xx926x
+ ARMv5TE, VFP, dual-issue
+ Feroceon 88fr571-vd "Jolteon"
+ CPUID 0x5615571x
+ ARMv5TE, VFP, dual-issue
+ Feroceon 88fr131 "Mohawk-D"
+ CPUID 0x5625131x
+ ARMv5TE, single-issue in-order
+ Sheeva PJ1 88sv331 "Mohawk"
+ CPUID 0x561584xx
+ ARMv5, single-issue iWMMXt v2
+ Sheeva PJ4 88sv581x "Flareon"
+ CPUID 0x560f581x
+ ARMv7, idivt, optional iWMMXt v2
+ Sheeva PJ4B 88sv581x
+ CPUID 0x561f581x
+ ARMv7, idivt, optional iWMMXt v2
+ Sheeva PJ4B-MP / PJ4C
+ CPUID 0x562f584x
+ ARMv7, idivt/idiva, LPAE, optional iWMMXt v2 and/or NEON
+
+Long-term plans
+---------------
+
+ * Unify the mach-dove/, mach-mv78xx0/, mach-orion5x/ into the
+ mach-mvebu/ to support all SoCs from the Marvell EBU (Engineering
+ Business Unit) in a single mach-<foo> directory. The plat-orion/
+ would therefore disappear.
+
+ * Unify the mach-mmp/ and mach-pxa/ into the same mach-pxa
+ directory. The plat-pxa/ would therefore disappear.
+
+Credits
+-------
+
+- Maen Suleiman <maen@marvell.com>
+- Lior Amsalem <alior@marvell.com>
+- Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+- Andrew Lunn <andrew@lunn.ch>
+- Nicolas Pitre <nico@fluxnic.net>
+- Eric Miao <eric.y.miao@gmail.com>
diff --git a/Documentation/arm/mem_alignment b/Documentation/arm/mem_alignment.rst
index e110e2781039..aa22893b62bc 100644
--- a/Documentation/arm/mem_alignment
+++ b/Documentation/arm/mem_alignment.rst
@@ -1,3 +1,7 @@
+================
+Memory alignment
+================
+
Too many problems popped up because of unnoticed misaligned memory access in
kernel code lately. Therefore the alignment fixup is now unconditionally
configured in for SA11x0 based targets. According to Alan Cox, this is a
@@ -26,9 +30,9 @@ space, and might cause programs to fail unexpectedly.
To change the alignment trap behavior, simply echo a number into
/proc/cpu/alignment. The number is made up from various bits:
+=== ========================================================
bit behavior when set
---- -----------------
-
+=== ========================================================
0 A user process performing an unaligned memory access
will cause the kernel to print a message indicating
process name, pid, pc, instruction, address, and the
@@ -41,12 +45,13 @@ bit behavior when set
2 The kernel will send a SIGBUS signal to the user process
performing the unaligned access.
+=== ========================================================
Note that not all combinations are supported - only values 0 through 5.
(6 and 7 don't make sense).
For example, the following will turn on the warnings, but without
-fixing up or sending SIGBUS signals:
+fixing up or sending SIGBUS signals::
echo 1 > /proc/cpu/alignment
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.rst
index 546a39048eb0..0521b4ce5c96 100644
--- a/Documentation/arm/memory.txt
+++ b/Documentation/arm/memory.rst
@@ -1,6 +1,9 @@
- Kernel Memory Layout on ARM Linux
+=================================
+Kernel Memory Layout on ARM Linux
+=================================
Russell King <rmk@arm.linux.org.uk>
+
November 17, 2005 (2.6.15)
This document describes the virtual memory layout which the Linux
@@ -15,8 +18,9 @@ As the ARM architecture matures, it becomes necessary to reserve
certain regions of VM space for use for new facilities; therefore
this document may reserve more VM space over time.
+=============== =============== ===============================================
Start End Use
---------------------------------------------------------------------------
+=============== =============== ===============================================
ffff8000 ffffffff copy_user_page / clear_user_page use.
For SA11xx and Xscale, this is used to
setup a minicache mapping.
@@ -77,6 +81,7 @@ MODULES_VADDR MODULES_END-1 Kernel module space
place their vector page here. NULL pointer
dereferences by both the kernel and user
space are also caught via this mapping.
+=============== =============== ===============================================
Please note that mappings which collide with the above areas may result
in a non-bootable kernel, or may cause the kernel to (eventually) panic
diff --git a/Documentation/arm/Microchip/README b/Documentation/arm/microchip.rst
index a366f37d38f1..c9a44c98e868 100644
--- a/Documentation/arm/Microchip/README
+++ b/Documentation/arm/microchip.rst
@@ -1,3 +1,4 @@
+=============================
ARM Microchip SoCs (aka AT91)
=============================
@@ -22,32 +23,46 @@ the Microchip website: http://www.microchip.com.
Flavors:
* ARM 920 based SoC
- at91rm9200
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-1768-32-bit-ARM920T-Embedded-Microprocessor-AT91RM9200_Datasheet.pdf
* ARM 926 based SoCs
- at91sam9260
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6221-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9260_Datasheet.pdf
- at91sam9xe
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6254-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9XE_Datasheet.pdf
- at91sam9261
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6062-ARM926EJ-S-Microprocessor-SAM9261_Datasheet.pdf
- at91sam9263
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6249-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9263_Datasheet.pdf
- at91sam9rl
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/doc6289.pdf
- at91sam9g20
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001516A.pdf
- at91sam9g45 family
@@ -55,7 +70,9 @@ the Microchip website: http://www.microchip.com.
- at91sam9g46
- at91sam9m10
- at91sam9m11 (device superset)
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6437-32-bit-ARM926-Embedded-Microprocessor-SAM9M11_Datasheet.pdf
- at91sam9x5 family (aka "The 5 series")
@@ -64,33 +81,44 @@ the Microchip website: http://www.microchip.com.
- at91sam9g35
- at91sam9x25
- at91sam9x35
- + Datasheet (can be considered as covering the whole family)
+
+ * Datasheet (can be considered as covering the whole family)
+
http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11055-32-bit-ARM926EJ-S-Microcontroller-SAM9X35_Datasheet.pdf
- at91sam9n12
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001517A.pdf
* ARM Cortex-A5 based SoCs
- sama5d3 family
+
- sama5d31
- sama5d33
- sama5d34
- sama5d35
- sama5d36 (device superset)
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11121-32-bit-Cortex-A5-Microcontroller-SAMA5D3_Datasheet.pdf
* ARM Cortex-A5 + NEON based SoCs
- sama5d4 family
+
- sama5d41
- sama5d42
- sama5d43
- sama5d44 (device superset)
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/60001525A.pdf
- sama5d2 family
+
- sama5d21
- sama5d22
- sama5d23
@@ -98,11 +126,14 @@ the Microchip website: http://www.microchip.com.
- sama5d26
- sama5d27 (device superset)
- sama5d28 (device superset + environmental monitors)
- + Datasheet
+
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001476B.pdf
* ARM Cortex-M7 MCUs
- sams70 family
+
- sams70j19
- sams70j20
- sams70j21
@@ -114,6 +145,7 @@ the Microchip website: http://www.microchip.com.
- sams70q21
- samv70 family
+
- samv70j19
- samv70j20
- samv70n19
@@ -122,6 +154,7 @@ the Microchip website: http://www.microchip.com.
- samv70q20
- samv71 family
+
- samv71j19
- samv71j20
- samv71j21
@@ -132,7 +165,8 @@ the Microchip website: http://www.microchip.com.
- samv71q20
- samv71q21
- + Datasheet
+ * Datasheet
+
http://ww1.microchip.com/downloads/en/DeviceDoc/60001527A.pdf
@@ -157,6 +191,7 @@ definition of a "Stable" binding/ABI.
This statement will be removed by AT91 MAINTAINERS when appropriate.
Naming conventions and best practice:
+
- SoCs Device Tree Source Include files are named after the official name of
the product (at91sam9g20.dtsi or sama5d33.dtsi for instance).
- Device Tree Source Include files (.dtsi) are used to collect common nodes that can be
diff --git a/Documentation/arm/netwinder.rst b/Documentation/arm/netwinder.rst
new file mode 100644
index 000000000000..8eab66caa2ac
--- /dev/null
+++ b/Documentation/arm/netwinder.rst
@@ -0,0 +1,85 @@
+================================
+NetWinder specific documentation
+================================
+
+The NetWinder is a small low-power computer, primarily designed
+to run Linux. It is based around the StrongARM RISC processor,
+DC21285 PCI bridge, with PC-type hardware glued around it.
+
+Port usage
+==========
+
+======= ====== ===============================
+Min Max Description
+======= ====== ===============================
+0x0000 0x000f DMA1
+0x0020 0x0021 PIC1
+0x0060 0x006f Keyboard
+0x0070 0x007f RTC
+0x0080 0x0087 DMA1
+0x0088 0x008f DMA2
+0x00a0 0x00a3 PIC2
+0x00c0 0x00df DMA2
+0x0180 0x0187 IRDA
+0x01f0 0x01f6 ide0
+0x0201 Game port
+0x0203 RWA010 configuration read
+0x0220 ? SoundBlaster
+0x0250 ? WaveArtist
+0x0279 RWA010 configuration index
+0x02f8 0x02ff Serial ttyS1
+0x0300 0x031f Ether10
+0x0338 GPIO1
+0x033a GPIO2
+0x0370 0x0371 W83977F configuration registers
+0x0388 ? AdLib
+0x03c0 0x03df VGA
+0x03f6 ide0
+0x03f8 0x03ff Serial ttyS0
+0x0400 0x0408 DC21143
+0x0480 0x0487 DMA1
+0x0488 0x048f DMA2
+0x0a79 RWA010 configuration write
+0xe800 0xe80f ide0/ide1 BM DMA
+======= ====== ===============================
+
+
+Interrupt usage
+===============
+
+======= ======= ========================
+IRQ type Description
+======= ======= ========================
+ 0 ISA 100Hz timer
+ 1 ISA Keyboard
+ 2 ISA cascade
+ 3 ISA Serial ttyS1
+ 4 ISA Serial ttyS0
+ 5 ISA PS/2 mouse
+ 6 ISA IRDA
+ 7 ISA Printer
+ 8 ISA RTC alarm
+ 9 ISA
+10 ISA GP10 (Orange reset button)
+11 ISA
+12 ISA WaveArtist
+13 ISA
+14 ISA hda1
+15 ISA
+======= ======= ========================
+
+DMA usage
+=========
+
+======= ======= ===========
+DMA type Description
+======= ======= ===========
+ 0 ISA IRDA
+ 1 ISA
+ 2 ISA cascade
+ 3 ISA WaveArtist
+ 4 ISA
+ 5 ISA
+ 6 ISA
+ 7 ISA WaveArtist
+======= ======= ===========
diff --git a/Documentation/arm/nwfpe/index.rst b/Documentation/arm/nwfpe/index.rst
new file mode 100644
index 000000000000..3c4d2f9aa10e
--- /dev/null
+++ b/Documentation/arm/nwfpe/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+NetWinder's floating point emulator
+===================================
+
+.. toctree::
+ :maxdepth: 1
+
+ nwfpe
+ netwinder-fpe
+ notes
+ todo
diff --git a/Documentation/arm/nwfpe/README.FPE b/Documentation/arm/nwfpe/netwinder-fpe.rst
index 26f5d7bb9a41..cbb320960fc4 100644
--- a/Documentation/arm/nwfpe/README.FPE
+++ b/Documentation/arm/nwfpe/netwinder-fpe.rst
@@ -1,12 +1,18 @@
+=============
+Current State
+=============
+
The following describes the current state of the NetWinder's floating point
emulator.
In the following nomenclature is used to describe the floating point
instructions. It follows the conventions in the ARM manual.
-<S|D|E> = <single|double|extended>, no default
-{P|M|Z} = {round to +infinity,round to -infinity,round to zero},
- default = round to nearest
+::
+
+ <S|D|E> = <single|double|extended>, no default
+ {P|M|Z} = {round to +infinity,round to -infinity,round to zero},
+ default = round to nearest
Note: items enclosed in {} are optional.
@@ -32,10 +38,10 @@ Form 2 syntax:
<LFM|SFM>{cond}<FD,EA> Fd, <count>, [Rn]{!}
These instructions are fully implemented. They store/load three words
-for each floating point register into the memory location given in the
+for each floating point register into the memory location given in the
instruction. The format in memory is unlikely to be compatible with
other implementations, in particular the actual hardware. Specific
-mention of this is made in the ARM manuals.
+mention of this is made in the ARM manuals.
Floating Point Coprocessor Register Transfer Instructions (CPRT)
----------------------------------------------------------------
@@ -123,7 +129,7 @@ RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
-LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
+LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
@@ -134,7 +140,7 @@ ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
These are not implemented. They are not currently issued by the compiler,
and are handled by routines in libc. These are not implemented by the FPA11
-hardware, but are handled by the floating point support code. They should
+hardware, but are handled by the floating point support code. They should
be implemented in future versions.
Signalling:
@@ -147,10 +153,10 @@ current_set[0] correctly.
The kernel provided with this distribution (vmlinux-nwfpe-0.93) contains
a fix for this problem and also incorporates the current version of the
emulator directly. It is possible to run with no floating point module
-loaded with this kernel. It is provided as a demonstration of the
+loaded with this kernel. It is provided as a demonstration of the
technology and for those who want to do floating point work that depends
on signals. It is not strictly necessary to use the module.
-A module (either the one provided by Russell King, or the one in this
+A module (either the one provided by Russell King, or the one in this
distribution) can be loaded to replace the functionality of the emulator
built into the kernel.
diff --git a/Documentation/arm/nwfpe/NOTES b/Documentation/arm/nwfpe/notes.rst
index 40577b5a49d3..102e55af8439 100644
--- a/Documentation/arm/nwfpe/NOTES
+++ b/Documentation/arm/nwfpe/notes.rst
@@ -1,3 +1,6 @@
+Notes
+=====
+
There seems to be a problem with exp(double) and our emulator. I haven't
been able to track it down yet. This does not occur with the emulator
supplied by Russell King.
diff --git a/Documentation/arm/nwfpe/README b/Documentation/arm/nwfpe/nwfpe.rst
index 771871de0c8b..35cd90dacbff 100644
--- a/Documentation/arm/nwfpe/README
+++ b/Documentation/arm/nwfpe/nwfpe.rst
@@ -1,4 +1,7 @@
-This directory contains the version 0.92 test release of the NetWinder
+Introduction
+============
+
+This directory contains the version 0.92 test release of the NetWinder
Floating Point Emulator.
The majority of the code was written by me, Scott Bambrough It is
@@ -31,7 +34,7 @@ SoftFloat to the ARM was done by Phil Blundell, based on an earlier
port of SoftFloat version 1 by Neil Carson for NetBSD/arm32.
The file README.FPE contains a description of what has been implemented
-so far in the emulator. The file TODO contains a information on what
+so far in the emulator. The file TODO contains a information on what
remains to be done, and other ideas for the emulator.
Bug reports, comments, suggestions should be directed to me at
@@ -48,10 +51,11 @@ Legal Notices
The NetWinder Floating Point Emulator is free software. Everything Rebel.com
has written is provided under the GNU GPL. See the file COPYING for copying
-conditions. Excluded from the above is the SoftFloat code. John Hauser's
+conditions. Excluded from the above is the SoftFloat code. John Hauser's
legal notice for SoftFloat is included below.
-------------------------------------------------------------------------------
+
SoftFloat Legal Notice
SoftFloat was written by John R. Hauser. This work was made possible in
diff --git a/Documentation/arm/nwfpe/TODO b/Documentation/arm/nwfpe/todo.rst
index 8027061b60eb..393f11b14540 100644
--- a/Documentation/arm/nwfpe/TODO
+++ b/Documentation/arm/nwfpe/todo.rst
@@ -1,39 +1,42 @@
TODO LIST
----------
+=========
-POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
-RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
-POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
+::
-LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
-LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
-EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
-SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
-COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
-TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
-ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
-ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
-ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
+ POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
+ RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
+ POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
+
+ LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
+ LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
+ EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
+ SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
+ COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
+ TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
+ ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
+ ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
+ ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
These are not implemented. They are not currently issued by the compiler,
and are handled by routines in libc. These are not implemented by the FPA11
-hardware, but are handled by the floating point support code. They should
+hardware, but are handled by the floating point support code. They should
be implemented in future versions.
There are a couple of ways to approach the implementation of these. One
-method would be to use accurate table methods for these routines. I have
+method would be to use accurate table methods for these routines. I have
a couple of papers by S. Gal from IBM's research labs in Haifa, Israel that
seem to promise extreme accuracy (in the order of 99.8%) and reasonable speed.
These methods are used in GLIBC for some of the transcendental functions.
Another approach, which I know little about is CORDIC. This stands for
-Coordinate Rotation Digital Computer, and is a method of computing
+Coordinate Rotation Digital Computer, and is a method of computing
transcendental functions using mostly shifts and adds and a few
multiplications and divisions. The ARM excels at shifts and adds,
-so such a method could be promising, but requires more research to
+so such a method could be promising, but requires more research to
determine if it is feasible.
Rounding Methods
+----------------
The IEEE standard defines 4 rounding modes. Round to nearest is the
default, but rounding to + or - infinity or round to zero are also allowed.
@@ -42,8 +45,8 @@ in a control register. Not so with the ARM FPA11 architecture. To change
the rounding mode one must specify it with each instruction.
This has made porting some benchmarks difficult. It is possible to
-introduce such a capability into the emulator. The FPCR contains
-bits describing the rounding mode. The emulator could be altered to
+introduce such a capability into the emulator. The FPCR contains
+bits describing the rounding mode. The emulator could be altered to
examine a flag, which if set forced it to ignore the rounding mode in
the instruction, and use the mode specified in the bits in the FPCR.
@@ -52,7 +55,8 @@ in the FPCR. This requires a kernel call in ArmLinux, as WFC/RFC are
supervisor only instructions. If anyone has any ideas or comments I
would like to hear them.
-[NOTE: pulled out from some docs on ARM floating point, specifically
+NOTE:
+ pulled out from some docs on ARM floating point, specifically
for the Acorn FPE, but not limited to it:
The floating point control register (FPCR) may only be present in some
@@ -64,4 +68,5 @@ would like to hear them.
Hence, the answer is yes, you could do this, but then you will run a high
risk of becoming isolated if and when hardware FP emulation comes out
- -- Russell].
+
+ -- Russell.
diff --git a/Documentation/arm/OMAP/DSS b/Documentation/arm/omap/dss.rst
index 4484e021290e..a40c4d9c717a 100644
--- a/Documentation/arm/OMAP/DSS
+++ b/Documentation/arm/omap/dss.rst
@@ -1,5 +1,6 @@
+=========================
OMAP2/3 Display Subsystem
--------------------------
+=========================
This is an almost total rewrite of the OMAP FB driver in drivers/video/omap
(let's call it DSS1). The main differences between DSS1 and DSS2 are DSI,
@@ -190,6 +191,8 @@ trans_key_value transparency color key (RGB24)
default_color default background color (RGB24)
/sys/devices/platform/omapdss/display? directory:
+
+=============== =============================================================
ctrl_name Controller name
mirror 0=off, 1=on
update_mode 0=off, 1=auto, 2=manual
@@ -202,6 +205,7 @@ timings Display timings (pixclock,xres/hfp/hbp/hsw,yres/vfp/vbp/vsw)
panel_name
tear_elim Tearing elimination 0=off, 1=on
output_type Output type (video encoder only): "composite" or "svideo"
+=============== =============================================================
There are also some debugfs files at <debugfs>/omapdss/ which show information
about clocks and registers.
@@ -209,22 +213,22 @@ about clocks and registers.
Examples
--------
-The following definitions have been made for the examples below:
+The following definitions have been made for the examples below::
-ovl0=/sys/devices/platform/omapdss/overlay0
-ovl1=/sys/devices/platform/omapdss/overlay1
-ovl2=/sys/devices/platform/omapdss/overlay2
+ ovl0=/sys/devices/platform/omapdss/overlay0
+ ovl1=/sys/devices/platform/omapdss/overlay1
+ ovl2=/sys/devices/platform/omapdss/overlay2
-mgr0=/sys/devices/platform/omapdss/manager0
-mgr1=/sys/devices/platform/omapdss/manager1
+ mgr0=/sys/devices/platform/omapdss/manager0
+ mgr1=/sys/devices/platform/omapdss/manager1
-lcd=/sys/devices/platform/omapdss/display0
-dvi=/sys/devices/platform/omapdss/display1
-tv=/sys/devices/platform/omapdss/display2
+ lcd=/sys/devices/platform/omapdss/display0
+ dvi=/sys/devices/platform/omapdss/display1
+ tv=/sys/devices/platform/omapdss/display2
-fb0=/sys/class/graphics/fb0
-fb1=/sys/class/graphics/fb1
-fb2=/sys/class/graphics/fb2
+ fb0=/sys/class/graphics/fb0
+ fb1=/sys/class/graphics/fb1
+ fb2=/sys/class/graphics/fb2
Default setup on OMAP3 SDP
--------------------------
@@ -232,55 +236,59 @@ Default setup on OMAP3 SDP
Here's the default setup on OMAP3 SDP board. All planes go to LCD. DVI
and TV-out are not in use. The columns from left to right are:
framebuffers, overlays, overlay managers, displays. Framebuffers are
-handled by omapfb, and the rest by the DSS.
+handled by omapfb, and the rest by the DSS::
-FB0 --- GFX -\ DVI
-FB1 --- VID1 --+- LCD ---- LCD
-FB2 --- VID2 -/ TV ----- TV
+ FB0 --- GFX -\ DVI
+ FB1 --- VID1 --+- LCD ---- LCD
+ FB2 --- VID2 -/ TV ----- TV
Example: Switch from LCD to DVI
-----------------------
+-------------------------------
+
+::
-w=`cat $dvi/timings | cut -d "," -f 2 | cut -d "/" -f 1`
-h=`cat $dvi/timings | cut -d "," -f 3 | cut -d "/" -f 1`
+ w=`cat $dvi/timings | cut -d "," -f 2 | cut -d "/" -f 1`
+ h=`cat $dvi/timings | cut -d "," -f 3 | cut -d "/" -f 1`
-echo "0" > $lcd/enabled
-echo "" > $mgr0/display
-fbset -fb /dev/fb0 -xres $w -yres $h -vxres $w -vyres $h
-# at this point you have to switch the dvi/lcd dip-switch from the omap board
-echo "dvi" > $mgr0/display
-echo "1" > $dvi/enabled
+ echo "0" > $lcd/enabled
+ echo "" > $mgr0/display
+ fbset -fb /dev/fb0 -xres $w -yres $h -vxres $w -vyres $h
+ # at this point you have to switch the dvi/lcd dip-switch from the omap board
+ echo "dvi" > $mgr0/display
+ echo "1" > $dvi/enabled
-After this the configuration looks like:
+After this the configuration looks like:::
-FB0 --- GFX -\ -- DVI
-FB1 --- VID1 --+- LCD -/ LCD
-FB2 --- VID2 -/ TV ----- TV
+ FB0 --- GFX -\ -- DVI
+ FB1 --- VID1 --+- LCD -/ LCD
+ FB2 --- VID2 -/ TV ----- TV
Example: Clone GFX overlay to LCD and TV
--------------------------------
+----------------------------------------
+
+::
-w=`cat $tv/timings | cut -d "," -f 2 | cut -d "/" -f 1`
-h=`cat $tv/timings | cut -d "," -f 3 | cut -d "/" -f 1`
+ w=`cat $tv/timings | cut -d "," -f 2 | cut -d "/" -f 1`
+ h=`cat $tv/timings | cut -d "," -f 3 | cut -d "/" -f 1`
-echo "0" > $ovl0/enabled
-echo "0" > $ovl1/enabled
+ echo "0" > $ovl0/enabled
+ echo "0" > $ovl1/enabled
-echo "" > $fb1/overlays
-echo "0,1" > $fb0/overlays
+ echo "" > $fb1/overlays
+ echo "0,1" > $fb0/overlays
-echo "$w,$h" > $ovl1/output_size
-echo "tv" > $ovl1/manager
+ echo "$w,$h" > $ovl1/output_size
+ echo "tv" > $ovl1/manager
-echo "1" > $ovl0/enabled
-echo "1" > $ovl1/enabled
+ echo "1" > $ovl0/enabled
+ echo "1" > $ovl1/enabled
-echo "1" > $tv/enabled
+ echo "1" > $tv/enabled
-After this the configuration looks like (only relevant parts shown):
+After this the configuration looks like (only relevant parts shown)::
-FB0 +-- GFX ---- LCD ---- LCD
- \- VID1 ---- TV ---- TV
+ FB0 +-- GFX ---- LCD ---- LCD
+ \- VID1 ---- TV ---- TV
Misc notes
----------
@@ -351,12 +359,14 @@ TODO
DSS locking
Error checking
+
- Lots of checks are missing or implemented just as BUG()
System DMA update for DSI
+
- Can be used for RGB16 and RGB24P modes. Probably not for RGB24U (how
to skip the empty byte?)
OMAP1 support
-- Not sure if needed
+- Not sure if needed
diff --git a/Documentation/arm/omap/index.rst b/Documentation/arm/omap/index.rst
new file mode 100644
index 000000000000..8b365b212e49
--- /dev/null
+++ b/Documentation/arm/omap/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+TI OMAP
+=======
+
+.. toctree::
+ :maxdepth: 1
+
+ omap
+ omap_pm
+ dss
diff --git a/Documentation/arm/OMAP/README b/Documentation/arm/omap/omap.rst
index 90c6c57d61e8..f440c0f4613f 100644
--- a/Documentation/arm/OMAP/README
+++ b/Documentation/arm/omap/omap.rst
@@ -1,7 +1,13 @@
+============
+OMAP history
+============
+
This file contains documentation for running mainline
kernel on omaps.
+====== ======================================================
KERNEL NEW DEPENDENCIES
+====== ======================================================
v4.3+ Update is needed for custom .config files to make sure
CONFIG_REGULATOR_PBIAS is enabled for MMC1 to work
properly.
@@ -9,3 +15,4 @@ v4.3+ Update is needed for custom .config files to make sure
v4.18+ Update is needed for custom .config files to make sure
CONFIG_MMC_SDHCI_OMAP is enabled for all MMC instances
to work in DRA7 and K2G based boards.
+====== ======================================================
diff --git a/Documentation/arm/OMAP/omap_pm b/Documentation/arm/omap/omap_pm.rst
index 4ae915a9f899..a335e4c8ce2c 100644
--- a/Documentation/arm/OMAP/omap_pm
+++ b/Documentation/arm/omap/omap_pm.rst
@@ -1,4 +1,4 @@
-
+=====================
The OMAP PM interface
=====================
@@ -31,19 +31,24 @@ Drivers need to express PM parameters which:
This document proposes the OMAP PM interface, including the following
five power management functions for driver code:
-1. Set the maximum MPU wakeup latency:
+1. Set the maximum MPU wakeup latency::
+
(*pdata->set_max_mpu_wakeup_lat)(struct device *dev, unsigned long t)
-2. Set the maximum device wakeup latency:
+2. Set the maximum device wakeup latency::
+
(*pdata->set_max_dev_wakeup_lat)(struct device *dev, unsigned long t)
-3. Set the maximum system DMA transfer start latency (CORE pwrdm):
+3. Set the maximum system DMA transfer start latency (CORE pwrdm)::
+
(*pdata->set_max_sdma_lat)(struct device *dev, long t)
-4. Set the minimum bus throughput needed by a device:
+4. Set the minimum bus throughput needed by a device::
+
(*pdata->set_min_bus_tput)(struct device *dev, u8 agent_id, unsigned long r)
-5. Return the number of times the device has lost context
+5. Return the number of times the device has lost context::
+
(*pdata->get_dev_context_loss_count)(struct device *dev)
@@ -65,12 +70,13 @@ Driver usage of the OMAP PM functions
As the 'pdata' in the above examples indicates, these functions are
exposed to drivers through function pointers in driver .platform_data
-structures. The function pointers are initialized by the board-*.c
+structures. The function pointers are initialized by the `board-*.c`
files to point to the corresponding OMAP PM functions:
-.set_max_dev_wakeup_lat will point to
-omap_pm_set_max_dev_wakeup_lat(), etc. Other architectures which do
-not support these functions should leave these function pointers set
-to NULL. Drivers should use the following idiom:
+
+- set_max_dev_wakeup_lat will point to
+ omap_pm_set_max_dev_wakeup_lat(), etc. Other architectures which do
+ not support these functions should leave these function pointers set
+ to NULL. Drivers should use the following idiom::
if (pdata->set_max_dev_wakeup_lat)
(*pdata->set_max_dev_wakeup_lat)(dev, t);
@@ -81,7 +87,7 @@ becomes accessible. To accomplish this, driver writers should use the
set_max_mpu_wakeup_lat() function to constrain the MPU wakeup
latency, and the set_max_dev_wakeup_lat() function to constrain the
device wakeup latency (from clk_enable() to accessibility). For
-example,
+example::
/* Limit MPU wakeup latency */
if (pdata->set_max_mpu_wakeup_lat)
@@ -116,17 +122,17 @@ specialized cases to convert that input information (OPPs/MPU
frequency) into the form that the underlying power management
implementation needs:
-6. (*pdata->dsp_get_opp_table)(void)
+6. `(*pdata->dsp_get_opp_table)(void)`
-7. (*pdata->dsp_set_min_opp)(u8 opp_id)
+7. `(*pdata->dsp_set_min_opp)(u8 opp_id)`
-8. (*pdata->dsp_get_opp)(void)
+8. `(*pdata->dsp_get_opp)(void)`
-9. (*pdata->cpu_get_freq_table)(void)
+9. `(*pdata->cpu_get_freq_table)(void)`
-10. (*pdata->cpu_set_freq)(unsigned long f)
+10. `(*pdata->cpu_set_freq)(unsigned long f)`
-11. (*pdata->cpu_get_freq)(void)
+11. `(*pdata->cpu_get_freq)(void)`
Customizing OPP for platform
============================
@@ -134,12 +140,15 @@ Defining CONFIG_PM should enable OPP layer for the silicon
and the registration of OPP table should take place automatically.
However, in special cases, the default OPP table may need to be
tweaked, for e.g.:
+
* enable default OPPs which are disabled by default, but which
could be enabled on a platform
* Disable an unsupported OPP on the platform
* Define and add a custom opp table entry
-in these cases, the board file needs to do additional steps as follows:
-arch/arm/mach-omapx/board-xyz.c
+ in these cases, the board file needs to do additional steps as follows:
+
+arch/arm/mach-omapx/board-xyz.c::
+
#include "pm.h"
....
static void __init omap_xyz_init_irq(void)
@@ -150,5 +159,7 @@ arch/arm/mach-omapx/board-xyz.c
/* Do customization to the defaults */
....
}
-NOTE: omapx_opp_init will be omap3_opp_init or as required
-based on the omap family.
+
+NOTE:
+ omapx_opp_init will be omap3_opp_init or as required
+ based on the omap family.
diff --git a/Documentation/arm/Porting b/Documentation/arm/porting.rst
index a492233931b9..bd21958bdb2d 100644
--- a/Documentation/arm/Porting
+++ b/Documentation/arm/porting.rst
@@ -1,3 +1,7 @@
+=======
+Porting
+=======
+
Taken from list archive at http://lists.arm.linux.org.uk/pipermail/linux-arm-kernel/2001-July/004064.html
Initial definitions
@@ -89,8 +93,7 @@ DATAADDR
Virtual address for the kernel data segment. Must not be defined
when using the decompressor.
-VMALLOC_START
-VMALLOC_END
+VMALLOC_START / VMALLOC_END
Virtual addresses bounding the vmalloc() area. There must not be
any static mappings in this area; vmalloc will overwrite them.
The addresses must also be in the kernel segment (see above).
@@ -107,13 +110,13 @@ Architecture Specific Macros
----------------------------
BOOT_MEM(pram,pio,vio)
- `pram' specifies the physical start address of RAM. Must always
+ `pram` specifies the physical start address of RAM. Must always
be present, and should be the same as PHYS_OFFSET.
- `pio' is the physical address of an 8MB region containing IO for
+ `pio` is the physical address of an 8MB region containing IO for
use with the debugging macros in arch/arm/kernel/debug-armv.S.
- `vio' is the virtual address of the 8MB debugging region.
+ `vio` is the virtual address of the 8MB debugging region.
It is expected that the debugging region will be re-initialised
by the architecture specific code later in the code (via the
@@ -132,4 +135,3 @@ MAPIO(func)
INITIRQ(func)
Machine specific function to initialise interrupts.
-
diff --git a/Documentation/arm/pxa/mfp.txt b/Documentation/arm/pxa/mfp.rst
index 0b7cab978c02..ac34e5d7ee44 100644
--- a/Documentation/arm/pxa/mfp.txt
+++ b/Documentation/arm/pxa/mfp.rst
@@ -1,4 +1,6 @@
- MFP Configuration for PXA2xx/PXA3xx Processors
+==============================================
+MFP Configuration for PXA2xx/PXA3xx Processors
+==============================================
Eric Miao <eric.miao@marvell.com>
@@ -6,15 +8,15 @@ MFP stands for Multi-Function Pin, which is the pin-mux logic on PXA3xx and
later PXA series processors. This document describes the existing MFP API,
and how board/platform driver authors could make use of it.
- Basic Concept
-===============
+Basic Concept
+=============
Unlike the GPIO alternate function settings on PXA25x and PXA27x, a new MFP
mechanism is introduced from PXA3xx to completely move the pin-mux functions
out of the GPIO controller. In addition to pin-mux configurations, the MFP
also controls the low power state, driving strength, pull-up/down and event
detection of each pin. Below is a diagram of internal connections between
-the MFP logic and the remaining SoC peripherals:
+the MFP logic and the remaining SoC peripherals::
+--------+
| |--(GPIO19)--+
@@ -69,8 +71,8 @@ NOTE: with such a clear separation of MFP and GPIO, by GPIO<xx> we normally
mean it is a GPIO signal, and by MFP<xxx> or pin xxx, we mean a physical
pad (or ball).
- MFP API Usage
-===============
+MFP API Usage
+=============
For board code writers, here are some guidelines:
@@ -94,9 +96,9 @@ For board code writers, here are some guidelines:
PXA310 supporting some additional ones), thus the difference is actually
covered in a single mfp-pxa300.h.
-2. prepare an array for the initial pin configurations, e.g.:
+2. prepare an array for the initial pin configurations, e.g.::
- static unsigned long mainstone_pin_config[] __initdata = {
+ static unsigned long mainstone_pin_config[] __initdata = {
/* Chip Select */
GPIO15_nCS_1,
@@ -116,7 +118,7 @@ For board code writers, here are some guidelines:
/* GPIO */
GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
- };
+ };
a) once the pin configurations are passed to pxa{2xx,3xx}_mfp_config(),
and written to the actual registers, they are useless and may discard,
@@ -143,17 +145,17 @@ For board code writers, here are some guidelines:
d) although PXA3xx MFP supports edge detection on each pin, the
internal logic will only wakeup the system when those specific bits
in ADxER registers are set, which can be well mapped to the
- corresponding peripheral, thus set_irq_wake() can be called with
+ corresponding peripheral, thus set_irq_wake() can be called with
the peripheral IRQ to enable the wakeup.
- MFP on PXA3xx
-===============
+MFP on PXA3xx
+=============
Every external I/O pad on PXA3xx (excluding those for special purpose) has
one MFP logic associated, and is controlled by one MFP register (MFPR).
-The MFPR has the following bit definitions (for PXA300/PXA310/PXA320):
+The MFPR has the following bit definitions (for PXA300/PXA310/PXA320)::
31 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
@@ -183,8 +185,8 @@ The MFPR has the following bit definitions (for PXA300/PXA310/PXA320):
0b006 - slow 10mA
0b007 - fast 10mA
- MFP Design for PXA2xx/PXA3xx
-==============================
+MFP Design for PXA2xx/PXA3xx
+============================
Due to the difference of pin-mux handling between PXA2xx and PXA3xx, a unified
MFP API is introduced to cover both series of processors.
@@ -194,11 +196,11 @@ configurations, these definitions are processor and platform independent, and
the actual API invoked to convert these definitions into register settings and
make them effective there-after.
- Files Involved
- --------------
+Files Involved
+--------------
- arch/arm/mach-pxa/include/mach/mfp.h
-
+
for
1. Unified pin definitions - enum constants for all configurable pins
2. processor-neutral bit definitions for a possible MFP configuration
@@ -226,42 +228,42 @@ make them effective there-after.
for implementation of the pin configuration to take effect for the actual
processor.
- Pin Configuration
- -----------------
+Pin Configuration
+-----------------
The following comments are copied from mfp.h (see the actual source code
- for most updated info)
-
- /*
- * a possible MFP configuration is represented by a 32-bit integer
- *
- * bit 0.. 9 - MFP Pin Number (1024 Pins Maximum)
- * bit 10..12 - Alternate Function Selection
- * bit 13..15 - Drive Strength
- * bit 16..18 - Low Power Mode State
- * bit 19..20 - Low Power Mode Edge Detection
- * bit 21..22 - Run Mode Pull State
- *
- * to facilitate the definition, the following macros are provided
- *
- * MFP_CFG_DEFAULT - default MFP configuration value, with
- * alternate function = 0,
- * drive strength = fast 3mA (MFP_DS03X)
- * low power mode = default
- * edge detection = none
- *
- * MFP_CFG - default MFPR value with alternate function
- * MFP_CFG_DRV - default MFPR value with alternate function and
- * pin drive strength
- * MFP_CFG_LPM - default MFPR value with alternate function and
- * low power mode
- * MFP_CFG_X - default MFPR value with alternate function,
- * pin drive strength and low power mode
- */
-
- Examples of pin configurations are:
-
- #define GPIO94_SSP3_RXD MFP_CFG_X(GPIO94, AF1, DS08X, FLOAT)
+ for most updated info)::
+
+ /*
+ * a possible MFP configuration is represented by a 32-bit integer
+ *
+ * bit 0.. 9 - MFP Pin Number (1024 Pins Maximum)
+ * bit 10..12 - Alternate Function Selection
+ * bit 13..15 - Drive Strength
+ * bit 16..18 - Low Power Mode State
+ * bit 19..20 - Low Power Mode Edge Detection
+ * bit 21..22 - Run Mode Pull State
+ *
+ * to facilitate the definition, the following macros are provided
+ *
+ * MFP_CFG_DEFAULT - default MFP configuration value, with
+ * alternate function = 0,
+ * drive strength = fast 3mA (MFP_DS03X)
+ * low power mode = default
+ * edge detection = none
+ *
+ * MFP_CFG - default MFPR value with alternate function
+ * MFP_CFG_DRV - default MFPR value with alternate function and
+ * pin drive strength
+ * MFP_CFG_LPM - default MFPR value with alternate function and
+ * low power mode
+ * MFP_CFG_X - default MFPR value with alternate function,
+ * pin drive strength and low power mode
+ */
+
+ Examples of pin configurations are::
+
+ #define GPIO94_SSP3_RXD MFP_CFG_X(GPIO94, AF1, DS08X, FLOAT)
which reads GPIO94 can be configured as SSP3_RXD, with alternate function
selection of 1, driving strength of 0b101, and a float state in low power
@@ -272,8 +274,8 @@ make them effective there-after.
do so, simply because this default setting is usually carefully encoded,
and is supposed to work in most cases.
- Register Settings
- -----------------
+Register Settings
+-----------------
Register settings on PXA3xx for a pin configuration is actually very
straight-forward, most bits can be converted directly into MFPR value
diff --git a/Documentation/arm/SA1100/ADSBitsy b/Documentation/arm/sa1100/adsbitsy.rst
index f9f62e8c0719..c179cb26b682 100644
--- a/Documentation/arm/SA1100/ADSBitsy
+++ b/Documentation/arm/sa1100/adsbitsy.rst
@@ -1,4 +1,7 @@
+===============================
ADS Bitsy Single Board Computer
+===============================
+
(It is different from Bitsy(iPAQ) of Compaq)
For more details, contact Applied Data Systems or see
@@ -15,7 +18,9 @@ The kernel zImage is linked to be loaded and executed at 0xc0400000.
Linux can be used with the ADS BootLoader that ships with the
newer rev boards. See their documentation on how to load Linux.
-Supported peripherals:
+Supported peripherals
+=====================
+
- SA1100 LCD frame buffer (8/16bpp...sort of)
- SA1111 USB Master
- SA1100 serial port
@@ -25,10 +30,13 @@ Supported peripherals:
- serial ports (ttyS[0-2])
- ttyS0 is default for serial console
-To do:
+To do
+=====
+
- everything else! :-)
-Notes:
+Notes
+=====
- The flash on board is divided into 3 partitions.
You should be careful to use flash on board.
diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/sa1100/assabet.rst
index e08a6739e72c..3e704831c311 100644
--- a/Documentation/arm/SA1100/Assabet
+++ b/Documentation/arm/sa1100/assabet.rst
@@ -1,3 +1,4 @@
+============================================
The Intel Assabet (SA-1110 evaluation) board
============================================
@@ -11,7 +12,7 @@ http://www.cs.cmu.edu/~wearable/software/assabet.html
Building the kernel
-------------------
-To build the kernel with current defaults:
+To build the kernel with current defaults::
make assabet_config
make oldconfig
@@ -51,9 +52,9 @@ Brief examples on how to boot Linux with RedBoot are shown below. But first
you need to have RedBoot installed in your flash memory. A known to work
precompiled RedBoot binary is available from the following location:
-ftp://ftp.netwinder.org/users/n/nico/
-ftp://ftp.arm.linux.org.uk/pub/linux/arm/people/nico/
-ftp://ftp.handhelds.org/pub/linux/arm/sa-1100-patches/
+- ftp://ftp.netwinder.org/users/n/nico/
+- ftp://ftp.arm.linux.org.uk/pub/linux/arm/people/nico/
+- ftp://ftp.handhelds.org/pub/linux/arm/sa-1100-patches/
Look for redboot-assabet*.tgz. Some installation infos are provided in
redboot-assabet*.txt.
@@ -71,12 +72,12 @@ Socket Communications Inc.), you should strongly consider using it for TFTP
file transfers. You must insert it before RedBoot runs since it can't detect
it dynamically.
-To initialize the flash directory:
+To initialize the flash directory::
fis init -f
To initialize the non-volatile settings, like whether you want to use BOOTP or
-a static IP address, etc, use this command:
+a static IP address, etc, use this command::
fconfig -i
@@ -85,15 +86,15 @@ Writing a kernel image into flash
---------------------------------
First, the kernel image must be loaded into RAM. If you have the zImage file
-available on a TFTP server:
+available on a TFTP server::
load zImage -r -b 0x100000
-If you rather want to use Y-Modem upload over the serial port:
+If you rather want to use Y-Modem upload over the serial port::
load -m ymodem -r -b 0x100000
-To write it to flash:
+To write it to flash::
fis create "Linux kernel" -b 0x100000 -l 0xc0000
@@ -102,18 +103,18 @@ Booting the kernel
------------------
The kernel still requires a filesystem to boot. A ramdisk image can be loaded
-as follows:
+as follows::
load ramdisk_image.gz -r -b 0x800000
Again, Y-Modem upload can be used instead of TFTP by replacing the file name
by '-y ymodem'.
-Now the kernel can be retrieved from flash like this:
+Now the kernel can be retrieved from flash like this::
fis load "Linux kernel"
-or loaded as described previously. To boot the kernel:
+or loaded as described previously. To boot the kernel::
exec -b 0x100000 -l 0xc0000
@@ -134,35 +135,35 @@ creating JFFS/JFFS2 images is available from the same site.
For instance, a sample JFFS2 image can be retrieved from the same FTP sites
mentioned below for the precompiled RedBoot image.
-To load this file:
+To load this file::
load sample_img.jffs2 -r -b 0x100000
-The result should look like:
+The result should look like::
-RedBoot> load sample_img.jffs2 -r -b 0x100000
-Raw file loaded 0x00100000-0x00377424
+ RedBoot> load sample_img.jffs2 -r -b 0x100000
+ Raw file loaded 0x00100000-0x00377424
-Now we must know the size of the unallocated flash:
+Now we must know the size of the unallocated flash::
fis free
-Result:
+Result::
-RedBoot> fis free
- 0x500E0000 .. 0x503C0000
+ RedBoot> fis free
+ 0x500E0000 .. 0x503C0000
The values above may be different depending on the size of the filesystem and
the type of flash. See their usage below as an example and take care of
substituting yours appropriately.
-We must determine some values:
+We must determine some values::
-size of unallocated flash: 0x503c0000 - 0x500e0000 = 0x2e0000
-size of the filesystem image: 0x00377424 - 0x00100000 = 0x277424
+ size of unallocated flash: 0x503c0000 - 0x500e0000 = 0x2e0000
+ size of the filesystem image: 0x00377424 - 0x00100000 = 0x277424
We want to fit the filesystem image of course, but we also want to give it all
-the remaining flash space as well. To write it:
+the remaining flash space as well. To write it::
fis unlock -f 0x500E0000 -l 0x2e0000
fis erase -f 0x500E0000 -l 0x2e0000
@@ -171,32 +172,32 @@ the remaining flash space as well. To write it:
Now the filesystem is associated to a MTD "partition" once Linux has discovered
what they are in the boot process. From Redboot, the 'fis list' command
-displays them:
-
-RedBoot> fis list
-Name FLASH addr Mem addr Length Entry point
-RedBoot 0x50000000 0x50000000 0x00020000 0x00000000
-RedBoot config 0x503C0000 0x503C0000 0x00020000 0x00000000
-FIS directory 0x503E0000 0x503E0000 0x00020000 0x00000000
-Linux kernel 0x50020000 0x00100000 0x000C0000 0x00000000
-JFFS2 0x500E0000 0x500E0000 0x002E0000 0x00000000
-
-However Linux should display something like:
-
-SA1100 flash: probing 32-bit flash bus
-SA1100 flash: Found 2 x16 devices at 0x0 in 32-bit mode
-Using RedBoot partition definition
-Creating 5 MTD partitions on "SA1100 flash":
-0x00000000-0x00020000 : "RedBoot"
-0x00020000-0x000e0000 : "Linux kernel"
-0x000e0000-0x003c0000 : "JFFS2"
-0x003c0000-0x003e0000 : "RedBoot config"
-0x003e0000-0x00400000 : "FIS directory"
+displays them::
+
+ RedBoot> fis list
+ Name FLASH addr Mem addr Length Entry point
+ RedBoot 0x50000000 0x50000000 0x00020000 0x00000000
+ RedBoot config 0x503C0000 0x503C0000 0x00020000 0x00000000
+ FIS directory 0x503E0000 0x503E0000 0x00020000 0x00000000
+ Linux kernel 0x50020000 0x00100000 0x000C0000 0x00000000
+ JFFS2 0x500E0000 0x500E0000 0x002E0000 0x00000000
+
+However Linux should display something like::
+
+ SA1100 flash: probing 32-bit flash bus
+ SA1100 flash: Found 2 x16 devices at 0x0 in 32-bit mode
+ Using RedBoot partition definition
+ Creating 5 MTD partitions on "SA1100 flash":
+ 0x00000000-0x00020000 : "RedBoot"
+ 0x00020000-0x000e0000 : "Linux kernel"
+ 0x000e0000-0x003c0000 : "JFFS2"
+ 0x003c0000-0x003e0000 : "RedBoot config"
+ 0x003e0000-0x00400000 : "FIS directory"
What's important here is the position of the partition we are interested in,
which is the third one. Within Linux, this correspond to /dev/mtdblock2.
Therefore to boot Linux with the kernel and its root filesystem in flash, we
-need this RedBoot command:
+need this RedBoot command::
fis load "Linux kernel"
exec -b 0x100000 -l 0xc0000 -c "root=/dev/mtdblock2"
@@ -218,21 +219,21 @@ time the Assabet is rebooted. Therefore it's possible to automate the boot
process using RedBoot's scripting capability.
For example, I use this to boot Linux with both the kernel and the ramdisk
-images retrieved from a TFTP server on the network:
-
-RedBoot> fconfig
-Run script at boot: false true
-Boot script:
-Enter script, terminate with empty line
->> load zImage -r -b 0x100000
->> load ramdisk_ks.gz -r -b 0x800000
->> exec -b 0x100000 -l 0xc0000
->>
-Boot script timeout (1000ms resolution): 3
-Use BOOTP for network configuration: true
-GDB connection port: 9000
-Network debug at boot time: false
-Update RedBoot non-volatile configuration - are you sure (y/n)? y
+images retrieved from a TFTP server on the network::
+
+ RedBoot> fconfig
+ Run script at boot: false true
+ Boot script:
+ Enter script, terminate with empty line
+ >> load zImage -r -b 0x100000
+ >> load ramdisk_ks.gz -r -b 0x800000
+ >> exec -b 0x100000 -l 0xc0000
+ >>
+ Boot script timeout (1000ms resolution): 3
+ Use BOOTP for network configuration: true
+ GDB connection port: 9000
+ Network debug at boot time: false
+ Update RedBoot non-volatile configuration - are you sure (y/n)? y
Then, rebooting the Assabet is just a matter of waiting for the login prompt.
@@ -240,6 +241,7 @@ Then, rebooting the Assabet is just a matter of waiting for the login prompt.
Nicolas Pitre
nico@fluxnic.net
+
June 12, 2001
@@ -249,52 +251,51 @@ Status of peripherals in -rmk tree (updated 14/10/2001)
Assabet:
Serial ports:
Radio: TX, RX, CTS, DSR, DCD, RI
- PM: Not tested.
- COM: TX, RX, CTS, DSR, DCD, RTS, DTR, PM
- PM: Not tested.
- I2C: Implemented, not fully tested.
- L3: Fully tested, pass.
- PM: Not tested.
+ - PM: Not tested.
+ - COM: TX, RX, CTS, DSR, DCD, RTS, DTR, PM
+ - PM: Not tested.
+ - I2C: Implemented, not fully tested.
+ - L3: Fully tested, pass.
+ - PM: Not tested.
Video:
- LCD: Fully tested. PM
- (LCD doesn't like being blanked with
- neponset connected)
- Video out: Not fully
+ - LCD: Fully tested. PM
+
+ (LCD doesn't like being blanked with neponset connected)
+
+ - Video out: Not fully
Audio:
UDA1341:
- Playback: Fully tested, pass.
- Record: Implemented, not tested.
- PM: Not tested.
+ - Playback: Fully tested, pass.
+ - Record: Implemented, not tested.
+ - PM: Not tested.
UCB1200:
- Audio play: Implemented, not heavily tested.
- Audio rec: Implemented, not heavily tested.
- Telco audio play: Implemented, not heavily tested.
- Telco audio rec: Implemented, not heavily tested.
- POTS control: No
- Touchscreen: Yes
- PM: Not tested.
+ - Audio play: Implemented, not heavily tested.
+ - Audio rec: Implemented, not heavily tested.
+ - Telco audio play: Implemented, not heavily tested.
+ - Telco audio rec: Implemented, not heavily tested.
+ - POTS control: No
+ - Touchscreen: Yes
+ - PM: Not tested.
Other:
- PCMCIA:
- LPE: Fully tested, pass.
- USB: No
- IRDA:
- SIR: Fully tested, pass.
- FIR: Fully tested, pass.
- PM: Not tested.
+ - PCMCIA:
+ - LPE: Fully tested, pass.
+ - USB: No
+ - IRDA:
+ - SIR: Fully tested, pass.
+ - FIR: Fully tested, pass.
+ - PM: Not tested.
Neponset:
Serial ports:
- COM1,2: TX, RX, CTS, DSR, DCD, RTS, DTR
- PM: Not tested.
- USB: Implemented, not heavily tested.
- PCMCIA: Implemented, not heavily tested.
- PM: Not tested.
- CF: Implemented, not heavily tested.
- PM: Not tested.
+ - COM1,2: TX, RX, CTS, DSR, DCD, RTS, DTR
+ - PM: Not tested.
+ - USB: Implemented, not heavily tested.
+ - PCMCIA: Implemented, not heavily tested.
+ - CF: Implemented, not heavily tested.
+ - PM: Not tested.
More stuff can be found in the -np (Nicolas Pitre's) tree.
-
diff --git a/Documentation/arm/SA1100/Brutus b/Documentation/arm/sa1100/brutus.rst
index 6a3aa95e9bfd..e1a23bee6d44 100644
--- a/Documentation/arm/SA1100/Brutus
+++ b/Documentation/arm/sa1100/brutus.rst
@@ -1,9 +1,13 @@
-Brutus is an evaluation platform for the SA1100 manufactured by Intel.
+======
+Brutus
+======
+
+Brutus is an evaluation platform for the SA1100 manufactured by Intel.
For more details, see:
http://developer.intel.com
-To compile for Brutus, you must issue the following commands:
+To compile for Brutus, you must issue the following commands::
make brutus_config
make config
@@ -16,25 +20,23 @@ must be loaded at 0xc0008000 in Brutus's memory and execution started at
entry.
But prior to execute the kernel, a ramdisk image must also be loaded in
-memory. Use memory address 0xd8000000 for this. Note that the file
+memory. Use memory address 0xd8000000 for this. Note that the file
containing the (compressed) ramdisk image must not exceed 4 MB.
Typically, you'll need angelboot to load the kernel.
-The following angelboot.opt file should be used:
-
------ begin angelboot.opt -----
-base 0xc0008000
-entry 0xc0008000
-r0 0x00000000
-r1 0x00000010
-device /dev/ttyS0
-options "9600 8N1"
-baud 115200
-otherfile ramdisk_img.gz
-otherbase 0xd8000000
------ end angelboot.opt -----
-
-Then load the kernel and ramdisk with:
+The following angelboot.opt file should be used::
+
+ base 0xc0008000
+ entry 0xc0008000
+ r0 0x00000000
+ r1 0x00000010
+ device /dev/ttyS0
+ options "9600 8N1"
+ baud 115200
+ otherfile ramdisk_img.gz
+ otherbase 0xd8000000
+
+Then load the kernel and ramdisk with::
angelboot -f angelboot.opt zImage
@@ -44,14 +46,16 @@ console is provided through the second Brutus serial port. To access it,
you may use minicom configured with /dev/ttyS1, 9600 baud, 8N1, no flow
control.
-Currently supported:
+Currently supported
+===================
+
- RS232 serial ports
- audio output
- LCD screen
- keyboard
-
-The actual Brutus support may not be complete without extra patches.
-If such patches exist, they should be found from
+
+The actual Brutus support may not be complete without extra patches.
+If such patches exist, they should be found from
ftp.netwinder.org/users/n/nico.
A full PCMCIA support is still missing, although it's possible to hack
@@ -63,4 +67,3 @@ Any contribution is welcome.
Please send patches to nico@fluxnic.net
Have Fun !
-
diff --git a/Documentation/arm/SA1100/CERF b/Documentation/arm/sa1100/cerf.rst
index b3d845301ef1..7fa71b609bf9 100644
--- a/Documentation/arm/SA1100/CERF
+++ b/Documentation/arm/sa1100/cerf.rst
@@ -1,3 +1,7 @@
+==============
+CerfBoard/Cube
+==============
+
*** The StrongARM version of the CerfBoard/Cube has been discontinued ***
The Intrinsyc CerfBoard is a StrongARM 1110-based computer on a board
@@ -9,7 +13,9 @@ Intrinsyc website, http://www.intrinsyc.com.
This document describes the support in the Linux kernel for the
Intrinsyc CerfBoard.
-Supported in this version:
+Supported in this version
+=========================
+
- CompactFlash+ slot (select PCMCIA in General Setup and any options
that may be required)
- Onboard Crystal CS8900 Ethernet controller (Cerf CS8900A support in
@@ -19,7 +25,7 @@ Supported in this version:
In order to get this kernel onto your Cerf, you need a server that runs
both BOOTP and TFTP. Detailed instructions should have come with your
evaluation kit on how to use the bootloader. This series of commands
-will suffice:
+will suffice::
make ARCH=arm CROSS_COMPILE=arm-linux- cerfcube_defconfig
make ARCH=arm CROSS_COMPILE=arm-linux- zImage
diff --git a/Documentation/arm/sa1100/freebird.rst b/Documentation/arm/sa1100/freebird.rst
new file mode 100644
index 000000000000..81043d0c6d64
--- /dev/null
+++ b/Documentation/arm/sa1100/freebird.rst
@@ -0,0 +1,25 @@
+========
+Freebird
+========
+
+Freebird-1.1 is produced by Legend(C), Inc.
+`http://web.archive.org/web/*/http://www.legend.com.cn`
+and software/linux maintained by Coventive(C), Inc.
+(http://www.coventive.com)
+
+Based on the Nicolas's strongarm kernel tree.
+
+Maintainer:
+
+Chester Kuo
+ - <chester@coventive.com>
+ - <chester@linux.org.tw>
+
+Author:
+
+- Tim wu <timwu@coventive.com>
+- CIH <cih@coventive.com>
+- Eric Peng <ericpeng@coventive.com>
+- Jeff Lee <jeff_lee@coventive.com>
+- Allen Cheng
+- Tony Liu <tonyliu@coventive.com>
diff --git a/Documentation/arm/SA1100/GraphicsClient b/Documentation/arm/sa1100/graphicsclient.rst
index 867bb35943af..a73d61c3ce91 100644
--- a/Documentation/arm/SA1100/GraphicsClient
+++ b/Documentation/arm/sa1100/graphicsclient.rst
@@ -1,9 +1,11 @@
+=============================================
ADS GraphicsClient Plus Single Board Computer
+=============================================
For more details, contact Applied Data Systems or see
http://www.applieddata.net/products.html
-The original Linux support for this product has been provided by
+The original Linux support for this product has been provided by
Nicolas Pitre <nico@fluxnic.net>. Continued development work by
Woojung Huh <whuh@applieddata.net>
@@ -14,8 +16,8 @@ board supports MTD/JFFS, so you could also mount something on there.
Use 'make graphicsclient_config' before any 'make config'. This will set up
defaults for GraphicsClient Plus support.
-The kernel zImage is linked to be loaded and executed at 0xc0200000.
-Also the following registers should have the specified values upon entry:
+The kernel zImage is linked to be loaded and executed at 0xc0200000.
+Also the following registers should have the specified values upon entry::
r0 = 0
r1 = 29 (this is the GraphicsClient architecture number)
@@ -31,23 +33,21 @@ as outlined below. In any case, if you're planning on deploying
something en masse, you should probably get the newer board.
If using Angel on the older boards, here is a typical angel.opt option file
-if the kernel is loaded through the Angel Debug Monitor:
-
------ begin angelboot.opt -----
-base 0xc0200000
-entry 0xc0200000
-r0 0x00000000
-r1 0x0000001d
-device /dev/ttyS1
-options "38400 8N1"
-baud 115200
-#otherfile ramdisk.gz
-#otherbase 0xc0800000
-exec minicom
------ end angelboot.opt -----
+if the kernel is loaded through the Angel Debug Monitor::
+
+ base 0xc0200000
+ entry 0xc0200000
+ r0 0x00000000
+ r1 0x0000001d
+ device /dev/ttyS1
+ options "38400 8N1"
+ baud 115200
+ #otherfile ramdisk.gz
+ #otherbase 0xc0800000
+ exec minicom
Then the kernel (and ramdisk if otherfile/otherbase lines above are
-uncommented) would be loaded with:
+uncommented) would be loaded with::
angelboot -f angelboot.opt zImage
@@ -59,7 +59,9 @@ If any other bootloader is used, ensure it accomplish the same, especially
for r0/r1 register values before jumping into the kernel.
-Supported peripherals:
+Supported peripherals
+=====================
+
- SA1100 LCD frame buffer (8/16bpp...sort of)
- on-board SMC 92C96 ethernet NIC
- SA1100 serial port
@@ -74,11 +76,14 @@ Supported peripherals:
See http://www.eurotech-inc.com/linux-sbc.asp for IOCTL documentation
and example user space code. ps/2 keybd is multiplexed through this driver
-To do:
+To do
+=====
+
- UCB1200 audio with new ucb_generic layer
- everything else! :-)
-Notes:
+Notes
+=====
- The flash on board is divided into 3 partitions. mtd0 is where
the ADS boot ROM and zImage is stored. It's been marked as
@@ -95,4 +100,3 @@ Notes:
fixed soon.
Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
-
diff --git a/Documentation/arm/SA1100/GraphicsMaster b/Documentation/arm/sa1100/graphicsmaster.rst
index 9145088a0ba2..e39892514f0c 100644
--- a/Documentation/arm/SA1100/GraphicsMaster
+++ b/Documentation/arm/sa1100/graphicsmaster.rst
@@ -1,4 +1,6 @@
+========================================
ADS GraphicsMaster Single Board Computer
+========================================
For more details, contact Applied Data Systems or see
http://www.applieddata.net/products.html
@@ -15,7 +17,9 @@ The kernel zImage is linked to be loaded and executed at 0xc0400000.
Linux can be used with the ADS BootLoader that ships with the
newer rev boards. See their documentation on how to load Linux.
-Supported peripherals:
+Supported peripherals
+=====================
+
- SA1100 LCD frame buffer (8/16bpp...sort of)
- SA1111 USB Master
- on-board SMC 92C96 ethernet NIC
@@ -31,10 +35,13 @@ Supported peripherals:
See http://www.eurotech-inc.com/linux-sbc.asp for IOCTL documentation
and example user space code. ps/2 keybd is multiplexed through this driver
-To do:
+To do
+=====
+
- everything else! :-)
-Notes:
+Notes
+=====
- The flash on board is divided into 3 partitions. mtd0 is where
the zImage is stored. It's been marked as read-only to keep you
diff --git a/Documentation/arm/SA1100/HUW_WEBPANEL b/Documentation/arm/sa1100/huw_webpanel.rst
index fd56b48d4833..1dc7ccb165f0 100644
--- a/Documentation/arm/SA1100/HUW_WEBPANEL
+++ b/Documentation/arm/sa1100/huw_webpanel.rst
@@ -1,9 +1,14 @@
+=======================
+Hoeft & Wessel Webpanel
+=======================
+
The HUW_WEBPANEL is a product of the german company Hoeft & Wessel AG
If you want more information, please visit
http://www.hoeft-wessel.de
-To build the kernel:
+To build the kernel::
+
make huw_webpanel_config
make oldconfig
[accept all defaults]
@@ -14,4 +19,3 @@ Roman Jordan jor@hoeft-wessel.de
Christoph Schulz schu@hoeft-wessel.de
2000/12/18/
-
diff --git a/Documentation/arm/sa1100/index.rst b/Documentation/arm/sa1100/index.rst
new file mode 100644
index 000000000000..68c2a280a745
--- /dev/null
+++ b/Documentation/arm/sa1100/index.rst
@@ -0,0 +1,25 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Intel StrongARM 1100
+====================
+
+.. toctree::
+ :maxdepth: 1
+
+ adsbitsy
+ assabet
+ brutus
+ cerf
+ freebird
+ graphicsclient
+ graphicsmaster
+ huw_webpanel
+ itsy
+ lart
+ nanoengine
+ pangolin
+ pleb
+ serial_uart
+ tifon
+ yopy
diff --git a/Documentation/arm/SA1100/Itsy b/Documentation/arm/sa1100/itsy.rst
index 44b94997fa0d..f49896ba3ef1 100644
--- a/Documentation/arm/SA1100/Itsy
+++ b/Documentation/arm/sa1100/itsy.rst
@@ -1,3 +1,7 @@
+====
+Itsy
+====
+
Itsy is a research project done by the Western Research Lab, and Systems
Research Center in Palo Alto, CA. The Itsy project is one of several
research projects at Compaq that are related to pocket computing.
@@ -7,6 +11,7 @@ For more information, see:
http://www.hpl.hp.com/downloads/crl/itsy/
Notes on initial 2.4 Itsy support (8/27/2000) :
+
The port was done on an Itsy version 1.5 machine with a daughtercard with
64 Meg of DRAM and 32 Meg of Flash. The initial work includes support for
serial console (to see what you're doing). No other devices have been
@@ -18,8 +23,10 @@ Finally, you will need to cd to arch/arm/boot/tools and execute a make there
to build the params-itsy program used to boot the kernel.
In order to install the port of 2.4 to the itsy, You will need to set the
-configuration parameters in the monitor as follows:
-Arg 1:0x08340000, Arg2: 0xC0000000, Arg3:18 (0x12), Arg4:0
+configuration parameters in the monitor as follows::
+
+ Arg 1:0x08340000, Arg2: 0xC0000000, Arg3:18 (0x12), Arg4:0
+
Make sure the start-routine address is set to 0x00060000.
Next, flash the params-itsy program to 0x00060000 ("p 1 0x00060000" in the
@@ -29,7 +36,8 @@ flash menu) Flash the kernel in arch/arm/boot/zImage into 0x08340000
handhelds.org.
The serial connection we established was at:
- 8-bit data, no parity, 1 stop bit(s), 115200.00 b/s. in the monitor, in the
+
+8-bit data, no parity, 1 stop bit(s), 115200.00 b/s. in the monitor, in the
params-itsy program, and in the kernel itself. This can be changed, but
not easily. The monitor parameters are easily changed, the params program
setup is assembly outl's, and the kernel is a configuration item specific to
diff --git a/Documentation/arm/SA1100/LART b/Documentation/arm/sa1100/lart.rst
index 6d412b685598..94c0568d1095 100644
--- a/Documentation/arm/SA1100/LART
+++ b/Documentation/arm/sa1100/lart.rst
@@ -1,5 +1,6 @@
+====================================
Linux Advanced Radio Terminal (LART)
-------------------------------------
+====================================
The LART is a small (7.5 x 10cm) SA-1100 board, designed for embedded
applications. It has 32 MB DRAM, 4MB Flash ROM, double RS232 and all
diff --git a/Documentation/arm/SA1100/nanoEngine b/Documentation/arm/sa1100/nanoengine.rst
index 48a7934f95f6..47f1a14cf98a 100644
--- a/Documentation/arm/SA1100/nanoEngine
+++ b/Documentation/arm/sa1100/nanoengine.rst
@@ -1,11 +1,11 @@
+==========
nanoEngine
-----------
+==========
-"nanoEngine" is a SA1110 based single board computer from
+"nanoEngine" is a SA1110 based single board computer from
Bright Star Engineering Inc. See www.brightstareng.com/arm
for more info.
(Ref: Stuart Adams <sja@brightstareng.com>)
Also visit Larry Doolittle's "Linux for the nanoEngine" site:
http://www.brightstareng.com/arm/nanoeng.htm
-
diff --git a/Documentation/arm/SA1100/Pangolin b/Documentation/arm/sa1100/pangolin.rst
index 077a6120e129..f0c5c1618553 100644
--- a/Documentation/arm/SA1100/Pangolin
+++ b/Documentation/arm/sa1100/pangolin.rst
@@ -1,16 +1,22 @@
+========
+Pangolin
+========
+
Pangolin is a StrongARM 1110-based evaluation platform produced
by Dialogue Technology (http://www.dialogue.com.tw/).
It has EISA slots for ease of configuration with SDRAM/Flash
memory card, USB/Serial/Audio card, Compact Flash card,
PCMCIA/IDE card and TFT-LCD card.
-To compile for Pangolin, you must issue the following commands:
+To compile for Pangolin, you must issue the following commands::
make pangolin_config
make oldconfig
make zImage
-Supported peripherals:
+Supported peripherals
+=====================
+
- SA1110 serial port (UART1/UART2/UART3)
- flash memory access
- compact flash driver
diff --git a/Documentation/arm/SA1100/PLEB b/Documentation/arm/sa1100/pleb.rst
index b9c8a631a351..d5b732967aa3 100644
--- a/Documentation/arm/SA1100/PLEB
+++ b/Documentation/arm/sa1100/pleb.rst
@@ -1,3 +1,7 @@
+====
+PLEB
+====
+
The PLEB project was started as a student initiative at the School of
Computer Science and Engineering, University of New South Wales to make a
pocket computer capable of running the Linux Kernel.
@@ -7,5 +11,3 @@ PLEB support has yet to be fully integrated.
For more information, see:
http://www.cse.unsw.edu.au
-
-
diff --git a/Documentation/arm/sa1100/serial_uart.rst b/Documentation/arm/sa1100/serial_uart.rst
new file mode 100644
index 000000000000..ea983642b9be
--- /dev/null
+++ b/Documentation/arm/sa1100/serial_uart.rst
@@ -0,0 +1,51 @@
+==================
+SA1100 serial port
+==================
+
+The SA1100 serial port had its major/minor numbers officially assigned::
+
+ > Date: Sun, 24 Sep 2000 21:40:27 -0700
+ > From: H. Peter Anvin <hpa@transmeta.com>
+ > To: Nicolas Pitre <nico@CAM.ORG>
+ > Cc: Device List Maintainer <device@lanana.org>
+ > Subject: Re: device
+ >
+ > Okay. Note that device numbers 204 and 205 are used for "low density
+ > serial devices", so you will have a range of minors on those majors (the
+ > tty device layer handles this just fine, so you don't have to worry about
+ > doing anything special.)
+ >
+ > So your assignments are:
+ >
+ > 204 char Low-density serial ports
+ > 5 = /dev/ttySA0 SA1100 builtin serial port 0
+ > 6 = /dev/ttySA1 SA1100 builtin serial port 1
+ > 7 = /dev/ttySA2 SA1100 builtin serial port 2
+ >
+ > 205 char Low-density serial ports (alternate device)
+ > 5 = /dev/cusa0 Callout device for ttySA0
+ > 6 = /dev/cusa1 Callout device for ttySA1
+ > 7 = /dev/cusa2 Callout device for ttySA2
+ >
+
+You must create those inodes in /dev on the root filesystem used
+by your SA1100-based device::
+
+ mknod ttySA0 c 204 5
+ mknod ttySA1 c 204 6
+ mknod ttySA2 c 204 7
+ mknod cusa0 c 205 5
+ mknod cusa1 c 205 6
+ mknod cusa2 c 205 7
+
+In addition to the creation of the appropriate device nodes above, you
+must ensure your user space applications make use of the correct device
+name. The classic example is the content of the /etc/inittab file where
+you might have a getty process started on ttyS0.
+
+In this case:
+
+- replace occurrences of ttyS0 with ttySA0, ttyS1 with ttySA1, etc.
+
+- don't forget to add 'ttySA0', 'console', or the appropriate tty name
+ in /etc/securetty for root to be allowed to login as well.
diff --git a/Documentation/arm/SA1100/Tifon b/Documentation/arm/sa1100/tifon.rst
index dd1934d9c851..c26e910b9ea7 100644
--- a/Documentation/arm/SA1100/Tifon
+++ b/Documentation/arm/sa1100/tifon.rst
@@ -1,7 +1,7 @@
+=====
Tifon
------
+=====
More info has to come...
Contact: Peter Danielsson <peter.danielsson@era-t.ericsson.se>
-
diff --git a/Documentation/arm/SA1100/Yopy b/Documentation/arm/sa1100/yopy.rst
index e14f16d836ac..5b35a5f61a44 100644
--- a/Documentation/arm/SA1100/Yopy
+++ b/Documentation/arm/sa1100/yopy.rst
@@ -1,2 +1,5 @@
-See http://www.yopydeveloper.org for more.
+====
+Yopy
+====
+See http://www.yopydeveloper.org for more.
diff --git a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt b/Documentation/arm/samsung-s3c24xx/cpufreq.rst
index fa968aa99d67..2ddc26c03b1f 100644
--- a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
+++ b/Documentation/arm/samsung-s3c24xx/cpufreq.rst
@@ -1,5 +1,6 @@
- S3C24XX CPUfreq support
- =======================
+=======================
+S3C24XX CPUfreq support
+=======================
Introduction
------------
diff --git a/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt b/Documentation/arm/samsung-s3c24xx/eb2410itx.rst
index b87292e05f2f..7863c93652f8 100644
--- a/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt
+++ b/Documentation/arm/samsung-s3c24xx/eb2410itx.rst
@@ -1,5 +1,6 @@
- Simtec Electronics EB2410ITX (BAST)
- ===================================
+===================================
+Simtec Electronics EB2410ITX (BAST)
+===================================
http://www.simtec.co.uk/products/EB2410ITX/
diff --git a/Documentation/arm/Samsung-S3C24XX/GPIO.txt b/Documentation/arm/samsung-s3c24xx/gpio.rst
index e8f918b96123..f7c3d7d011a2 100644
--- a/Documentation/arm/Samsung-S3C24XX/GPIO.txt
+++ b/Documentation/arm/samsung-s3c24xx/gpio.rst
@@ -1,5 +1,6 @@
- S3C24XX GPIO Control
- ====================
+====================
+S3C24XX GPIO Control
+====================
Introduction
------------
@@ -12,7 +13,7 @@ Introduction
of the s3c2410 GPIO system, please read the Samsung provided
data-sheet/users manual to find out the complete list.
- See Documentation/arm/Samsung/GPIO.txt for the core implementation.
+ See Documentation/arm/samsung/gpio.rst for the core implementation.
GPIOLIB
@@ -26,16 +27,16 @@ GPIOLIB
listed below will be removed (they may be marked as __deprecated
in the near future).
- The following functions now either have a s3c_ specific variant
+ The following functions now either have a `s3c_` specific variant
or are merged into gpiolib. See the definitions in
arch/arm/plat-samsung/include/plat/gpio-cfg.h:
- s3c2410_gpio_setpin() gpio_set_value() or gpio_direction_output()
- s3c2410_gpio_getpin() gpio_get_value() or gpio_direction_input()
- s3c2410_gpio_getirq() gpio_to_irq()
- s3c2410_gpio_cfgpin() s3c_gpio_cfgpin()
- s3c2410_gpio_getcfg() s3c_gpio_getcfg()
- s3c2410_gpio_pullup() s3c_gpio_setpull()
+ - s3c2410_gpio_setpin() gpio_set_value() or gpio_direction_output()
+ - s3c2410_gpio_getpin() gpio_get_value() or gpio_direction_input()
+ - s3c2410_gpio_getirq() gpio_to_irq()
+ - s3c2410_gpio_cfgpin() s3c_gpio_cfgpin()
+ - s3c2410_gpio_getcfg() s3c_gpio_getcfg()
+ - s3c2410_gpio_pullup() s3c_gpio_setpull()
GPIOLIB conversion
@@ -77,7 +78,7 @@ out s3c2410 API, then here are some notes on the process.
6) s3c2410_gpio_getirq() should be directly replaceable with the
gpio_to_irq() call.
-The s3c2410_gpio and gpio_ calls have always operated on the same gpio
+The s3c2410_gpio and `gpio_` calls have always operated on the same gpio
numberspace, so there is no problem with converting the gpio numbering
between the calls.
diff --git a/Documentation/arm/Samsung-S3C24XX/H1940.txt b/Documentation/arm/samsung-s3c24xx/h1940.rst
index b738859b1fc0..62a562c178e3 100644
--- a/Documentation/arm/Samsung-S3C24XX/H1940.txt
+++ b/Documentation/arm/samsung-s3c24xx/h1940.rst
@@ -1,5 +1,6 @@
- HP IPAQ H1940
- =============
+=============
+HP IPAQ H1940
+=============
http://www.handhelds.org/projects/h1940.html
diff --git a/Documentation/arm/samsung-s3c24xx/index.rst b/Documentation/arm/samsung-s3c24xx/index.rst
new file mode 100644
index 000000000000..5b8a7f9398d8
--- /dev/null
+++ b/Documentation/arm/samsung-s3c24xx/index.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Samsung S3C24XX SoC Family
+==========================
+
+.. toctree::
+ :maxdepth: 1
+
+ h1940
+ gpio
+ cpufreq
+ suspend
+ usb-host
+ s3c2412
+ eb2410itx
+ nand
+ smdk2440
+ s3c2413
+ overview
diff --git a/Documentation/arm/Samsung-S3C24XX/NAND.txt b/Documentation/arm/samsung-s3c24xx/nand.rst
index bc478a3409b8..938995694ee7 100644
--- a/Documentation/arm/Samsung-S3C24XX/NAND.txt
+++ b/Documentation/arm/samsung-s3c24xx/nand.rst
@@ -1,5 +1,6 @@
- S3C24XX NAND Support
- ====================
+====================
+S3C24XX NAND Support
+====================
Introduction
------------
@@ -27,4 +28,3 @@ Document Author
---------------
Ben Dooks, Copyright 2007 Simtec Electronics
-
diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/samsung-s3c24xx/overview.rst
index 00d3c3141e21..e9a1dc7276b5 100644
--- a/Documentation/arm/Samsung-S3C24XX/Overview.txt
+++ b/Documentation/arm/samsung-s3c24xx/overview.rst
@@ -1,5 +1,6 @@
- S3C24XX ARM Linux Overview
- ==========================
+==========================
+S3C24XX ARM Linux Overview
+==========================
@@ -182,7 +183,7 @@ NAND
controller. If there are any problems the latest linux-mtd
code can be found from http://www.linux-mtd.infradead.org/
- For more information see Documentation/arm/Samsung-S3C24XX/NAND.txt
+ For more information see Documentation/arm/samsung-s3c24xx/nand.rst
SD/MMC
@@ -221,8 +222,8 @@ GPIO
As of v2.6.34, the move towards using gpiolib support is almost
complete, and very little of the old calls are left.
- See Documentation/arm/Samsung-S3C24XX/GPIO.txt for the S3C24XX specific
- support and Documentation/arm/Samsung/GPIO.txt for the core Samsung
+ See Documentation/arm/samsung-s3c24xx/gpio.rst for the S3C24XX specific
+ support and Documentation/arm/samsung/gpio.rst for the core Samsung
implementation.
@@ -276,18 +277,18 @@ Platform Data
kmalloc()s an area of memory, and copies the __initdata
and then sets the relevant device's platform data. Making
the function `__init` takes care of ensuring it is discarded
- with the rest of the initialisation code
+ with the rest of the initialisation code::
- static __init void s3c24xx_xxx_set_platdata(struct xxx_data *pd)
- {
- struct s3c2410_xxx_mach_info *npd;
+ static __init void s3c24xx_xxx_set_platdata(struct xxx_data *pd)
+ {
+ struct s3c2410_xxx_mach_info *npd;
npd = kmalloc(sizeof(struct s3c2410_xxx_mach_info), GFP_KERNEL);
if (npd) {
memcpy(npd, pd, sizeof(struct s3c2410_xxx_mach_info));
s3c_device_xxx.dev.platform_data = npd;
} else {
- printk(KERN_ERR "no memory for xxx platform data\n");
+ printk(KERN_ERR "no memory for xxx platform data\n");
}
}
diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2412.txt b/Documentation/arm/samsung-s3c24xx/s3c2412.rst
index dc1fd362d3c1..68b985fc6bf4 100644
--- a/Documentation/arm/Samsung-S3C24XX/S3C2412.txt
+++ b/Documentation/arm/samsung-s3c24xx/s3c2412.rst
@@ -1,5 +1,6 @@
- S3C2412 ARM Linux Overview
- ==========================
+==========================
+S3C2412 ARM Linux Overview
+==========================
Introduction
------------
diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2413.txt b/Documentation/arm/samsung-s3c24xx/s3c2413.rst
index 909bdc7dd7b5..1f51e207fc46 100644
--- a/Documentation/arm/Samsung-S3C24XX/S3C2413.txt
+++ b/Documentation/arm/samsung-s3c24xx/s3c2413.rst
@@ -1,5 +1,6 @@
- S3C2413 ARM Linux Overview
- ==========================
+==========================
+S3C2413 ARM Linux Overview
+==========================
Introduction
------------
@@ -10,7 +11,7 @@ Introduction
Camera Interface
----------------
+----------------
This block is currently not supported.
diff --git a/Documentation/arm/Samsung-S3C24XX/SMDK2440.txt b/Documentation/arm/samsung-s3c24xx/smdk2440.rst
index 429390bd4684..524fd0b4afaf 100644
--- a/Documentation/arm/Samsung-S3C24XX/SMDK2440.txt
+++ b/Documentation/arm/samsung-s3c24xx/smdk2440.rst
@@ -1,5 +1,6 @@
- Samsung/Meritech SMDK2440
- =========================
+=========================
+Samsung/Meritech SMDK2440
+=========================
Introduction
------------
diff --git a/Documentation/arm/Samsung-S3C24XX/Suspend.txt b/Documentation/arm/samsung-s3c24xx/suspend.rst
index cb4f0c0cdf9d..b4f3ae9fe76e 100644
--- a/Documentation/arm/Samsung-S3C24XX/Suspend.txt
+++ b/Documentation/arm/samsung-s3c24xx/suspend.rst
@@ -1,5 +1,6 @@
- S3C24XX Suspend Support
- =======================
+=======================
+S3C24XX Suspend Support
+=======================
Introduction
@@ -57,16 +58,16 @@ Machine Support
and will end up initialising all compiled machines' pm init!
The following is an example of code used for testing wakeup from
- an falling edge on IRQ_EINT0:
+ an falling edge on IRQ_EINT0::
-static irqreturn_t button_irq(int irq, void *pw)
-{
+ static irqreturn_t button_irq(int irq, void *pw)
+ {
return IRQ_HANDLED;
-}
+ }
-statuc void __init machine_init(void)
-{
+ statuc void __init machine_init(void)
+ {
...
request_irq(IRQ_EINT0, button_irq, IRQF_TRIGGER_FALLING,
@@ -75,7 +76,7 @@ statuc void __init machine_init(void)
enable_irq_wake(IRQ_EINT0);
s3c_pm_init();
-}
+ }
Debugging
@@ -134,4 +135,3 @@ Document Author
---------------
Ben Dooks, Copyright 2004 Simtec Electronics
-
diff --git a/Documentation/arm/Samsung-S3C24XX/USB-Host.txt b/Documentation/arm/samsung-s3c24xx/usb-host.rst
index f82b1faefad5..c84268bd1884 100644
--- a/Documentation/arm/Samsung-S3C24XX/USB-Host.txt
+++ b/Documentation/arm/samsung-s3c24xx/usb-host.rst
@@ -1,5 +1,6 @@
- S3C24XX USB Host support
- ========================
+========================
+S3C24XX USB Host support
+========================
@@ -13,7 +14,7 @@ Configuration
Enable at least the following kernel options:
- menuconfig:
+ menuconfig::
Device Drivers --->
USB support --->
@@ -22,8 +23,9 @@ Configuration
.config:
- CONFIG_USB
- CONFIG_USB_OHCI_HCD
+
+ - CONFIG_USB
+ - CONFIG_USB_OHCI_HCD
Once these options are configured, the standard set of USB device
@@ -60,17 +62,14 @@ Platform Data
The ports are numbered 0 and 1.
power_control:
-
Called to enable or disable the power on the port.
enable_oc:
-
Called to enable or disable the over-current monitoring.
This should claim or release the resources being used to
check the power condition on the port, such as an IRQ.
report_oc:
-
The OHCI driver fills this field in for the over-current code
to call when there is a change to the over-current state on
an port. The ports argument is a bitmask of 1 bit per port,
@@ -80,7 +79,6 @@ Platform Data
ensure this is called correctly.
port[x]:
-
This is struct describes each port, 0 or 1. The platform driver
should set the flags field of each port to S3C_HCDFLG_USED if
the port is enabled.
diff --git a/Documentation/arm/Samsung/Bootloader-interface.txt b/Documentation/arm/samsung/bootloader-interface.rst
index d17ed518a7ea..a56f325dae78 100644
--- a/Documentation/arm/Samsung/Bootloader-interface.txt
+++ b/Documentation/arm/samsung/bootloader-interface.rst
@@ -1,7 +1,9 @@
- Interface between kernel and boot loaders on Exynos boards
- ==========================================================
+==========================================================
+Interface between kernel and boot loaders on Exynos boards
+==========================================================
Author: Krzysztof Kozlowski
+
Date : 6 June 2015
The document tries to describe currently used interface between Linux kernel
@@ -17,8 +19,10 @@ executing kernel.
1. Non-Secure mode
Address: sysram_ns_base_addr
+
+============= ============================================ ==================
Offset Value Purpose
-=============================================================================
+============= ============================================ ==================
0x08 exynos_cpu_resume_ns, mcpm_entry_point System suspend
0x0c 0x00000bad (Magic cookie) System suspend
0x1c exynos4_secondary_startup Secondary CPU boot
@@ -27,22 +31,28 @@ Offset Value Purpose
0x24 exynos_cpu_resume_ns AFTR
0x28 + 4*cpu 0x8 (Magic cookie, Exynos3250) AFTR
0x28 0x0 or last value during resume (Exynos542x) System suspend
+============= ============================================ ==================
2. Secure mode
Address: sysram_base_addr
+
+============= ============================================ ==================
Offset Value Purpose
-=============================================================================
+============= ============================================ ==================
0x00 exynos4_secondary_startup Secondary CPU boot
0x04 exynos4_secondary_startup (Exynos542x) Secondary CPU boot
4*cpu exynos4_secondary_startup (Exynos4412) Secondary CPU boot
0x20 exynos_cpu_resume (Exynos4210 r1.0) AFTR
0x24 0xfcba0d10 (Magic cookie, Exynos4210 r1.0) AFTR
+============= ============================================ ==================
Address: pmu_base_addr
+
+============= ============================================ ==================
Offset Value Purpose
-=============================================================================
+============= ============================================ ==================
0x0800 exynos_cpu_resume AFTR, suspend
0x0800 mcpm_entry_point (Exynos542x with MCPM) AFTR, suspend
0x0804 0xfcba0d10 (Magic cookie) AFTR
@@ -50,15 +60,18 @@ Offset Value Purpose
0x0814 exynos4_secondary_startup (Exynos4210 r1.1) Secondary CPU boot
0x0818 0xfcba0d10 (Magic cookie, Exynos4210 r1.1) AFTR
0x081C exynos_cpu_resume (Exynos4210 r1.1) AFTR
-
+============= ============================================ ==================
3. Other (regardless of secure/non-secure mode)
Address: pmu_base_addr
+
+============= =============================== ===============================
Offset Value Purpose
-=============================================================================
+============= =============================== ===============================
0x0908 Non-zero Secondary CPU boot up indicator
on Exynos3250 and Exynos542x
+============= =============================== ===============================
4. Glossary
diff --git a/Documentation/arm/Samsung/clksrc-change-registers.awk b/Documentation/arm/samsung/clksrc-change-registers.awk
index 7be1b8aa7cd9..7be1b8aa7cd9 100755
--- a/Documentation/arm/Samsung/clksrc-change-registers.awk
+++ b/Documentation/arm/samsung/clksrc-change-registers.awk
diff --git a/Documentation/arm/Samsung/GPIO.txt b/Documentation/arm/samsung/gpio.rst
index 795adfd88081..5f7cadd7159e 100644
--- a/Documentation/arm/Samsung/GPIO.txt
+++ b/Documentation/arm/samsung/gpio.rst
@@ -1,5 +1,6 @@
- Samsung GPIO implementation
- ===========================
+===========================
+Samsung GPIO implementation
+===========================
Introduction
------------
@@ -11,7 +12,7 @@ specific calls provided alongside the drivers/gpio core.
S3C24XX (Legacy)
----------------
-See Documentation/arm/Samsung-S3C24XX/GPIO.txt for more information
+See Documentation/arm/samsung-s3c24xx/gpio.rst for more information
about these devices. Their implementation has been brought into line
with the core samsung implementation described in this document.
diff --git a/Documentation/arm/samsung/index.rst b/Documentation/arm/samsung/index.rst
new file mode 100644
index 000000000000..8142cce3d23e
--- /dev/null
+++ b/Documentation/arm/samsung/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+Samsung SoC
+===========
+
+.. toctree::
+ :maxdepth: 1
+
+ gpio
+ bootloader-interface
+ overview
diff --git a/Documentation/arm/Samsung/Overview.txt b/Documentation/arm/samsung/overview.rst
index 8f7309bad460..e74307897416 100644
--- a/Documentation/arm/Samsung/Overview.txt
+++ b/Documentation/arm/samsung/overview.rst
@@ -1,5 +1,6 @@
- Samsung ARM Linux Overview
- ==========================
+==========================
+Samsung ARM Linux Overview
+==========================
Introduction
------------
@@ -11,7 +12,7 @@ Introduction
The currently supported SoCs are:
- - S3C24XX: See Documentation/arm/Samsung-S3C24XX/Overview.txt for full list
+ - S3C24XX: See Documentation/arm/samsung-s3c24xx/overview.rst for full list
- S3C64XX: S3C6400 and S3C6410
- S5PC110 / S5PV210
@@ -22,7 +23,7 @@ S3C24XX Systems
There is still documentation in Documnetation/arm/Samsung-S3C24XX/ which
deals with the architecture and drivers specific to these devices.
- See Documentation/arm/Samsung-S3C24XX/Overview.txt for more information
+ See Documentation/arm/samsung-s3c24xx/overview.rst for more information
on the implementation details and specific support.
@@ -32,8 +33,10 @@ Configuration
A number of configurations are supplied, as there is no current way of
unifying all the SoCs into one kernel.
- s5pc110_defconfig - S5PC110 specific default configuration
- s5pv210_defconfig - S5PV210 specific default configuration
+ s5pc110_defconfig
+ - S5PC110 specific default configuration
+ s5pv210_defconfig
+ - S5PV210 specific default configuration
Layout
diff --git a/Documentation/arm/Setup b/Documentation/arm/setup.rst
index 0cb1e64bde80..8e12ef3fb9a7 100644
--- a/Documentation/arm/Setup
+++ b/Documentation/arm/setup.rst
@@ -1,5 +1,6 @@
+=============================================
Kernel initialisation parameters on ARM Linux
----------------------------------------------
+=============================================
The following document describes the kernel initialisation parameter
structure, otherwise known as 'struct param_struct' which is used
@@ -14,12 +15,10 @@ There are a lot of parameters listed in there, and they are described
below:
page_size
-
This parameter must be set to the page size of the machine, and
will be checked by the kernel.
nr_pages
-
This is the total number of pages of memory in the system. If
the memory is banked, then this should contain the total number
of pages in the system.
@@ -28,24 +27,22 @@ below:
include this information.
ramdisk_size
-
This is now obsolete, and should not be used.
flags
-
Various kernel flags, including:
- bit 0 - 1 = mount root read only
- bit 1 - unused
- bit 2 - 0 = load ramdisk
- bit 3 - 0 = prompt for ramdisk
- rootdev
+ ===== ========================
+ bit 0 1 = mount root read only
+ bit 1 unused
+ bit 2 0 = load ramdisk
+ bit 3 0 = prompt for ramdisk
+ ===== ========================
+ rootdev
major/minor number pair of device to mount as the root filesystem.
- video_num_cols
- video_num_rows
-
+ video_num_cols / video_num_rows
These two together describe the character size of the dummy console,
or VGA console character size. They should not be used for any other
purpose.
@@ -54,66 +51,50 @@ below:
the equivalent character size of your fbcon display. This then allows
all the bootup messages to be displayed correctly.
- video_x
- video_y
-
+ video_x / video_y
This describes the character position of cursor on VGA console, and
is otherwise unused. (should not be used for other console types, and
should not be used for other purposes).
memc_control_reg
-
MEMC chip control register for Acorn Archimedes and Acorn A5000
based machines. May be used differently by different architectures.
sounddefault
-
Default sound setting on Acorn machines. May be used differently by
different architectures.
adfsdrives
-
Number of ADFS/MFM disks. May be used differently by different
architectures.
- bytes_per_char_h
- bytes_per_char_v
-
+ bytes_per_char_h / bytes_per_char_v
These are now obsolete, and should not be used.
pages_in_bank[4]
-
Number of pages in each bank of the systems memory (used for RiscPC).
This is intended to be used on systems where the physical memory
is non-contiguous from the processors point of view.
pages_in_vram
-
Number of pages in VRAM (used on Acorn RiscPC). This value may also
be used by loaders if the size of the video RAM can't be obtained
from the hardware.
- initrd_start
- initrd_size
-
+ initrd_start / initrd_size
This describes the kernel virtual start address and size of the
initial ramdisk.
rd_start
-
Start address in sectors of the ramdisk image on a floppy disk.
system_rev
-
system revision number.
- system_serial_low
- system_serial_high
-
+ system_serial_low / system_serial_high
system 64-bit serial number
mem_fclk_21285
-
The speed of the external oscillator to the 21285 (footbridge),
which control's the speed of the memory bus, timer & serial port.
Depending upon the speed of the cpu its value can be between
@@ -121,9 +102,7 @@ below:
then a value of 50 Mhz is the default on 21285 architectures.
paths[8][128]
-
These are now obsolete, and should not be used.
commandline
-
Kernel command line parameters. Details can be found elsewhere.
diff --git a/Documentation/arm/SH-Mobile/.gitignore b/Documentation/arm/sh-mobile/.gitignore
index c928dbf3cc88..c928dbf3cc88 100644
--- a/Documentation/arm/SH-Mobile/.gitignore
+++ b/Documentation/arm/sh-mobile/.gitignore
diff --git a/Documentation/arm/SPEAr/overview.txt b/Documentation/arm/spear/overview.rst
index 1b049be6c84f..1a77f6b213b6 100644
--- a/Documentation/arm/SPEAr/overview.txt
+++ b/Documentation/arm/spear/overview.rst
@@ -1,5 +1,6 @@
- SPEAr ARM Linux Overview
- ==========================
+========================
+SPEAr ARM Linux Overview
+========================
Introduction
------------
@@ -14,6 +15,7 @@ Introduction
Hierarchy in SPEAr is as follows:
SPEAr (Platform)
+
- SPEAr3XX (3XX SOC series, based on ARM9)
- SPEAr300 (SOC)
- SPEAr300 Evaluation Board
@@ -30,17 +32,18 @@ Introduction
- SPEAr1340 (SOC)
- SPEAr1340 Evaluation Board
- Configuration
- -------------
+Configuration
+-------------
A generic configuration is provided for each machine, and can be used as the
- default by
+ default by::
+
make spear13xx_defconfig
make spear3xx_defconfig
make spear6xx_defconfig
- Layout
- ------
+Layout
+------
The common files for multiple machine families (SPEAr3xx, SPEAr6xx and
SPEAr13xx) are located in the platform code contained in arch/arm/plat-spear
@@ -57,7 +60,7 @@ Introduction
support Flattened Device Tree.
- Document Author
- ---------------
+Document Author
+---------------
Viresh Kumar <vireshk@kernel.org>, (c) 2010-2012 ST Microelectronics
diff --git a/Documentation/arm/sti/overview.txt b/Documentation/arm/sti/overview.rst
index 1a4e93d6027f..70743617a74f 100644
--- a/Documentation/arm/sti/overview.txt
+++ b/Documentation/arm/sti/overview.rst
@@ -1,5 +1,6 @@
- STi ARM Linux Overview
- ==========================
+======================
+STi ARM Linux Overview
+======================
Introduction
------------
@@ -10,15 +11,17 @@ Introduction
B2000 and B2020 Reference boards.
- configuration
- -------------
+configuration
+-------------
A generic configuration is provided for both STiH415/416, and can be used as the
- default by
+ default by::
+
make stih41x_defconfig
- Layout
- ------
+Layout
+------
+
All the files for multiple machine families (STiH415, STiH416, and STiG125)
are located in the platform code contained in arch/arm/mach-sti
@@ -27,7 +30,7 @@ Introduction
Device Trees.
- Document Author
- ---------------
+Document Author
+---------------
Srinivas Kandagatla <srinivas.kandagatla@st.com>, (c) 2013 ST Microelectronics
diff --git a/Documentation/arm/sti/stih407-overview.txt b/Documentation/arm/sti/stih407-overview.rst
index 3343f32f58bc..027e75bc7b7c 100644
--- a/Documentation/arm/sti/stih407-overview.txt
+++ b/Documentation/arm/sti/stih407-overview.rst
@@ -1,5 +1,6 @@
- STiH407 Overview
- ================
+================
+STiH407 Overview
+================
Introduction
------------
@@ -12,7 +13,7 @@ Introduction
- ARM Cortex-A9 1.5 GHz dual core CPU (28nm)
- SATA2, USB 3.0, PCIe, Gbit Ethernet
- Document Author
- ---------------
+Document Author
+---------------
Maxime Coquelin <maxime.coquelin@st.com>, (c) 2014 ST Microelectronics
diff --git a/Documentation/arm/sti/stih415-overview.txt b/Documentation/arm/sti/stih415-overview.rst
index 1383e33f265d..b67452d610c4 100644
--- a/Documentation/arm/sti/stih415-overview.txt
+++ b/Documentation/arm/sti/stih415-overview.rst
@@ -1,5 +1,6 @@
- STiH415 Overview
- ================
+================
+STiH415 Overview
+================
Introduction
------------
@@ -7,6 +8,7 @@ Introduction
The STiH415 is the next generation of HD, AVC set-top box processors
for satellite, cable, terrestrial and IP-STB markets.
- Features
+ Features:
+
- ARM Cortex-A9 1.0 GHz, dual-core CPU
- SATA2x2,USB 2.0x3, PCIe, Gbit Ethernet MACx2
diff --git a/Documentation/arm/sti/stih416-overview.txt b/Documentation/arm/sti/stih416-overview.rst
index 558444c201c6..93f17d74d8db 100644
--- a/Documentation/arm/sti/stih416-overview.txt
+++ b/Documentation/arm/sti/stih416-overview.rst
@@ -1,5 +1,6 @@
- STiH416 Overview
- ================
+================
+STiH416 Overview
+================
Introduction
------------
diff --git a/Documentation/arm/sti/stih418-overview.txt b/Documentation/arm/sti/stih418-overview.rst
index 1cd8fc80646d..b563c1f4fe5a 100644
--- a/Documentation/arm/sti/stih418-overview.txt
+++ b/Documentation/arm/sti/stih418-overview.rst
@@ -1,5 +1,6 @@
- STiH418 Overview
- ================
+================
+STiH418 Overview
+================
Introduction
------------
@@ -14,7 +15,7 @@ Introduction
- HEVC L5.1 Main 10
- VP9
- Document Author
- ---------------
+Document Author
+---------------
Maxime Coquelin <maxime.coquelin@st.com>, (c) 2015 ST Microelectronics
diff --git a/Documentation/arm/stm32/overview.rst b/Documentation/arm/stm32/overview.rst
index f7e734153860..85cfc8410798 100644
--- a/Documentation/arm/stm32/overview.rst
+++ b/Documentation/arm/stm32/overview.rst
@@ -1,5 +1,3 @@
-:orphan:
-
========================
STM32 ARM Linux Overview
========================
diff --git a/Documentation/arm/stm32/stm32f429-overview.rst b/Documentation/arm/stm32/stm32f429-overview.rst
index 65bbb1c3b423..a7ebe8ea6697 100644
--- a/Documentation/arm/stm32/stm32f429-overview.rst
+++ b/Documentation/arm/stm32/stm32f429-overview.rst
@@ -1,5 +1,4 @@
-:orphan:
-
+==================
STM32F429 Overview
==================
@@ -23,6 +22,4 @@ Datasheet and reference manual are publicly available on ST website (STM32F429_)
.. _STM32F429: http://www.st.com/web/en/catalog/mmc/FM141/SC1169/SS1577/LN1806?ecmp=stm32f429-439_pron_pr-ces2014_nov2013
-:Authors:
-
-Maxime Coquelin <mcoquelin.stm32@gmail.com>
+:Authors: Maxime Coquelin <mcoquelin.stm32@gmail.com>
diff --git a/Documentation/arm/stm32/stm32f746-overview.rst b/Documentation/arm/stm32/stm32f746-overview.rst
index 42d593085015..78befddc7740 100644
--- a/Documentation/arm/stm32/stm32f746-overview.rst
+++ b/Documentation/arm/stm32/stm32f746-overview.rst
@@ -1,5 +1,4 @@
-:orphan:
-
+==================
STM32F746 Overview
==================
@@ -30,6 +29,4 @@ Datasheet and reference manual are publicly available on ST website (STM32F746_)
.. _STM32F746: http://www.st.com/content/st_com/en/products/microcontrollers/stm32-32-bit-arm-cortex-mcus/stm32f7-series/stm32f7x6/stm32f746ng.html
-:Authors:
-
-Alexandre Torgue <alexandre.torgue@st.com>
+:Authors: Alexandre Torgue <alexandre.torgue@st.com>
diff --git a/Documentation/arm/stm32/stm32f769-overview.rst b/Documentation/arm/stm32/stm32f769-overview.rst
index f6adac862b17..e482980ddf21 100644
--- a/Documentation/arm/stm32/stm32f769-overview.rst
+++ b/Documentation/arm/stm32/stm32f769-overview.rst
@@ -1,5 +1,4 @@
-:orphan:
-
+==================
STM32F769 Overview
==================
@@ -32,6 +31,4 @@ Datasheet and reference manual are publicly available on ST website (STM32F769_)
.. _STM32F769: http://www.st.com/content/st_com/en/products/microcontrollers/stm32-32-bit-arm-cortex-mcus/stm32-high-performance-mcus/stm32f7-series/stm32f7x9/stm32f769ni.html
-:Authors:
-
-Alexandre Torgue <alexandre.torgue@st.com>
+:Authors: Alexandre Torgue <alexandre.torgue@st.com>
diff --git a/Documentation/arm/stm32/stm32h743-overview.rst b/Documentation/arm/stm32/stm32h743-overview.rst
index c525835e7473..4e15f1a42730 100644
--- a/Documentation/arm/stm32/stm32h743-overview.rst
+++ b/Documentation/arm/stm32/stm32h743-overview.rst
@@ -1,5 +1,4 @@
-:orphan:
-
+==================
STM32H743 Overview
==================
@@ -31,6 +30,4 @@ Datasheet and reference manual are publicly available on ST website (STM32H743_)
.. _STM32H743: http://www.st.com/en/microcontrollers/stm32h7x3.html?querycriteria=productId=LN2033
-:Authors:
-
-Alexandre Torgue <alexandre.torgue@st.com>
+:Authors: Alexandre Torgue <alexandre.torgue@st.com>
diff --git a/Documentation/arm/stm32/stm32mp157-overview.rst b/Documentation/arm/stm32/stm32mp157-overview.rst
index 2c52cd020601..f62fdc8e7d8d 100644
--- a/Documentation/arm/stm32/stm32mp157-overview.rst
+++ b/Documentation/arm/stm32/stm32mp157-overview.rst
@@ -1,5 +1,4 @@
-:orphan:
-
+===================
STM32MP157 Overview
===================
diff --git a/Documentation/arm/sunxi/README b/Documentation/arm/sunxi.rst
index f8efc21998bf..b037428aee98 100644
--- a/Documentation/arm/sunxi/README
+++ b/Documentation/arm/sunxi.rst
@@ -1,3 +1,4 @@
+==================
ARM Allwinner SoCs
==================
@@ -10,93 +11,140 @@ SunXi family
Linux kernel mach directory: arch/arm/mach-sunxi
Flavors:
+
* ARM926 based SoCs
- Allwinner F20 (sun3i)
- + Not Supported
+
+ * Not Supported
* ARM Cortex-A8 based SoCs
- Allwinner A10 (sun4i)
- + Datasheet
+
+ * Datasheet
+
http://dl.linux-sunxi.org/A10/A10%20Datasheet%20-%20v1.21%20%282012-04-06%29.pdf
- + User Manual
+ * User Manual
+
http://dl.linux-sunxi.org/A10/A10%20User%20Manual%20-%20v1.20%20%282012-04-09%2c%20DECRYPTED%29.pdf
- Allwinner A10s (sun5i)
- + Datasheet
+
+ * Datasheet
+
http://dl.linux-sunxi.org/A10s/A10s%20Datasheet%20-%20v1.20%20%282012-03-27%29.pdf
- Allwinner A13 / R8 (sun5i)
- + Datasheet
+
+ * Datasheet
+
http://dl.linux-sunxi.org/A13/A13%20Datasheet%20-%20v1.12%20%282012-03-29%29.pdf
- + User Manual
+ * User Manual
+
http://dl.linux-sunxi.org/A13/A13%20User%20Manual%20-%20v1.2%20%282013-01-08%29.pdf
- Next Thing Co GR8 (sun5i)
* Single ARM Cortex-A7 based SoCs
- Allwinner V3s (sun8i)
- + Datasheet
+
+ * Datasheet
+
http://linux-sunxi.org/File:Allwinner_V3s_Datasheet_V1.0.pdf
* Dual ARM Cortex-A7 based SoCs
- Allwinner A20 (sun7i)
- + User Manual
+
+ * User Manual
+
http://dl.linux-sunxi.org/A20/A20%20User%20Manual%202013-03-22.pdf
- Allwinner A23 (sun8i)
- + Datasheet
+
+ * Datasheet
+
http://dl.linux-sunxi.org/A23/A23%20Datasheet%20V1.0%2020130830.pdf
- + User Manual
+
+ * User Manual
+
http://dl.linux-sunxi.org/A23/A23%20User%20Manual%20V1.0%2020130830.pdf
* Quad ARM Cortex-A7 based SoCs
- Allwinner A31 (sun6i)
- + Datasheet
+
+ * Datasheet
+
http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf
- + User Manual
+
+ * User Manual
+
http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20user%20manual%20V1.1%2020130630.pdf
- Allwinner A31s (sun6i)
- + Datasheet
+
+ * Datasheet
+
http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20datasheet%20V1.3%2020131106.pdf
- + User Manual
+
+ * User Manual
+
http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20User%20Manual%20%20V1.0%2020130322.pdf
- Allwinner A33 (sun8i)
- + Datasheet
+
+ * Datasheet
+
http://dl.linux-sunxi.org/A33/A33%20Datasheet%20release%201.1.pdf
- + User Manual
+
+ * User Manual
+
http://dl.linux-sunxi.org/A33/A33%20user%20manual%20release%201.1.pdf
- Allwinner H2+ (sun8i)
- + No document available now, but is known to be working properly with
+
+ * No document available now, but is known to be working properly with
H3 drivers and memory map.
- Allwinner H3 (sun8i)
- + Datasheet
+
+ * Datasheet
+
http://dl.linux-sunxi.org/H3/Allwinner_H3_Datasheet_V1.0.pdf
- Allwinner R40 (sun8i)
- + Datasheet
+
+ * Datasheet
+
https://github.com/tinalinux/docs/raw/r40-v1.y/R40_Datasheet_V1.0.pdf
- + User Manual
+
+ * User Manual
+
https://github.com/tinalinux/docs/raw/r40-v1.y/Allwinner_R40_User_Manual_V1.0.pdf
* Quad ARM Cortex-A15, Quad ARM Cortex-A7 based SoCs
- Allwinner A80
- + Datasheet
+
+ * Datasheet
+
http://dl.linux-sunxi.org/A80/A80_Datasheet_Revision_1.0_0404.pdf
* Octa ARM Cortex-A7 based SoCs
- Allwinner A83T
- + Datasheet
+
+ * Datasheet
+
https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_Datasheet_v1.3_20150510.pdf
- + User Manual
+
+ * User Manual
+
https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_User_Manual_v1.5.1_20150513.pdf
* Quad ARM Cortex-A53 based SoCs
- Allwinner A64
- + Datasheet
+
+ * Datasheet
+
http://dl.linux-sunxi.org/A64/A64_Datasheet_V1.1.pdf
- + User Manual
+
+ * User Manual
+
http://dl.linux-sunxi.org/A64/Allwinner%20A64%20User%20Manual%20v1.0.pdf
diff --git a/Documentation/arm/sunxi/clocks.txt b/Documentation/arm/sunxi/clocks.rst
index e09a88aa3136..23bd03f3e21f 100644
--- a/Documentation/arm/sunxi/clocks.txt
+++ b/Documentation/arm/sunxi/clocks.rst
@@ -1,3 +1,4 @@
+=======================================================
Frequently asked questions about the sunxi clock system
=======================================================
@@ -12,7 +13,7 @@ A: The 24MHz oscillator allows gating to save power. Indeed, if gated
steps, one can gate it and keep the system running. Consider this
simplified suspend example:
- While the system is operational, you would see something like
+ While the system is operational, you would see something like::
24MHz 32kHz
|
@@ -23,7 +24,7 @@ A: The 24MHz oscillator allows gating to save power. Indeed, if gated
[CPU]
When you are about to suspend, you switch the CPU Mux to the 32kHz
- oscillator:
+ oscillator::
24Mhz 32kHz
| |
@@ -33,7 +34,7 @@ A: The 24MHz oscillator allows gating to save power. Indeed, if gated
|
[CPU]
- Finally you can gate the main oscillator
+ Finally you can gate the main oscillator::
32kHz
|
diff --git a/Documentation/arm/swp_emulation b/Documentation/arm/swp_emulation.rst
index af903d22fd93..6a608a9c3715 100644
--- a/Documentation/arm/swp_emulation
+++ b/Documentation/arm/swp_emulation.rst
@@ -11,17 +11,17 @@ sequence. If a memory access fault (an abort) occurs, a segmentation fault is
signalled to the triggering process.
/proc/cpu/swp_emulation holds some statistics/information, including the PID of
-the last process to trigger the emulation to be invocated. For example:
----
-Emulated SWP: 12
-Emulated SWPB: 0
-Aborted SWP{B}: 1
-Last process: 314
----
+the last process to trigger the emulation to be invocated. For example::
-NOTE: when accessing uncached shared regions, LDREX/STREX rely on an external
-transaction monitoring block called a global monitor to maintain update
-atomicity. If your system does not implement a global monitor, this option can
-cause programs that perform SWP operations to uncached memory to deadlock, as
-the STREX operation will always fail.
+ Emulated SWP: 12
+ Emulated SWPB: 0
+ Aborted SWP{B}: 1
+ Last process: 314
+
+NOTE:
+ when accessing uncached shared regions, LDREX/STREX rely on an external
+ transaction monitoring block called a global monitor to maintain update
+ atomicity. If your system does not implement a global monitor, this option can
+ cause programs that perform SWP operations to uncached memory to deadlock, as
+ the STREX operation will always fail.
diff --git a/Documentation/arm/tcm.txt b/Documentation/arm/tcm.rst
index 7c15871c1885..effd9c7bc968 100644
--- a/Documentation/arm/tcm.txt
+++ b/Documentation/arm/tcm.rst
@@ -1,5 +1,7 @@
+==================================================
ARM TCM (Tightly-Coupled Memory) handling in Linux
-----
+==================================================
+
Written by Linus Walleij <linus.walleij@stericsson.com>
Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory).
@@ -85,46 +87,50 @@ to have functions called locally inside the TCM without
wasting space, there is also the __tcmlocalfunc prefix that
will make the call relative.
-Variables to go into dtcm can be tagged like this:
-int __tcmdata foo;
+Variables to go into dtcm can be tagged like this::
+
+ int __tcmdata foo;
+
+Constants can be tagged like this::
-Constants can be tagged like this:
-int __tcmconst foo;
+ int __tcmconst foo;
+
+To put assembler into TCM just use::
+
+ .section ".tcm.text" or .section ".tcm.data"
-To put assembler into TCM just use
-.section ".tcm.text" or .section ".tcm.data"
respectively.
-Example code:
+Example code::
-#include <asm/tcm.h>
+ #include <asm/tcm.h>
-/* Uninitialized data */
-static u32 __tcmdata tcmvar;
-/* Initialized data */
-static u32 __tcmdata tcmassigned = 0x2BADBABEU;
-/* Constant */
-static const u32 __tcmconst tcmconst = 0xCAFEBABEU;
+ /* Uninitialized data */
+ static u32 __tcmdata tcmvar;
+ /* Initialized data */
+ static u32 __tcmdata tcmassigned = 0x2BADBABEU;
+ /* Constant */
+ static const u32 __tcmconst tcmconst = 0xCAFEBABEU;
-static void __tcmlocalfunc tcm_to_tcm(void)
-{
+ static void __tcmlocalfunc tcm_to_tcm(void)
+ {
int i;
for (i = 0; i < 100; i++)
tcmvar ++;
-}
+ }
-static void __tcmfunc hello_tcm(void)
-{
+ static void __tcmfunc hello_tcm(void)
+ {
/* Some abstract code that runs in ITCM */
int i;
for (i = 0; i < 100; i++) {
tcmvar ++;
}
tcm_to_tcm();
-}
+ }
-static void __init test_tcm(void)
-{
+ static void __init test_tcm(void)
+ {
u32 *tcmem;
int i;
@@ -152,4 +158,4 @@ static void __init test_tcm(void)
printk("TCM tcmem[%d] = %08x\n", i, tcmem[i]);
tcm_free(tcmem, 20);
}
-}
+ }
diff --git a/Documentation/arm/uefi.txt b/Documentation/arm/uefi.rst
index 6543a0adea8a..f868330df6be 100644
--- a/Documentation/arm/uefi.txt
+++ b/Documentation/arm/uefi.rst
@@ -1,3 +1,7 @@
+================================================
+The Unified Extensible Firmware Interface (UEFI)
+================================================
+
UEFI, the Unified Extensible Firmware Interface, is a specification
governing the behaviours of compatible firmware interfaces. It is
maintained by the UEFI Forum - http://www.uefi.org/.
@@ -11,11 +15,13 @@ UEFI support in Linux
=====================
Booting on a platform with firmware compliant with the UEFI specification
makes it possible for the kernel to support additional features:
+
- UEFI Runtime Services
- Retrieving various configuration information through the standardised
interface of UEFI configuration tables. (ACPI, SMBIOS, ...)
For actually enabling [U]EFI support, enable:
+
- CONFIG_EFI=y
- CONFIG_EFI_VARS=y or m
@@ -42,19 +48,20 @@ Instead, the kernel reads the UEFI memory map.
The stub populates the FDT /chosen node with (and the kernel scans for) the
following parameters:
-________________________________________________________________________________
-Name | Size | Description
-================================================================================
-linux,uefi-system-table | 64-bit | Physical address of the UEFI System Table.
---------------------------------------------------------------------------------
-linux,uefi-mmap-start | 64-bit | Physical address of the UEFI memory map,
- | | populated by the UEFI GetMemoryMap() call.
---------------------------------------------------------------------------------
-linux,uefi-mmap-size | 32-bit | Size in bytes of the UEFI memory map
- | | pointed to in previous entry.
---------------------------------------------------------------------------------
-linux,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI
- | | memory map.
---------------------------------------------------------------------------------
-linux,uefi-mmap-desc-ver | 32-bit | Version of the mmap descriptor format.
---------------------------------------------------------------------------------
+
+========================== ====== ===========================================
+Name Size Description
+========================== ====== ===========================================
+linux,uefi-system-table 64-bit Physical address of the UEFI System Table.
+
+linux,uefi-mmap-start 64-bit Physical address of the UEFI memory map,
+ populated by the UEFI GetMemoryMap() call.
+
+linux,uefi-mmap-size 32-bit Size in bytes of the UEFI memory map
+ pointed to in previous entry.
+
+linux,uefi-mmap-desc-size 32-bit Size in bytes of each entry in the UEFI
+ memory map.
+
+linux,uefi-mmap-desc-ver 32-bit Version of the mmap descriptor format.
+========================== ====== ===========================================
diff --git a/Documentation/arm/VFP/release-notes.txt b/Documentation/arm/vfp/release-notes.rst
index 28a2795705ca..c6b04937cee3 100644
--- a/Documentation/arm/VFP/release-notes.txt
+++ b/Documentation/arm/vfp/release-notes.rst
@@ -1,7 +1,9 @@
+===============================================
Release notes for Linux Kernel VFP support code
------------------------------------------------
+===============================================
Date: 20 May 2004
+
Author: Russell King
This is the first release of the Linux Kernel VFP support code. It
diff --git a/Documentation/arm/vlocks.txt b/Documentation/arm/vlocks.rst
index 45731672c564..a40a1742110b 100644
--- a/Documentation/arm/vlocks.txt
+++ b/Documentation/arm/vlocks.rst
@@ -1,3 +1,4 @@
+======================================
vlocks for Bare-Metal Mutual Exclusion
======================================
@@ -26,7 +27,7 @@ started yet.
Algorithm
---------
-The easiest way to explain the vlocks algorithm is with some pseudo-code:
+The easiest way to explain the vlocks algorithm is with some pseudo-code::
int currently_voting[NR_CPUS] = { 0, };
@@ -93,7 +94,7 @@ Features and limitations
number of CPUs.
vlocks can be cascaded in a voting hierarchy to permit better scaling
- if necessary, as in the following hypothetical example for 4096 CPUs:
+ if necessary, as in the following hypothetical example for 4096 CPUs::
/* first level: local election */
my_town = towns[(this_cpu >> 4) & 0xf];
@@ -127,12 +128,12 @@ the basic algorithm:
reduces the number of round-trips required to external memory.
In the ARM implementation, this means that we can use a single load
- and comparison:
+ and comparison::
LDR Rt, [Rn]
CMP Rt, #0
- ...in place of code equivalent to:
+ ...in place of code equivalent to::
LDRB Rt, [Rn]
CMP Rt, #0
diff --git a/Documentation/arm64/index.rst b/Documentation/arm64/index.rst
index 018b7836ecb7..96b696ba4e6c 100644
--- a/Documentation/arm64/index.rst
+++ b/Documentation/arm64/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
==================
ARM64 Architecture
==================
diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt
deleted file mode 100644
index 01bce243d3d7..000000000000
--- a/Documentation/backlight/lp855x-driver.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Kernel driver lp855x
-====================
-
-Backlight driver for LP855x ICs
-
-Supported chips:
- Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
- LP8557
-
-Author: Milo(Woogyom) Kim <milo.kim@ti.com>
-
-Description
------------
-
-* Brightness control
-
-Brightness can be controlled by the pwm input or the i2c command.
-The lp855x driver supports both cases.
-
-* Device attributes
-
-1) bl_ctl_mode
-Backlight control mode.
-Value : pwm based or register based
-
-2) chip_id
-The lp855x chip id.
-Value : lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557
-
-Platform data for lp855x
-------------------------
-
-For supporting platform specific data, the lp855x platform data can be used.
-
-* name : Backlight driver name. If it is not defined, default name is set.
-* device_control : Value of DEVICE CONTROL register.
-* initial_brightness : Initial value of backlight brightness.
-* period_ns : Platform specific PWM period value. unit is nano.
- Only valid when brightness is pwm input mode.
-* size_program : Total size of lp855x_rom_data.
-* rom_data : List of new eeprom/eprom registers.
-
-example 1) lp8552 platform data : i2c register mode with new eeprom data
-
-#define EEPROM_A5_ADDR 0xA5
-#define EEPROM_A5_VAL 0x4f /* EN_VSYNC=0 */
-
-static struct lp855x_rom_data lp8552_eeprom_arr[] = {
- {EEPROM_A5_ADDR, EEPROM_A5_VAL},
-};
-
-static struct lp855x_platform_data lp8552_pdata = {
- .name = "lcd-bl",
- .device_control = I2C_CONFIG(LP8552),
- .initial_brightness = INITIAL_BRT,
- .size_program = ARRAY_SIZE(lp8552_eeprom_arr),
- .rom_data = lp8552_eeprom_arr,
-};
-
-example 2) lp8556 platform data : pwm input mode with default rom data
-
-static struct lp855x_platform_data lp8556_pdata = {
- .device_control = PWM_CONFIG(LP8556),
- .initial_brightness = INITIAL_BRT,
- .period_ns = 1000000,
-};
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.rst
index bbd6eb5bbb07..0d237d402860 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.rst
@@ -1,9 +1,11 @@
+==========================
BFQ (Budget Fair Queueing)
==========================
BFQ is a proportional-share I/O scheduler, with some extra
low-latency capabilities. In addition to cgroups support (blkio or io
controllers), BFQ's main features are:
+
- BFQ guarantees a high system and application responsiveness, and a
low latency for time-sensitive applications, such as audio or video
players;
@@ -55,18 +57,18 @@ sustainable throughputs, on the same systems as above:
BFQ works for multi-queue devices too.
-The table of contents follow. Impatients can just jump to Section 3.
+.. The table of contents follow. Impatients can just jump to Section 3.
-CONTENTS
+.. CONTENTS
-1. When may BFQ be useful?
- 1-1 Personal systems
- 1-2 Server systems
-2. How does BFQ work?
-3. What are BFQ's tunables and how to properly configure BFQ?
-4. BFQ group scheduling
- 4-1 Service guarantees provided
- 4-2 Interface
+ 1. When may BFQ be useful?
+ 1-1 Personal systems
+ 1-2 Server systems
+ 2. How does BFQ work?
+ 3. What are BFQ's tunables and how to properly configure BFQ?
+ 4. BFQ group scheduling
+ 4-1 Service guarantees provided
+ 4-2 Interface
1. When may BFQ be useful?
==========================
@@ -77,17 +79,20 @@ BFQ provides the following benefits on personal and server systems.
--------------------
Low latency for interactive applications
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Regardless of the actual background workload, BFQ guarantees that, for
interactive tasks, the storage device is virtually as responsive as if
it was idle. For example, even if one or more of the following
background workloads are being executed:
+
- one or more large files are being read, written or copied,
- a tree of source files is being compiled,
- one or more virtual machines are performing I/O,
- a software update is in progress,
- indexing daemons are scanning filesystems and updating their
databases,
+
starting an application or loading a file from within an application
takes about the same time as if the storage device was idle. As a
comparison, with CFQ, NOOP or DEADLINE, and in the same conditions,
@@ -95,13 +100,14 @@ applications experience high latencies, or even become unresponsive
until the background workload terminates (also on SSDs).
Low latency for soft real-time applications
-
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Also soft real-time applications, such as audio and video
players/streamers, enjoy a low latency and a low drop rate, regardless
of the background I/O workload. As a consequence, these applications
do not suffer from almost any glitch due to the background workload.
Higher speed for code-development tasks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
If some additional workload happens to be executed in parallel, then
BFQ executes the I/O-related components of typical code-development
@@ -109,6 +115,7 @@ tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
NOOP or DEADLINE.
High throughput
+^^^^^^^^^^^^^^^
On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and
up to 150% higher throughput than DEADLINE and NOOP, with all the
@@ -117,6 +124,7 @@ and with all the workloads on flash-based devices, BFQ achieves,
instead, about the same throughput as the other schedulers.
Strong fairness, bandwidth and delay guarantees
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
BFQ distributes the device throughput, and not just the device time,
among I/O-bound applications in proportion their weights, with any
@@ -133,15 +141,15 @@ Most benefits for server systems follow from the same service
properties as above. In particular, regardless of whether additional,
possibly heavy workloads are being served, BFQ guarantees:
-. audio and video-streaming with zero or very low jitter and drop
+* audio and video-streaming with zero or very low jitter and drop
rate;
-. fast retrieval of WEB pages and embedded objects;
+* fast retrieval of WEB pages and embedded objects;
-. real-time recording of data in live-dumping applications (e.g.,
+* real-time recording of data in live-dumping applications (e.g.,
packet logging);
-. responsiveness in local and remote access to a server.
+* responsiveness in local and remote access to a server.
2. How does BFQ work?
@@ -151,7 +159,7 @@ BFQ is a proportional-share I/O scheduler, whose general structure,
plus a lot of code, are borrowed from CFQ.
- Each process doing I/O on a device is associated with a weight and a
- (bfq_)queue.
+ `(bfq_)queue`.
- BFQ grants exclusive access to the device, for a while, to one queue
(process) at a time, and implements this service model by
@@ -539,12 +547,13 @@ As for cgroups-v1 (blkio controller), the exact set of stat files
created, and kept up-to-date by bfq, depends on whether
CONFIG_BFQ_CGROUP_DEBUG is set. If it is set, then bfq creates all
the stat files documented in
-Documentation/cgroup-v1/blkio-controller.rst. If, instead,
-CONFIG_BFQ_CGROUP_DEBUG is not set, then bfq creates only the files
-blkio.bfq.io_service_bytes
-blkio.bfq.io_service_bytes_recursive
-blkio.bfq.io_serviced
-blkio.bfq.io_serviced_recursive
+Documentation/admin-guide/cgroup-v1/blkio-controller.rst. If, instead,
+CONFIG_BFQ_CGROUP_DEBUG is not set, then bfq creates only the files::
+
+ blkio.bfq.io_service_bytes
+ blkio.bfq.io_service_bytes_recursive
+ blkio.bfq.io_serviced
+ blkio.bfq.io_serviced_recursive
The value of CONFIG_BFQ_CGROUP_DEBUG greatly influences the maximum
throughput sustainable with bfq, because updating the blkio.bfq.*
@@ -567,17 +576,22 @@ weight of the queues associated with interactive and soft real-time
applications. Unset this tunable if you need/want to control weights.
-[1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
+[1]
+ P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
Scheduler", Proceedings of the First Workshop on Mobile System
Technologies (MST-2015), May 2015.
+
http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
-[2] P. Valente and M. Andreolini, "Improving Application
+[2]
+ P. Valente and M. Andreolini, "Improving Application
Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of
the 5th Annual International Systems and Storage Conference
(SYSTOR '12), June 2012.
+
Slightly extended version:
- http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-
- results.pdf
-[3] https://github.com/Algodev-github/S
+ http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-results.pdf
+
+[3]
+ https://github.com/Algodev-github/S
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.rst
index 5a4a799fe61b..b964796ec9c7 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.rst
@@ -1,15 +1,25 @@
- Notes on the Generic Block Layer Rewrite in Linux 2.5
- =====================================================
+=====================================================
+Notes on the Generic Block Layer Rewrite in Linux 2.5
+=====================================================
+
+.. note::
+
+ It seems that there are lot of outdated stuff here. This seems
+ to be written somewhat as a task list. Yet, eventually, something
+ here might still be useful.
Notes Written on Jan 15, 2002:
- Jens Axboe <jens.axboe@oracle.com>
- Suparna Bhattacharya <suparna@in.ibm.com>
+
+ - Jens Axboe <jens.axboe@oracle.com>
+ - Suparna Bhattacharya <suparna@in.ibm.com>
Last Updated May 2, 2002
+
September 2003: Updated I/O Scheduler portions
- Nick Piggin <npiggin@kernel.dk>
+ - Nick Piggin <npiggin@kernel.dk>
-Introduction:
+Introduction
+============
These are some notes describing some aspects of the 2.5 block layer in the
context of the bio rewrite. The idea is to bring out some of the key
@@ -17,11 +27,11 @@ changes and a glimpse of the rationale behind those changes.
Please mail corrections & suggestions to suparna@in.ibm.com.
-Credits:
----------
+Credits
+=======
2.5 bio rewrite:
- Jens Axboe <jens.axboe@oracle.com>
+ - Jens Axboe <jens.axboe@oracle.com>
Many aspects of the generic block layer redesign were driven by and evolved
over discussions, prior patches and the collective experience of several
@@ -29,62 +39,63 @@ people. See sections 8 and 9 for a list of some related references.
The following people helped with review comments and inputs for this
document:
- Christoph Hellwig <hch@infradead.org>
- Arjan van de Ven <arjanv@redhat.com>
- Randy Dunlap <rdunlap@xenotime.net>
- Andre Hedrick <andre@linux-ide.org>
+
+ - Christoph Hellwig <hch@infradead.org>
+ - Arjan van de Ven <arjanv@redhat.com>
+ - Randy Dunlap <rdunlap@xenotime.net>
+ - Andre Hedrick <andre@linux-ide.org>
The following people helped with fixes/contributions to the bio patches
while it was still work-in-progress:
- David S. Miller <davem@redhat.com>
+ - David S. Miller <davem@redhat.com>
-Description of Contents:
-------------------------
-1. Scope for tuning of logic to various needs
- 1.1 Tuning based on device or low level driver capabilities
+.. Description of Contents:
+
+ 1. Scope for tuning of logic to various needs
+ 1.1 Tuning based on device or low level driver capabilities
- Per-queue parameters
- Highmem I/O support
- I/O scheduler modularization
- 1.2 Tuning based on high level requirements/capabilities
+ 1.2 Tuning based on high level requirements/capabilities
1.2.1 Request Priority/Latency
- 1.3 Direct access/bypass to lower layers for diagnostics and special
- device operations
+ 1.3 Direct access/bypass to lower layers for diagnostics and special
+ device operations
1.3.1 Pre-built commands
-2. New flexible and generic but minimalist i/o structure or descriptor
- (instead of using buffer heads at the i/o layer)
- 2.1 Requirements/Goals addressed
- 2.2 The bio struct in detail (multi-page io unit)
- 2.3 Changes in the request structure
-3. Using bios
- 3.1 Setup/teardown (allocation, splitting)
- 3.2 Generic bio helper routines
- 3.2.1 Traversing segments and completion units in a request
- 3.2.2 Setting up DMA scatterlists
- 3.2.3 I/O completion
- 3.2.4 Implications for drivers that do not interpret bios (don't handle
- multiple segments)
- 3.3 I/O submission
-4. The I/O scheduler
-5. Scalability related changes
- 5.1 Granular locking: Removal of io_request_lock
- 5.2 Prepare for transition to 64 bit sector_t
-6. Other Changes/Implications
- 6.1 Partition re-mapping handled by the generic block layer
-7. A few tips on migration of older drivers
-8. A list of prior/related/impacted patches/ideas
-9. Other References/Discussion Threads
+ 2. New flexible and generic but minimalist i/o structure or descriptor
+ (instead of using buffer heads at the i/o layer)
+ 2.1 Requirements/Goals addressed
+ 2.2 The bio struct in detail (multi-page io unit)
+ 2.3 Changes in the request structure
+ 3. Using bios
+ 3.1 Setup/teardown (allocation, splitting)
+ 3.2 Generic bio helper routines
+ 3.2.1 Traversing segments and completion units in a request
+ 3.2.2 Setting up DMA scatterlists
+ 3.2.3 I/O completion
+ 3.2.4 Implications for drivers that do not interpret bios (don't handle
+ multiple segments)
+ 3.3 I/O submission
+ 4. The I/O scheduler
+ 5. Scalability related changes
+ 5.1 Granular locking: Removal of io_request_lock
+ 5.2 Prepare for transition to 64 bit sector_t
+ 6. Other Changes/Implications
+ 6.1 Partition re-mapping handled by the generic block layer
+ 7. A few tips on migration of older drivers
+ 8. A list of prior/related/impacted patches/ideas
+ 9. Other References/Discussion Threads
----------------------------------------------------------------------------
Bio Notes
---------
+=========
Let us discuss the changes in the context of how some overall goals for the
block layer are addressed.
1. Scope for tuning the generic logic to satisfy various requirements
+=====================================================================
The block layer design supports adaptable abstractions to handle common
processing with the ability to tune the logic to an appropriate extent
@@ -97,6 +108,7 @@ and application/middleware software designed to take advantage of these
capabilities.
1.1 Tuning based on low level device / driver capabilities
+----------------------------------------------------------
Sophisticated devices with large built-in caches, intelligent i/o scheduling
optimizations, high memory DMA support, etc may find some of the
@@ -133,12 +145,12 @@ Some new queue property settings:
Sets two variables that limit the size of the request.
- The request queue's max_sectors, which is a soft size in
- units of 512 byte sectors, and could be dynamically varied
- by the core kernel.
+ units of 512 byte sectors, and could be dynamically varied
+ by the core kernel.
- The request queue's max_hw_sectors, which is a hard limit
- and reflects the maximum size request a driver can handle
- in units of 512 byte sectors.
+ and reflects the maximum size request a driver can handle
+ in units of 512 byte sectors.
The default for both max_sectors and max_hw_sectors is
255. The upper limit of max_sectors is 1024.
@@ -161,8 +173,8 @@ Some new queue property settings:
New queue flags:
- QUEUE_FLAG_CLUSTER (see 3.2.2)
- QUEUE_FLAG_QUEUED (see 3.2.4)
+ - QUEUE_FLAG_CLUSTER (see 3.2.2)
+ - QUEUE_FLAG_QUEUED (see 3.2.4)
ii. High-mem i/o capabilities are now considered the default
@@ -234,6 +246,7 @@ I/O scheduler wrappers are to be used instead of accessing the queue directly.
See section 4. The I/O scheduler for details.
1.2 Tuning Based on High level code capabilities
+------------------------------------------------
i. Application capabilities for raw i/o
@@ -258,9 +271,11 @@ would need an additional mechanism either via open flags or ioctls, or some
other upper level mechanism to communicate such settings to block.
1.2.1 Request Priority/Latency
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Todo/Under discussion:
-Arjan's proposed request priority scheme allows higher levels some broad
+Todo/Under discussion::
+
+ Arjan's proposed request priority scheme allows higher levels some broad
control (high/med/low) over the priority of an i/o request vs other pending
requests in the queue. For example it allows reads for bringing in an
executable page on demand to be given a higher priority over pending write
@@ -272,7 +287,9 @@ Arjan's proposed request priority scheme allows higher levels some broad
1.3 Direct Access to Low level Device/Driver Capabilities (Bypass mode)
- (e.g Diagnostics, Systems Management)
+-----------------------------------------------------------------------
+
+(e.g Diagnostics, Systems Management)
There are situations where high-level code needs to have direct access to
the low level device capabilities or requires the ability to issue commands
@@ -308,28 +325,32 @@ involved. In the latter case, the driver would modify and manage the
request->buffer, request->sector and request->nr_sectors or
request->current_nr_sectors fields itself rather than using the block layer
end_request or end_that_request_first completion interfaces.
-(See 2.3 or Documentation/block/request.txt for a brief explanation of
+(See 2.3 or Documentation/block/request.rst for a brief explanation of
the request structure fields)
-[TBD: end_that_request_last should be usable even in this case;
-Perhaps an end_that_direct_request_first routine could be implemented to make
-handling direct requests easier for such drivers; Also for drivers that
-expect bios, a helper function could be provided for setting up a bio
-corresponding to a data buffer]
-
-<JENS: I dont understand the above, why is end_that_request_first() not
-usable? Or _last for that matter. I must be missing something>
-<SUP: What I meant here was that if the request doesn't have a bio, then
- end_that_request_first doesn't modify nr_sectors or current_nr_sectors,
- and hence can't be used for advancing request state settings on the
- completion of partial transfers. The driver has to modify these fields
- directly by hand.
- This is because end_that_request_first only iterates over the bio list,
- and always returns 0 if there are none associated with the request.
- _last works OK in this case, and is not a problem, as I mentioned earlier
->
+::
+
+ [TBD: end_that_request_last should be usable even in this case;
+ Perhaps an end_that_direct_request_first routine could be implemented to make
+ handling direct requests easier for such drivers; Also for drivers that
+ expect bios, a helper function could be provided for setting up a bio
+ corresponding to a data buffer]
+
+ <JENS: I dont understand the above, why is end_that_request_first() not
+ usable? Or _last for that matter. I must be missing something>
+
+ <SUP: What I meant here was that if the request doesn't have a bio, then
+ end_that_request_first doesn't modify nr_sectors or current_nr_sectors,
+ and hence can't be used for advancing request state settings on the
+ completion of partial transfers. The driver has to modify these fields
+ directly by hand.
+ This is because end_that_request_first only iterates over the bio list,
+ and always returns 0 if there are none associated with the request.
+ _last works OK in this case, and is not a problem, as I mentioned earlier
+ >
1.3.1 Pre-built Commands
+^^^^^^^^^^^^^^^^^^^^^^^^
A request can be created with a pre-built custom command to be sent directly
to the device. The cmd block in the request structure has room for filling
@@ -360,9 +381,11 @@ Aside:
the pre-builder hook can be invoked there.
-2. Flexible and generic but minimalist i/o structure/descriptor.
+2. Flexible and generic but minimalist i/o structure/descriptor
+===============================================================
2.1 Reason for a new structure and requirements addressed
+---------------------------------------------------------
Prior to 2.5, buffer heads were used as the unit of i/o at the generic block
layer, and the low level request structure was associated with a chain of
@@ -378,26 +401,26 @@ which were generated for each such chunk.
The following were some of the goals and expectations considered in the
redesign of the block i/o data structure in 2.5.
-i. Should be appropriate as a descriptor for both raw and buffered i/o -
+1. Should be appropriate as a descriptor for both raw and buffered i/o -
avoid cache related fields which are irrelevant in the direct/page i/o path,
or filesystem block size alignment restrictions which may not be relevant
for raw i/o.
-ii. Ability to represent high-memory buffers (which do not have a virtual
+2. Ability to represent high-memory buffers (which do not have a virtual
address mapping in kernel address space).
-iii.Ability to represent large i/os w/o unnecessarily breaking them up (i.e
+3. Ability to represent large i/os w/o unnecessarily breaking them up (i.e
greater than PAGE_SIZE chunks in one shot)
-iv. At the same time, ability to retain independent identity of i/os from
+4. At the same time, ability to retain independent identity of i/os from
different sources or i/o units requiring individual completion (e.g. for
latency reasons)
-v. Ability to represent an i/o involving multiple physical memory segments
+5. Ability to represent an i/o involving multiple physical memory segments
(including non-page aligned page fragments, as specified via readv/writev)
without unnecessarily breaking it up, if the underlying device is capable of
handling it.
-vi. Preferably should be based on a memory descriptor structure that can be
+6. Preferably should be based on a memory descriptor structure that can be
passed around different types of subsystems or layers, maybe even
networking, without duplication or extra copies of data/descriptor fields
themselves in the process
-vii.Ability to handle the possibility of splits/merges as the structure passes
+7. Ability to handle the possibility of splits/merges as the structure passes
through layered drivers (lvm, md, evms), with minimal overhead.
The solution was to define a new structure (bio) for the block layer,
@@ -408,6 +431,7 @@ bh structure for buffered i/o, and in the case of raw/direct i/o kiobufs are
mapped to bio structures.
2.2 The bio struct
+------------------
The bio structure uses a vector representation pointing to an array of tuples
of <page, offset, len> to describe the i/o buffer, and has various other
@@ -417,16 +441,18 @@ performing the i/o.
Notice that this representation means that a bio has no virtual address
mapping at all (unlike buffer heads).
-struct bio_vec {
+::
+
+ struct bio_vec {
struct page *bv_page;
unsigned short bv_len;
unsigned short bv_offset;
-};
+ };
-/*
- * main unit of I/O for the block layer and lower layers (ie drivers)
- */
-struct bio {
+ /*
+ * main unit of I/O for the block layer and lower layers (ie drivers)
+ */
+ struct bio {
struct bio *bi_next; /* request queue link */
struct block_device *bi_bdev; /* target device */
unsigned long bi_flags; /* status, command, etc */
@@ -443,7 +469,7 @@ struct bio {
bio_end_io_t *bi_end_io; /* bi_end_io (bio) */
atomic_t bi_cnt; /* pin count: free when it hits zero */
void *bi_private;
-};
+ };
With this multipage bio design:
@@ -453,7 +479,7 @@ With this multipage bio design:
- Splitting of an i/o request across multiple devices (as in the case of
lvm or raid) is achieved by cloning the bio (where the clone points to
the same bi_io_vec array, but with the index and size accordingly modified)
-- A linked list of bios is used as before for unrelated merges (*) - this
+- A linked list of bios is used as before for unrelated merges [#]_ - this
avoids reallocs and makes independent completions easier to handle.
- Code that traverses the req list can find all the segments of a bio
by using rq_for_each_segment. This handles the fact that a request
@@ -462,10 +488,12 @@ With this multipage bio design:
field to keep track of the next bio_vec entry to process.
(e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
[TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying
- bi_offset an len fields]
+ bi_offset an len fields]
+
+.. [#]
-(*) unrelated merges -- a request ends up containing two or more bios that
- didn't originate from the same place.
+ unrelated merges -- a request ends up containing two or more bios that
+ didn't originate from the same place.
bi_end_io() i/o callback gets called on i/o completion of the entire bio.
@@ -483,10 +511,11 @@ which in turn means that only raw I/O uses it (direct i/o may not work
right now). The intent however is to enable clustering of pages etc to
become possible. The pagebuf abstraction layer from SGI also uses multi-page
bios, but that is currently not included in the stock development kernels.
-The same is true of Andrew Morton's work-in-progress multipage bio writeout
+The same is true of Andrew Morton's work-in-progress multipage bio writeout
and readahead patches.
2.3 Changes in the Request Structure
+------------------------------------
The request structure is the structure that gets passed down to low level
drivers. The block layer make_request function builds up a request structure,
@@ -499,11 +528,11 @@ request structure.
Only some relevant fields (mainly those which changed or may be referred
to in some of the discussion here) are listed below, not necessarily in
the order in which they occur in the structure (see include/linux/blkdev.h)
-Refer to Documentation/block/request.txt for details about all the request
+Refer to Documentation/block/request.rst for details about all the request
structure fields and a quick reference about the layers which are
-supposed to use or modify those fields.
+supposed to use or modify those fields::
-struct request {
+ struct request {
struct list_head queuelist; /* Not meant to be directly accessed by
the driver.
Used by q->elv_next_request_fn
@@ -548,11 +577,11 @@ struct request {
.
struct bio *bio, *biotail; /* bio list instead of bh */
struct request_list *rl;
-}
-
+ }
+
See the req_ops and req_flag_bits definitions for an explanation of the various
flags available. Some bits are used by the block layer or i/o scheduler.
-
+
The behaviour of the various sector counts are almost the same as before,
except that since we have multi-segment bios, current_nr_sectors refers
to the numbers of sectors in the current segment being processed which could
@@ -578,8 +607,10 @@ a driver needs to be careful about interoperation with the block layer helper
functions which the driver uses. (Section 1.3)
3. Using bios
+=============
3.1 Setup/Teardown
+------------------
There are routines for managing the allocation, and reference counting, and
freeing of bios (bio_alloc, bio_get, bio_put).
@@ -606,10 +637,13 @@ case of bio, these routines make use of the standard slab allocator.
The caller of bio_alloc is expected to taken certain steps to avoid
deadlocks, e.g. avoid trying to allocate more memory from the pool while
already holding memory obtained from the pool.
-[TBD: This is a potential issue, though a rare possibility
- in the bounce bio allocation that happens in the current code, since
- it ends up allocating a second bio from the same pool while
- holding the original bio ]
+
+::
+
+ [TBD: This is a potential issue, though a rare possibility
+ in the bounce bio allocation that happens in the current code, since
+ it ends up allocating a second bio from the same pool while
+ holding the original bio ]
Memory allocated from the pool should be released back within a limited
amount of time (in the case of bio, that would be after the i/o is completed).
@@ -635,14 +669,18 @@ same bio_vec_list). This would typically be used for splitting i/o requests
in lvm or md.
3.2 Generic bio helper Routines
+-------------------------------
3.2.1 Traversing segments and completion units in a request
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The macro rq_for_each_segment() should be used for traversing the bios
in the request list (drivers should avoid directly trying to do it
themselves). Using these helpers should also make it easier to cope
with block changes in the future.
+::
+
struct req_iterator iter;
rq_for_each_segment(bio_vec, rq, iter)
/* bio_vec is now current segment */
@@ -653,6 +691,7 @@ which don't make a distinction between segments and completion units would
need to be reorganized to support multi-segment bios.
3.2.2 Setting up DMA scatterlists
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The blk_rq_map_sg() helper routine would be used for setting up scatter
gather lists from a request, so a driver need not do it on its own.
@@ -683,6 +722,7 @@ of physical data segments in a request (i.e. the largest sized scatter list
a driver could handle)
3.2.3 I/O completion
+^^^^^^^^^^^^^^^^^^^^
The existing generic block layer helper routines end_request,
end_that_request_first and end_that_request_last can be used for i/o
@@ -691,8 +731,10 @@ request can be kicked of) as before. With the introduction of multi-page
bio support, end_that_request_first requires an additional argument indicating
the number of sectors completed.
-3.2.4 Implications for drivers that do not interpret bios (don't handle
- multiple segments)
+3.2.4 Implications for drivers that do not interpret bios
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+(don't handle multiple segments)
Drivers that do not interpret bios e.g those which do not handle multiple
segments and do not support i/o into high memory addresses (require bounce
@@ -707,15 +749,18 @@ be used if only if the request has come down from block/bio path, not for
direct access requests which only specify rq->buffer without a valid rq->bio)
3.3 I/O Submission
+------------------
The routine submit_bio() is used to submit a single io. Higher level i/o
routines make use of this:
(a) Buffered i/o:
+
The routine submit_bh() invokes submit_bio() on a bio corresponding to the
bh, allocating the bio if required. ll_rw_block() uses submit_bh() as before.
(b) Kiobuf i/o (for raw/direct i/o):
+
The ll_rw_kio() routine breaks up the kiobuf into page sized chunks and
maps the array to one or more multi-page bios, issuing submit_bio() to
perform the i/o on each of these.
@@ -738,6 +783,7 @@ Todo/Observation:
(c) Page i/o:
+
Todo/Under discussion:
Andrew Morton's multi-page bio patches attempt to issue multi-page
@@ -753,6 +799,7 @@ Todo/Under discussion:
abstraction, but intended to be as lightweight as possible).
(d) Direct access i/o:
+
Direct access requests that do not contain bios would be submitted differently
as discussed earlier in section 1.3.
@@ -780,11 +827,13 @@ Aside:
4. The I/O scheduler
+====================
+
I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch
queue and specific I/O schedulers. Unless stated otherwise, elevator is used
to refer to both parts and I/O scheduler to specific I/O schedulers.
-Block layer implements generic dispatch queue in block/*.c.
+Block layer implements generic dispatch queue in `block/*.c`.
The generic dispatch queue is responsible for requeueing, handling non-fs
requests and all other subtleties.
@@ -802,8 +851,11 @@ doesn't implement a function, the switch does nothing or some minimal house
keeping work.
4.1. I/O scheduler API
+----------------------
The functions an elevator may implement are: (* are mandatory)
+
+=============================== ================================================
elevator_merge_fn called to query requests for merge with a bio
elevator_merge_req_fn called when two requests get merged. the one
@@ -857,8 +909,11 @@ elevator_deactivate_req_fn Called when device driver decides to delay
elevator_init_fn*
elevator_exit_fn Allocate and free any elevator specific storage
for a queue.
+=============================== ================================================
4.2 Request flows seen by I/O schedulers
+----------------------------------------
+
All requests seen by I/O schedulers strictly follow one of the following three
flows.
@@ -872,9 +927,12 @@ flows.
-> put_req_fn
4.3 I/O scheduler implementation
+--------------------------------
+
The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
optimal disk scan and request servicing performance (based on generic
principles and device capabilities), optimized for:
+
i. improved throughput
ii. improved latency
iii. better utilization of h/w & CPU time
@@ -928,15 +986,19 @@ Aside:
a big request from the broken up pieces coming by.
4.4 I/O contexts
+----------------
+
I/O contexts provide a dynamically allocated per process data area. They may
be used in I/O schedulers, and in the block layer (could be used for IO statis,
-priorities for example). See *io_context in block/ll_rw_blk.c, and as-iosched.c
+priorities for example). See `*io_context` in block/ll_rw_blk.c, and as-iosched.c
for an example of usage in an i/o scheduler.
5. Scalability related changes
+==============================
5.1 Granular Locking: io_request_lock replaced by a per-queue lock
+------------------------------------------------------------------
The global io_request_lock has been removed as of 2.5, to avoid
the scalability bottleneck it was causing, and has been replaced by more
@@ -951,20 +1013,23 @@ request_fn execution which it means that lots of older drivers
should still be SMP safe. Drivers are free to drop the queue
lock themselves, if required. Drivers that explicitly used the
io_request_lock for serialization need to be modified accordingly.
-Usually it's as easy as adding a global lock:
+Usually it's as easy as adding a global lock::
static DEFINE_SPINLOCK(my_driver_lock);
and passing the address to that lock to blk_init_queue().
5.2 64 bit sector numbers (sector_t prepares for 64 bit support)
+----------------------------------------------------------------
The sector number used in the bio structure has been changed to sector_t,
which could be defined as 64 bit in preparation for 64 bit sector support.
6. Other Changes/Implications
+=============================
6.1 Partition re-mapping handled by the generic block layer
+-----------------------------------------------------------
In 2.5 some of the gendisk/partition related code has been reorganized.
Now the generic block layer performs partition-remapping early and thus
@@ -979,6 +1044,7 @@ sent are offset from the beginning of the device.
7. A Few Tips on Migration of older drivers
+===========================================
Old-style drivers that just use CURRENT and ignores clustered requests,
may not need much change. The generic layer will automatically handle
@@ -1012,12 +1078,12 @@ blk_init_queue time.
Drivers no longer have to map a {partition, sector offset} into the
correct absolute location anymore, this is done by the block layer, so
-where a driver received a request ala this before:
+where a driver received a request ala this before::
rq->rq_dev = mk_kdev(3, 5); /* /dev/hda5 */
rq->sector = 0; /* first sector on hda5 */
- it will now see
+it will now see::
rq->rq_dev = mk_kdev(3, 0); /* /dev/hda */
rq->sector = 123128; /* offset from start of disk */
@@ -1034,38 +1100,65 @@ a bio into the virtual address space.
8. Prior/Related/Impacted patches
+=================================
8.1. Earlier kiobuf patches (sct/axboe/chait/hch/mkp)
+-----------------------------------------------------
+
- orig kiobuf & raw i/o patches (now in 2.4 tree)
- direct kiobuf based i/o to devices (no intermediate bh's)
- page i/o using kiobuf
- kiobuf splitting for lvm (mkp)
- elevator support for kiobuf request merging (axboe)
+
8.2. Zero-copy networking (Dave Miller)
+---------------------------------------
+
8.3. SGI XFS - pagebuf patches - use of kiobufs
+-----------------------------------------------
8.4. Multi-page pioent patch for bio (Christoph Hellwig)
+--------------------------------------------------------
8.5. Direct i/o implementation (Andrea Arcangeli) since 2.4.10-pre11
+--------------------------------------------------------------------
8.6. Async i/o implementation patch (Ben LaHaise)
+-------------------------------------------------
8.7. EVMS layering design (IBM EVMS team)
-8.8. Larger page cache size patch (Ben LaHaise) and
- Large page size (Daniel Phillips)
+-----------------------------------------
+8.8. Larger page cache size patch (Ben LaHaise) and Large page size (Daniel Phillips)
+-------------------------------------------------------------------------------------
+
=> larger contiguous physical memory buffers
+
8.9. VM reservations patch (Ben LaHaise)
+----------------------------------------
8.10. Write clustering patches ? (Marcelo/Quintela/Riel ?)
+----------------------------------------------------------
8.11. Block device in page cache patch (Andrea Archangeli) - now in 2.4.10+
-8.12. Multiple block-size transfers for faster raw i/o (Shailabh Nagar,
- Badari)
+---------------------------------------------------------------------------
+8.12. Multiple block-size transfers for faster raw i/o (Shailabh Nagar, Badari)
+-------------------------------------------------------------------------------
8.13 Priority based i/o scheduler - prepatches (Arjan van de Ven)
+------------------------------------------------------------------
8.14 IDE Taskfile i/o patch (Andre Hedrick)
+--------------------------------------------
8.15 Multi-page writeout and readahead patches (Andrew Morton)
+---------------------------------------------------------------
8.16 Direct i/o patches for 2.5 using kvec and bio (Badari Pulavarthy)
+-----------------------------------------------------------------------
-9. Other References:
+9. Other References
+===================
-9.1 The Splice I/O Model - Larry McVoy (and subsequent discussions on lkml,
-and Linus' comments - Jan 2001)
-9.2 Discussions about kiobuf and bh design on lkml between sct, linus, alan
-et al - Feb-March 2001 (many of the initial thoughts that led to bio were
-brought up in this discussion thread)
-9.3 Discussions on mempool on lkml - Dec 2001.
+9.1 The Splice I/O Model
+------------------------
+
+Larry McVoy (and subsequent discussions on lkml, and Linus' comments - Jan 2001
+9.2 Discussions about kiobuf and bh design
+------------------------------------------
+
+On lkml between sct, linus, alan et al - Feb-March 2001 (many of the
+initial thoughts that led to bio were brought up in this discussion thread)
+
+9.3 Discussions on mempool on lkml - Dec 2001.
+----------------------------------------------
diff --git a/Documentation/block/biovecs.txt b/Documentation/block/biovecs.rst
index ce6eccaf5df7..86fa66c87172 100644
--- a/Documentation/block/biovecs.txt
+++ b/Documentation/block/biovecs.rst
@@ -1,6 +1,6 @@
-
-Immutable biovecs and biovec iterators:
-=======================================
+======================================
+Immutable biovecs and biovec iterators
+======================================
Kent Overstreet <kmo@daterainc.com>
@@ -121,10 +121,12 @@ Other implications:
Usage of helpers:
=================
-* The following helpers whose names have the suffix of "_all" can only be used
-on non-BIO_CLONED bio. They are usually used by filesystem code. Drivers
-shouldn't use them because the bio may have been split before it reached the
-driver.
+* The following helpers whose names have the suffix of `_all` can only be used
+ on non-BIO_CLONED bio. They are usually used by filesystem code. Drivers
+ shouldn't use them because the bio may have been split before it reached the
+ driver.
+
+::
bio_for_each_segment_all()
bio_first_bvec_all()
@@ -132,13 +134,13 @@ driver.
bio_last_bvec_all()
* The following helpers iterate over single-page segment. The passed 'struct
-bio_vec' will contain a single-page IO vector during the iteration
+ bio_vec' will contain a single-page IO vector during the iteration::
bio_for_each_segment()
bio_for_each_segment_all()
* The following helpers iterate over multi-page bvec. The passed 'struct
-bio_vec' will contain a multi-page IO vector during the iteration
+ bio_vec' will contain a multi-page IO vector during the iteration::
bio_for_each_bvec()
rq_for_each_bvec()
diff --git a/Documentation/block/capability.rst b/Documentation/block/capability.rst
new file mode 100644
index 000000000000..2cf258d64bbe
--- /dev/null
+++ b/Documentation/block/capability.rst
@@ -0,0 +1,18 @@
+===============================
+Generic Block Device Capability
+===============================
+
+This file documents the sysfs file block/<disk>/capability
+
+capability is a hex word indicating which capabilities a specific disk
+supports. For more information on bits not listed here, see
+include/linux/genhd.h
+
+GENHD_FL_MEDIA_CHANGE_NOTIFY
+----------------------------
+
+Value: 4
+
+When this bit is set, the disk supports Asynchronous Notification
+of media change events. These events will be broadcast to user
+space via kernel uevent.
diff --git a/Documentation/block/capability.txt b/Documentation/block/capability.txt
deleted file mode 100644
index 2f1729424ef4..000000000000
--- a/Documentation/block/capability.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Generic Block Device Capability
-===============================================================================
-This file documents the sysfs file block/<disk>/capability
-
-capability is a hex word indicating which capabilities a specific disk
-supports. For more information on bits not listed here, see
-include/linux/genhd.h
-
-Capability Value
--------------------------------------------------------------------------------
-GENHD_FL_MEDIA_CHANGE_NOTIFY 4
- When this bit is set, the disk supports Asynchronous Notification
- of media change events. These events will be broadcast to user
- space via kernel uevent.
-
diff --git a/Documentation/block/cmdline-partition.txt b/Documentation/block/cmdline-partition.rst
index 760a3f7c3ed4..530bedff548a 100644
--- a/Documentation/block/cmdline-partition.txt
+++ b/Documentation/block/cmdline-partition.rst
@@ -1,5 +1,6 @@
+==============================================
Embedded device command line partition parsing
-=====================================================================
+==============================================
The "blkdevparts" command line option adds support for reading the
block device partition table from the kernel command line.
@@ -22,12 +23,15 @@ blkdevparts=<blkdev-def>[;<blkdev-def>]
<size>
partition size, in bytes, such as: 512, 1m, 1G.
size may contain an optional suffix of (upper or lower case):
+
K, M, G, T, P, E.
+
"-" is used to denote all remaining space.
<offset>
partition start address, in bytes.
offset may contain an optional suffix of (upper or lower case):
+
K, M, G, T, P, E.
(part-name)
@@ -36,11 +40,14 @@ blkdevparts=<blkdev-def>[;<blkdev-def>]
User space application can access partition by partition name.
Example:
+
eMMC disk names are "mmcblk0" and "mmcblk0boot0".
- bootargs:
+ bootargs::
+
'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
- dmesg:
+ dmesg::
+
mmcblk0: p1(data0) p2(data1) p3()
mmcblk0boot0: p1(boot) p2(kernel)
diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.rst
index 934c44ea0c57..4f2452a95c43 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.rst
@@ -1,5 +1,9 @@
-----------------------------------------------------------------------
-1. INTRODUCTION
+==============
+Data Integrity
+==============
+
+1. Introduction
+===============
Modern filesystems feature checksumming of data and metadata to
protect against data corruption. However, the detection of the
@@ -28,8 +32,8 @@ integrity of the I/O and reject it if corruption is detected. This
allows not only corruption prevention but also isolation of the point
of failure.
-----------------------------------------------------------------------
-2. THE DATA INTEGRITY EXTENSIONS
+2. The Data Integrity Extensions
+================================
As written, the protocol extensions only protect the path between
controller and storage device. However, many controllers actually
@@ -75,8 +79,8 @@ Extensions. As these extensions are outside the scope of the protocol
bodies (T10, T13), Oracle and its partners are trying to standardize
them within the Storage Networking Industry Association.
-----------------------------------------------------------------------
-3. KERNEL CHANGES
+3. Kernel Changes
+=================
The data integrity framework in Linux enables protection information
to be pinned to I/Os and sent to/received from controllers that
@@ -123,10 +127,11 @@ access to manipulate the tags from user space. A passthrough
interface for this is being worked on.
-----------------------------------------------------------------------
-4. BLOCK LAYER IMPLEMENTATION DETAILS
+4. Block Layer Implementation Details
+=====================================
-4.1 BIO
+4.1 Bio
+-------
The data integrity patches add a new field to struct bio when
CONFIG_BLK_DEV_INTEGRITY is enabled. bio_integrity(bio) returns a
@@ -145,7 +150,8 @@ attached using bio_integrity_add_page().
bio_free() will automatically free the bip.
-4.2 BLOCK DEVICE
+4.2 Block Device
+----------------
Because the format of the protection data is tied to the physical
disk, each block device has been extended with a block integrity
@@ -163,10 +169,11 @@ and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6
will require extra work due to the application tag.
-----------------------------------------------------------------------
-5.0 BLOCK LAYER INTEGRITY API
+5.0 Block Layer Integrity API
+=============================
-5.1 NORMAL FILESYSTEM
+5.1 Normal Filesystem
+---------------------
The normal filesystem is unaware that the underlying block device
is capable of sending/receiving integrity metadata. The IMD will
@@ -174,25 +181,26 @@ will require extra work due to the application tag.
in case of a WRITE. A READ request will cause the I/O integrity
to be verified upon completion.
- IMD generation and verification can be toggled using the
+ IMD generation and verification can be toggled using the::
/sys/block/<bdev>/integrity/write_generate
- and
+ and::
/sys/block/<bdev>/integrity/read_verify
flags.
-5.2 INTEGRITY-AWARE FILESYSTEM
+5.2 Integrity-Aware Filesystem
+------------------------------
A filesystem that is integrity-aware can prepare I/Os with IMD
attached. It can also use the application tag space if this is
supported by the block device.
- bool bio_integrity_prep(bio);
+ `bool bio_integrity_prep(bio);`
To generate IMD for WRITE and to set up buffers for READ, the
filesystem must call bio_integrity_prep(bio).
@@ -204,14 +212,15 @@ will require extra work due to the application tag.
Complete bio with error if prepare failed for some reson.
-5.3 PASSING EXISTING INTEGRITY METADATA
+5.3 Passing Existing Integrity Metadata
+---------------------------------------
Filesystems that either generate their own integrity metadata or
are capable of transferring IMD from user space can use the
following calls:
- struct bip * bio_integrity_alloc(bio, gfp_mask, nr_pages);
+ `struct bip * bio_integrity_alloc(bio, gfp_mask, nr_pages);`
Allocates the bio integrity payload and hangs it off of the bio.
nr_pages indicate how many pages of protection data need to be
@@ -220,7 +229,7 @@ will require extra work due to the application tag.
The integrity payload will be freed at bio_free() time.
- int bio_integrity_add_page(bio, page, len, offset);
+ `int bio_integrity_add_page(bio, page, len, offset);`
Attaches a page containing integrity metadata to an existing
bio. The bio must have an existing bip,
@@ -241,21 +250,21 @@ will require extra work due to the application tag.
integrity upon completion.
-5.4 REGISTERING A BLOCK DEVICE AS CAPABLE OF EXCHANGING INTEGRITY
- METADATA
+5.4 Registering A Block Device As Capable Of Exchanging Integrity Metadata
+--------------------------------------------------------------------------
To enable integrity exchange on a block device the gendisk must be
registered as capable:
- int blk_integrity_register(gendisk, blk_integrity);
+ `int blk_integrity_register(gendisk, blk_integrity);`
The blk_integrity struct is a template and should contain the
- following:
+ following::
static struct blk_integrity my_profile = {
.name = "STANDARDSBODY-TYPE-VARIANT-CSUM",
.generate_fn = my_generate_fn,
- .verify_fn = my_verify_fn,
+ .verify_fn = my_verify_fn,
.tuple_size = sizeof(struct my_tuple_size),
.tag_size = <tag bytes per hw sector>,
};
@@ -278,4 +287,5 @@ will require extra work due to the application tag.
0 depending on the value of the Control Mode Page ATO bit.
----------------------------------------------------------------------
+
2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.rst
index 2d82c80322cb..9f5c5a4c370e 100644
--- a/Documentation/block/deadline-iosched.txt
+++ b/Documentation/block/deadline-iosched.rst
@@ -1,3 +1,4 @@
+==============================
Deadline IO scheduler tunables
==============================
@@ -7,15 +8,13 @@ of interest to power users.
Selecting IO schedulers
-----------------------
-Refer to Documentation/block/switching-sched.txt for information on
+Refer to Documentation/block/switching-sched.rst for information on
selecting an io scheduler on a per-device basis.
-
-********************************************************************************
-
+------------------------------------------------------------------------------
read_expire (in ms)
------------
+-----------------------
The goal of the deadline io scheduler is to attempt to guarantee a start
service time for a request. As we focus mainly on read latencies, this is
@@ -25,15 +24,15 @@ milliseconds.
write_expire (in ms)
------------
+-----------------------
Similar to read_expire mentioned above, but for writes.
fifo_batch (number of requests)
-----------
+------------------------------------
-Requests are grouped into ``batches'' of a particular data direction (read or
+Requests are grouped into ``batches`` of a particular data direction (read or
write) which are serviced in increasing sector order. To limit extra seeking,
deadline expiries are only checked between batches. fifo_batch controls the
maximum number of requests per batch.
@@ -45,7 +44,7 @@ generally improves throughput, at the cost of latency variation.
writes_starved (number of dispatches)
---------------
+--------------------------------------
When we have to move requests from the io scheduler queue to the block
device dispatch queue, we always give a preference to reads. However, we
@@ -56,7 +55,7 @@ same criteria as reads.
front_merges (bool)
-------------
+----------------------
Sometimes it happens that a request enters the io scheduler that is contiguous
with a request that is already on the queue. Either it fits in the back of that
@@ -71,5 +70,3 @@ rbtree front sector lookup when the io scheduler merge function is called.
Nov 11 2002, Jens Axboe <jens.axboe@oracle.com>
-
-
diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst
new file mode 100644
index 000000000000..3fa7a52fafa4
--- /dev/null
+++ b/Documentation/block/index.rst
@@ -0,0 +1,25 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====
+Block
+=====
+
+.. toctree::
+ :maxdepth: 1
+
+ bfq-iosched
+ biodoc
+ biovecs
+ capability
+ cmdline-partition
+ data-integrity
+ deadline-iosched
+ ioprio
+ kyber-iosched
+ null_blk
+ pr
+ queue-sysfs
+ request
+ stat
+ switching-sched
+ writeback_cache_control
diff --git a/Documentation/block/ioprio.txt b/Documentation/block/ioprio.rst
index 8ed8c59380b4..f72b0de65af7 100644
--- a/Documentation/block/ioprio.txt
+++ b/Documentation/block/ioprio.rst
@@ -1,3 +1,4 @@
+===================
Block io priorities
===================
@@ -40,81 +41,81 @@ class data, since it doesn't really apply here.
Tools
-----
-See below for a sample ionice tool. Usage:
+See below for a sample ionice tool. Usage::
-# ionice -c<class> -n<level> -p<pid>
+ # ionice -c<class> -n<level> -p<pid>
If pid isn't given, the current process is assumed. IO priority settings
are inherited on fork, so you can use ionice to start the process at a given
-level:
+level::
-# ionice -c2 -n0 /bin/ls
+ # ionice -c2 -n0 /bin/ls
will run ls at the best-effort scheduling class at the highest priority.
-For a running process, you can give the pid instead:
+For a running process, you can give the pid instead::
-# ionice -c1 -n2 -p100
+ # ionice -c1 -n2 -p100
will change pid 100 to run at the realtime scheduling class, at priority 2.
----> snip ionice.c tool <---
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <getopt.h>
-#include <unistd.h>
-#include <sys/ptrace.h>
-#include <asm/unistd.h>
-
-extern int sys_ioprio_set(int, int, int);
-extern int sys_ioprio_get(int, int);
-
-#if defined(__i386__)
-#define __NR_ioprio_set 289
-#define __NR_ioprio_get 290
-#elif defined(__ppc__)
-#define __NR_ioprio_set 273
-#define __NR_ioprio_get 274
-#elif defined(__x86_64__)
-#define __NR_ioprio_set 251
-#define __NR_ioprio_get 252
-#elif defined(__ia64__)
-#define __NR_ioprio_set 1274
-#define __NR_ioprio_get 1275
-#else
-#error "Unsupported arch"
-#endif
-
-static inline int ioprio_set(int which, int who, int ioprio)
-{
+ionice.c tool::
+
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <errno.h>
+ #include <getopt.h>
+ #include <unistd.h>
+ #include <sys/ptrace.h>
+ #include <asm/unistd.h>
+
+ extern int sys_ioprio_set(int, int, int);
+ extern int sys_ioprio_get(int, int);
+
+ #if defined(__i386__)
+ #define __NR_ioprio_set 289
+ #define __NR_ioprio_get 290
+ #elif defined(__ppc__)
+ #define __NR_ioprio_set 273
+ #define __NR_ioprio_get 274
+ #elif defined(__x86_64__)
+ #define __NR_ioprio_set 251
+ #define __NR_ioprio_get 252
+ #elif defined(__ia64__)
+ #define __NR_ioprio_set 1274
+ #define __NR_ioprio_get 1275
+ #else
+ #error "Unsupported arch"
+ #endif
+
+ static inline int ioprio_set(int which, int who, int ioprio)
+ {
return syscall(__NR_ioprio_set, which, who, ioprio);
-}
+ }
-static inline int ioprio_get(int which, int who)
-{
+ static inline int ioprio_get(int which, int who)
+ {
return syscall(__NR_ioprio_get, which, who);
-}
+ }
-enum {
+ enum {
IOPRIO_CLASS_NONE,
IOPRIO_CLASS_RT,
IOPRIO_CLASS_BE,
IOPRIO_CLASS_IDLE,
-};
+ };
-enum {
+ enum {
IOPRIO_WHO_PROCESS = 1,
IOPRIO_WHO_PGRP,
IOPRIO_WHO_USER,
-};
+ };
-#define IOPRIO_CLASS_SHIFT 13
+ #define IOPRIO_CLASS_SHIFT 13
-const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
+ const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
-int main(int argc, char *argv[])
-{
+ int main(int argc, char *argv[])
+ {
int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
int c, pid = 0;
@@ -175,9 +176,7 @@ int main(int argc, char *argv[])
}
return 0;
-}
-
----> snip ionice.c tool <---
+ }
March 11 2005, Jens Axboe <jens.axboe@oracle.com>
diff --git a/Documentation/block/kyber-iosched.txt b/Documentation/block/kyber-iosched.rst
index e94feacd7edc..3e164dd0617c 100644
--- a/Documentation/block/kyber-iosched.txt
+++ b/Documentation/block/kyber-iosched.rst
@@ -1,5 +1,6 @@
+============================
Kyber I/O scheduler tunables
-===========================
+============================
The only two tunables for the Kyber scheduler are the target latencies for
reads and synchronous writes. Kyber will throttle requests in order to meet
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.rst
index 41f0a3d33bbd..31451d80783c 100644
--- a/Documentation/block/null_blk.txt
+++ b/Documentation/block/null_blk.rst
@@ -1,33 +1,43 @@
+========================
Null block device driver
-================================================================================
+========================
-I. Overview
+1. Overview
+===========
The null block device (/dev/nullb*) is used for benchmarking the various
block-layer implementations. It emulates a block device of X gigabytes in size.
The following instances are possible:
Single-queue block-layer
+
- Request-based.
- Single submission queue per device.
- Implements IO scheduling algorithms (CFQ, Deadline, noop).
+
Multi-queue block-layer
+
- Request-based.
- Configurable submission queues per device.
+
No block-layer (Known as bio-based)
+
- Bio-based. IO requests are submitted directly to the device driver.
- Directly accepts bio data structure and returns them.
All of them have a completion queue for each core in the system.
-II. Module parameters applicable for all instances:
+2. Module parameters applicable for all instances
+=================================================
queue_mode=[0-2]: Default: 2-Multi-queue
Selects which block-layer the module should instantiate with.
- 0: Bio-based.
- 1: Single-queue.
- 2: Multi-queue.
+ = ============
+ 0 Bio-based
+ 1 Single-queue
+ 2 Multi-queue
+ = ============
home_node=[0--nr_nodes]: Default: NUMA_NO_NODE
Selects what CPU node the data structures are allocated from.
@@ -45,12 +55,14 @@ nr_devices=[Number of devices]: Default: 1
irqmode=[0-2]: Default: 1-Soft-irq
The completion mode used for completing IOs to the block-layer.
- 0: None.
- 1: Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead
+ = ===========================================================================
+ 0 None.
+ 1 Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead
when IOs are issued from another CPU node than the home the device is
connected to.
- 2: Timer: Waits a specific period (completion_nsec) for each IO before
+ 2 Timer: Waits a specific period (completion_nsec) for each IO before
completion.
+ = ===========================================================================
completion_nsec=[ns]: Default: 10,000ns
Combined with irqmode=2 (timer). The time each completion event must wait.
@@ -66,30 +78,45 @@ hw_queue_depth=[0..qdepth]: Default: 64
III: Multi-queue specific parameters
use_per_node_hctx=[0/1]: Default: 0
- 0: The number of submit queues are set to the value of the submit_queues
+
+ = =====================================================================
+ 0 The number of submit queues are set to the value of the submit_queues
parameter.
- 1: The multi-queue block layer is instantiated with a hardware dispatch
+ 1 The multi-queue block layer is instantiated with a hardware dispatch
queue for each CPU node in the system.
+ = =====================================================================
no_sched=[0/1]: Default: 0
- 0: nullb* use default blk-mq io scheduler.
- 1: nullb* doesn't use io scheduler.
+
+ = ======================================
+ 0 nullb* use default blk-mq io scheduler
+ 1 nullb* doesn't use io scheduler
+ = ======================================
blocking=[0/1]: Default: 0
- 0: Register as a non-blocking blk-mq driver device.
- 1: Register as a blocking blk-mq driver device, null_blk will set
+
+ = ===============================================================
+ 0 Register as a non-blocking blk-mq driver device.
+ 1 Register as a blocking blk-mq driver device, null_blk will set
the BLK_MQ_F_BLOCKING flag, indicating that it sometimes/always
needs to block in its ->queue_rq() function.
+ = ===============================================================
shared_tags=[0/1]: Default: 0
- 0: Tag set is not shared.
- 1: Tag set shared between devices for blk-mq. Only makes sense with
+
+ = ================================================================
+ 0 Tag set is not shared.
+ 1 Tag set shared between devices for blk-mq. Only makes sense with
nr_devices > 1, otherwise there's no tag set to share.
+ = ================================================================
zoned=[0/1]: Default: 0
- 0: Block device is exposed as a random-access block device.
- 1: Block device is exposed as a host-managed zoned block device. Requires
+
+ = ======================================================================
+ 0 Block device is exposed as a random-access block device.
+ 1 Block device is exposed as a host-managed zoned block device. Requires
CONFIG_BLK_DEV_ZONED.
+ = ======================================================================
zone_size=[MB]: Default: 256
Per zone size when exposed as a zoned block device. Must be a power of two.
diff --git a/Documentation/block/pr.txt b/Documentation/block/pr.rst
index ac9b8e70e64b..30ea1c2e39eb 100644
--- a/Documentation/block/pr.txt
+++ b/Documentation/block/pr.rst
@@ -1,4 +1,4 @@
-
+===============================================
Block layer support for Persistent Reservations
===============================================
@@ -23,22 +23,18 @@ The following types of reservations are supported:
--------------------------------------------------
- PR_WRITE_EXCLUSIVE
-
Only the initiator that owns the reservation can write to the
device. Any initiator can read from the device.
- PR_EXCLUSIVE_ACCESS
-
Only the initiator that owns the reservation can access the
device.
- PR_WRITE_EXCLUSIVE_REG_ONLY
-
Only initiators with a registered key can write to the device,
Any initiator can read from the device.
- PR_EXCLUSIVE_ACCESS_REG_ONLY
-
Only initiators with a registered key can access the device.
- PR_WRITE_EXCLUSIVE_ALL_REGS
@@ -48,21 +44,21 @@ The following types of reservations are supported:
All initiators with a registered key are considered reservation
holders.
Please reference the SPC spec on the meaning of a reservation
- holder if you want to use this type.
+ holder if you want to use this type.
- PR_EXCLUSIVE_ACCESS_ALL_REGS
-
Only initiators with a registered key can access the device.
All initiators with a registered key are considered reservation
holders.
Please reference the SPC spec on the meaning of a reservation
- holder if you want to use this type.
+ holder if you want to use this type.
The following ioctl are supported:
----------------------------------
1. IOC_PR_REGISTER
+^^^^^^^^^^^^^^^^^^
This ioctl command registers a new reservation if the new_key argument
is non-null. If no existing reservation exists old_key must be zero,
@@ -74,6 +70,7 @@ in old_key.
2. IOC_PR_RESERVE
+^^^^^^^^^^^^^^^^^
This ioctl command reserves the device and thus restricts access for other
devices based on the type argument. The key argument must be the existing
@@ -82,12 +79,14 @@ IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands.
3. IOC_PR_RELEASE
+^^^^^^^^^^^^^^^^^
This ioctl command releases the reservation specified by key and flags
and thus removes any access restriction implied by it.
4. IOC_PR_PREEMPT
+^^^^^^^^^^^^^^^^^
This ioctl command releases the existing reservation referred to by
old_key and replaces it with a new reservation of type for the
@@ -95,11 +94,13 @@ reservation key new_key.
5. IOC_PR_PREEMPT_ABORT
+^^^^^^^^^^^^^^^^^^^^^^^
This ioctl command works like IOC_PR_PREEMPT except that it also aborts
any outstanding command sent over a connection identified by old_key.
6. IOC_PR_CLEAR
+^^^^^^^^^^^^^^^
This ioctl command unregisters both key and any other reservation key
registered with the device and drops any existing reservation.
@@ -111,7 +112,6 @@ Flags
All the ioctls have a flag field. Currently only one flag is supported:
- PR_FL_IGNORE_KEY
-
Ignore the existing reservation key. This is commonly supported for
IOC_PR_REGISTER, and some implementation may support the flag for
IOC_PR_RESERVE.
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.rst
index b40b5b7cebd9..6a8513af9201 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.rst
@@ -1,3 +1,4 @@
+=================
Queue sysfs files
=================
@@ -10,7 +11,7 @@ Files denoted with a RO postfix are readonly and the RW postfix means
read-write.
add_random (RW)
-----------------
+---------------
This file allows to turn off the disk entropy contribution. Default
value of this file is '1'(on).
@@ -30,13 +31,13 @@ used by CPU-addressable storage to bypass the pagecache. It shows '1'
if true, '0' if not.
discard_granularity (RO)
------------------------
+------------------------
This shows the size of internal allocation of the device in bytes, if
reported by the device. A value of '0' means device does not support
the discard functionality.
discard_max_hw_bytes (RO)
-----------------------
+-------------------------
Devices that support discard functionality may have internal limits on
the number of bytes that can be trimmed or unmapped in a single operation.
The discard_max_bytes parameter is set by the device driver to the maximum
diff --git a/Documentation/block/request.txt b/Documentation/block/request.rst
index 754e104ed369..747021e1ffdb 100644
--- a/Documentation/block/request.txt
+++ b/Documentation/block/request.rst
@@ -1,26 +1,37 @@
-
+============================
struct request documentation
+============================
Jens Axboe <jens.axboe@oracle.com> 27/05/02
-1.0
-Index
-2.0 Struct request members classification
+.. FIXME:
+ No idea about what does mean - seems just some noise, so comment it
+
+ 1.0
+ Index
+
+ 2.0 Struct request members classification
+
+ 2.1 struct request members explanation
- 2.1 struct request members explanation
+ 3.0
+
+
+ 2.0
-3.0
-2.0
Short explanation of request members
+====================================
Classification flags:
+ = ====================
D driver member
B block layer member
I I/O scheduler member
+ = ====================
Unless an entry contains a D classification, a device driver must not access
this member. Some members may contain D classifications, but should only be
@@ -28,14 +39,13 @@ access through certain macros or functions (eg ->flags).
<linux/blkdev.h>
-2.1
+=============================== ======= =======================================
Member Flag Comment
------- ---- -------
-
+=============================== ======= =======================================
struct list_head queuelist BI Organization on various internal
queues
-void *elevator_private I I/O scheduler private data
+``void *elevator_private`` I I/O scheduler private data
unsigned char cmd[16] D Driver can use this for setting up
a cdb before execution, see
@@ -71,18 +81,19 @@ unsigned int hard_cur_sectors B Used to keep current_nr_sectors sane
int tag DB TCQ tag, if assigned
-void *special D Free to be used by driver
+``void *special`` D Free to be used by driver
-char *buffer D Map of first segment, also see
+``char *buffer`` D Map of first segment, also see
section on bouncing SECTION
-struct completion *waiting D Can be used by driver to get signalled
+``struct completion *waiting`` D Can be used by driver to get signalled
on request completion
-struct bio *bio DBI First bio in request
+``struct bio *bio`` DBI First bio in request
-struct bio *biotail DBI Last bio in request
+``struct bio *biotail`` DBI Last bio in request
-struct request_queue *q DB Request queue this request belongs to
+``struct request_queue *q`` DB Request queue this request belongs to
-struct request_list *rl B Request list this request came from
+``struct request_list *rl`` B Request list this request came from
+=============================== ======= =======================================
diff --git a/Documentation/block/stat.txt b/Documentation/block/stat.rst
index 0aace9cc536c..9c07bc22b0bc 100644
--- a/Documentation/block/stat.txt
+++ b/Documentation/block/stat.rst
@@ -1,3 +1,4 @@
+===============================================
Block layer statistics in /sys/block/<dev>/stat
===============================================
@@ -6,9 +7,12 @@ This file documents the contents of the /sys/block/<dev>/stat file.
The stat file provides several statistics about the state of block
device <dev>.
-Q. Why are there multiple statistics in a single file? Doesn't sysfs
+Q.
+ Why are there multiple statistics in a single file? Doesn't sysfs
normally contain a single value per file?
-A. By having a single file, the kernel can guarantee that the statistics
+
+A.
+ By having a single file, the kernel can guarantee that the statistics
represent a consistent snapshot of the state of the device. If the
statistics were exported as multiple files containing one statistic
each, it would be impossible to guarantee that a set of readings
@@ -18,8 +22,10 @@ The stat file consists of a single line of text containing 11 decimal
values separated by whitespace. The fields are summarized in the
following table, and described in more detail below.
+
+=============== ============= =================================================
Name units description
----- ----- -----------
+=============== ============= =================================================
read I/Os requests number of read I/Os processed
read merges requests number of read I/Os merged with in-queue I/O
read sectors sectors number of sectors read
@@ -35,6 +41,7 @@ discard I/Os requests number of discard I/Os processed
discard merges requests number of discard I/Os merged with in-queue I/O
discard sectors sectors number of sectors discarded
discard ticks milliseconds total wait time for discard requests
+=============== ============= =================================================
read I/Os, write I/Os, discard I/0s
===================================
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.rst
index 7977f6fb8b20..42042417380e 100644
--- a/Documentation/block/switching-sched.txt
+++ b/Documentation/block/switching-sched.rst
@@ -1,35 +1,39 @@
+===================
+Switching Scheduler
+===================
+
To choose IO schedulers at boot time, use the argument 'elevator=deadline'.
'noop' and 'cfq' (the default) are also available. IO schedulers are assigned
globally at boot time only presently.
Each io queue has a set of io scheduler tunables associated with it. These
tunables control how the io scheduler works. You can find these entries
-in:
+in::
-/sys/block/<device>/queue/iosched
+ /sys/block/<device>/queue/iosched
assuming that you have sysfs mounted on /sys. If you don't have sysfs mounted,
-you can do so by typing:
+you can do so by typing::
-# mount none /sys -t sysfs
+ # mount none /sys -t sysfs
It is possible to change the IO scheduler for a given block device on
the fly to select one of mq-deadline, none, bfq, or kyber schedulers -
which can improve that device's throughput.
-To set a specific scheduler, simply do this:
+To set a specific scheduler, simply do this::
-echo SCHEDNAME > /sys/block/DEV/queue/scheduler
+ echo SCHEDNAME > /sys/block/DEV/queue/scheduler
where SCHEDNAME is the name of a defined IO scheduler, and DEV is the
device name (hda, hdb, sga, or whatever you happen to have).
The list of defined schedulers can be found by simply doing
a "cat /sys/block/DEV/queue/scheduler" - the list of valid names
-will be displayed, with the currently selected scheduler in brackets:
+will be displayed, with the currently selected scheduler in brackets::
-# cat /sys/block/sda/queue/scheduler
-[mq-deadline] kyber bfq none
-# echo none >/sys/block/sda/queue/scheduler
-# cat /sys/block/sda/queue/scheduler
-[none] mq-deadline kyber bfq
+ # cat /sys/block/sda/queue/scheduler
+ [mq-deadline] kyber bfq none
+ # echo none >/sys/block/sda/queue/scheduler
+ # cat /sys/block/sda/queue/scheduler
+ [none] mq-deadline kyber bfq
diff --git a/Documentation/block/writeback_cache_control.txt b/Documentation/block/writeback_cache_control.rst
index 8a6bdada5f6b..2c752c57c14c 100644
--- a/Documentation/block/writeback_cache_control.txt
+++ b/Documentation/block/writeback_cache_control.rst
@@ -1,6 +1,6 @@
-
+==========================================
Explicit volatile write back cache control
-=====================================
+==========================================
Introduction
------------
@@ -31,7 +31,7 @@ the blkdev_issue_flush() helper for a pure cache flush.
Forced Unit Access
------------------
+------------------
The REQ_FUA flag can be OR ed into the r/w flags of a bio submitted from the
filesystem and will make sure that I/O completion for this request is only
@@ -62,14 +62,14 @@ flags themselves without any help from the block layer.
Implementation details for request_fn based block drivers
---------------------------------------------------------------
+---------------------------------------------------------
For devices that do not support volatile write caches there is no driver
support required, the block layer completes empty REQ_PREFLUSH requests before
entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
requests that have a payload. For devices with volatile write caches the
driver needs to tell the block layer that it supports flushing caches by
-doing:
+doing::
blk_queue_write_cache(sdkp->disk->queue, true, false);
@@ -77,7 +77,7 @@ and handle empty REQ_OP_FLUSH requests in its prep_fn/request_fn. Note that
REQ_PREFLUSH requests with a payload are automatically turned into a sequence
of an empty REQ_OP_FLUSH request followed by the actual write by the block
layer. For devices that also support the FUA bit the block layer needs
-to be told to pass through the REQ_FUA bit using:
+to be told to pass through the REQ_FUA bit using::
blk_queue_write_cache(sdkp->disk->queue, true, true);
diff --git a/Documentation/cdrom/index.rst b/Documentation/cdrom/index.rst
index efbd5d111825..338ad5f94e7c 100644
--- a/Documentation/cdrom/index.rst
+++ b/Documentation/cdrom/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
=====
cdrom
diff --git a/Documentation/gcc-plugins.txt b/Documentation/core-api/gcc-plugins.rst
index 8502f24396fb..8502f24396fb 100644
--- a/Documentation/gcc-plugins.txt
+++ b/Documentation/core-api/gcc-plugins.rst
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 322ac954b390..da0ed972d224 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -36,6 +36,7 @@ Core utilities
memory-hotplug
protection-keys
../RCU/index
+ gcc-plugins
Interfaces for kernel debugging
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 75d2bbe9813f..c6224d039bcb 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -119,7 +119,7 @@ Kernel Pointers
For printing kernel pointers which should be hidden from unprivileged
users. The behaviour of %pK depends on the kptr_restrict sysctl - see
-Documentation/sysctl/kernel.txt for more details.
+Documentation/admin-guide/sysctl/kernel.rst for more details.
Unmodified Addresses
--------------------
diff --git a/Documentation/devicetree/bindings/arm/xen.txt b/Documentation/devicetree/bindings/arm/xen.txt
index c9b9321434ea..db5c56db30ec 100644
--- a/Documentation/devicetree/bindings/arm/xen.txt
+++ b/Documentation/devicetree/bindings/arm/xen.txt
@@ -54,7 +54,7 @@ hypervisor {
};
The format and meaning of the "xen,uefi-*" parameters are similar to those in
-Documentation/arm/uefi.txt, which are provided by the regular UEFI stub. However
+Documentation/arm/uefi.rst, which are provided by the regular UEFI stub. However
they differ because they are provided by the Xen hypervisor, together with a set
of UEFI runtime services implemented via hypercalls, see
http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,platform.h.html.
diff --git a/Documentation/devicetree/bindings/phy/phy-bindings.txt b/Documentation/devicetree/bindings/phy/phy-bindings.txt
index a403b81d0679..c4eb38902533 100644
--- a/Documentation/devicetree/bindings/phy/phy-bindings.txt
+++ b/Documentation/devicetree/bindings/phy/phy-bindings.txt
@@ -1,5 +1,5 @@
This document explains only the device tree data binding. For general
-information about PHY subsystem refer to Documentation/phy.txt
+information about PHY subsystem refer to Documentation/driver-api/phy/phy.rst
PHY device node
===============
diff --git a/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt b/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt
index 93fc09c12954..d80e36a77ec5 100644
--- a/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt
+++ b/Documentation/devicetree/bindings/phy/phy-pxa-usb.txt
@@ -15,4 +15,4 @@ Example:
};
This document explains the device tree binding. For general
-information about PHY subsystem refer to Documentation/phy.txt
+information about PHY subsystem refer to Documentation/driver-api/phy/phy.rst
diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt
index 60f8640f2b2f..4660ccee35a3 100644
--- a/Documentation/devicetree/booting-without-of.txt
+++ b/Documentation/devicetree/booting-without-of.txt
@@ -160,7 +160,7 @@ it with special cases.
of the kernel image. That entry point supports two calling
conventions. A summary of the interface is described here. A full
description of the boot requirements is documented in
- Documentation/arm/Booting
+ Documentation/arm/booting.rst
a) ATAGS interface. Minimal information is passed from firmware
to the kernel with a tagged list of predefined parameters.
@@ -174,7 +174,7 @@ it with special cases.
b) Entry with a flattened device-tree block. Firmware loads the
physical address of the flattened device tree block (dtb) into r2,
r1 is not used, but it is considered good practice to use a valid
- machine number as described in Documentation/arm/Booting.
+ machine number as described in Documentation/arm/booting.rst.
r0 : 0
diff --git a/Documentation/driver-api/backlight/lp855x-driver.rst b/Documentation/driver-api/backlight/lp855x-driver.rst
new file mode 100644
index 000000000000..1e0b224fc397
--- /dev/null
+++ b/Documentation/driver-api/backlight/lp855x-driver.rst
@@ -0,0 +1,81 @@
+====================
+Kernel driver lp855x
+====================
+
+Backlight driver for LP855x ICs
+
+Supported chips:
+
+ Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
+ LP8557
+
+Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+
+Description
+-----------
+
+* Brightness control
+
+ Brightness can be controlled by the pwm input or the i2c command.
+ The lp855x driver supports both cases.
+
+* Device attributes
+
+ 1) bl_ctl_mode
+
+ Backlight control mode.
+
+ Value: pwm based or register based
+
+ 2) chip_id
+
+ The lp855x chip id.
+
+ Value: lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557
+
+Platform data for lp855x
+------------------------
+
+For supporting platform specific data, the lp855x platform data can be used.
+
+* name:
+ Backlight driver name. If it is not defined, default name is set.
+* device_control:
+ Value of DEVICE CONTROL register.
+* initial_brightness:
+ Initial value of backlight brightness.
+* period_ns:
+ Platform specific PWM period value. unit is nano.
+ Only valid when brightness is pwm input mode.
+* size_program:
+ Total size of lp855x_rom_data.
+* rom_data:
+ List of new eeprom/eprom registers.
+
+Examples
+========
+
+1) lp8552 platform data: i2c register mode with new eeprom data::
+
+ #define EEPROM_A5_ADDR 0xA5
+ #define EEPROM_A5_VAL 0x4f /* EN_VSYNC=0 */
+
+ static struct lp855x_rom_data lp8552_eeprom_arr[] = {
+ {EEPROM_A5_ADDR, EEPROM_A5_VAL},
+ };
+
+ static struct lp855x_platform_data lp8552_pdata = {
+ .name = "lcd-bl",
+ .device_control = I2C_CONFIG(LP8552),
+ .initial_brightness = INITIAL_BRT,
+ .size_program = ARRAY_SIZE(lp8552_eeprom_arr),
+ .rom_data = lp8552_eeprom_arr,
+ };
+
+2) lp8556 platform data: pwm input mode with default rom data::
+
+ static struct lp855x_platform_data lp8556_pdata = {
+ .device_control = PWM_CONFIG(LP8556),
+ .initial_brightness = INITIAL_BRT,
+ .period_ns = 1000000,
+ };
diff --git a/Documentation/bt8xxgpio.txt b/Documentation/driver-api/bt8xxgpio.rst
index a845feb074de..a845feb074de 100644
--- a/Documentation/bt8xxgpio.txt
+++ b/Documentation/driver-api/bt8xxgpio.rst
diff --git a/Documentation/connector/connector.txt b/Documentation/driver-api/connector.rst
index ab7ca897fab7..c100c7482289 100644
--- a/Documentation/connector/connector.txt
+++ b/Documentation/driver-api/connector.rst
@@ -1,6 +1,8 @@
-/*****************************************/
-Kernel Connector.
-/*****************************************/
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+Kernel Connector
+================
Kernel connector - new netlink based userspace <-> kernel space easy
to use communication module.
@@ -12,94 +14,55 @@ identifier, the appropriate callback will be called.
From the userspace point of view it's quite straightforward:
- socket();
- bind();
- send();
- recv();
+ - socket();
+ - bind();
+ - send();
+ - recv();
But if kernelspace wants to use the full power of such connections, the
driver writer must create special sockets, must know about struct sk_buff
handling, etc... The Connector driver allows any kernelspace agents to use
netlink based networking for inter-process communication in a significantly
-easier way:
+easier way::
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
-void cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask);
-void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask);
+ int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
+ void cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask);
+ void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask);
-struct cb_id
-{
+ struct cb_id
+ {
__u32 idx;
__u32 val;
-};
+ };
idx and val are unique identifiers which must be registered in the
-connector.h header for in-kernel usage. void (*callback) (void *) is a
+connector.h header for in-kernel usage. `void (*callback) (void *)` is a
callback function which will be called when a message with above idx.val
is received by the connector core. The argument for that function must
-be dereferenced to struct cn_msg *.
+be dereferenced to `struct cn_msg *`::
-struct cn_msg
-{
+ struct cn_msg
+ {
struct cb_id id;
__u32 seq;
__u32 ack;
- __u32 len; /* Length of the following data */
+ __u32 len; /* Length of the following data */
__u8 data[0];
-};
-
-/*****************************************/
-Connector interfaces.
-/*****************************************/
-
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
-
- Registers new callback with connector core.
-
- struct cb_id *id - unique connector's user identifier.
- It must be registered in connector.h for legal in-kernel users.
- char *name - connector's callback symbolic name.
- void (*callback) (struct cn..) - connector's callback.
- cn_msg and the sender's credentials
-
-
-void cn_del_callback(struct cb_id *id);
-
- Unregisters new callback with connector core.
-
- struct cb_id *id - unique connector's user identifier.
-
-
-int cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __groups, int gfp_mask);
-int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __groups, int gfp_mask);
+ };
- Sends message to the specified groups. It can be safely called from
- softirq context, but may silently fail under strong memory pressure.
- If there are no listeners for given group -ESRCH can be returned.
+Connector interfaces
+====================
- struct cn_msg * - message header(with attached data).
- u16 len - for *_multi multiple cn_msg messages can be sent
- u32 port - destination port.
- If non-zero the message will be sent to the
- given port, which should be set to the
- original sender.
- u32 __group - destination group.
- If port and __group is zero, then appropriate group will
- be searched through all registered connector users,
- and message will be delivered to the group which was
- created for user with the same ID as in msg.
- If __group is not zero, then message will be delivered
- to the specified group.
- int gfp_mask - GFP mask.
+ .. kernel-doc:: include/linux/connector.h
- Note: When registering new callback user, connector core assigns
- netlink group to the user which is equal to its id.idx.
+ Note:
+ When registering new callback user, connector core assigns
+ netlink group to the user which is equal to its id.idx.
-/*****************************************/
-Protocol description.
-/*****************************************/
+Protocol description
+====================
The current framework offers a transport layer with fixed headers. The
recommended protocol which uses such a header is as following:
@@ -132,9 +95,8 @@ driver (it also registers itself with id={-1, -1}).
As example of this usage can be found in the cn_test.c module which
uses the connector to request notification and to send messages.
-/*****************************************/
-Reliability.
-/*****************************************/
+Reliability
+===========
Netlink itself is not a reliable protocol. That means that messages can
be lost due to memory pressure or process' receiving queue overflowed,
@@ -142,32 +104,31 @@ so caller is warned that it must be prepared. That is why the struct
cn_msg [main connector's message header] contains u32 seq and u32 ack
fields.
-/*****************************************/
-Userspace usage.
-/*****************************************/
+Userspace usage
+===============
2.6.14 has a new netlink socket implementation, which by default does not
allow people to send data to netlink groups other than 1.
So, if you wish to use a netlink socket (for example using connector)
with a different group number, the userspace application must subscribe to
-that group first. It can be achieved by the following pseudocode:
+that group first. It can be achieved by the following pseudocode::
-s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+ s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
-l_local.nl_family = AF_NETLINK;
-l_local.nl_groups = 12345;
-l_local.nl_pid = 0;
+ l_local.nl_family = AF_NETLINK;
+ l_local.nl_groups = 12345;
+ l_local.nl_pid = 0;
-if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
+ if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
perror("bind");
close(s);
return -1;
-}
+ }
-{
+ {
int on = l_local.nl_groups;
setsockopt(s, 270, 1, &on, sizeof(on));
-}
+ }
Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket
option. To drop a multicast subscription, one should call the above socket
@@ -180,16 +141,15 @@ group number 12345, you must increment CN_NETLINK_USERS to that number.
Additional 0xf numbers are allocated to be used by non-in-kernel users.
Due to this limitation, group 0xffffffff does not work now, so one can
-not use add/remove connector's group notifications, but as far as I know,
+not use add/remove connector's group notifications, but as far as I know,
only cn_test.c test module used it.
Some work in netlink area is still being done, so things can be changed in
2.6.15 timeframe, if it will happen, documentation will be updated for that
kernel.
-/*****************************************/
Code samples
-/*****************************************/
+============
Sample code for a connector test module and user space can be found
in samples/connector/. To build this code, enable CONFIG_CONNECTOR
diff --git a/Documentation/console/console.txt b/Documentation/driver-api/console.rst
index d73c2ab4beda..8394ad7747ac 100644
--- a/Documentation/console/console.txt
+++ b/Documentation/driver-api/console.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
Console Drivers
===============
@@ -17,25 +20,26 @@ of driver occupying the consoles.) They can only take over the console that is
occupied by the system driver. In the same token, if the modular driver is
released by the console, the system driver will take over.
-Modular drivers, from the programmer's point of view, have to call:
+Modular drivers, from the programmer's point of view, have to call::
do_take_over_console() - load and bind driver to console layer
give_up_console() - unload driver; it will only work if driver
is fully unbound
-In newer kernels, the following are also available:
+In newer kernels, the following are also available::
do_register_con_driver()
do_unregister_con_driver()
If sysfs is enabled, the contents of /sys/class/vtconsole can be
examined. This shows the console backends currently registered by the
-system which are named vtcon<n> where <n> is an integer from 0 to 15. Thus:
+system which are named vtcon<n> where <n> is an integer from 0 to 15.
+Thus::
ls /sys/class/vtconsole
. .. vtcon0 vtcon1
-Each directory in /sys/class/vtconsole has 3 files:
+Each directory in /sys/class/vtconsole has 3 files::
ls /sys/class/vtconsole/vtcon0
. .. bind name uevent
@@ -46,27 +50,29 @@ What do these files signify?
read, or acts to bind or unbind the driver to the virtual consoles
when written to. The possible values are:
- 0 - means the driver is not bound and if echo'ed, commands the driver
+ 0
+ - means the driver is not bound and if echo'ed, commands the driver
to unbind
- 1 - means the driver is bound and if echo'ed, commands the driver to
+ 1
+ - means the driver is bound and if echo'ed, commands the driver to
bind
- 2. name - read-only file. Shows the name of the driver in this format:
+ 2. name - read-only file. Shows the name of the driver in this format::
- cat /sys/class/vtconsole/vtcon0/name
- (S) VGA+
+ cat /sys/class/vtconsole/vtcon0/name
+ (S) VGA+
- '(S)' stands for a (S)ystem driver, i.e., it cannot be directly
- commanded to bind or unbind
+ '(S)' stands for a (S)ystem driver, i.e., it cannot be directly
+ commanded to bind or unbind
- 'VGA+' is the name of the driver
+ 'VGA+' is the name of the driver
- cat /sys/class/vtconsole/vtcon1/name
- (M) frame buffer device
+ cat /sys/class/vtconsole/vtcon1/name
+ (M) frame buffer device
- In this case, '(M)' stands for a (M)odular driver, one that can be
- directly commanded to bind or unbind.
+ In this case, '(M)' stands for a (M)odular driver, one that can be
+ directly commanded to bind or unbind.
3. uevent - ignore this file
@@ -75,14 +81,17 @@ driver takes over the consoles vacated by the driver. Binding, on the other
hand, will bind the driver to the consoles that are currently occupied by a
system driver.
-NOTE1: Binding and unbinding must be selected in Kconfig. It's under:
+NOTE1:
+ Binding and unbinding must be selected in Kconfig. It's under::
-Device Drivers -> Character devices -> Support for binding and unbinding
-console drivers
+ Device Drivers ->
+ Character devices ->
+ Support for binding and unbinding console drivers
-NOTE2: If any of the virtual consoles are in KD_GRAPHICS mode, then binding or
-unbinding will not succeed. An example of an application that sets the console
-to KD_GRAPHICS is X.
+NOTE2:
+ If any of the virtual consoles are in KD_GRAPHICS mode, then binding or
+ unbinding will not succeed. An example of an application that sets the
+ console to KD_GRAPHICS is X.
How useful is this feature? This is very useful for console driver
developers. By unbinding the driver from the console layer, one can unload the
@@ -92,10 +101,10 @@ framebuffer console to VGA console and vice versa, this feature also makes
this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb
for more details.)
-Notes for developers:
-=====================
+Notes for developers
+====================
-do_take_over_console() is now broken up into:
+do_take_over_console() is now broken up into::
do_register_con_driver()
do_bind_con_driver() - private function
@@ -104,7 +113,7 @@ give_up_console() is a wrapper to do_unregister_con_driver(), and a driver must
be fully unbound for this call to succeed. con_is_bound() will check if the
driver is bound or not.
-Guidelines for console driver writers:
+Guidelines for console driver writers
=====================================
In order for binding to and unbinding from the console to properly work,
@@ -140,6 +149,4 @@ The current crop of console drivers should still work correctly, but binding
and unbinding them may cause problems. With minimal fixes, these drivers can
be made to work correctly.
-==========================
Antonino Daplas <adaplas@pol.net>
-
diff --git a/Documentation/dcdbas.txt b/Documentation/driver-api/dcdbas.rst
index 309cc57a7c1c..309cc57a7c1c 100644
--- a/Documentation/dcdbas.txt
+++ b/Documentation/driver-api/dcdbas.rst
diff --git a/Documentation/dell_rbu.txt b/Documentation/driver-api/dell_rbu.rst
index 5d1ce7bcd04d..5d1ce7bcd04d 100644
--- a/Documentation/dell_rbu.txt
+++ b/Documentation/driver-api/dell_rbu.rst
diff --git a/Documentation/driver-model/binding.rst b/Documentation/driver-api/driver-model/binding.rst
index 7ea1d7a41e1d..7ea1d7a41e1d 100644
--- a/Documentation/driver-model/binding.rst
+++ b/Documentation/driver-api/driver-model/binding.rst
diff --git a/Documentation/driver-model/bus.rst b/Documentation/driver-api/driver-model/bus.rst
index 016b15a6e8ea..016b15a6e8ea 100644
--- a/Documentation/driver-model/bus.rst
+++ b/Documentation/driver-api/driver-model/bus.rst
diff --git a/Documentation/driver-model/class.rst b/Documentation/driver-api/driver-model/class.rst
index fff55b80e86a..fff55b80e86a 100644
--- a/Documentation/driver-model/class.rst
+++ b/Documentation/driver-api/driver-model/class.rst
diff --git a/Documentation/driver-model/design-patterns.rst b/Documentation/driver-api/driver-model/design-patterns.rst
index 41eb8f41f7dd..41eb8f41f7dd 100644
--- a/Documentation/driver-model/design-patterns.rst
+++ b/Documentation/driver-api/driver-model/design-patterns.rst
diff --git a/Documentation/driver-model/device.rst b/Documentation/driver-api/driver-model/device.rst
index 2b868d49d349..2b868d49d349 100644
--- a/Documentation/driver-model/device.rst
+++ b/Documentation/driver-api/driver-model/device.rst
diff --git a/Documentation/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index 4ac99122b5f1..4ac99122b5f1 100644
--- a/Documentation/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
diff --git a/Documentation/driver-model/driver.rst b/Documentation/driver-api/driver-model/driver.rst
index 11d281506a04..11d281506a04 100644
--- a/Documentation/driver-model/driver.rst
+++ b/Documentation/driver-api/driver-model/driver.rst
diff --git a/Documentation/driver-model/index.rst b/Documentation/driver-api/driver-model/index.rst
index 9f85d579ce56..755016422269 100644
--- a/Documentation/driver-model/index.rst
+++ b/Documentation/driver-api/driver-model/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
============
Driver Model
============
diff --git a/Documentation/driver-model/overview.rst b/Documentation/driver-api/driver-model/overview.rst
index d4d1e9b40e0c..d4d1e9b40e0c 100644
--- a/Documentation/driver-model/overview.rst
+++ b/Documentation/driver-api/driver-model/overview.rst
diff --git a/Documentation/driver-model/platform.rst b/Documentation/driver-api/driver-model/platform.rst
index 334dd4071ae4..334dd4071ae4 100644
--- a/Documentation/driver-model/platform.rst
+++ b/Documentation/driver-api/driver-model/platform.rst
diff --git a/Documentation/driver-model/porting.rst b/Documentation/driver-api/driver-model/porting.rst
index ae4bf843c1d6..931ea879af3f 100644
--- a/Documentation/driver-model/porting.rst
+++ b/Documentation/driver-api/driver-model/porting.rst
@@ -9,7 +9,7 @@ Patrick Mochel
Overview
-Please refer to `Documentation/driver-model/*.rst` for definitions of
+Please refer to `Documentation/driver-api/driver-model/*.rst` for definitions of
various driver types and concepts.
Most of the work of porting devices drivers to the new model happens
diff --git a/Documentation/early-userspace/buffer-format.txt b/Documentation/driver-api/early-userspace/buffer-format.rst
index e1fd7f9dad16..7f74e301fdf3 100644
--- a/Documentation/early-userspace/buffer-format.txt
+++ b/Documentation/driver-api/early-userspace/buffer-format.rst
@@ -1,8 +1,10 @@
- initramfs buffer format
- -----------------------
+=======================
+initramfs buffer format
+=======================
- Al Viro, H. Peter Anvin
- Last revision: 2002-01-13
+Al Viro, H. Peter Anvin
+
+Last revision: 2002-01-13
Starting with kernel 2.5.x, the old "initial ramdisk" protocol is
getting {replaced/complemented} with the new "initial ramfs"
@@ -18,7 +20,8 @@ archive can be compressed using gzip(1). One valid version of an
initramfs buffer is thus a single .cpio.gz file.
The full format of the initramfs buffer is defined by the following
-grammar, where:
+grammar, where::
+
* is used to indicate "0 or more occurrences of"
(|) indicates alternatives
+ indicates concatenation
@@ -49,7 +52,9 @@ hexadecimal ASCII numbers fully padded with '0' on the left to the
full width of the field, for example, the integer 4780 is represented
by the ASCII string "000012ac"):
+============= ================== ==============================================
Field name Field size Meaning
+============= ================== ==============================================
c_magic 6 bytes The string "070701" or "070702"
c_ino 8 bytes File inode number
c_mode 8 bytes File mode and permissions
@@ -65,6 +70,7 @@ c_rmin 8 bytes Minor part of device node reference
c_namesize 8 bytes Length of filename, including final \0
c_chksum 8 bytes Checksum of data field if c_magic is 070702;
otherwise zero
+============= ================== ==============================================
The c_mode field matches the contents of st_mode returned by stat(2)
on Linux, and encodes the file type and file permissions.
@@ -82,7 +88,8 @@ If the filename is "TRAILER!!!" this is actually an end-of-archive
marker; the c_filesize for an end-of-archive marker must be zero.
-*** Handling of hard links
+Handling of hard links
+======================
When a nondirectory with c_nlink > 1 is seen, the (c_maj,c_min,c_ino)
tuple is looked up in a tuple buffer. If not found, it is entered in
diff --git a/Documentation/early-userspace/README b/Documentation/driver-api/early-userspace/early_userspace_support.rst
index 955d667dc87e..3deefb34046b 100644
--- a/Documentation/early-userspace/README
+++ b/Documentation/driver-api/early-userspace/early_userspace_support.rst
@@ -1,3 +1,4 @@
+=======================
Early userspace support
=======================
@@ -26,6 +27,7 @@ archive to be used as the image or have the kernel build process build
the image from specifications.
CPIO ARCHIVE method
+-------------------
You can create a cpio archive that contains the early userspace image.
Your cpio archive should be specified in CONFIG_INITRAMFS_SOURCE and it
@@ -34,6 +36,7 @@ CONFIG_INITRAMFS_SOURCE and directory and file names are not allowed in
combination with a cpio archive.
IMAGE BUILDING method
+---------------------
The kernel build process can also build an early userspace image from
source parts rather than supplying a cpio archive. This method provides
diff --git a/Documentation/driver-api/early-userspace/index.rst b/Documentation/driver-api/early-userspace/index.rst
new file mode 100644
index 000000000000..149c1822f06d
--- /dev/null
+++ b/Documentation/driver-api/early-userspace/index.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Early Userspace
+===============
+
+.. toctree::
+ :maxdepth: 1
+
+ early_userspace_support
+ buffer-format
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/EDID/howto.rst b/Documentation/driver-api/edid.rst
index 725fd49a88ca..b1b5acd501ed 100644
--- a/Documentation/EDID/howto.rst
+++ b/Documentation/driver-api/edid.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
====
EDID
diff --git a/Documentation/eisa.txt b/Documentation/driver-api/eisa.rst
index f388545a85a7..c07565ba57da 100644
--- a/Documentation/eisa.txt
+++ b/Documentation/driver-api/eisa.rst
@@ -103,7 +103,7 @@ id_table an array of NULL terminated EISA id strings,
(driver_data).
driver a generic driver, such as described in
- Documentation/driver-model/driver.rst. Only .name,
+ Documentation/driver-api/driver-model/driver.rst. Only .name,
.probe and .remove members are mandatory.
=============== ====================================================
@@ -152,7 +152,7 @@ state set of flags indicating the state of the device. Current
flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED.
res set of four 256 bytes I/O regions allocated to this device
dma_mask DMA mask set from the parent device.
-dev generic device (see Documentation/driver-model/device.rst)
+dev generic device (see Documentation/driver-api/driver-model/device.rst)
======== ============================================================
You can get the 'struct eisa_device' from 'struct device' using the
diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 349f2dc33029..921c71a3d683 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -399,7 +399,7 @@ symbol:
will pass the struct gpio_chip* for the chip to all IRQ callbacks, so the
callbacks need to embed the gpio_chip in its state container and obtain a
pointer to the container using container_of().
- (See Documentation/driver-model/design-patterns.rst)
+ (See Documentation/driver-api/driver-model/design-patterns.rst)
- gpiochip_irqchip_add_nested(): adds a nested cascaded irqchip to a gpiochip,
as discussed above regarding different types of cascaded irqchips. The
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 6cd750a03ea0..d12a80f386a6 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -14,8 +14,10 @@ available subsections can be seen below.
.. toctree::
:maxdepth: 2
+ driver-model/index
basics
infrastructure
+ early-userspace/index
pm/index
clk
device-io
@@ -36,6 +38,7 @@ available subsections can be seen below.
i2c
ipmb
i3c/index
+ interconnect
hsi
edac
scsi
@@ -44,8 +47,11 @@ available subsections can be seen below.
mtdnand
miscellaneous
mei/index
+ mtd/index
+ mmc/index
+ nvdimm/index
w1
- rapidio
+ rapidio/index
s390-drivers
vme
80211/index
@@ -53,13 +59,48 @@ available subsections can be seen below.
firmware/index
pinctl
gpio/index
+ md/index
misc_devices
+ nfc/index
dmaengine/index
slimbus
soundwire/index
fpga/index
acpi/index
+ backlight/lp855x-driver.rst
+ bt8xxgpio
+ connector
+ console
+ dcdbas
+ dell_rbu
+ edid
+ eisa
+ isa
+ isapnp
generic-counter
+ lightnvm-pblk
+ memory-devices/index
+ men-chameleon-bus
+ ntb
+ nvmem
+ parport-lowlevel
+ pps
+ ptp
+ phy/index
+ pti_intel_mid
+ pwm
+ rfkill
+ serial/index
+ sgi-ioc4
+ sm501
+ smsc_ece1099
+ switchtec
+ sync_file
+ vfio-mediated-device
+ vfio
+ xilinx/index
+ xillybus
+ zorro
.. only:: subproject and html
diff --git a/Documentation/interconnect/interconnect.rst b/Documentation/driver-api/interconnect.rst
index 56e331dab70e..c3e004893796 100644
--- a/Documentation/interconnect/interconnect.rst
+++ b/Documentation/driver-api/interconnect.rst
@@ -1,7 +1,5 @@
.. SPDX-License-Identifier: GPL-2.0
-:orphan:
-
=====================================
GENERIC SYSTEM INTERCONNECT SUBSYSTEM
=====================================
diff --git a/Documentation/isa.txt b/Documentation/driver-api/isa.rst
index def4a7b690b5..def4a7b690b5 100644
--- a/Documentation/isa.txt
+++ b/Documentation/driver-api/isa.rst
diff --git a/Documentation/isapnp.txt b/Documentation/driver-api/isapnp.rst
index 8d0840ac847b..8d0840ac847b 100644
--- a/Documentation/isapnp.txt
+++ b/Documentation/driver-api/isapnp.rst
diff --git a/Documentation/lightnvm/pblk.txt b/Documentation/driver-api/lightnvm-pblk.rst
index 1040ed1cec81..1040ed1cec81 100644
--- a/Documentation/lightnvm/pblk.txt
+++ b/Documentation/driver-api/lightnvm-pblk.rst
diff --git a/Documentation/driver-api/md/index.rst b/Documentation/driver-api/md/index.rst
new file mode 100644
index 000000000000..18f54a7d7d6e
--- /dev/null
+++ b/Documentation/driver-api/md/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====
+RAID
+====
+
+.. toctree::
+ :maxdepth: 1
+
+ md-cluster
+ raid5-cache
+ raid5-ppl
diff --git a/Documentation/md/md-cluster.txt b/Documentation/driver-api/md/md-cluster.rst
index e1055f105cf5..96eb52cec7eb 100644
--- a/Documentation/md/md-cluster.txt
+++ b/Documentation/driver-api/md/md-cluster.rst
@@ -1,19 +1,24 @@
+==========
+MD Cluster
+==========
+
The cluster MD is a shared-device RAID for a cluster, it supports
two levels: raid1 and raid10 (limited support).
1. On-disk format
+=================
Separate write-intent-bitmaps are used for each cluster node.
The bitmaps record all writes that may have been started on that node,
-and may not yet have finished. The on-disk layout is:
+and may not yet have finished. The on-disk layout is::
-0 4k 8k 12k
--------------------------------------------------------------------
-| idle | md super | bm super [0] + bits |
-| bm bits[0, contd] | bm super[1] + bits | bm bits[1, contd] |
-| bm super[2] + bits | bm bits [2, contd] | bm super[3] + bits |
-| bm bits [3, contd] | | |
+ 0 4k 8k 12k
+ -------------------------------------------------------------------
+ | idle | md super | bm super [0] + bits |
+ | bm bits[0, contd] | bm super[1] + bits | bm bits[1, contd] |
+ | bm super[2] + bits | bm bits [2, contd] | bm super[3] + bits |
+ | bm bits [3, contd] | | |
During "normal" functioning we assume the filesystem ensures that only
one node writes to any given block at a time, so a write request will
@@ -28,10 +33,12 @@ node) is writing.
2. DLM Locks for management
+===========================
There are three groups of locks for managing the device:
2.1 Bitmap lock resource (bm_lockres)
+-------------------------------------
The bm_lockres protects individual node bitmaps. They are named in
the form bitmap000 for node 1, bitmap001 for node 2 and so on. When a
@@ -48,6 +55,7 @@ There are three groups of locks for managing the device:
joins the cluster.
2.2 Message passing locks
+-------------------------
Each node has to communicate with other nodes when starting or ending
resync, and for metadata superblock updates. This communication is
@@ -55,116 +63,155 @@ There are three groups of locks for managing the device:
with the Lock Value Block (LVB) of one of the "message" lock.
2.3 new-device management
+-------------------------
A single lock: "no-new-dev" is used to co-ordinate the addition of
new devices - this must be synchronized across the array.
Normally all nodes hold a concurrent-read lock on this device.
3. Communication
+================
Messages can be broadcast to all nodes, and the sender waits for all
other nodes to acknowledge the message before proceeding. Only one
message can be processed at a time.
3.1 Message Types
+-----------------
There are six types of messages which are passed:
- 3.1.1 METADATA_UPDATED: informs other nodes that the metadata has
+3.1.1 METADATA_UPDATED
+^^^^^^^^^^^^^^^^^^^^^^
+
+ informs other nodes that the metadata has
been updated, and the node must re-read the md superblock. This is
performed synchronously. It is primarily used to signal device
failure.
- 3.1.2 RESYNCING: informs other nodes that a resync is initiated or
+3.1.2 RESYNCING
+^^^^^^^^^^^^^^^
+ informs other nodes that a resync is initiated or
ended so that each node may suspend or resume the region. Each
RESYNCING message identifies a range of the devices that the
sending node is about to resync. This overrides any previous
notification from that node: only one ranged can be resynced at a
time per-node.
- 3.1.3 NEWDISK: informs other nodes that a device is being added to
+3.1.3 NEWDISK
+^^^^^^^^^^^^^
+
+ informs other nodes that a device is being added to
the array. Message contains an identifier for that device. See
below for further details.
- 3.1.4 REMOVE: A failed or spare device is being removed from the
+3.1.4 REMOVE
+^^^^^^^^^^^^
+
+ A failed or spare device is being removed from the
array. The slot-number of the device is included in the message.
- 3.1.5 RE_ADD: A failed device is being re-activated - the assumption
+ 3.1.5 RE_ADD:
+
+ A failed device is being re-activated - the assumption
is that it has been determined to be working again.
- 3.1.6 BITMAP_NEEDS_SYNC: if a node is stopped locally but the bitmap
+ 3.1.6 BITMAP_NEEDS_SYNC:
+
+ If a node is stopped locally but the bitmap
isn't clean, then another node is informed to take the ownership of
resync.
3.2 Communication mechanism
+---------------------------
The DLM LVB is used to communicate within nodes of the cluster. There
are three resources used for the purpose:
- 3.2.1 token: The resource which protects the entire communication
+3.2.1 token
+^^^^^^^^^^^
+ The resource which protects the entire communication
system. The node having the token resource is allowed to
communicate.
- 3.2.2 message: The lock resource which carries the data to
- communicate.
+3.2.2 message
+^^^^^^^^^^^^^
+ The lock resource which carries the data to communicate.
- 3.2.3 ack: The resource, acquiring which means the message has been
+3.2.3 ack
+^^^^^^^^^
+
+ The resource, acquiring which means the message has been
acknowledged by all nodes in the cluster. The BAST of the resource
is used to inform the receiving node that a node wants to
communicate.
The algorithm is:
- 1. receive status - all nodes have concurrent-reader lock on "ack".
+ 1. receive status - all nodes have concurrent-reader lock on "ack"::
+
+ sender receiver receiver
+ "ack":CR "ack":CR "ack":CR
- sender receiver receiver
- "ack":CR "ack":CR "ack":CR
+ 2. sender get EX on "token",
+ sender get EX on "message"::
- 2. sender get EX on "token"
- sender get EX on "message"
- sender receiver receiver
- "token":EX "ack":CR "ack":CR
- "message":EX
- "ack":CR
+ sender receiver receiver
+ "token":EX "ack":CR "ack":CR
+ "message":EX
+ "ack":CR
Sender checks that it still needs to send a message. Messages
received or other events that happened while waiting for the
"token" may have made this message inappropriate or redundant.
- 3. sender writes LVB.
+ 3. sender writes LVB
+
sender down-convert "message" from EX to CW
+
sender try to get EX of "ack"
- [ wait until all receivers have *processed* the "message" ]
- [ triggered by bast of "ack" ]
- receiver get CR on "message"
- receiver read LVB
- receiver processes the message
- [ wait finish ]
- receiver releases "ack"
- receiver tries to get PR on "message"
+ ::
+
+ [ wait until all receivers have *processed* the "message" ]
- sender receiver receiver
- "token":EX "message":CR "message":CR
- "message":CW
- "ack":EX
+ [ triggered by bast of "ack" ]
+ receiver get CR on "message"
+ receiver read LVB
+ receiver processes the message
+ [ wait finish ]
+ receiver releases "ack"
+ receiver tries to get PR on "message"
+
+ sender receiver receiver
+ "token":EX "message":CR "message":CR
+ "message":CW
+ "ack":EX
4. triggered by grant of EX on "ack" (indicating all receivers
have processed message)
+
sender down-converts "ack" from EX to CR
+
sender releases "message"
+
sender releases "token"
- receiver upconvert to PR on "message"
- receiver get CR of "ack"
- receiver release "message"
- sender receiver receiver
- "ack":CR "ack":CR "ack":CR
+ ::
+
+ receiver upconvert to PR on "message"
+ receiver get CR of "ack"
+ receiver release "message"
+
+ sender receiver receiver
+ "ack":CR "ack":CR "ack":CR
4. Handling Failures
+====================
4.1 Node Failure
+----------------
When a node fails, the DLM informs the cluster with the slot
number. The node starts a cluster recovery thread. The cluster
@@ -177,11 +224,11 @@ The algorithm is:
- cleans the bitmap of the failed node
- releases bitmap<number> lock of the failed node
- initiates resync of the bitmap on the current node
- md_check_recovery is invoked within recover_bitmaps,
- then md_check_recovery -> metadata_update_start/finish,
- it will lock the communication by lock_comm.
- Which means when one node is resyncing it blocks all
- other nodes from writing anywhere on the array.
+ md_check_recovery is invoked within recover_bitmaps,
+ then md_check_recovery -> metadata_update_start/finish,
+ it will lock the communication by lock_comm.
+ Which means when one node is resyncing it blocks all
+ other nodes from writing anywhere on the array.
The resync process is the regular md resync. However, in a clustered
environment when a resync is performed, it needs to tell other nodes
@@ -198,6 +245,7 @@ The algorithm is:
particular I/O range should be suspended or not.
4.2 Device Failure
+==================
Device failures are handled and communicated with the metadata update
routine. When a node detects a device failure it does not allow
@@ -205,38 +253,41 @@ The algorithm is:
acknowledged by all other nodes.
5. Adding a new Device
+----------------------
For adding a new device, it is necessary that all nodes "see" the new
device to be added. For this, the following algorithm is used:
- 1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
+ 1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CLUSTER_ADD)
- 2. Node 1 sends a NEWDISK message with uuid and slot number
- 3. Other nodes issue kobject_uevent_env with uuid and slot number
+ 2. Node 1 sends a NEWDISK message with uuid and slot number
+ 3. Other nodes issue kobject_uevent_env with uuid and slot number
(Steps 4,5 could be a udev rule)
- 4. In userspace, the node searches for the disk, perhaps
+ 4. In userspace, the node searches for the disk, perhaps
using blkid -t SUB_UUID=""
- 5. Other nodes issue either of the following depending on whether
+ 5. Other nodes issue either of the following depending on whether
the disk was found:
ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
- disc.number set to slot number)
+ disc.number set to slot number)
ioctl(CLUSTERED_DISK_NACK)
- 6. Other nodes drop lock on "no-new-devs" (CR) if device is found
- 7. Node 1 attempts EX lock on "no-new-dev"
- 8. If node 1 gets the lock, it sends METADATA_UPDATED after
+ 6. Other nodes drop lock on "no-new-devs" (CR) if device is found
+ 7. Node 1 attempts EX lock on "no-new-dev"
+ 8. If node 1 gets the lock, it sends METADATA_UPDATED after
unmarking the disk as SpareLocal
- 9. If not (get "no-new-dev" lock), it fails the operation and sends
+ 9. If not (get "no-new-dev" lock), it fails the operation and sends
METADATA_UPDATED.
10. Other nodes get the information whether a disk is added or not
by the following METADATA_UPDATED.
-6. Module interface.
+6. Module interface
+===================
There are 17 call-backs which the md core can make to the cluster
module. Understanding these can give a good overview of the whole
process.
6.1 join(nodes) and leave()
+---------------------------
These are called when an array is started with a clustered bitmap,
and when the array is stopped. join() ensures the cluster is
@@ -244,11 +295,13 @@ The algorithm is:
Only the first 'nodes' nodes in the cluster can use the array.
6.2 slot_number()
+-----------------
Reports the slot number advised by the cluster infrastructure.
Range is from 0 to nodes-1.
6.3 resync_info_update()
+------------------------
This updates the resync range that is stored in the bitmap lock.
The starting point is updated as the resync progresses. The
@@ -256,6 +309,7 @@ The algorithm is:
It does *not* send a RESYNCING message.
6.4 resync_start(), resync_finish()
+-----------------------------------
These are called when resync/recovery/reshape starts or stops.
They update the resyncing range in the bitmap lock and also
@@ -265,8 +319,8 @@ The algorithm is:
resync_finish() also sends a BITMAP_NEEDS_SYNC message which
allows some other node to take over.
-6.5 metadata_update_start(), metadata_update_finish(),
- metadata_update_cancel().
+6.5 metadata_update_start(), metadata_update_finish(), metadata_update_cancel()
+-------------------------------------------------------------------------------
metadata_update_start is used to get exclusive access to
the metadata. If a change is still needed once that access is
@@ -275,6 +329,7 @@ The algorithm is:
can be used to release the lock.
6.6 area_resyncing()
+--------------------
This combines two elements of functionality.
@@ -289,6 +344,7 @@ The algorithm is:
a node failure.
6.7 add_new_disk_start(), add_new_disk_finish(), new_disk_ack()
+---------------------------------------------------------------
These are used to manage the new-disk protocol described above.
When a new device is added, add_new_disk_start() is called before
@@ -300,17 +356,20 @@ The algorithm is:
new_disk_ack() is called.
6.8 remove_disk()
+-----------------
This is called when a spare or failed device is removed from
the array. It causes a REMOVE message to be send to other nodes.
6.9 gather_bitmaps()
+--------------------
This sends a RE_ADD message to all other nodes and then
gathers bitmap information from all bitmaps. This combined
bitmap is then used to recovery the re-added device.
6.10 lock_all_bitmaps() and unlock_all_bitmaps()
+------------------------------------------------
These are called when change bitmap to none. If a node plans
to clear the cluster raid's bitmap, it need to make sure no other
@@ -319,6 +378,7 @@ The algorithm is:
accordingly.
7. Unsupported features
+=======================
There are somethings which are not supported by cluster MD yet.
diff --git a/Documentation/md/raid5-cache.txt b/Documentation/driver-api/md/raid5-cache.rst
index 2b210f295786..d7a15f44a7c3 100644
--- a/Documentation/md/raid5-cache.txt
+++ b/Documentation/driver-api/md/raid5-cache.rst
@@ -1,4 +1,6 @@
-RAID5 cache
+================
+RAID 4/5/6 cache
+================
Raid 4/5/6 could include an extra disk for data cache besides normal RAID
disks. The role of RAID disks isn't changed with the cache disk. The cache disk
@@ -6,19 +8,19 @@ caches data to the RAID disks. The cache can be in write-through (supported
since 4.4) or write-back mode (supported since 4.10). mdadm (supported since
3.4) has a new option '--write-journal' to create array with cache. Please
refer to mdadm manual for details. By default (RAID array starts), the cache is
-in write-through mode. A user can switch it to write-back mode by:
+in write-through mode. A user can switch it to write-back mode by::
-echo "write-back" > /sys/block/md0/md/journal_mode
+ echo "write-back" > /sys/block/md0/md/journal_mode
-And switch it back to write-through mode by:
+And switch it back to write-through mode by::
-echo "write-through" > /sys/block/md0/md/journal_mode
+ echo "write-through" > /sys/block/md0/md/journal_mode
In both modes, all writes to the array will hit cache disk first. This means
the cache disk must be fast and sustainable.
--------------------------------------
-write-through mode:
+write-through mode
+==================
This mode mainly fixes the 'write hole' issue. For RAID 4/5/6 array, an unclean
shutdown can cause data in some stripes to not be in consistent state, eg, data
@@ -42,8 +44,8 @@ exposed to 'write hole' again.
In write-through mode, the cache disk isn't required to be big. Several
hundreds megabytes are enough.
---------------------------------------
-write-back mode:
+write-back mode
+===============
write-back mode fixes the 'write hole' issue too, since all write data is
cached on cache disk. But the main goal of 'write-back' cache is to speed up
@@ -64,16 +66,16 @@ data loss.
In write-back mode, MD also caches data in memory. The memory cache includes
the same data stored on cache disk, so a power loss doesn't cause data loss.
The memory cache size has performance impact for the array. It's recommended
-the size is big. A user can configure the size by:
+the size is big. A user can configure the size by::
-echo "2048" > /sys/block/md0/md/stripe_cache_size
+ echo "2048" > /sys/block/md0/md/stripe_cache_size
Too small cache disk will make the write aggregation less efficient in this
mode depending on the workloads. It's recommended to use a cache disk with at
least several gigabytes size in write-back mode.
---------------------------------------
-The implementation:
+The implementation
+==================
The write-through and write-back cache use the same disk format. The cache disk
is organized as a simple write log. The log consists of 'meta data' and 'data'
diff --git a/Documentation/md/raid5-ppl.txt b/Documentation/driver-api/md/raid5-ppl.rst
index bfa092589e00..357e5515bc55 100644
--- a/Documentation/md/raid5-ppl.txt
+++ b/Documentation/driver-api/md/raid5-ppl.rst
@@ -1,4 +1,6 @@
+==================
Partial Parity Log
+==================
Partial Parity Log (PPL) is a feature available for RAID5 arrays. The issue
addressed by PPL is that after a dirty shutdown, parity of a particular stripe
diff --git a/Documentation/driver-api/memory-devices/index.rst b/Documentation/driver-api/memory-devices/index.rst
new file mode 100644
index 000000000000..28101458cda5
--- /dev/null
+++ b/Documentation/driver-api/memory-devices/index.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+Memory Controller drivers
+=========================
+
+.. toctree::
+ :maxdepth: 1
+
+ ti-emif
+ ti-gpmc
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/memory-devices/ti-emif.txt b/Documentation/driver-api/memory-devices/ti-emif.rst
index f4ad9a7d0f4b..dea2ad9bcd7e 100644
--- a/Documentation/memory-devices/ti-emif.txt
+++ b/Documentation/driver-api/memory-devices/ti-emif.rst
@@ -1,20 +1,24 @@
-TI EMIF SDRAM Controller Driver:
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+TI EMIF SDRAM Controller Driver
+===============================
Author
-========
+======
Aneesh V <aneesh@ti.com>
Location
-============
+========
driver/memory/emif.c
Supported SoCs:
-===================
+===============
TI OMAP44xx
TI OMAP54xx
Menuconfig option:
-==========================
+==================
Device Drivers
Memory devices
Texas Instruments EMIF driver
@@ -29,10 +33,11 @@ functions of the driver includes re-configuring AC timing
parameters and other settings during frequency, voltage and
temperature changes
-Platform Data (see include/linux/platform_data/emif_plat.h):
-=====================================================================
+Platform Data (see include/linux/platform_data/emif_plat.h)
+===========================================================
DDR device details and other board dependent and SoC dependent
information can be passed through platform data (struct emif_platform_data)
+
- DDR device details: 'struct ddr_device_info'
- Device AC timings: 'struct lpddr2_timings' and 'struct lpddr2_min_tck'
- Custom configurations: customizable policy options through
@@ -40,17 +45,19 @@ information can be passed through platform data (struct emif_platform_data)
- IP revision
- PHY type
-Interface to the external world:
-================================
+Interface to the external world
+===============================
EMIF driver registers notifiers for voltage and frequency changes
affecting EMIF and takes appropriate actions when these are invoked.
+
- freq_pre_notify_handling()
- freq_post_notify_handling()
- volt_notify_handling()
Debugfs
-========
+=======
The driver creates two debugfs entries per device.
+
- regcache_dump : dump of register values calculated and saved for all
frequencies used so far.
- mr4 : last polled value of MR4 register in the LPDDR2 device. MR4
diff --git a/Documentation/bus-devices/ti-gpmc.txt b/Documentation/driver-api/memory-devices/ti-gpmc.rst
index cc9ce57e0a26..33efcb81f080 100644
--- a/Documentation/bus-devices/ti-gpmc.txt
+++ b/Documentation/driver-api/memory-devices/ti-gpmc.rst
@@ -1,8 +1,12 @@
-GPMC (General Purpose Memory Controller):
-=========================================
+.. SPDX-License-Identifier: GPL-2.0
+
+========================================
+GPMC (General Purpose Memory Controller)
+========================================
GPMC is an unified memory controller dedicated to interfacing external
memory devices like
+
* Asynchronous SRAM like memories and application specific integrated
circuit devices.
* Asynchronous, synchronous, and page mode burst NOR flash devices
@@ -48,75 +52,128 @@ most of the datasheets & hardware (to be exact none of those supported
in mainline having custom timing routine) and by simulation.
gpmc timing dependency on peripheral timings:
+
[<gpmc_timing>: <peripheral timing1>, <peripheral timing2> ...]
1. common
-cs_on: t_ceasu
-adv_on: t_avdasu, t_ceavd
+
+cs_on:
+ t_ceasu
+adv_on:
+ t_avdasu, t_ceavd
2. sync common
-sync_clk: clk
-page_burst_access: t_bacc
-clk_activation: t_ces, t_avds
+
+sync_clk:
+ clk
+page_burst_access:
+ t_bacc
+clk_activation:
+ t_ces, t_avds
3. read async muxed
-adv_rd_off: t_avdp_r
-oe_on: t_oeasu, t_aavdh
-access: t_iaa, t_oe, t_ce, t_aa
-rd_cycle: t_rd_cycle, t_cez_r, t_oez
+
+adv_rd_off:
+ t_avdp_r
+oe_on:
+ t_oeasu, t_aavdh
+access:
+ t_iaa, t_oe, t_ce, t_aa
+rd_cycle:
+ t_rd_cycle, t_cez_r, t_oez
4. read async non-muxed
-adv_rd_off: t_avdp_r
-oe_on: t_oeasu
-access: t_iaa, t_oe, t_ce, t_aa
-rd_cycle: t_rd_cycle, t_cez_r, t_oez
+
+adv_rd_off:
+ t_avdp_r
+oe_on:
+ t_oeasu
+access:
+ t_iaa, t_oe, t_ce, t_aa
+rd_cycle:
+ t_rd_cycle, t_cez_r, t_oez
5. read sync muxed
-adv_rd_off: t_avdp_r, t_avdh
-oe_on: t_oeasu, t_ach, cyc_aavdh_oe
-access: t_iaa, cyc_iaa, cyc_oe
-rd_cycle: t_cez_r, t_oez, t_ce_rdyz
+
+adv_rd_off:
+ t_avdp_r, t_avdh
+oe_on:
+ t_oeasu, t_ach, cyc_aavdh_oe
+access:
+ t_iaa, cyc_iaa, cyc_oe
+rd_cycle:
+ t_cez_r, t_oez, t_ce_rdyz
6. read sync non-muxed
-adv_rd_off: t_avdp_r
-oe_on: t_oeasu
-access: t_iaa, cyc_iaa, cyc_oe
-rd_cycle: t_cez_r, t_oez, t_ce_rdyz
+
+adv_rd_off:
+ t_avdp_r
+oe_on:
+ t_oeasu
+access:
+ t_iaa, cyc_iaa, cyc_oe
+rd_cycle:
+ t_cez_r, t_oez, t_ce_rdyz
7. write async muxed
-adv_wr_off: t_avdp_w
-we_on, wr_data_mux_bus: t_weasu, t_aavdh, cyc_aavhd_we
-we_off: t_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_wr_cycle
+
+adv_wr_off:
+ t_avdp_w
+we_on, wr_data_mux_bus:
+ t_weasu, t_aavdh, cyc_aavhd_we
+we_off:
+ t_wpl
+cs_wr_off:
+ t_wph
+wr_cycle:
+ t_cez_w, t_wr_cycle
8. write async non-muxed
-adv_wr_off: t_avdp_w
-we_on, wr_data_mux_bus: t_weasu
-we_off: t_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_wr_cycle
+
+adv_wr_off:
+ t_avdp_w
+we_on, wr_data_mux_bus:
+ t_weasu
+we_off:
+ t_wpl
+cs_wr_off:
+ t_wph
+wr_cycle:
+ t_cez_w, t_wr_cycle
9. write sync muxed
-adv_wr_off: t_avdp_w, t_avdh
-we_on, wr_data_mux_bus: t_weasu, t_rdyo, t_aavdh, cyc_aavhd_we
-we_off: t_wpl, cyc_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_ce_rdyz
+
+adv_wr_off:
+ t_avdp_w, t_avdh
+we_on, wr_data_mux_bus:
+ t_weasu, t_rdyo, t_aavdh, cyc_aavhd_we
+we_off:
+ t_wpl, cyc_wpl
+cs_wr_off:
+ t_wph
+wr_cycle:
+ t_cez_w, t_ce_rdyz
10. write sync non-muxed
-adv_wr_off: t_avdp_w
-we_on, wr_data_mux_bus: t_weasu, t_rdyo
-we_off: t_wpl, cyc_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_ce_rdyz
-
-
-Note: Many of gpmc timings are dependent on other gpmc timings (a few
-gpmc timings purely dependent on other gpmc timings, a reason that
-some of the gpmc timings are missing above), and it will result in
-indirect dependency of peripheral timings to gpmc timings other than
-mentioned above, refer timing routine for more details. To know what
-these peripheral timings correspond to, please see explanations in
-struct gpmc_device_timings definition. And for gpmc timings refer
-IP details (link above).
+
+adv_wr_off:
+ t_avdp_w
+we_on, wr_data_mux_bus:
+ t_weasu, t_rdyo
+we_off:
+ t_wpl, cyc_wpl
+cs_wr_off:
+ t_wph
+wr_cycle:
+ t_cez_w, t_ce_rdyz
+
+
+Note:
+ Many of gpmc timings are dependent on other gpmc timings (a few
+ gpmc timings purely dependent on other gpmc timings, a reason that
+ some of the gpmc timings are missing above), and it will result in
+ indirect dependency of peripheral timings to gpmc timings other than
+ mentioned above, refer timing routine for more details. To know what
+ these peripheral timings correspond to, please see explanations in
+ struct gpmc_device_timings definition. And for gpmc timings refer
+ IP details (link above).
diff --git a/Documentation/men-chameleon-bus.txt b/Documentation/driver-api/men-chameleon-bus.rst
index 1b1f048aa748..1b1f048aa748 100644
--- a/Documentation/men-chameleon-bus.txt
+++ b/Documentation/driver-api/men-chameleon-bus.rst
diff --git a/Documentation/driver-api/mmc/index.rst b/Documentation/driver-api/mmc/index.rst
new file mode 100644
index 000000000000..7339736ac774
--- /dev/null
+++ b/Documentation/driver-api/mmc/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+MMC/SD/SDIO card support
+========================
+
+.. toctree::
+ :maxdepth: 1
+
+ mmc-dev-attrs
+ mmc-dev-parts
+ mmc-async-req
+ mmc-tools
diff --git a/Documentation/mmc/mmc-async-req.txt b/Documentation/driver-api/mmc/mmc-async-req.rst
index ae1907b10e4a..0f7197c9c3b5 100644
--- a/Documentation/mmc/mmc-async-req.txt
+++ b/Documentation/driver-api/mmc/mmc-async-req.rst
@@ -1,13 +1,20 @@
+========================
+MMC Asynchronous Request
+========================
+
Rationale
=========
How significant is the cache maintenance overhead?
+
It depends. Fast eMMC and multiple cache levels with speculative cache
pre-fetch makes the cache overhead relatively significant. If the DMA
preparations for the next request are done in parallel with the current
transfer, the DMA preparation overhead would not affect the MMC performance.
+
The intention of non-blocking (asynchronous) MMC requests is to minimize the
time between when an MMC request ends and another MMC request begins.
+
Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
dma_unmap_sg are processing. Using non-blocking MMC requests makes it
possible to prepare the caches for next job in parallel with an active
@@ -17,6 +24,7 @@ MMC block driver
================
The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
+
The increase in throughput is proportional to the time it takes to
prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
a request and how fast the memory is. The faster the MMC/SD is the
@@ -35,6 +43,7 @@ MMC core API extension
======================
There is one new public function mmc_start_req().
+
It starts a new MMC command request for a host. The function isn't
truly non-blocking. If there is an ongoing async request it waits
for completion of that request and starts the new one and returns. It
@@ -47,6 +56,7 @@ MMC host extensions
There are two optional members in the mmc_host_ops -- pre_req() and
post_req() -- that the host driver may implement in order to move work
to before and after the actual mmc_host_ops.request() function is called.
+
In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
descriptor, and post_req() runs the dma_unmap_sg().
@@ -55,33 +65,34 @@ Optimize for the first request
The first request in a series of requests can't be prepared in parallel
with the previous transfer, since there is no previous request.
+
The argument is_first_req in pre_req() indicates that there is no previous
request. The host driver may optimize for this scenario to minimize
the performance loss. A way to optimize for this is to split the current
request in two chunks, prepare the first chunk and start the request,
and finally prepare the second chunk and start the transfer.
-Pseudocode to handle is_first_req scenario with minimal prepare overhead:
-
-if (is_first_req && req->size > threshold)
- /* start MMC transfer for the complete transfer size */
- mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
-
- /*
- * Begin to prepare DMA while cmd is being processed by MMC.
- * The first chunk of the request should take the same time
- * to prepare as the "MMC process command time".
- * If prepare time exceeds MMC cmd time
- * the transfer is delayed, guesstimate max 4k as first chunk size.
- */
- prepare_1st_chunk_for_dma(req);
- /* flush pending desc to the DMAC (dmaengine.h) */
- dma_issue_pending(req->dma_desc);
-
- prepare_2nd_chunk_for_dma(req);
- /*
- * The second issue_pending should be called before MMC runs out
- * of the first chunk. If the MMC runs out of the first data chunk
- * before this call, the transfer is delayed.
- */
- dma_issue_pending(req->dma_desc);
+Pseudocode to handle is_first_req scenario with minimal prepare overhead::
+
+ if (is_first_req && req->size > threshold)
+ /* start MMC transfer for the complete transfer size */
+ mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
+
+ /*
+ * Begin to prepare DMA while cmd is being processed by MMC.
+ * The first chunk of the request should take the same time
+ * to prepare as the "MMC process command time".
+ * If prepare time exceeds MMC cmd time
+ * the transfer is delayed, guesstimate max 4k as first chunk size.
+ */
+ prepare_1st_chunk_for_dma(req);
+ /* flush pending desc to the DMAC (dmaengine.h) */
+ dma_issue_pending(req->dma_desc);
+
+ prepare_2nd_chunk_for_dma(req);
+ /*
+ * The second issue_pending should be called before MMC runs out
+ * of the first chunk. If the MMC runs out of the first data chunk
+ * before this call, the transfer is delayed.
+ */
+ dma_issue_pending(req->dma_desc);
diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/driver-api/mmc/mmc-dev-attrs.rst
index 4ad0bb17f343..4f44b1b730d6 100644
--- a/Documentation/mmc/mmc-dev-attrs.txt
+++ b/Documentation/driver-api/mmc/mmc-dev-attrs.rst
@@ -1,3 +1,4 @@
+==================================
SD and MMC Block Device Attributes
==================================
@@ -6,23 +7,29 @@ SD or MMC device.
The following attributes are read/write.
- force_ro Enforce read-only access even if write protect switch is off.
+ ======== ===============================================
+ force_ro Enforce read-only access even if write protect switch is off.
+ ======== ===============================================
SD and MMC Device Attributes
============================
All attributes are read-only.
+ ====================== ===============================================
cid Card Identification Register
csd Card Specific Data Register
scr SD Card Configuration Register (SD only)
date Manufacturing Date (from CID Register)
- fwrev Firmware/Product Revision (from CID Register) (SD and MMCv1 only)
- hwrev Hardware/Product Revision (from CID Register) (SD and MMCv1 only)
+ fwrev Firmware/Product Revision (from CID Register)
+ (SD and MMCv1 only)
+ hwrev Hardware/Product Revision (from CID Register)
+ (SD and MMCv1 only)
manfid Manufacturer ID (from CID Register)
name Product Name (from CID Register)
oemid OEM/Application ID (from CID Register)
- prv Product Revision (from CID Register) (SD and MMCv4 only)
+ prv Product Revision (from CID Register)
+ (SD and MMCv4 only)
serial Product Serial Number (from CID Register)
erase_size Erase group size
preferred_erase_size Preferred erase size
@@ -30,7 +37,10 @@ All attributes are read-only.
rel_sectors Reliable write sector count
ocr Operation Conditions Register
dsr Driver Stage Register
- cmdq_en Command Queue enabled: 1 => enabled, 0 => not enabled
+ cmdq_en Command Queue enabled:
+
+ 1 => enabled, 0 => not enabled
+ ====================== ===============================================
Note on Erase Size and Preferred Erase Size:
@@ -44,14 +54,15 @@ Note on Erase Size and Preferred Erase Size:
SD/MMC cards can erase an arbitrarily large area up to and
including the whole card. When erasing a large area it may
be desirable to do it in smaller chunks for three reasons:
- 1. A single erase command will make all other I/O on
+
+ 1. A single erase command will make all other I/O on
the card wait. This is not a problem if the whole card
is being erased, but erasing one partition will make
I/O for another partition on the same card wait for the
duration of the erase - which could be a several
minutes.
- 2. To be able to inform the user of erase progress.
- 3. The erase timeout becomes too large to be very
+ 2. To be able to inform the user of erase progress.
+ 3. The erase timeout becomes too large to be very
useful. Because the erase timeout contains a margin
which is multiplied by the size of the erase area,
the value can end up being several minutes for large
@@ -72,6 +83,9 @@ Note on Erase Size and Preferred Erase Size:
"preferred_erase_size" is in bytes.
Note on raw_rpmb_size_mult:
+
"raw_rpmb_size_mult" is a multiple of 128kB block.
+
RPMB size in byte is calculated by using the following equation:
- RPMB partition size = 128kB x raw_rpmb_size_mult
+
+ RPMB partition size = 128kB x raw_rpmb_size_mult
diff --git a/Documentation/mmc/mmc-dev-parts.txt b/Documentation/driver-api/mmc/mmc-dev-parts.rst
index f08d078d43cf..995922f1f744 100644
--- a/Documentation/mmc/mmc-dev-parts.txt
+++ b/Documentation/driver-api/mmc/mmc-dev-parts.rst
@@ -1,3 +1,4 @@
+============================
SD and MMC Device Partitions
============================
@@ -18,18 +19,18 @@ platform, write access is disabled by default to reduce the chance of
accidental bricking.
To enable write access to /dev/mmcblkXbootY, disable the forced read-only
-access with:
+access with::
-echo 0 > /sys/block/mmcblkXbootY/force_ro
+ echo 0 > /sys/block/mmcblkXbootY/force_ro
-To re-enable read-only access:
+To re-enable read-only access::
-echo 1 > /sys/block/mmcblkXbootY/force_ro
+ echo 1 > /sys/block/mmcblkXbootY/force_ro
The boot partitions can also be locked read only until the next power on,
-with:
+with::
-echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on
+ echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on
This is a feature of the card and not of the kernel. If the card does
not support boot partition locking, the file will not exist. If the
diff --git a/Documentation/mmc/mmc-tools.txt b/Documentation/driver-api/mmc/mmc-tools.rst
index 735509c165d5..54406093768b 100644
--- a/Documentation/mmc/mmc-tools.txt
+++ b/Documentation/driver-api/mmc/mmc-tools.rst
@@ -1,14 +1,17 @@
+======================
MMC tools introduction
======================
There is one MMC test tools called mmc-utils, which is maintained by Chris Ball,
you can find it at the below public git repository:
-http://git.kernel.org/cgit/linux/kernel/git/cjb/mmc-utils.git/
+
+ http://git.kernel.org/cgit/linux/kernel/git/cjb/mmc-utils.git/
Functions
=========
The mmc-utils tools can do the following:
+
- Print and parse extcsd data.
- Determine the eMMC writeprotect status.
- Set the eMMC writeprotect status.
diff --git a/Documentation/driver-api/mtd/index.rst b/Documentation/driver-api/mtd/index.rst
new file mode 100644
index 000000000000..436ba5a851d7
--- /dev/null
+++ b/Documentation/driver-api/mtd/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+Memory Technology Device (MTD)
+==============================
+
+.. toctree::
+ :maxdepth: 1
+
+ intel-spi
+ nand_ecc
+ spi-nor
diff --git a/Documentation/mtd/intel-spi.txt b/Documentation/driver-api/mtd/intel-spi.rst
index bc357729c2cb..0e6d9cd5388d 100644
--- a/Documentation/mtd/intel-spi.txt
+++ b/Documentation/driver-api/mtd/intel-spi.rst
@@ -1,5 +1,6 @@
+==============================
Upgrading BIOS using intel-spi
-------------------------------
+==============================
Many Intel CPUs like Baytrail and Braswell include SPI serial flash host
controller which is used to hold BIOS and other platform specific data.
@@ -36,45 +37,45 @@ Linux.
module parameter to modprobe).
4) Once the board is up and running again, find the right MTD partition
- (it is named as "BIOS"):
+ (it is named as "BIOS")::
- # cat /proc/mtd
- dev: size erasesize name
- mtd0: 00800000 00001000 "BIOS"
+ # cat /proc/mtd
+ dev: size erasesize name
+ mtd0: 00800000 00001000 "BIOS"
So here it will be /dev/mtd0 but it may vary.
- 5) Make backup of the existing image first:
+ 5) Make backup of the existing image first::
- # dd if=/dev/mtd0ro of=bios.bak
- 16384+0 records in
- 16384+0 records out
- 8388608 bytes (8.4 MB) copied, 10.0269 s, 837 kB/s
+ # dd if=/dev/mtd0ro of=bios.bak
+ 16384+0 records in
+ 16384+0 records out
+ 8388608 bytes (8.4 MB) copied, 10.0269 s, 837 kB/s
- 6) Verify the backup
+ 6) Verify the backup:
- # sha1sum /dev/mtd0ro bios.bak
- fdbb011920572ca6c991377c4b418a0502668b73 /dev/mtd0ro
- fdbb011920572ca6c991377c4b418a0502668b73 bios.bak
+ # sha1sum /dev/mtd0ro bios.bak
+ fdbb011920572ca6c991377c4b418a0502668b73 /dev/mtd0ro
+ fdbb011920572ca6c991377c4b418a0502668b73 bios.bak
The SHA1 sums must match. Otherwise do not continue any further!
7) Erase the SPI serial flash. After this step, do not reboot the
- board! Otherwise it will not start anymore.
+ board! Otherwise it will not start anymore::
- # flash_erase /dev/mtd0 0 0
- Erasing 4 Kibyte @ 7ff000 -- 100 % complete
+ # flash_erase /dev/mtd0 0 0
+ Erasing 4 Kibyte @ 7ff000 -- 100 % complete
8) Once completed without errors you can write the new BIOS image:
# dd if=MNW2MAX1.X64.0092.R01.1605221712.bin of=/dev/mtd0
9) Verify that the new content of the SPI serial flash matches the new
- BIOS image:
+ BIOS image::
- # sha1sum /dev/mtd0ro MNW2MAX1.X64.0092.R01.1605221712.bin
- 9b4df9e4be2057fceec3a5529ec3d950836c87a2 /dev/mtd0ro
- 9b4df9e4be2057fceec3a5529ec3d950836c87a2 MNW2MAX1.X64.0092.R01.1605221712.bin
+ # sha1sum /dev/mtd0ro MNW2MAX1.X64.0092.R01.1605221712.bin
+ 9b4df9e4be2057fceec3a5529ec3d950836c87a2 /dev/mtd0ro
+ 9b4df9e4be2057fceec3a5529ec3d950836c87a2 MNW2MAX1.X64.0092.R01.1605221712.bin
The SHA1 sums should match.
@@ -84,5 +85,6 @@ Linux.
References
----------
-[1] https://firmware.intel.com/sites/default/files/MinnowBoard.MAX_.X64.92.R01.zip
+[1] https://firmware.intel.com/sites/default/files/MinnowBoard%2EMAX_%2EX64%2E92%2ER01%2Ezip
+
[2] http://www.linux-mtd.infradead.org/
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/driver-api/mtd/nand_ecc.rst
index f8c3284bf6a7..e8d3c53a5056 100644
--- a/Documentation/mtd/nand_ecc.txt
+++ b/Documentation/driver-api/mtd/nand_ecc.rst
@@ -1,3 +1,7 @@
+==========================
+NAND Error-correction Code
+==========================
+
Introduction
============
@@ -37,63 +41,79 @@ sometimes also referred to as xor. In C the operator for xor is ^
Back to ecc.
Let's give a small figure:
+========= ==== ==== ==== ==== ==== ==== ==== ==== === === === === ====
byte 0: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp4 ... rp14
byte 1: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp2 rp4 ... rp14
byte 2: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp4 ... rp14
byte 3: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp4 ... rp14
byte 4: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp5 ... rp14
-....
+...
byte 254: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp5 ... rp15
byte 255: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp5 ... rp15
cp1 cp0 cp1 cp0 cp1 cp0 cp1 cp0
cp3 cp3 cp2 cp2 cp3 cp3 cp2 cp2
cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4
+========= ==== ==== ==== ==== ==== ==== ==== ==== === === === === ====
This figure represents a sector of 256 bytes.
cp is my abbreviation for column parity, rp for row parity.
Let's start to explain column parity.
-cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
-so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
+
+- cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
+
+ so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
+
Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
-cp2 is the parity over bit0, bit1, bit4 and bit5
-cp3 is the parity over bit2, bit3, bit6 and bit7.
-cp4 is the parity over bit0, bit1, bit2 and bit3.
-cp5 is the parity over bit4, bit5, bit6 and bit7.
+
+- cp2 is the parity over bit0, bit1, bit4 and bit5
+- cp3 is the parity over bit2, bit3, bit6 and bit7.
+- cp4 is the parity over bit0, bit1, bit2 and bit3.
+- cp5 is the parity over bit4, bit5, bit6 and bit7.
+
Note that each of cp0 .. cp5 is exactly one bit.
Row parity actually works almost the same.
-rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
-rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
-rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
-(so handle two bytes, then skip 2 bytes).
-rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
-for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
-so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
-and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
+
+- rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
+- rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
+- rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
+ (so handle two bytes, then skip 2 bytes).
+- rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
+- for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
+
+ so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
+- and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
+
The story now becomes quite boring. I guess you get the idea.
-rp6 covers 8 bytes then skips 8 etc
-rp7 skips 8 bytes then covers 8 etc
-rp8 covers 16 bytes then skips 16 etc
-rp9 skips 16 bytes then covers 16 etc
-rp10 covers 32 bytes then skips 32 etc
-rp11 skips 32 bytes then covers 32 etc
-rp12 covers 64 bytes then skips 64 etc
-rp13 skips 64 bytes then covers 64 etc
-rp14 covers 128 bytes then skips 128
-rp15 skips 128 bytes then covers 128
+
+- rp6 covers 8 bytes then skips 8 etc
+- rp7 skips 8 bytes then covers 8 etc
+- rp8 covers 16 bytes then skips 16 etc
+- rp9 skips 16 bytes then covers 16 etc
+- rp10 covers 32 bytes then skips 32 etc
+- rp11 skips 32 bytes then covers 32 etc
+- rp12 covers 64 bytes then skips 64 etc
+- rp13 skips 64 bytes then covers 64 etc
+- rp14 covers 128 bytes then skips 128
+- rp15 skips 128 bytes then covers 128
In the end the parity bits are grouped together in three bytes as
follows:
+
+===== ===== ===== ===== ===== ===== ===== ===== =====
ECC Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
+===== ===== ===== ===== ===== ===== ===== ===== =====
ECC 0 rp07 rp06 rp05 rp04 rp03 rp02 rp01 rp00
ECC 1 rp15 rp14 rp13 rp12 rp11 rp10 rp09 rp08
ECC 2 cp5 cp4 cp3 cp2 cp1 cp0 1 1
+===== ===== ===== ===== ===== ===== ===== ===== =====
I detected after writing this that ST application note AN1823
(http://www.st.com/stonline/) gives a much
nicer picture.(but they use line parity as term where I use row parity)
Oh well, I'm graphically challenged, so suffer with me for a moment :-)
+
And I could not reuse the ST picture anyway for copyright reasons.
@@ -101,9 +121,10 @@ Attempt 0
=========
Implementing the parity calculation is pretty simple.
-In C pseudocode:
-for (i = 0; i < 256; i++)
-{
+In C pseudocode::
+
+ for (i = 0; i < 256; i++)
+ {
if (i & 0x01)
rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
else
@@ -142,7 +163,7 @@ for (i = 0; i < 256; i++)
cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
-}
+ }
Analysis 0
@@ -167,82 +188,84 @@ This leads to:
Attempt 1
=========
-const char parity[256] = {
- 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
- 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
- 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
- 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
- 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
- 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
- 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
- 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
- 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
- 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
- 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
- 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
- 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
- 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
- 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
- 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
-};
-
-void ecc1(const unsigned char *buf, unsigned char *code)
-{
- int i;
- const unsigned char *bp = buf;
- unsigned char cur;
- unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
- unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
- unsigned char par;
-
- par = 0;
- rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
- rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
- rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
- rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
-
- for (i = 0; i < 256; i++)
- {
- cur = *bp++;
- par ^= cur;
- if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
- if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
- if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
- if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
- if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
- if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
- if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
- if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
- }
- code[0] =
- (parity[rp7] << 7) |
- (parity[rp6] << 6) |
- (parity[rp5] << 5) |
- (parity[rp4] << 4) |
- (parity[rp3] << 3) |
- (parity[rp2] << 2) |
- (parity[rp1] << 1) |
- (parity[rp0]);
- code[1] =
- (parity[rp15] << 7) |
- (parity[rp14] << 6) |
- (parity[rp13] << 5) |
- (parity[rp12] << 4) |
- (parity[rp11] << 3) |
- (parity[rp10] << 2) |
- (parity[rp9] << 1) |
- (parity[rp8]);
- code[2] =
- (parity[par & 0xf0] << 7) |
- (parity[par & 0x0f] << 6) |
- (parity[par & 0xcc] << 5) |
- (parity[par & 0x33] << 4) |
- (parity[par & 0xaa] << 3) |
- (parity[par & 0x55] << 2);
- code[0] = ~code[0];
- code[1] = ~code[1];
- code[2] = ~code[2];
-}
+::
+
+ const char parity[256] = {
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
+ };
+
+ void ecc1(const unsigned char *buf, unsigned char *code)
+ {
+ int i;
+ const unsigned char *bp = buf;
+ unsigned char cur;
+ unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+ unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+ unsigned char par;
+
+ par = 0;
+ rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+ rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+ rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+ rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+ for (i = 0; i < 256; i++)
+ {
+ cur = *bp++;
+ par ^= cur;
+ if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
+ if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
+ if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
+ if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
+ if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
+ if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
+ if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
+ if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
+ }
+ code[0] =
+ (parity[rp7] << 7) |
+ (parity[rp6] << 6) |
+ (parity[rp5] << 5) |
+ (parity[rp4] << 4) |
+ (parity[rp3] << 3) |
+ (parity[rp2] << 2) |
+ (parity[rp1] << 1) |
+ (parity[rp0]);
+ code[1] =
+ (parity[rp15] << 7) |
+ (parity[rp14] << 6) |
+ (parity[rp13] << 5) |
+ (parity[rp12] << 4) |
+ (parity[rp11] << 3) |
+ (parity[rp10] << 2) |
+ (parity[rp9] << 1) |
+ (parity[rp8]);
+ code[2] =
+ (parity[par & 0xf0] << 7) |
+ (parity[par & 0x0f] << 6) |
+ (parity[par & 0xcc] << 5) |
+ (parity[par & 0x33] << 4) |
+ (parity[par & 0xaa] << 3) |
+ (parity[par & 0x55] << 2);
+ code[0] = ~code[0];
+ code[1] = ~code[1];
+ code[2] = ~code[2];
+ }
Still pretty straightforward. The last three invert statements are there to
give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
@@ -293,88 +316,90 @@ Let's give it a try...
Attempt 2
=========
-extern const char parity[256];
-
-void ecc2(const unsigned char *buf, unsigned char *code)
-{
- int i;
- const unsigned long *bp = (unsigned long *)buf;
- unsigned long cur;
- unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
- unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
- unsigned long par;
-
- par = 0;
- rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
- rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
- rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
- rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
-
- for (i = 0; i < 64; i++)
- {
- cur = *bp++;
- par ^= cur;
- if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
- if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
- if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
- if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
- if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
- if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
- }
- /*
- we need to adapt the code generation for the fact that rp vars are now
- long; also the column parity calculation needs to be changed.
- we'll bring rp4 to 15 back to single byte entities by shifting and
- xoring
- */
- rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
- rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
- rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
- rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
- rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
- rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
- rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
- rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
- rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
- rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
- rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
- rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
- rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
- rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
- par ^= (par >> 16);
- rp1 = (par >> 8); rp1 &= 0xff;
- rp0 = (par & 0xff);
- par ^= (par >> 8); par &= 0xff;
-
- code[0] =
- (parity[rp7] << 7) |
- (parity[rp6] << 6) |
- (parity[rp5] << 5) |
- (parity[rp4] << 4) |
- (parity[rp3] << 3) |
- (parity[rp2] << 2) |
- (parity[rp1] << 1) |
- (parity[rp0]);
- code[1] =
- (parity[rp15] << 7) |
- (parity[rp14] << 6) |
- (parity[rp13] << 5) |
- (parity[rp12] << 4) |
- (parity[rp11] << 3) |
- (parity[rp10] << 2) |
- (parity[rp9] << 1) |
- (parity[rp8]);
- code[2] =
- (parity[par & 0xf0] << 7) |
- (parity[par & 0x0f] << 6) |
- (parity[par & 0xcc] << 5) |
- (parity[par & 0x33] << 4) |
- (parity[par & 0xaa] << 3) |
- (parity[par & 0x55] << 2);
- code[0] = ~code[0];
- code[1] = ~code[1];
- code[2] = ~code[2];
-}
+::
+
+ extern const char parity[256];
+
+ void ecc2(const unsigned char *buf, unsigned char *code)
+ {
+ int i;
+ const unsigned long *bp = (unsigned long *)buf;
+ unsigned long cur;
+ unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+ unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+ unsigned long par;
+
+ par = 0;
+ rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+ rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+ rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+ rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+ for (i = 0; i < 64; i++)
+ {
+ cur = *bp++;
+ par ^= cur;
+ if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+ if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+ if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+ if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+ if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+ if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+ }
+ /*
+ we need to adapt the code generation for the fact that rp vars are now
+ long; also the column parity calculation needs to be changed.
+ we'll bring rp4 to 15 back to single byte entities by shifting and
+ xoring
+ */
+ rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
+ rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
+ rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
+ rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
+ rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
+ rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
+ rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
+ rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
+ rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
+ rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
+ rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
+ rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
+ rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
+ rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
+ par ^= (par >> 16);
+ rp1 = (par >> 8); rp1 &= 0xff;
+ rp0 = (par & 0xff);
+ par ^= (par >> 8); par &= 0xff;
+
+ code[0] =
+ (parity[rp7] << 7) |
+ (parity[rp6] << 6) |
+ (parity[rp5] << 5) |
+ (parity[rp4] << 4) |
+ (parity[rp3] << 3) |
+ (parity[rp2] << 2) |
+ (parity[rp1] << 1) |
+ (parity[rp0]);
+ code[1] =
+ (parity[rp15] << 7) |
+ (parity[rp14] << 6) |
+ (parity[rp13] << 5) |
+ (parity[rp12] << 4) |
+ (parity[rp11] << 3) |
+ (parity[rp10] << 2) |
+ (parity[rp9] << 1) |
+ (parity[rp8]);
+ code[2] =
+ (parity[par & 0xf0] << 7) |
+ (parity[par & 0x0f] << 6) |
+ (parity[par & 0xcc] << 5) |
+ (parity[par & 0x33] << 4) |
+ (parity[par & 0xaa] << 3) |
+ (parity[par & 0x55] << 2);
+ code[0] = ~code[0];
+ code[1] = ~code[1];
+ code[2] = ~code[2];
+ }
The parity array is not shown any more. Note also that for these
examples I kinda deviated from my regular programming style by allowing
@@ -403,28 +428,32 @@ lookups
Attempt 3
=========
-Odd replaced:
- if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
- if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
- if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
- if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
- if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
- if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
-with
- if (i & 0x01) rp5 ^= cur;
- if (i & 0x02) rp7 ^= cur;
- if (i & 0x04) rp9 ^= cur;
- if (i & 0x08) rp11 ^= cur;
- if (i & 0x10) rp13 ^= cur;
- if (i & 0x20) rp15 ^= cur;
-
- and outside the loop added:
- rp4 = par ^ rp5;
- rp6 = par ^ rp7;
- rp8 = par ^ rp9;
- rp10 = par ^ rp11;
- rp12 = par ^ rp13;
- rp14 = par ^ rp15;
+Odd replaced::
+
+ if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+ if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+ if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+ if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+ if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+ if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+
+with::
+
+ if (i & 0x01) rp5 ^= cur;
+ if (i & 0x02) rp7 ^= cur;
+ if (i & 0x04) rp9 ^= cur;
+ if (i & 0x08) rp11 ^= cur;
+ if (i & 0x10) rp13 ^= cur;
+ if (i & 0x20) rp15 ^= cur;
+
+and outside the loop added::
+
+ rp4 = par ^ rp5;
+ rp6 = par ^ rp7;
+ rp8 = par ^ rp9;
+ rp10 = par ^ rp11;
+ rp12 = par ^ rp13;
+ rp14 = par ^ rp15;
And after that the code takes about 30% more time, although the number of
statements is reduced. This is also reflected in the assembly code.
@@ -448,7 +477,7 @@ Attempt 4
=========
Unrolled the loop 1, 2, 3 and 4 times.
-For 4 the code starts with:
+For 4 the code starts with::
for (i = 0; i < 4; i++)
{
@@ -471,8 +500,11 @@ Analysis 4
==========
Unrolling once gains about 15%
+
Unrolling twice keeps the gain at about 15%
+
Unrolling three times gives a gain of 30% compared to attempt 2.
+
Unrolling four times gives a marginal improvement compared to unrolling
three times.
@@ -492,8 +524,10 @@ Attempt 5
Effectively so all odd digit rp assignments in the loop were removed.
This included the else clause of the if statements.
-Of course after the loop we need to correct things by adding code like:
+Of course after the loop we need to correct things by adding code like::
+
rp5 = par ^ rp4;
+
Also the initial assignments (rp5 = 0; etc) could be removed.
Along the line I also removed the initialisation of rp0/1/2/3.
@@ -513,7 +547,7 @@ statement. Time for yet another version!
Attempt 6
=========
-THe code within the for loop was changed to:
+THe code within the for loop was changed to::
for (i = 0; i < 4; i++)
{
@@ -564,13 +598,17 @@ million iterations in order not to lose too much accuracy. This one
definitely seemed to be the jackpot!
There is a little bit more room for improvement though. There are three
-places with statements:
-rp4 ^= cur; rp6 ^= cur;
+places with statements::
+
+ rp4 ^= cur; rp6 ^= cur;
+
It seems more efficient to also maintain a variable rp4_6 in the while
loop; This eliminates 3 statements per loop. Of course after the loop we
-need to correct by adding:
- rp4 ^= rp4_6;
- rp6 ^= rp4_6
+need to correct by adding::
+
+ rp4 ^= rp4_6;
+ rp6 ^= rp4_6
+
Furthermore there are 4 sequential assignments to rp8. This can be
encoded slightly more efficiently by saving tmppar before those 4 lines
and later do rp8 = rp8 ^ tmppar ^ notrp8;
@@ -582,7 +620,7 @@ Time for a new test!
Attempt 7
=========
-The new code now looks like:
+The new code now looks like::
for (i = 0; i < 4; i++)
{
@@ -644,9 +682,12 @@ Although it seems that the code within the loop cannot be optimised
further there is still room to optimize the generation of the ecc codes.
We can simply calculate the total parity. If this is 0 then rp4 = rp5
etc. If the parity is 1, then rp4 = !rp5;
+
But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
-in the result byte and then do something like
+in the result byte and then do something like::
+
code[0] |= (code[0] << 1);
+
Lets test this.
@@ -657,11 +698,13 @@ Changed the code but again this slightly degrades performance. Tried all
kind of other things, like having dedicated parity arrays to avoid the
shift after parity[rp7] << 7; No gain.
Change the lookup using the parity array by using shift operators (e.g.
-replace parity[rp7] << 7 with:
-rp7 ^= (rp7 << 4);
-rp7 ^= (rp7 << 2);
-rp7 ^= (rp7 << 1);
-rp7 &= 0x80;
+replace parity[rp7] << 7 with::
+
+ rp7 ^= (rp7 << 4);
+ rp7 ^= (rp7 << 2);
+ rp7 ^= (rp7 << 1);
+ rp7 &= 0x80;
+
No gain.
The only marginal change was inverting the parity bits, so we can remove
@@ -683,13 +726,16 @@ Correcting errors
For correcting errors I again used the ST application note as a starter,
but I also peeked at the existing code.
+
The algorithm itself is pretty straightforward. Just xor the given and
the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
are 1 we have one correctable bit error. If there is 1 bit 1, we have an
error in the given ecc code.
+
It proved to be fastest to do some table lookups. Performance gain
introduced by this is about a factor 2 on my system when a repair had to
be done, and 1% or so if no repair had to be done.
+
Code size increased from 330 bytes to 686 bytes for this function.
(gcc 4.2, -O3)
@@ -700,8 +746,10 @@ Conclusion
The gain when calculating the ecc is tremendous. Om my development hardware
a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
embedded system with a MIPS core a factor 7 was obtained.
+
On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
5 (big endian mode, gcc 4.1.2, -O3)
+
For correction not much gain could be obtained (as bitflips are rare). Then
again there are also much less cycles spent there.
@@ -711,4 +759,5 @@ out of it with an assembler program, but due to pipeline behaviour etc
this is very tricky (at least for intel hw).
Author: Frans Meulenbroeks
+
Copyright (C) 2008 Koninklijke Philips Electronics NV.
diff --git a/Documentation/mtd/spi-nor.txt b/Documentation/driver-api/mtd/spi-nor.rst
index da1fbff5a24c..f5333e3bf486 100644
--- a/Documentation/mtd/spi-nor.txt
+++ b/Documentation/driver-api/mtd/spi-nor.rst
@@ -1,5 +1,6 @@
- SPI NOR framework
- ============================================
+=================
+SPI NOR framework
+=================
Part I - Why do we need this framework?
---------------------------------------
@@ -23,7 +24,7 @@ This framework just adds a new layer between the MTD and the SPI bus driver.
With this new layer, the SPI NOR controller driver does not depend on the
m25p80 code anymore.
- Before this framework, the layer is like:
+Before this framework, the layer is like::
MTD
------------------------
diff --git a/Documentation/driver-api/nfc/index.rst b/Documentation/driver-api/nfc/index.rst
new file mode 100644
index 000000000000..b6e9eedbff29
--- /dev/null
+++ b/Documentation/driver-api/nfc/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+Near Field Communication
+========================
+
+.. toctree::
+ :maxdepth: 1
+
+ nfc-hci
+ nfc-pn544
diff --git a/Documentation/nfc/nfc-hci.txt b/Documentation/driver-api/nfc/nfc-hci.rst
index 0dc078cab972..eb8a1a14e919 100644
--- a/Documentation/nfc/nfc-hci.txt
+++ b/Documentation/driver-api/nfc/nfc-hci.rst
@@ -1,7 +1,9 @@
+========================
HCI backend for NFC Core
+========================
-Author: Eric Lapuyade, Samuel Ortiz
-Contact: eric.lapuyade@intel.com, samuel.ortiz@intel.com
+- Author: Eric Lapuyade, Samuel Ortiz
+- Contact: eric.lapuyade@intel.com, samuel.ortiz@intel.com
General
-------
@@ -24,12 +26,13 @@ HCI events can also be received from the host controller. They will be handled
and a translation will be forwarded to NFC Core as needed. There are hooks to
let the HCI driver handle proprietary events or override standard behavior.
HCI uses 2 execution contexts:
+
- one for executing commands : nfc_hci_msg_tx_work(). Only one command
-can be executing at any given moment.
+ can be executing at any given moment.
- one for dispatching received events and commands : nfc_hci_msg_rx_work().
-HCI Session initialization:
----------------------------
+HCI Session initialization
+--------------------------
The Session initialization is an HCI standard which must unfortunately
support proprietary gates. This is the reason why the driver will pass a list
@@ -58,9 +61,9 @@ HCI Management
--------------
A driver would normally register itself with HCI and provide the following
-entry points:
+entry points::
-struct nfc_hci_ops {
+ struct nfc_hci_ops {
int (*open)(struct nfc_hci_dev *hdev);
void (*close)(struct nfc_hci_dev *hdev);
int (*hci_ready) (struct nfc_hci_dev *hdev);
@@ -82,38 +85,38 @@ struct nfc_hci_ops {
struct nfc_target *target);
int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event,
struct sk_buff *skb);
-};
+ };
- open() and close() shall turn the hardware on and off.
- hci_ready() is an optional entry point that is called right after the hci
-session has been set up. The driver can use it to do additional initialization
-that must be performed using HCI commands.
+ session has been set up. The driver can use it to do additional initialization
+ that must be performed using HCI commands.
- xmit() shall simply write a frame to the physical link.
- start_poll() is an optional entrypoint that shall set the hardware in polling
-mode. This must be implemented only if the hardware uses proprietary gates or a
-mechanism slightly different from the HCI standard.
+ mode. This must be implemented only if the hardware uses proprietary gates or a
+ mechanism slightly different from the HCI standard.
- dep_link_up() is called after a p2p target has been detected, to finish
-the p2p connection setup with hardware parameters that need to be passed back
-to nfc core.
+ the p2p connection setup with hardware parameters that need to be passed back
+ to nfc core.
- dep_link_down() is called to bring the p2p link down.
- target_from_gate() is an optional entrypoint to return the nfc protocols
-corresponding to a proprietary gate.
+ corresponding to a proprietary gate.
- complete_target_discovered() is an optional entry point to let the driver
-perform additional proprietary processing necessary to auto activate the
-discovered target.
+ perform additional proprietary processing necessary to auto activate the
+ discovered target.
- im_transceive() must be implemented by the driver if proprietary HCI commands
-are required to send data to the tag. Some tag types will require custom
-commands, others can be written to using the standard HCI commands. The driver
-can check the tag type and either do proprietary processing, or return 1 to ask
-for standard processing. The data exchange command itself must be sent
-asynchronously.
+ are required to send data to the tag. Some tag types will require custom
+ commands, others can be written to using the standard HCI commands. The driver
+ can check the tag type and either do proprietary processing, or return 1 to ask
+ for standard processing. The data exchange command itself must be sent
+ asynchronously.
- tm_send() is called to send data in the case of a p2p connection
- check_presence() is an optional entry point that will be called regularly
-by the core to check that an activated tag is still in the field. If this is
-not implemented, the core will not be able to push tag_lost events to the user
-space
+ by the core to check that an activated tag is still in the field. If this is
+ not implemented, the core will not be able to push tag_lost events to the user
+ space
- event_received() is called to handle an event coming from the chip. Driver
-can handle the event or return 1 to let HCI attempt standard processing.
+ can handle the event or return 1 to let HCI attempt standard processing.
On the rx path, the driver is responsible to push incoming HCP frames to HCI
using nfc_hci_recv_frame(). HCI will take care of re-aggregation and handling
@@ -122,20 +125,23 @@ This must be done from a context that can sleep.
PHY Management
--------------
-The physical link (i2c, ...) management is defined by the following structure:
+The physical link (i2c, ...) management is defined by the following structure::
-struct nfc_phy_ops {
+ struct nfc_phy_ops {
int (*write)(void *dev_id, struct sk_buff *skb);
int (*enable)(void *dev_id);
void (*disable)(void *dev_id);
-};
-
-enable(): turn the phy on (power on), make it ready to transfer data
-disable(): turn the phy off
-write(): Send a data frame to the chip. Note that to enable higher
-layers such as an llc to store the frame for re-emission, this function must
-not alter the skb. It must also not return a positive result (return 0 for
-success, negative for failure).
+ };
+
+enable():
+ turn the phy on (power on), make it ready to transfer data
+disable():
+ turn the phy off
+write():
+ Send a data frame to the chip. Note that to enable higher
+ layers such as an llc to store the frame for re-emission, this
+ function must not alter the skb. It must also not return a positive
+ result (return 0 for success, negative for failure).
Data coming from the chip shall be sent directly to nfc_hci_recv_frame().
@@ -145,9 +151,9 @@ LLC
Communication between the CPU and the chip often requires some link layer
protocol. Those are isolated as modules managed by the HCI layer. There are
currently two modules : nop (raw transfert) and shdlc.
-A new llc must implement the following functions:
+A new llc must implement the following functions::
-struct nfc_llc_ops {
+ struct nfc_llc_ops {
void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
rcv_to_hci_t rcv_to_hci, int tx_headroom,
int tx_tailroom, int *rx_headroom, int *rx_tailroom,
@@ -157,17 +163,25 @@ struct nfc_llc_ops {
int (*stop) (struct nfc_llc *llc);
void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb);
int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb);
-};
-
-- init() : allocate and init your private storage
-- deinit() : cleanup
-- start() : establish the logical connection
-- stop () : terminate the logical connection
-- rcv_from_drv() : handle data coming from the chip, going to HCI
-- xmit_from_hci() : handle data sent by HCI, going to the chip
+ };
+
+init():
+ allocate and init your private storage
+deinit():
+ cleanup
+start():
+ establish the logical connection
+stop ():
+ terminate the logical connection
+rcv_from_drv():
+ handle data coming from the chip, going to HCI
+xmit_from_hci():
+ handle data sent by HCI, going to the chip
The llc must be registered with nfc before it can be used. Do that by
-calling nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
+calling::
+
+ nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
Again, note that the llc does not handle the physical link. It is thus very
easy to mix any physical link with any llc for a given chip driver.
@@ -187,26 +201,32 @@ fast, cannot sleep. sends incoming frames to HCI where they are passed to
the current llc. In case of shdlc, the frame is queued in shdlc rx queue.
- SHDLC State Machine worker (SMW)
-Only when llc_shdlc is used: handles shdlc rx & tx queues.
-Dispatches HCI cmd responses.
+
+ Only when llc_shdlc is used: handles shdlc rx & tx queues.
+
+ Dispatches HCI cmd responses.
- HCI Tx Cmd worker (MSGTXWQ)
-Serializes execution of HCI commands. Completes execution in case of response
-timeout.
+
+ Serializes execution of HCI commands.
+
+ Completes execution in case of response timeout.
- HCI Rx worker (MSGRXWQ)
-Dispatches incoming HCI commands or events.
+
+ Dispatches incoming HCI commands or events.
- Syscall context from a userspace call (SYSCALL)
-Any entrypoint in HCI called from NFC Core
+
+ Any entrypoint in HCI called from NFC Core
Workflow executing an HCI command (using shdlc)
-----------------------------------------------
Executing an HCI command can easily be performed synchronously using the
-following API:
+following API::
-int nfc_hci_send_cmd (struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
+ int nfc_hci_send_cmd (struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
const u8 *param, size_t param_len, struct sk_buff **skb)
The API must be invoked from a context that can sleep. Most of the time, this
@@ -234,11 +254,11 @@ waiting command execution. Response processing involves invoking the completion
callback that was provided by nfc_hci_msg_tx_work() when it sent the command.
The completion callback will then wake the syscall context.
-It is also possible to execute the command asynchronously using this API:
+It is also possible to execute the command asynchronously using this API::
-static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
- const u8 *param, size_t param_len,
- data_exchange_cb_t cb, void *cb_context)
+ static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
+ const u8 *param, size_t param_len,
+ data_exchange_cb_t cb, void *cb_context)
The workflow is the same, except that the API call returns immediately, and
the callback will be called with the result from the SMW context.
@@ -268,23 +288,24 @@ went wrong below and know that expected events will probably never happen.
Handling of these errors is done as follows:
- driver (pn544) fails to deliver an incoming frame: it stores the error such
-that any subsequent call to the driver will result in this error. Then it calls
-the standard nfc_shdlc_recv_frame() with a NULL argument to report the problem
-above. shdlc stores a EREMOTEIO sticky status, which will trigger SMW to
-report above in turn.
+ that any subsequent call to the driver will result in this error. Then it
+ calls the standard nfc_shdlc_recv_frame() with a NULL argument to report the
+ problem above. shdlc stores a EREMOTEIO sticky status, which will trigger
+ SMW to report above in turn.
- SMW is basically a background thread to handle incoming and outgoing shdlc
-frames. This thread will also check the shdlc sticky status and report to HCI
-when it discovers it is not able to run anymore because of an unrecoverable
-error that happened within shdlc or below. If the problem occurs during shdlc
-connection, the error is reported through the connect completion.
+ frames. This thread will also check the shdlc sticky status and report to HCI
+ when it discovers it is not able to run anymore because of an unrecoverable
+ error that happened within shdlc or below. If the problem occurs during shdlc
+ connection, the error is reported through the connect completion.
- HCI: if an internal HCI error happens (frame is lost), or HCI is reported an
-error from a lower layer, HCI will either complete the currently executing
-command with that error, or notify NFC Core directly if no command is executing.
+ error from a lower layer, HCI will either complete the currently executing
+ command with that error, or notify NFC Core directly if no command is
+ executing.
- NFC Core: when NFC Core is notified of an error from below and polling is
-active, it will send a tag discovered event with an empty tag list to the user
-space to let it know that the poll operation will never be able to detect a tag.
-If polling is not active and the error was sticky, lower levels will return it
-at next invocation.
+ active, it will send a tag discovered event with an empty tag list to the user
+ space to let it know that the poll operation will never be able to detect a
+ tag. If polling is not active and the error was sticky, lower levels will
+ return it at next invocation.
diff --git a/Documentation/nfc/nfc-pn544.txt b/Documentation/driver-api/nfc/nfc-pn544.rst
index b36ca14ca2d6..6b2d8aae0c4e 100644
--- a/Documentation/nfc/nfc-pn544.txt
+++ b/Documentation/driver-api/nfc/nfc-pn544.rst
@@ -1,5 +1,7 @@
-Kernel driver for the NXP Semiconductors PN544 Near Field
-Communication chip
+============================================================================
+Kernel driver for the NXP Semiconductors PN544 Near Field Communication chip
+============================================================================
+
General
-------
diff --git a/Documentation/ntb.txt b/Documentation/driver-api/ntb.rst
index 074a423c853c..074a423c853c 100644
--- a/Documentation/ntb.txt
+++ b/Documentation/driver-api/ntb.rst
diff --git a/Documentation/nvdimm/btt.txt b/Documentation/driver-api/nvdimm/btt.rst
index e293fb664924..107395c042ae 100644
--- a/Documentation/nvdimm/btt.txt
+++ b/Documentation/driver-api/nvdimm/btt.rst
@@ -1,9 +1,10 @@
+=============================
BTT - Block Translation Table
=============================
1. Introduction
----------------
+===============
Persistent memory based storage is able to perform IO at byte (or more
accurately, cache line) granularity. However, we often want to expose such
@@ -25,7 +26,7 @@ provides atomic sector updates.
2. Static Layout
-----------------
+================
The underlying storage on which a BTT can be laid out is not limited in any way.
The BTT, however, splits the available space into chunks of up to 512 GiB,
@@ -33,43 +34,43 @@ called "Arenas".
Each arena follows the same layout for its metadata, and all references in an
arena are internal to it (with the exception of one field that points to the
-next arena). The following depicts the "On-disk" metadata layout:
-
-
- Backing Store +-------> Arena
-+---------------+ | +------------------+
-| | | | Arena info block |
-| Arena 0 +---+ | 4K |
-| 512G | +------------------+
-| | | |
-+---------------+ | |
-| | | |
-| Arena 1 | | Data Blocks |
-| 512G | | |
-| | | |
-+---------------+ | |
-| . | | |
-| . | | |
-| . | | |
-| | | |
-| | | |
-+---------------+ +------------------+
- | |
- | BTT Map |
- | |
- | |
- +------------------+
- | |
- | BTT Flog |
- | |
- +------------------+
- | Info block copy |
- | 4K |
- +------------------+
+next arena). The following depicts the "On-disk" metadata layout::
+
+
+ Backing Store +-------> Arena
+ +---------------+ | +------------------+
+ | | | | Arena info block |
+ | Arena 0 +---+ | 4K |
+ | 512G | +------------------+
+ | | | |
+ +---------------+ | |
+ | | | |
+ | Arena 1 | | Data Blocks |
+ | 512G | | |
+ | | | |
+ +---------------+ | |
+ | . | | |
+ | . | | |
+ | . | | |
+ | | | |
+ | | | |
+ +---------------+ +------------------+
+ | |
+ | BTT Map |
+ | |
+ | |
+ +------------------+
+ | |
+ | BTT Flog |
+ | |
+ +------------------+
+ | Info block copy |
+ | 4K |
+ +------------------+
3. Theory of Operation
-----------------------
+======================
a. The BTT Map
@@ -79,31 +80,37 @@ The map is a simple lookup/indirection table that maps an LBA to an internal
block. Each map entry is 32 bits. The two most significant bits are special
flags, and the remaining form the internal block number.
+======== =============================================================
Bit Description
-31 - 30 : Error and Zero flags - Used in the following way:
- Bit Description
- 31 30
- -----------------------------------------------------------------------
- 00 Initial state. Reads return zeroes; Premap = Postmap
- 01 Zero state: Reads return zeroes
- 10 Error state: Reads fail; Writes clear 'E' bit
- 11 Normal Block – has valid postmap
+======== =============================================================
+31 - 30 Error and Zero flags - Used in the following way::
+ == == ====================================================
+ 31 30 Description
+ == == ====================================================
+ 0 0 Initial state. Reads return zeroes; Premap = Postmap
+ 0 1 Zero state: Reads return zeroes
+ 1 0 Error state: Reads fail; Writes clear 'E' bit
+ 1 1 Normal Block – has valid postmap
+ == == ====================================================
-29 - 0 : Mappings to internal 'postmap' blocks
+29 - 0 Mappings to internal 'postmap' blocks
+======== =============================================================
Some of the terminology that will be subsequently used:
-External LBA : LBA as made visible to upper layers.
-ABA : Arena Block Address - Block offset/number within an arena
-Premap ABA : The block offset into an arena, which was decided upon by range
+============ ================================================================
+External LBA LBA as made visible to upper layers.
+ABA Arena Block Address - Block offset/number within an arena
+Premap ABA The block offset into an arena, which was decided upon by range
checking the External LBA
-Postmap ABA : The block number in the "Data Blocks" area obtained after
+Postmap ABA The block number in the "Data Blocks" area obtained after
indirection from the map
-nfree : The number of free blocks that are maintained at any given time.
+nfree The number of free blocks that are maintained at any given time.
This is the number of concurrent writes that can happen to the
arena.
+============ ================================================================
For example, after adding a BTT, we surface a disk of 1024G. We get a read for
@@ -121,19 +128,21 @@ i.e. Every write goes to a "free" block. A running list of free blocks is
maintained in the form of the BTT flog. 'Flog' is a combination of the words
"free list" and "log". The flog contains 'nfree' entries, and an entry contains:
-lba : The premap ABA that is being written to
-old_map : The old postmap ABA - after 'this' write completes, this will be a
+======== =====================================================================
+lba The premap ABA that is being written to
+old_map The old postmap ABA - after 'this' write completes, this will be a
free block.
-new_map : The new postmap ABA. The map will up updated to reflect this
+new_map The new postmap ABA. The map will up updated to reflect this
lba->postmap_aba mapping, but we log it here in case we have to
recover.
-seq : Sequence number to mark which of the 2 sections of this flog entry is
+seq Sequence number to mark which of the 2 sections of this flog entry is
valid/newest. It cycles between 01->10->11->01 (binary) under normal
operation, with 00 indicating an uninitialized state.
-lba' : alternate lba entry
-old_map': alternate old postmap entry
-new_map': alternate new postmap entry
-seq' : alternate sequence number.
+lba' alternate lba entry
+old_map' alternate old postmap entry
+new_map' alternate new postmap entry
+seq' alternate sequence number.
+======== =====================================================================
Each of the above fields is 32-bit, making one entry 32 bytes. Entries are also
padded to 64 bytes to avoid cache line sharing or aliasing. Flog updates are
@@ -147,8 +156,10 @@ c. The concept of lanes
While 'nfree' describes the number of concurrent IOs an arena can process
concurrently, 'nlanes' is the number of IOs the BTT device as a whole can
-process.
- nlanes = min(nfree, num_cpus)
+process::
+
+ nlanes = min(nfree, num_cpus)
+
A lane number is obtained at the start of any IO, and is used for indexing into
all the on-disk and in-memory data structures for the duration of the IO. If
there are more CPUs than the max number of available lanes, than lanes are
@@ -180,10 +191,10 @@ e. In-memory data structure: map locks
--------------------------------------
Consider a case where two writer threads are writing to the same LBA. There can
-be a race in the following sequence of steps:
+be a race in the following sequence of steps::
-free[lane] = map[premap_aba]
-map[premap_aba] = postmap_aba
+ free[lane] = map[premap_aba]
+ map[premap_aba] = postmap_aba
Both threads can update their respective free[lane] with the same old, freed
postmap_aba. This has made the layout inconsistent by losing a free entry, and
@@ -202,6 +213,7 @@ On startup, we analyze the BTT flog to create our list of free blocks. We walk
through all the entries, and for each lane, of the set of two possible
'sections', we always look at the most recent one only (based on the sequence
number). The reconstruction rules/steps are simple:
+
- Read map[log_entry.lba].
- If log_entry.new matches the map entry, then log_entry.old is free.
- If log_entry.new does not match the map entry, then log_entry.new is free.
@@ -228,7 +240,7 @@ Write:
1. Convert external LBA to Arena number + pre-map ABA
2. Get a lane (and take lane_lock)
3. Use lane to index into in-memory free list and obtain a new block, next flog
- index, next sequence number
+ index, next sequence number
4. Scan the RTT to check if free block is present, and spin/wait if it is.
5. Write data to this free block
6. Read map to get the existing post-map ABA entry for this pre-map ABA
@@ -245,6 +257,7 @@ Write:
An arena would be in an error state if any of the metadata is corrupted
irrecoverably, either due to a bug or a media error. The following conditions
indicate an error:
+
- Info block checksum does not match (and recovering from the copy also fails)
- All internal available blocks are not uniquely and entirely addressed by the
sum of mapped blocks and free blocks (from the BTT flog).
@@ -263,11 +276,10 @@ The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
(pmem, or blk mode). The easiest way to set up such a namespace is using the
'ndctl' utility [1]:
-For example, the ndctl command line to setup a btt with a 4k sector size is:
+For example, the ndctl command line to setup a btt with a 4k sector size is::
ndctl create-namespace -f -e namespace0.0 -m sector -l 4k
See ndctl create-namespace --help for more options.
[1]: https://github.com/pmem/ndctl
-
diff --git a/Documentation/driver-api/nvdimm/index.rst b/Documentation/driver-api/nvdimm/index.rst
new file mode 100644
index 000000000000..a4f8f98aeb94
--- /dev/null
+++ b/Documentation/driver-api/nvdimm/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Non-Volatile Memory Device (NVDIMM)
+===================================
+
+.. toctree::
+ :maxdepth: 1
+
+ nvdimm
+ btt
+ security
diff --git a/Documentation/nvdimm/nvdimm.txt b/Documentation/driver-api/nvdimm/nvdimm.rst
index 1669f626b037..08f855cbb4e6 100644
--- a/Documentation/nvdimm/nvdimm.txt
+++ b/Documentation/driver-api/nvdimm/nvdimm.rst
@@ -1,8 +1,14 @@
- LIBNVDIMM: Non-Volatile Devices
- libnvdimm - kernel / libndctl - userspace helper library
- linux-nvdimm@lists.01.org
- v13
+===============================
+LIBNVDIMM: Non-Volatile Devices
+===============================
+libnvdimm - kernel / libndctl - userspace helper library
+
+linux-nvdimm@lists.01.org
+
+Version 13
+
+.. contents:
Glossary
Overview
@@ -40,49 +46,57 @@
Glossary
---------
-
-PMEM: A system-physical-address range where writes are persistent. A
-block device composed of PMEM is capable of DAX. A PMEM address range
-may span an interleave of several DIMMs.
-
-BLK: A set of one or more programmable memory mapped apertures provided
-by a DIMM to access its media. This indirection precludes the
-performance benefit of interleaving, but enables DIMM-bounded failure
-modes.
-
-DPA: DIMM Physical Address, is a DIMM-relative offset. With one DIMM in
-the system there would be a 1:1 system-physical-address:DPA association.
-Once more DIMMs are added a memory controller interleave must be
-decoded to determine the DPA associated with a given
-system-physical-address. BLK capacity always has a 1:1 relationship
-with a single-DIMM's DPA range.
-
-DAX: File system extensions to bypass the page cache and block layer to
-mmap persistent memory, from a PMEM block device, directly into a
-process address space.
-
-DSM: Device Specific Method: ACPI method to to control specific
-device - in this case the firmware.
-
-DCR: NVDIMM Control Region Structure defined in ACPI 6 Section 5.2.25.5.
-It defines a vendor-id, device-id, and interface format for a given DIMM.
-
-BTT: Block Translation Table: Persistent memory is byte addressable.
-Existing software may have an expectation that the power-fail-atomicity
-of writes is at least one sector, 512 bytes. The BTT is an indirection
-table with atomic update semantics to front a PMEM/BLK block device
-driver and present arbitrary atomic sector sizes.
-
-LABEL: Metadata stored on a DIMM device that partitions and identifies
-(persistently names) storage between PMEM and BLK. It also partitions
-BLK storage to host BTTs with different parameters per BLK-partition.
-Note that traditional partition tables, GPT/MBR, are layered on top of a
-BLK or PMEM device.
+========
+
+PMEM:
+ A system-physical-address range where writes are persistent. A
+ block device composed of PMEM is capable of DAX. A PMEM address range
+ may span an interleave of several DIMMs.
+
+BLK:
+ A set of one or more programmable memory mapped apertures provided
+ by a DIMM to access its media. This indirection precludes the
+ performance benefit of interleaving, but enables DIMM-bounded failure
+ modes.
+
+DPA:
+ DIMM Physical Address, is a DIMM-relative offset. With one DIMM in
+ the system there would be a 1:1 system-physical-address:DPA association.
+ Once more DIMMs are added a memory controller interleave must be
+ decoded to determine the DPA associated with a given
+ system-physical-address. BLK capacity always has a 1:1 relationship
+ with a single-DIMM's DPA range.
+
+DAX:
+ File system extensions to bypass the page cache and block layer to
+ mmap persistent memory, from a PMEM block device, directly into a
+ process address space.
+
+DSM:
+ Device Specific Method: ACPI method to to control specific
+ device - in this case the firmware.
+
+DCR:
+ NVDIMM Control Region Structure defined in ACPI 6 Section 5.2.25.5.
+ It defines a vendor-id, device-id, and interface format for a given DIMM.
+
+BTT:
+ Block Translation Table: Persistent memory is byte addressable.
+ Existing software may have an expectation that the power-fail-atomicity
+ of writes is at least one sector, 512 bytes. The BTT is an indirection
+ table with atomic update semantics to front a PMEM/BLK block device
+ driver and present arbitrary atomic sector sizes.
+
+LABEL:
+ Metadata stored on a DIMM device that partitions and identifies
+ (persistently names) storage between PMEM and BLK. It also partitions
+ BLK storage to host BTTs with different parameters per BLK-partition.
+ Note that traditional partition tables, GPT/MBR, are layered on top of a
+ BLK or PMEM device.
Overview
---------
+========
The LIBNVDIMM subsystem provides support for three types of NVDIMMs, namely,
PMEM, BLK, and NVDIMM devices that can simultaneously support both PMEM
@@ -96,19 +110,30 @@ accessible via BLK. When that occurs a LABEL is needed to reserve DPA
for exclusive access via one mode a time.
Supporting Documents
-ACPI 6: http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf
-NVDIMM Namespace: http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf
-DSM Interface Example: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
-Driver Writer's Guide: http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf
+--------------------
+
+ACPI 6:
+ http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf
+NVDIMM Namespace:
+ http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf
+DSM Interface Example:
+ http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
+Driver Writer's Guide:
+ http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf
Git Trees
-LIBNVDIMM: https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git
-LIBNDCTL: https://github.com/pmem/ndctl.git
-PMEM: https://github.com/01org/prd
+---------
+
+LIBNVDIMM:
+ https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git
+LIBNDCTL:
+ https://github.com/pmem/ndctl.git
+PMEM:
+ https://github.com/01org/prd
LIBNVDIMM PMEM and BLK
-------------------
+======================
Prior to the arrival of the NFIT, non-volatile memory was described to a
system in various ad-hoc ways. Usually only the bare minimum was
@@ -122,38 +147,39 @@ For each NVDIMM access method (PMEM, BLK), LIBNVDIMM provides a block
device driver:
1. PMEM (nd_pmem.ko): Drives a system-physical-address range. This
- range is contiguous in system memory and may be interleaved (hardware
- memory controller striped) across multiple DIMMs. When interleaved the
- platform may optionally provide details of which DIMMs are participating
- in the interleave.
-
- Note that while LIBNVDIMM describes system-physical-address ranges that may
- alias with BLK access as ND_NAMESPACE_PMEM ranges and those without
- alias as ND_NAMESPACE_IO ranges, to the nd_pmem driver there is no
- distinction. The different device-types are an implementation detail
- that userspace can exploit to implement policies like "only interface
- with address ranges from certain DIMMs". It is worth noting that when
- aliasing is present and a DIMM lacks a label, then no block device can
- be created by default as userspace needs to do at least one allocation
- of DPA to the PMEM range. In contrast ND_NAMESPACE_IO ranges, once
- registered, can be immediately attached to nd_pmem.
+ range is contiguous in system memory and may be interleaved (hardware
+ memory controller striped) across multiple DIMMs. When interleaved the
+ platform may optionally provide details of which DIMMs are participating
+ in the interleave.
+
+ Note that while LIBNVDIMM describes system-physical-address ranges that may
+ alias with BLK access as ND_NAMESPACE_PMEM ranges and those without
+ alias as ND_NAMESPACE_IO ranges, to the nd_pmem driver there is no
+ distinction. The different device-types are an implementation detail
+ that userspace can exploit to implement policies like "only interface
+ with address ranges from certain DIMMs". It is worth noting that when
+ aliasing is present and a DIMM lacks a label, then no block device can
+ be created by default as userspace needs to do at least one allocation
+ of DPA to the PMEM range. In contrast ND_NAMESPACE_IO ranges, once
+ registered, can be immediately attached to nd_pmem.
2. BLK (nd_blk.ko): This driver performs I/O using a set of platform
- defined apertures. A set of apertures will access just one DIMM.
- Multiple windows (apertures) allow multiple concurrent accesses, much like
- tagged-command-queuing, and would likely be used by different threads or
- different CPUs.
+ defined apertures. A set of apertures will access just one DIMM.
+ Multiple windows (apertures) allow multiple concurrent accesses, much like
+ tagged-command-queuing, and would likely be used by different threads or
+ different CPUs.
+
+ The NFIT specification defines a standard format for a BLK-aperture, but
+ the spec also allows for vendor specific layouts, and non-NFIT BLK
+ implementations may have other designs for BLK I/O. For this reason
+ "nd_blk" calls back into platform-specific code to perform the I/O.
- The NFIT specification defines a standard format for a BLK-aperture, but
- the spec also allows for vendor specific layouts, and non-NFIT BLK
- implementations may have other designs for BLK I/O. For this reason
- "nd_blk" calls back into platform-specific code to perform the I/O.
- One such implementation is defined in the "Driver Writer's Guide" and "DSM
- Interface Example".
+ One such implementation is defined in the "Driver Writer's Guide" and "DSM
+ Interface Example".
Why BLK?
---------
+========
While PMEM provides direct byte-addressable CPU-load/store access to
NVDIMM storage, it does not provide the best system RAS (recovery,
@@ -162,12 +188,15 @@ system-physical-address address causes a CPU exception while an access
to a corrupted address through an BLK-aperture causes that block window
to raise an error status in a register. The latter is more aligned with
the standard error model that host-bus-adapter attached disks present.
+
Also, if an administrator ever wants to replace a memory it is easier to
service a system at DIMM module boundaries. Compare this to PMEM where
data could be interleaved in an opaque hardware specific manner across
several DIMMs.
PMEM vs BLK
+-----------
+
BLK-apertures solve these RAS problems, but their presence is also the
major contributing factor to the complexity of the ND subsystem. They
complicate the implementation because PMEM and BLK alias in DPA space.
@@ -185,13 +214,14 @@ carved into an arbitrary number of BLK devices with discontiguous
extents.
BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
---------------------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
One of the few
reasons to allow multiple BLK namespaces per REGION is so that each
BLK-namespace can be configured with a BTT with unique atomic sector
sizes. While a PMEM device can host a BTT the LABEL specification does
not provide for a sector size to be specified for a PMEM namespace.
+
This is due to the expectation that the primary usage model for PMEM is
via DAX, and the BTT is incompatible with DAX. However, for the cases
where an application or filesystem still needs atomic sector update
@@ -200,52 +230,52 @@ LIBNVDIMM/NDCTL: Block Translation Table "btt"
Example NVDIMM Platform
------------------------
+=======================
For the remainder of this document the following diagram will be
-referenced for any example sysfs layouts.
-
-
- (a) (b) DIMM BLK-REGION
- +-------------------+--------+--------+--------+
-+------+ | pm0.0 | blk2.0 | pm1.0 | blk2.1 | 0 region2
-| imc0 +--+- - - region0- - - +--------+ +--------+
-+--+---+ | pm0.0 | blk3.0 | pm1.0 | blk3.1 | 1 region3
- | +-------------------+--------v v--------+
-+--+---+ | |
-| cpu0 | region1
-+--+---+ | |
- | +----------------------------^ ^--------+
-+--+---+ | blk4.0 | pm1.0 | blk4.0 | 2 region4
-| imc1 +--+----------------------------| +--------+
-+------+ | blk5.0 | pm1.0 | blk5.0 | 3 region5
- +----------------------------+--------+--------+
+referenced for any example sysfs layouts::
+
+
+ (a) (b) DIMM BLK-REGION
+ +-------------------+--------+--------+--------+
+ +------+ | pm0.0 | blk2.0 | pm1.0 | blk2.1 | 0 region2
+ | imc0 +--+- - - region0- - - +--------+ +--------+
+ +--+---+ | pm0.0 | blk3.0 | pm1.0 | blk3.1 | 1 region3
+ | +-------------------+--------v v--------+
+ +--+---+ | |
+ | cpu0 | region1
+ +--+---+ | |
+ | +----------------------------^ ^--------+
+ +--+---+ | blk4.0 | pm1.0 | blk4.0 | 2 region4
+ | imc1 +--+----------------------------| +--------+
+ +------+ | blk5.0 | pm1.0 | blk5.0 | 3 region5
+ +----------------------------+--------+--------+
In this platform we have four DIMMs and two memory controllers in one
socket. Each unique interface (BLK or PMEM) to DPA space is identified
by a region device with a dynamically assigned id (REGION0 - REGION5).
1. The first portion of DIMM0 and DIMM1 are interleaved as REGION0. A
- single PMEM namespace is created in the REGION0-SPA-range that spans most
- of DIMM0 and DIMM1 with a user-specified name of "pm0.0". Some of that
- interleaved system-physical-address range is reclaimed as BLK-aperture
- accessed space starting at DPA-offset (a) into each DIMM. In that
- reclaimed space we create two BLK-aperture "namespaces" from REGION2 and
- REGION3 where "blk2.0" and "blk3.0" are just human readable names that
- could be set to any user-desired name in the LABEL.
+ single PMEM namespace is created in the REGION0-SPA-range that spans most
+ of DIMM0 and DIMM1 with a user-specified name of "pm0.0". Some of that
+ interleaved system-physical-address range is reclaimed as BLK-aperture
+ accessed space starting at DPA-offset (a) into each DIMM. In that
+ reclaimed space we create two BLK-aperture "namespaces" from REGION2 and
+ REGION3 where "blk2.0" and "blk3.0" are just human readable names that
+ could be set to any user-desired name in the LABEL.
2. In the last portion of DIMM0 and DIMM1 we have an interleaved
- system-physical-address range, REGION1, that spans those two DIMMs as
- well as DIMM2 and DIMM3. Some of REGION1 is allocated to a PMEM namespace
- named "pm1.0", the rest is reclaimed in 4 BLK-aperture namespaces (for
- each DIMM in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
- "blk5.0".
+ system-physical-address range, REGION1, that spans those two DIMMs as
+ well as DIMM2 and DIMM3. Some of REGION1 is allocated to a PMEM namespace
+ named "pm1.0", the rest is reclaimed in 4 BLK-aperture namespaces (for
+ each DIMM in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
+ "blk5.0".
3. The portion of DIMM2 and DIMM3 that do not participate in the REGION1
- interleaved system-physical-address range (i.e. the DPA address past
- offset (b) are also included in the "blk4.0" and "blk5.0" namespaces.
- Note, that this example shows that BLK-aperture namespaces don't need to
- be contiguous in DPA-space.
+ interleaved system-physical-address range (i.e. the DPA address past
+ offset (b) are also included in the "blk4.0" and "blk5.0" namespaces.
+ Note, that this example shows that BLK-aperture namespaces don't need to
+ be contiguous in DPA-space.
This bus is provided by the kernel under the device
/sys/devices/platform/nfit_test.0 when CONFIG_NFIT_TEST is enabled and
@@ -254,7 +284,7 @@ by a region device with a dynamically assigned id (REGION0 - REGION5).
LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
-----------------------------------------------------
+========================================================
What follows is a description of the LIBNVDIMM sysfs layout and a
corresponding object hierarchy diagram as viewed through the LIBNDCTL
@@ -263,12 +293,18 @@ NVDIMM Platform which is also the LIBNVDIMM bus used in the LIBNDCTL unit
test.
LIBNDCTL: Context
+-----------------
+
Every API call in the LIBNDCTL library requires a context that holds the
logging parameters and other library instance state. The library is
based on the libabc template:
-https://git.kernel.org/cgit/linux/kernel/git/kay/libabc.git
+
+ https://git.kernel.org/cgit/linux/kernel/git/kay/libabc.git
LIBNDCTL: instantiate a new library context example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+::
struct ndctl_ctx *ctx;
@@ -278,7 +314,7 @@ LIBNDCTL: instantiate a new library context example
return NULL;
LIBNVDIMM/LIBNDCTL: Bus
--------------------
+-----------------------
A bus has a 1:1 relationship with an NFIT. The current expectation for
ACPI based systems is that there is only ever one platform-global NFIT.
@@ -288,9 +324,10 @@ we use this capability to test multiple NFIT configurations in the unit
test.
LIBNVDIMM: control class device in /sys/class
+---------------------------------------------
This character device accepts DSM messages to be passed to DIMM
-identified by its NFIT handle.
+identified by its NFIT handle::
/sys/class/nd/ndctl0
|-- dev
@@ -300,10 +337,15 @@ identified by its NFIT handle.
LIBNVDIMM: bus
+--------------
+
+::
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nfit_desc);
+::
+
/sys/devices/platform/nfit_test.0/ndbus0
|-- commands
|-- nd
@@ -324,7 +366,9 @@ LIBNVDIMM: bus
`-- wait_probe
LIBNDCTL: bus enumeration example
-Find the bus handle that describes the bus from Example NVDIMM Platform
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Find the bus handle that describes the bus from Example NVDIMM Platform::
static struct ndctl_bus *get_bus_by_provider(struct ndctl_ctx *ctx,
const char *provider)
@@ -342,7 +386,7 @@ Find the bus handle that describes the bus from Example NVDIMM Platform
LIBNVDIMM/LIBNDCTL: DIMM (NMEM)
----------------------------
+-------------------------------
The DIMM device provides a character device for sending commands to
hardware, and it is a container for LABELs. If the DIMM is defined by
@@ -355,11 +399,16 @@ Range Mapping Structure", and there is no requirement that they actually
be physical DIMMs, so we use a more generic name.
LIBNVDIMM: DIMM (NMEM)
+^^^^^^^^^^^^^^^^^^^^^^
+
+::
struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
const struct attribute_group **groups, unsigned long flags,
unsigned long *dsm_mask);
+::
+
/sys/devices/platform/nfit_test.0/ndbus0
|-- nmem0
| |-- available_slots
@@ -384,15 +433,20 @@ LIBNVDIMM: DIMM (NMEM)
LIBNDCTL: DIMM enumeration example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Note, in this example we are assuming NFIT-defined DIMMs which are
identified by an "nfit_handle" a 32-bit value where:
-Bit 3:0 DIMM number within the memory channel
-Bit 7:4 memory channel number
-Bit 11:8 memory controller ID
-Bit 15:12 socket ID (within scope of a Node controller if node controller is present)
-Bit 27:16 Node Controller ID
-Bit 31:28 Reserved
+
+ - Bit 3:0 DIMM number within the memory channel
+ - Bit 7:4 memory channel number
+ - Bit 11:8 memory controller ID
+ - Bit 15:12 socket ID (within scope of a Node controller if node
+ controller is present)
+ - Bit 27:16 Node Controller ID
+ - Bit 31:28 Reserved
+
+::
static struct ndctl_dimm *get_dimm_by_handle(struct ndctl_bus *bus,
unsigned int handle)
@@ -413,7 +467,7 @@ Bit 31:28 Reserved
dimm = get_dimm_by_handle(bus, DIMM_HANDLE(0, 0, 0, 0, 0));
LIBNVDIMM/LIBNDCTL: Region
-----------------------
+--------------------------
A generic REGION device is registered for each PMEM range or BLK-aperture
set. Per the example there are 6 regions: 2 PMEM and 4 BLK-aperture
@@ -435,13 +489,15 @@ emits, "devtype" duplicates the DEVTYPE variable stored by udev at the
at the 'add' event, and finally, the optional "spa_index" is provided in
the case where the region is defined by a SPA.
-LIBNVDIMM: region
+LIBNVDIMM: region::
struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
struct nd_region_desc *ndr_desc);
struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
struct nd_region_desc *ndr_desc);
+::
+
/sys/devices/platform/nfit_test.0/ndbus0
|-- region0
| |-- available_size
@@ -468,10 +524,11 @@ LIBNVDIMM: region
[..]
LIBNDCTL: region enumeration example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Sample region retrieval routines based on NFIT-unique data like
"spa_index" (interleave set id) for PMEM and "nfit_handle" (dimm id) for
-BLK.
+BLK::
static struct ndctl_region *get_pmem_region_by_spa_index(struct ndctl_bus *bus,
unsigned int spa_index)
@@ -518,33 +575,33 @@ REGION name generic and expects userspace to always consider the
region-attributes for four reasons:
1. There are already more than two REGION and "namespace" types. For
- PMEM there are two subtypes. As mentioned previously we have PMEM where
- the constituent DIMM devices are known and anonymous PMEM. For BLK
- regions the NFIT specification already anticipates vendor specific
- implementations. The exact distinction of what a region contains is in
- the region-attributes not the region-name or the region-devtype.
+ PMEM there are two subtypes. As mentioned previously we have PMEM where
+ the constituent DIMM devices are known and anonymous PMEM. For BLK
+ regions the NFIT specification already anticipates vendor specific
+ implementations. The exact distinction of what a region contains is in
+ the region-attributes not the region-name or the region-devtype.
2. A region with zero child-namespaces is a possible configuration. For
- example, the NFIT allows for a DCR to be published without a
- corresponding BLK-aperture. This equates to a DIMM that can only accept
- control/configuration messages, but no i/o through a descendant block
- device. Again, this "type" is advertised in the attributes ('mappings'
- == 0) and the name does not tell you much.
+ example, the NFIT allows for a DCR to be published without a
+ corresponding BLK-aperture. This equates to a DIMM that can only accept
+ control/configuration messages, but no i/o through a descendant block
+ device. Again, this "type" is advertised in the attributes ('mappings'
+ == 0) and the name does not tell you much.
3. What if a third major interface type arises in the future? Outside
- of vendor specific implementations, it's not difficult to envision a
- third class of interface type beyond BLK and PMEM. With a generic name
- for the REGION level of the device-hierarchy old userspace
- implementations can still make sense of new kernel advertised
- region-types. Userspace can always rely on the generic region
- attributes like "mappings", "size", etc and the expected child devices
- named "namespace". This generic format of the device-model hierarchy
- allows the LIBNVDIMM and LIBNDCTL implementations to be more uniform and
- future-proof.
+ of vendor specific implementations, it's not difficult to envision a
+ third class of interface type beyond BLK and PMEM. With a generic name
+ for the REGION level of the device-hierarchy old userspace
+ implementations can still make sense of new kernel advertised
+ region-types. Userspace can always rely on the generic region
+ attributes like "mappings", "size", etc and the expected child devices
+ named "namespace". This generic format of the device-model hierarchy
+ allows the LIBNVDIMM and LIBNDCTL implementations to be more uniform and
+ future-proof.
4. There are more robust mechanisms for determining the major type of a
- region than a device name. See the next section, How Do I Determine the
- Major Type of a Region?
+ region than a device name. See the next section, How Do I Determine the
+ Major Type of a Region?
How Do I Determine the Major Type of a Region?
----------------------------------------------
@@ -553,7 +610,8 @@ Outside of the blanket recommendation of "use libndctl", or simply
looking at the kernel header (/usr/include/linux/ndctl.h) to decode the
"nstype" integer attribute, here are some other options.
- 1. module alias lookup:
+1. module alias lookup
+^^^^^^^^^^^^^^^^^^^^^^
The whole point of region/namespace device type differentiation is to
decide which block-device driver will attach to a given LIBNVDIMM namespace.
@@ -569,28 +627,31 @@ looking at the kernel header (/usr/include/linux/ndctl.h) to decode the
the resulting namespaces. The output from module resolution is more
accurate than a region-name or region-devtype.
- 2. udev:
+2. udev
+^^^^^^^
+
+ The kernel "devtype" is registered in the udev database::
- The kernel "devtype" is registered in the udev database
- # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region0
- P: /devices/platform/nfit_test.0/ndbus0/region0
- E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region0
- E: DEVTYPE=nd_pmem
- E: MODALIAS=nd:t2
- E: SUBSYSTEM=nd
+ # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region0
+ P: /devices/platform/nfit_test.0/ndbus0/region0
+ E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region0
+ E: DEVTYPE=nd_pmem
+ E: MODALIAS=nd:t2
+ E: SUBSYSTEM=nd
- # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region4
- P: /devices/platform/nfit_test.0/ndbus0/region4
- E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region4
- E: DEVTYPE=nd_blk
- E: MODALIAS=nd:t3
- E: SUBSYSTEM=nd
+ # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region4
+ P: /devices/platform/nfit_test.0/ndbus0/region4
+ E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region4
+ E: DEVTYPE=nd_blk
+ E: MODALIAS=nd:t3
+ E: SUBSYSTEM=nd
...and is available as a region attribute, but keep in mind that the
"devtype" does not indicate sub-type variations and scripts should
really be understanding the other attributes.
- 3. type specific attributes:
+3. type specific attributes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
As it currently stands a BLK-aperture region will never have a
"nfit/spa_index" attribute, but neither will a non-NFIT PMEM region. A
@@ -600,7 +661,7 @@ looking at the kernel header (/usr/include/linux/ndctl.h) to decode the
LIBNVDIMM/LIBNDCTL: Namespace
--------------------------
+-----------------------------
A REGION, after resolving DPA aliasing and LABEL specified boundaries,
surfaces one or more "namespace" devices. The arrival of a "namespace"
@@ -608,12 +669,14 @@ device currently triggers either the nd_blk or nd_pmem driver to load
and register a disk/block device.
LIBNVDIMM: namespace
+^^^^^^^^^^^^^^^^^^^^
+
Here is a sample layout from the three major types of NAMESPACE where
namespace0.0 represents DIMM-info-backed PMEM (note that it has a 'uuid'
attribute), namespace2.0 represents a BLK namespace (note it has a
'sector_size' attribute) that, and namespace6.0 represents an anonymous
PMEM namespace (note that has no 'uuid' attribute due to not support a
-LABEL).
+LABEL)::
/sys/devices/platform/nfit_test.0/ndbus0/region0/namespace0.0
|-- alt_name
@@ -656,76 +719,84 @@ LABEL).
`-- uevent
LIBNDCTL: namespace enumeration example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Namespaces are indexed relative to their parent region, example below.
These indexes are mostly static from boot to boot, but subsystem makes
no guarantees in this regard. For a static namespace identifier use its
'uuid' attribute.
-static struct ndctl_namespace *get_namespace_by_id(struct ndctl_region *region,
- unsigned int id)
-{
- struct ndctl_namespace *ndns;
+::
- ndctl_namespace_foreach(region, ndns)
- if (ndctl_namespace_get_id(ndns) == id)
- return ndns;
+ static struct ndctl_namespace
+ *get_namespace_by_id(struct ndctl_region *region, unsigned int id)
+ {
+ struct ndctl_namespace *ndns;
- return NULL;
-}
+ ndctl_namespace_foreach(region, ndns)
+ if (ndctl_namespace_get_id(ndns) == id)
+ return ndns;
+
+ return NULL;
+ }
LIBNDCTL: namespace creation example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
Idle namespaces are automatically created by the kernel if a given
region has enough available capacity to create a new namespace.
Namespace instantiation involves finding an idle namespace and
configuring it. For the most part the setting of namespace attributes
can occur in any order, the only constraint is that 'uuid' must be set
before 'size'. This enables the kernel to track DPA allocations
-internally with a static identifier.
+internally with a static identifier::
-static int configure_namespace(struct ndctl_region *region,
- struct ndctl_namespace *ndns,
- struct namespace_parameters *parameters)
-{
- char devname[50];
+ static int configure_namespace(struct ndctl_region *region,
+ struct ndctl_namespace *ndns,
+ struct namespace_parameters *parameters)
+ {
+ char devname[50];
- snprintf(devname, sizeof(devname), "namespace%d.%d",
- ndctl_region_get_id(region), paramaters->id);
+ snprintf(devname, sizeof(devname), "namespace%d.%d",
+ ndctl_region_get_id(region), paramaters->id);
- ndctl_namespace_set_alt_name(ndns, devname);
- /* 'uuid' must be set prior to setting size! */
- ndctl_namespace_set_uuid(ndns, paramaters->uuid);
- ndctl_namespace_set_size(ndns, paramaters->size);
- /* unlike pmem namespaces, blk namespaces have a sector size */
- if (parameters->lbasize)
- ndctl_namespace_set_sector_size(ndns, parameters->lbasize);
- ndctl_namespace_enable(ndns);
-}
+ ndctl_namespace_set_alt_name(ndns, devname);
+ /* 'uuid' must be set prior to setting size! */
+ ndctl_namespace_set_uuid(ndns, paramaters->uuid);
+ ndctl_namespace_set_size(ndns, paramaters->size);
+ /* unlike pmem namespaces, blk namespaces have a sector size */
+ if (parameters->lbasize)
+ ndctl_namespace_set_sector_size(ndns, parameters->lbasize);
+ ndctl_namespace_enable(ndns);
+ }
Why the Term "namespace"?
+^^^^^^^^^^^^^^^^^^^^^^^^^
1. Why not "volume" for instance? "volume" ran the risk of confusing
- ND (libnvdimm subsystem) to a volume manager like device-mapper.
+ ND (libnvdimm subsystem) to a volume manager like device-mapper.
2. The term originated to describe the sub-devices that can be created
- within a NVME controller (see the nvme specification:
- http://www.nvmexpress.org/specifications/), and NFIT namespaces are
- meant to parallel the capabilities and configurability of
- NVME-namespaces.
+ within a NVME controller (see the nvme specification:
+ http://www.nvmexpress.org/specifications/), and NFIT namespaces are
+ meant to parallel the capabilities and configurability of
+ NVME-namespaces.
LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
----------------------------------------------
+-------------------------------------------------
A BTT (design document: http://pmem.io/2014/09/23/btt.html) is a stacked
block device driver that fronts either the whole block device or a
partition of a block device emitted by either a PMEM or BLK NAMESPACE.
LIBNVDIMM: btt layout
+^^^^^^^^^^^^^^^^^^^^^
+
Every region will start out with at least one BTT device which is the
seed device. To activate it set the "namespace", "uuid", and
"sector_size" attributes and then bind the device to the nd_pmem or
-nd_blk driver depending on the region type.
+nd_blk driver depending on the region type::
/sys/devices/platform/nfit_test.1/ndbus0/region0/btt0/
|-- namespace
@@ -739,10 +810,12 @@ nd_blk driver depending on the region type.
`-- uuid
LIBNDCTL: btt creation example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
Similar to namespaces an idle BTT device is automatically created per
region. Each time this "seed" btt device is configured and enabled a new
seed is created. Creating a BTT configuration involves two steps of
-finding and idle BTT and assigning it to consume a PMEM or BLK namespace.
+finding and idle BTT and assigning it to consume a PMEM or BLK namespace::
static struct ndctl_btt *get_idle_btt(struct ndctl_region *region)
{
@@ -787,29 +860,28 @@ Summary LIBNDCTL Diagram
------------------------
For the given example above, here is the view of the objects as seen by the
-LIBNDCTL API:
- +---+
- |CTX| +---------+ +--------------+ +---------------+
- +-+-+ +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" |
- | | +---------+ +--------------+ +---------------+
-+-------+ | | +---------+ +--------------+ +---------------+
-| DIMM0 <-+ | +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" |
-+-------+ | | | +---------+ +--------------+ +---------------+
-| DIMM1 <-+ +-v--+ | +---------+ +--------------+ +---------------+
-+-------+ +-+BUS0+---> REGION2 +-+-> NAMESPACE2.0 +--> ND6 "blk2.0" |
-| DIMM2 <-+ +----+ | +---------+ | +--------------+ +----------------------+
-+-------+ | | +-> NAMESPACE2.1 +--> ND5 "blk2.1" | BTT2 |
-| DIMM3 <-+ | +--------------+ +----------------------+
-+-------+ | +---------+ +--------------+ +---------------+
- +-> REGION3 +-+-> NAMESPACE3.0 +--> ND4 "blk3.0" |
- | +---------+ | +--------------+ +----------------------+
- | +-> NAMESPACE3.1 +--> ND3 "blk3.1" | BTT1 |
- | +--------------+ +----------------------+
- | +---------+ +--------------+ +---------------+
- +-> REGION4 +---> NAMESPACE4.0 +--> ND2 "blk4.0" |
- | +---------+ +--------------+ +---------------+
- | +---------+ +--------------+ +----------------------+
- +-> REGION5 +---> NAMESPACE5.0 +--> ND1 "blk5.0" | BTT0 |
- +---------+ +--------------+ +---------------+------+
-
-
+LIBNDCTL API::
+
+ +---+
+ |CTX| +---------+ +--------------+ +---------------+
+ +-+-+ +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" |
+ | | +---------+ +--------------+ +---------------+
+ +-------+ | | +---------+ +--------------+ +---------------+
+ | DIMM0 <-+ | +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" |
+ +-------+ | | | +---------+ +--------------+ +---------------+
+ | DIMM1 <-+ +-v--+ | +---------+ +--------------+ +---------------+
+ +-------+ +-+BUS0+---> REGION2 +-+-> NAMESPACE2.0 +--> ND6 "blk2.0" |
+ | DIMM2 <-+ +----+ | +---------+ | +--------------+ +----------------------+
+ +-------+ | | +-> NAMESPACE2.1 +--> ND5 "blk2.1" | BTT2 |
+ | DIMM3 <-+ | +--------------+ +----------------------+
+ +-------+ | +---------+ +--------------+ +---------------+
+ +-> REGION3 +-+-> NAMESPACE3.0 +--> ND4 "blk3.0" |
+ | +---------+ | +--------------+ +----------------------+
+ | +-> NAMESPACE3.1 +--> ND3 "blk3.1" | BTT1 |
+ | +--------------+ +----------------------+
+ | +---------+ +--------------+ +---------------+
+ +-> REGION4 +---> NAMESPACE4.0 +--> ND2 "blk4.0" |
+ | +---------+ +--------------+ +---------------+
+ | +---------+ +--------------+ +----------------------+
+ +-> REGION5 +---> NAMESPACE5.0 +--> ND1 "blk5.0" | BTT0 |
+ +---------+ +--------------+ +---------------+------+
diff --git a/Documentation/nvdimm/security.txt b/Documentation/driver-api/nvdimm/security.rst
index 4c36c05ca98e..ad9dea099b34 100644
--- a/Documentation/nvdimm/security.txt
+++ b/Documentation/driver-api/nvdimm/security.rst
@@ -1,4 +1,5 @@
-NVDIMM SECURITY
+===============
+NVDIMM Security
===============
1. Introduction
@@ -138,4 +139,5 @@ This command is only available when the master security is enabled, indicated
by the extended security status.
[1]: http://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf
+
[2]: http://www.t13.org/documents/UploadedDocuments/docs2006/e05179r4-ACS-SecurityClarifications.pdf
diff --git a/Documentation/nvmem/nvmem.txt b/Documentation/driver-api/nvmem.rst
index fc2fe4b18655..d9d958d5c824 100644
--- a/Documentation/nvmem/nvmem.txt
+++ b/Documentation/driver-api/nvmem.rst
@@ -1,5 +1,10 @@
- NVMEM SUBSYSTEM
- Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+NVMEM Subsystem
+===============
+
+ Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
This document explains the NVMEM Framework along with the APIs provided,
and how to use it.
@@ -40,54 +45,54 @@ nvmem_device pointer.
nvmem_unregister(nvmem) is used to unregister a previously registered provider.
-For example, a simple qfprom case:
+For example, a simple qfprom case::
-static struct nvmem_config econfig = {
+ static struct nvmem_config econfig = {
.name = "qfprom",
.owner = THIS_MODULE,
-};
+ };
-static int qfprom_probe(struct platform_device *pdev)
-{
+ static int qfprom_probe(struct platform_device *pdev)
+ {
...
econfig.dev = &pdev->dev;
nvmem = nvmem_register(&econfig);
...
-}
+ }
It is mandatory that the NVMEM provider has a regmap associated with its
struct device. Failure to do would return error code from nvmem_register().
Users of board files can define and register nvmem cells using the
-nvmem_cell_table struct:
+nvmem_cell_table struct::
-static struct nvmem_cell_info foo_nvmem_cells[] = {
+ static struct nvmem_cell_info foo_nvmem_cells[] = {
{
.name = "macaddr",
.offset = 0x7f00,
.bytes = ETH_ALEN,
}
-};
+ };
-static struct nvmem_cell_table foo_nvmem_cell_table = {
+ static struct nvmem_cell_table foo_nvmem_cell_table = {
.nvmem_name = "i2c-eeprom",
.cells = foo_nvmem_cells,
.ncells = ARRAY_SIZE(foo_nvmem_cells),
-};
+ };
-nvmem_add_cell_table(&foo_nvmem_cell_table);
+ nvmem_add_cell_table(&foo_nvmem_cell_table);
Additionally it is possible to create nvmem cell lookup entries and register
-them with the nvmem framework from machine code as shown in the example below:
+them with the nvmem framework from machine code as shown in the example below::
-static struct nvmem_cell_lookup foo_nvmem_lookup = {
+ static struct nvmem_cell_lookup foo_nvmem_lookup = {
.nvmem_name = "i2c-eeprom",
.cell_name = "macaddr",
.dev_id = "foo_mac.0",
.con_id = "mac-address",
-};
+ };
-nvmem_add_cell_lookups(&foo_nvmem_lookup, 1);
+ nvmem_add_cell_lookups(&foo_nvmem_lookup, 1);
NVMEM Consumers
+++++++++++++++
@@ -99,43 +104,43 @@ read from and to NVMEM.
=================================
NVMEM cells are the data entries/fields in the NVMEM.
-The NVMEM framework provides 3 APIs to read/write NVMEM cells.
+The NVMEM framework provides 3 APIs to read/write NVMEM cells::
-struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name);
-struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name);
+ struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name);
+ struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name);
-void nvmem_cell_put(struct nvmem_cell *cell);
-void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
+ void nvmem_cell_put(struct nvmem_cell *cell);
+ void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
-void *nvmem_cell_read(struct nvmem_cell *cell, ssize_t *len);
-int nvmem_cell_write(struct nvmem_cell *cell, void *buf, ssize_t len);
+ void *nvmem_cell_read(struct nvmem_cell *cell, ssize_t *len);
+ int nvmem_cell_write(struct nvmem_cell *cell, void *buf, ssize_t len);
-*nvmem_cell_get() apis will get a reference to nvmem cell for a given id,
+`*nvmem_cell_get()` apis will get a reference to nvmem cell for a given id,
and nvmem_cell_read/write() can then read or write to the cell.
-Once the usage of the cell is finished the consumer should call *nvmem_cell_put()
-to free all the allocation memory for the cell.
+Once the usage of the cell is finished the consumer should call
+`*nvmem_cell_put()` to free all the allocation memory for the cell.
4. Direct NVMEM device based consumer APIs
==========================================
In some instances it is necessary to directly read/write the NVMEM.
-To facilitate such consumers NVMEM framework provides below apis.
+To facilitate such consumers NVMEM framework provides below apis::
-struct nvmem_device *nvmem_device_get(struct device *dev, const char *name);
-struct nvmem_device *devm_nvmem_device_get(struct device *dev,
+ struct nvmem_device *nvmem_device_get(struct device *dev, const char *name);
+ struct nvmem_device *devm_nvmem_device_get(struct device *dev,
const char *name);
-void nvmem_device_put(struct nvmem_device *nvmem);
-int nvmem_device_read(struct nvmem_device *nvmem, unsigned int offset,
+ void nvmem_device_put(struct nvmem_device *nvmem);
+ int nvmem_device_read(struct nvmem_device *nvmem, unsigned int offset,
size_t bytes, void *buf);
-int nvmem_device_write(struct nvmem_device *nvmem, unsigned int offset,
+ int nvmem_device_write(struct nvmem_device *nvmem, unsigned int offset,
size_t bytes, void *buf);
-int nvmem_device_cell_read(struct nvmem_device *nvmem,
+ int nvmem_device_cell_read(struct nvmem_device *nvmem,
struct nvmem_cell_info *info, void *buf);
-int nvmem_device_cell_write(struct nvmem_device *nvmem,
+ int nvmem_device_cell_write(struct nvmem_device *nvmem,
struct nvmem_cell_info *info, void *buf);
Before the consumers can read/write NVMEM directly, it should get hold
-of nvmem_controller from one of the *nvmem_device_get() api.
+of nvmem_controller from one of the `*nvmem_device_get()` api.
The difference between these apis and cell based apis is that these apis always
take nvmem_device as parameter.
@@ -145,12 +150,12 @@ take nvmem_device as parameter.
When a consumer no longer needs the NVMEM, it has to release the reference
to the NVMEM it has obtained using the APIs mentioned in the above section.
-The NVMEM framework provides 2 APIs to release a reference to the NVMEM.
+The NVMEM framework provides 2 APIs to release a reference to the NVMEM::
-void nvmem_cell_put(struct nvmem_cell *cell);
-void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
-void nvmem_device_put(struct nvmem_device *nvmem);
-void devm_nvmem_device_put(struct device *dev, struct nvmem_device *nvmem);
+ void nvmem_cell_put(struct nvmem_cell *cell);
+ void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
+ void nvmem_device_put(struct nvmem_device *nvmem);
+ void devm_nvmem_device_put(struct device *dev, struct nvmem_device *nvmem);
Both these APIs are used to release a reference to the NVMEM and
devm_nvmem_cell_put and devm_nvmem_device_put destroys the devres associated
@@ -162,20 +167,21 @@ Userspace
6. Userspace binary interface
==============================
-Userspace can read/write the raw NVMEM file located at
-/sys/bus/nvmem/devices/*/nvmem
+Userspace can read/write the raw NVMEM file located at::
+
+ /sys/bus/nvmem/devices/*/nvmem
-ex:
+ex::
-hexdump /sys/bus/nvmem/devices/qfprom0/nvmem
+ hexdump /sys/bus/nvmem/devices/qfprom0/nvmem
-0000000 0000 0000 0000 0000 0000 0000 0000 0000
-*
-00000a0 db10 2240 0000 e000 0c00 0c00 0000 0c00
-0000000 0000 0000 0000 0000 0000 0000 0000 0000
-...
-*
-0001000
+ 0000000 0000 0000 0000 0000 0000 0000 0000 0000
+ *
+ 00000a0 db10 2240 0000 e000 0c00 0c00 0000 0c00
+ 0000000 0000 0000 0000 0000 0000 0000 0000 0000
+ ...
+ *
+ 0001000
7. DeviceTree Binding
=====================
diff --git a/Documentation/parport-lowlevel.txt b/Documentation/driver-api/parport-lowlevel.rst
index 0633d70ffda7..0633d70ffda7 100644
--- a/Documentation/parport-lowlevel.txt
+++ b/Documentation/driver-api/parport-lowlevel.rst
diff --git a/Documentation/driver-api/phy/index.rst b/Documentation/driver-api/phy/index.rst
new file mode 100644
index 000000000000..69ba1216de72
--- /dev/null
+++ b/Documentation/driver-api/phy/index.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Generic PHY Framework
+=====================
+
+.. toctree::
+
+ phy
+ samsung-usb2
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
+
diff --git a/Documentation/phy.txt b/Documentation/driver-api/phy/phy.rst
index 457c3e0f86d6..457c3e0f86d6 100644
--- a/Documentation/phy.txt
+++ b/Documentation/driver-api/phy/phy.rst
diff --git a/Documentation/phy/samsung-usb2.txt b/Documentation/driver-api/phy/samsung-usb2.rst
index ed12d437189d..c48c8b9797b9 100644
--- a/Documentation/phy/samsung-usb2.txt
+++ b/Documentation/driver-api/phy/samsung-usb2.rst
@@ -1,9 +1,9 @@
-.------------------------------------------------------------------------------+
-| Samsung USB 2.0 PHY adaptation layer |
-+-----------------------------------------------------------------------------+'
+====================================
+Samsung USB 2.0 PHY adaptation layer
+====================================
-| 1. Description
-+----------------
+1. Description
+--------------
The architecture of the USB 2.0 PHY module in Samsung SoCs is similar
among many SoCs. In spite of the similarities it proved difficult to
@@ -14,8 +14,8 @@ the PHY powering up process had to be altered. This adaptation layer is
a compromise between having separate drivers and having a single driver
with added support for many special cases.
-| 2. Files description
-+----------------------
+2. Files description
+--------------------
- phy-samsung-usb2.c
This is the main file of the adaptation layer. This file contains
@@ -32,44 +32,45 @@ with added support for many special cases.
driver. In addition it should contain extern declarations for
structures that describe particular SoCs.
-| 3. Supporting SoCs
-+--------------------
+3. Supporting SoCs
+------------------
To support a new SoC a new file should be added to the drivers/phy
directory. Each SoC's configuration is stored in an instance of the
-struct samsung_usb2_phy_config.
+struct samsung_usb2_phy_config::
-struct samsung_usb2_phy_config {
+ struct samsung_usb2_phy_config {
const struct samsung_usb2_common_phy *phys;
int (*rate_to_clk)(unsigned long, u32 *);
unsigned int num_phys;
bool has_mode_switch;
-};
+ };
-The num_phys is the number of phys handled by the driver. *phys is an
+The num_phys is the number of phys handled by the driver. `*phys` is an
array that contains the configuration for each phy. The has_mode_switch
property is a boolean flag that determines whether the SoC has USB host
and device on a single pair of pins. If so, a special register has to
be modified to change the internal routing of these pins between a USB
device or host module.
-For example the configuration for Exynos 4210 is following:
+For example the configuration for Exynos 4210 is following::
-const struct samsung_usb2_phy_config exynos4210_usb2_phy_config = {
+ const struct samsung_usb2_phy_config exynos4210_usb2_phy_config = {
.has_mode_switch = 0,
.num_phys = EXYNOS4210_NUM_PHYS,
.phys = exynos4210_phys,
.rate_to_clk = exynos4210_rate_to_clk,
-}
+ }
+
+- `int (*rate_to_clk)(unsigned long, u32 *)`
-- int (*rate_to_clk)(unsigned long, u32 *)
The rate_to_clk callback is to convert the rate of the clock
used as the reference clock for the PHY module to the value
that should be written in the hardware register.
-The exynos4210_phys configuration array is as follows:
+The exynos4210_phys configuration array is as follows::
-static const struct samsung_usb2_common_phy exynos4210_phys[] = {
+ static const struct samsung_usb2_common_phy exynos4210_phys[] = {
{
.label = "device",
.id = EXYNOS4210_DEVICE,
@@ -95,29 +96,30 @@ static const struct samsung_usb2_common_phy exynos4210_phys[] = {
.power_off = exynos4210_power_off,
},
{},
-};
+ };
+
+- `int (*power_on)(struct samsung_usb2_phy_instance *);`
+ `int (*power_off)(struct samsung_usb2_phy_instance *);`
-- int (*power_on)(struct samsung_usb2_phy_instance *);
-- int (*power_off)(struct samsung_usb2_phy_instance *);
These two callbacks are used to power on and power off the phy
by modifying appropriate registers.
Final change to the driver is adding appropriate compatible value to the
phy-samsung-usb2.c file. In case of Exynos 4210 the following lines were
-added to the struct of_device_id samsung_usb2_phy_of_match[] array:
+added to the struct of_device_id samsung_usb2_phy_of_match[] array::
-#ifdef CONFIG_PHY_EXYNOS4210_USB2
+ #ifdef CONFIG_PHY_EXYNOS4210_USB2
{
.compatible = "samsung,exynos4210-usb2-phy",
.data = &exynos4210_usb2_phy_config,
},
-#endif
+ #endif
To add further flexibility to the driver the Kconfig file enables to
include support for selected SoCs in the compiled driver. The Kconfig
-entry for Exynos 4210 is following:
+entry for Exynos 4210 is following::
-config PHY_EXYNOS4210_USB2
+ config PHY_EXYNOS4210_USB2
bool "Support for Exynos 4210"
depends on PHY_SAMSUNG_USB2
depends on CPU_EXYNOS4210
@@ -128,8 +130,8 @@ config PHY_EXYNOS4210_USB2
phys are available - device, host, HSCI0 and HSCI1.
The newly created file that supports the new SoC has to be also added to the
-Makefile. In case of Exynos 4210 the added line is following:
+Makefile. In case of Exynos 4210 the added line is following::
-obj-$(CONFIG_PHY_EXYNOS4210_USB2) += phy-exynos4210-usb2.o
+ obj-$(CONFIG_PHY_EXYNOS4210_USB2) += phy-exynos4210-usb2.o
After completing these steps the support for the new SoC should be ready.
diff --git a/Documentation/driver-api/pps.rst b/Documentation/driver-api/pps.rst
index 1456d2c32ebd..2d6b99766ee8 100644
--- a/Documentation/driver-api/pps.rst
+++ b/Documentation/driver-api/pps.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
======================
PPS - Pulse Per Second
diff --git a/Documentation/driver-api/pti_intel_mid.rst b/Documentation/driver-api/pti_intel_mid.rst
new file mode 100644
index 000000000000..20f1cff42d5f
--- /dev/null
+++ b/Documentation/driver-api/pti_intel_mid.rst
@@ -0,0 +1,106 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Intel MID PTI
+=============
+
+The Intel MID PTI project is HW implemented in Intel Atom
+system-on-a-chip designs based on the Parallel Trace
+Interface for MIPI P1149.7 cJTAG standard. The kernel solution
+for this platform involves the following files::
+
+ ./include/linux/pti.h
+ ./drivers/.../n_tracesink.h
+ ./drivers/.../n_tracerouter.c
+ ./drivers/.../n_tracesink.c
+ ./drivers/.../pti.c
+
+pti.c is the driver that enables various debugging features
+popular on platforms from certain mobile manufacturers.
+n_tracerouter.c and n_tracesink.c allow extra system information to
+be collected and routed to the pti driver, such as trace
+debugging data from a modem. Although n_tracerouter
+and n_tracesink are a part of the complete PTI solution,
+these two line disciplines can work separately from
+pti.c and route any data stream from one /dev/tty node
+to another /dev/tty node via kernel-space. This provides
+a stable, reliable connection that will not break unless
+the user-space application shuts down (plus avoids
+kernel->user->kernel context switch overheads of routing
+data).
+
+An example debugging usage for this driver system:
+
+ * Hook /dev/ttyPTI0 to syslogd. Opening this port will also start
+ a console device to further capture debugging messages to PTI.
+ * Hook /dev/ttyPTI1 to modem debugging data to write to PTI HW.
+ This is where n_tracerouter and n_tracesink are used.
+ * Hook /dev/pti to a user-level debugging application for writing
+ to PTI HW.
+ * `Use mipi_` Kernel Driver API in other device drivers for
+ debugging to PTI by first requesting a PTI write address via
+ mipi_request_masterchannel(1).
+
+Below is example pseudo-code on how a 'privileged' application
+can hook up n_tracerouter and n_tracesink to any tty on
+a system. 'Privileged' means the application has enough
+privileges to successfully manipulate the ldisc drivers
+but is not just blindly executing as 'root'. Keep in mind
+the use of ioctl(,TIOCSETD,) is not specific to the n_tracerouter
+and n_tracesink line discpline drivers but is a generic
+operation for a program to use a line discpline driver
+on a tty port other than the default n_tty::
+
+ /////////// To hook up n_tracerouter and n_tracesink /////////
+
+ // Note that n_tracerouter depends on n_tracesink.
+ #include <errno.h>
+ #define ONE_TTY "/dev/ttyOne"
+ #define TWO_TTY "/dev/ttyTwo"
+
+ // needed global to hand onto ldisc connection
+ static int g_fd_source = -1;
+ static int g_fd_sink = -1;
+
+ // these two vars used to grab LDISC values from loaded ldisc drivers
+ // in OS. Look at /proc/tty/ldiscs to get the right numbers from
+ // the ldiscs loaded in the system.
+ int source_ldisc_num, sink_ldisc_num = -1;
+ int retval;
+
+ g_fd_source = open(ONE_TTY, O_RDWR); // must be R/W
+ g_fd_sink = open(TWO_TTY, O_RDWR); // must be R/W
+
+ if (g_fd_source <= 0) || (g_fd_sink <= 0) {
+ // doubt you'll want to use these exact error lines of code
+ printf("Error on open(). errno: %d\n",errno);
+ return errno;
+ }
+
+ retval = ioctl(g_fd_sink, TIOCSETD, &sink_ldisc_num);
+ if (retval < 0) {
+ printf("Error on ioctl(). errno: %d\n", errno);
+ return errno;
+ }
+
+ retval = ioctl(g_fd_source, TIOCSETD, &source_ldisc_num);
+ if (retval < 0) {
+ printf("Error on ioctl(). errno: %d\n", errno);
+ return errno;
+ }
+
+ /////////// To disconnect n_tracerouter and n_tracesink ////////
+
+ // First make sure data through the ldiscs has stopped.
+
+ // Second, disconnect ldiscs. This provides a
+ // little cleaner shutdown on tty stack.
+ sink_ldisc_num = 0;
+ source_ldisc_num = 0;
+ ioctl(g_fd_uart, TIOCSETD, &sink_ldisc_num);
+ ioctl(g_fd_gadget, TIOCSETD, &source_ldisc_num);
+
+ // Three, program closes connection, and cleanup:
+ close(g_fd_uart);
+ close(g_fd_gadget);
+ g_fd_uart = g_fd_gadget = NULL;
diff --git a/Documentation/driver-api/ptp.rst b/Documentation/driver-api/ptp.rst
index b6e65d66d37a..a15192e32347 100644
--- a/Documentation/driver-api/ptp.rst
+++ b/Documentation/driver-api/ptp.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
===========================================
PTP hardware clock infrastructure for Linux
diff --git a/Documentation/pwm.txt b/Documentation/driver-api/pwm.rst
index ab62f1bb0366..ab62f1bb0366 100644
--- a/Documentation/pwm.txt
+++ b/Documentation/driver-api/pwm.rst
diff --git a/Documentation/driver-api/rapidio/index.rst b/Documentation/driver-api/rapidio/index.rst
new file mode 100644
index 000000000000..a41b4242d16f
--- /dev/null
+++ b/Documentation/driver-api/rapidio/index.rst
@@ -0,0 +1,15 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+The Linux RapidIO Subsystem
+===========================
+
+.. toctree::
+ :maxdepth: 1
+
+ rapidio
+ sysfs
+
+ tsi721
+ mport_cdev
+ rio_cm
diff --git a/Documentation/rapidio/mport_cdev.txt b/Documentation/driver-api/rapidio/mport_cdev.rst
index a53f786ee2e9..df77a7f7be7d 100644
--- a/Documentation/rapidio/mport_cdev.txt
+++ b/Documentation/driver-api/rapidio/mport_cdev.rst
@@ -1,13 +1,9 @@
-RapidIO subsystem mport character device driver (rio_mport_cdev.c)
==================================================================
-
-Version History:
-----------------
- 1.0.0 - Initial driver release.
-
+RapidIO subsystem mport character device driver (rio_mport_cdev.c)
==================================================================
-I. Overview
+1. Overview
+===========
This device driver is the result of collaboration within the RapidIO.org
Software Task Group (STG) between Texas Instruments, Freescale,
@@ -29,40 +25,41 @@ Using available set of ioctl commands user-space applications can perform
following RapidIO bus and subsystem operations:
- Reads and writes from/to configuration registers of mport devices
- (RIO_MPORT_MAINT_READ_LOCAL/RIO_MPORT_MAINT_WRITE_LOCAL)
+ (RIO_MPORT_MAINT_READ_LOCAL/RIO_MPORT_MAINT_WRITE_LOCAL)
- Reads and writes from/to configuration registers of remote RapidIO devices.
This operations are defined as RapidIO Maintenance reads/writes in RIO spec.
- (RIO_MPORT_MAINT_READ_REMOTE/RIO_MPORT_MAINT_WRITE_REMOTE)
+ (RIO_MPORT_MAINT_READ_REMOTE/RIO_MPORT_MAINT_WRITE_REMOTE)
- Set RapidIO Destination ID for mport devices (RIO_MPORT_MAINT_HDID_SET)
- Set RapidIO Component Tag for mport devices (RIO_MPORT_MAINT_COMPTAG_SET)
- Query logical index of mport devices (RIO_MPORT_MAINT_PORT_IDX_GET)
- Query capabilities and RapidIO link configuration of mport devices
- (RIO_MPORT_GET_PROPERTIES)
+ (RIO_MPORT_GET_PROPERTIES)
- Enable/Disable reporting of RapidIO doorbell events to user-space applications
- (RIO_ENABLE_DOORBELL_RANGE/RIO_DISABLE_DOORBELL_RANGE)
+ (RIO_ENABLE_DOORBELL_RANGE/RIO_DISABLE_DOORBELL_RANGE)
- Enable/Disable reporting of RIO port-write events to user-space applications
- (RIO_ENABLE_PORTWRITE_RANGE/RIO_DISABLE_PORTWRITE_RANGE)
+ (RIO_ENABLE_PORTWRITE_RANGE/RIO_DISABLE_PORTWRITE_RANGE)
- Query/Control type of events reported through this driver: doorbells,
port-writes or both (RIO_SET_EVENT_MASK/RIO_GET_EVENT_MASK)
- Configure/Map mport's outbound requests window(s) for specific size,
RapidIO destination ID, hopcount and request type
- (RIO_MAP_OUTBOUND/RIO_UNMAP_OUTBOUND)
+ (RIO_MAP_OUTBOUND/RIO_UNMAP_OUTBOUND)
- Configure/Map mport's inbound requests window(s) for specific size,
RapidIO base address and local memory base address
- (RIO_MAP_INBOUND/RIO_UNMAP_INBOUND)
+ (RIO_MAP_INBOUND/RIO_UNMAP_INBOUND)
- Allocate/Free contiguous DMA coherent memory buffer for DMA data transfers
to/from remote RapidIO devices (RIO_ALLOC_DMA/RIO_FREE_DMA)
- Initiate DMA data transfers to/from remote RapidIO devices (RIO_TRANSFER).
Supports blocking, asynchronous and posted (a.k.a 'fire-and-forget') data
transfer modes.
- Check/Wait for completion of asynchronous DMA data transfer
- (RIO_WAIT_FOR_ASYNC)
+ (RIO_WAIT_FOR_ASYNC)
- Manage device objects supported by RapidIO subsystem (RIO_DEV_ADD/RIO_DEV_DEL).
This allows implementation of various RapidIO fabric enumeration algorithms
as user-space applications while using remaining functionality provided by
kernel RapidIO subsystem.
-II. Hardware Compatibility
+2. Hardware Compatibility
+=========================
This device driver uses standard interfaces defined by kernel RapidIO subsystem
and therefore it can be used with any mport device driver registered by RapidIO
@@ -78,29 +75,35 @@ functionality of their platform when planning to use this driver:
specific DMA engine support and therefore DMA data transfers mport_cdev driver
are not available.
-III. Module parameters
+3. Module parameters
+====================
-- 'dma_timeout' - DMA transfer completion timeout (in msec, default value 3000).
+- 'dma_timeout'
+ - DMA transfer completion timeout (in msec, default value 3000).
This parameter set a maximum completion wait time for SYNC mode DMA
transfer requests and for RIO_WAIT_FOR_ASYNC ioctl requests.
-- 'dbg_level' - This parameter allows to control amount of debug information
+- 'dbg_level'
+ - This parameter allows to control amount of debug information
generated by this device driver. This parameter is formed by set of
bit masks that correspond to the specific functional blocks.
For mask definitions see 'drivers/rapidio/devices/rio_mport_cdev.c'
This parameter can be changed dynamically.
Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
-IV. Known problems
+4. Known problems
+=================
None.
-V. User-space Applications and API
+5. User-space Applications and API
+==================================
API library and applications that use this device driver are available from
RapidIO.org.
-VI. TODO List
+6. TODO List
+============
- Add support for sending/receiving "raw" RapidIO messaging packets.
- Add memory mapped DMA data transfers as an option when RapidIO-specific DMA
diff --git a/Documentation/rapidio/rapidio.txt b/Documentation/driver-api/rapidio/rapidio.rst
index 28fbd877f85a..fb8942d3ba85 100644
--- a/Documentation/rapidio/rapidio.txt
+++ b/Documentation/driver-api/rapidio/rapidio.rst
@@ -1,6 +1,6 @@
- The Linux RapidIO Subsystem
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+============
+Introduction
+============
The RapidIO standard is a packet-based fabric interconnect standard designed for
use in embedded systems. Development of the RapidIO standard is directed by the
@@ -11,7 +11,7 @@ This document describes the basics of the Linux RapidIO subsystem and provides
information on its major components.
1 Overview
-----------
+==========
Because the RapidIO subsystem follows the Linux device model it is integrated
into the kernel similarly to other buses by defining RapidIO-specific device and
@@ -22,7 +22,7 @@ architecture-specific interfaces that provide support for common RapidIO
subsystem operations.
2. Core Components
-------------------
+==================
A typical RapidIO network is a combination of endpoints and switches.
Each of these components is represented in the subsystem by an associated data
@@ -30,6 +30,7 @@ structure. The core logical components of the RapidIO subsystem are defined
in include/linux/rio.h file.
2.1 Master Port
+---------------
A master port (or mport) is a RapidIO interface controller that is local to the
processor executing the Linux code. A master port generates and receives RapidIO
@@ -46,6 +47,7 @@ includes rio_ops data structure which contains pointers to hardware specific
implementations of RapidIO functions.
2.2 Device
+----------
A RapidIO device is any endpoint (other than mport) or switch in the network.
All devices are presented in the RapidIO subsystem by corresponding rio_dev data
@@ -53,6 +55,7 @@ structure. Devices form one global device list and per-network device lists
(depending on number of available mports and networks).
2.3 Switch
+----------
A RapidIO switch is a special class of device that routes packets between its
ports towards their final destination. The packet destination port within a
@@ -66,6 +69,7 @@ specific switch drivers that are designed to provide hardware-specific
implementation of common switch management routines.
2.4 Network
+-----------
A RapidIO network is a combination of interconnected endpoint and switch devices.
Each RapidIO network known to the system is represented by corresponding rio_net
@@ -74,11 +78,13 @@ ports that form the same network. It also contains a pointer to the default
master port that is used to communicate with devices within the network.
2.5 Device Drivers
+------------------
RapidIO device-specific drivers follow Linux Kernel Driver Model and are
intended to support specific RapidIO devices attached to the RapidIO network.
2.6 Subsystem Interfaces
+------------------------
RapidIO interconnect specification defines features that may be used to provide
one or more common service layers for all participating RapidIO devices. These
@@ -90,7 +96,7 @@ subsystem interfaces. This allows to have multiple common services attached to
the same device without blocking attachment of a device-specific driver.
3. Subsystem Initialization
----------------------------
+===========================
In order to initialize the RapidIO subsystem, a platform must initialize and
register at least one master port within the RapidIO network. To register mport
@@ -105,7 +111,7 @@ RapidIO subsystem can be configured to be built as a statically linked or
modular component of the kernel (see details below).
4. Enumeration and Discovery
-----------------------------
+============================
4.1 Overview
------------
@@ -168,14 +174,16 @@ on RapidIO subsystem build configuration:
(b) If the RapidIO subsystem core is built as a loadable module, in addition
to the method shown above, the host destination ID(s) can be specified using
traditional methods of passing module parameter "hdid=" during its loading:
+
- from command line: "modprobe rapidio hdid=-1,7", or
- from modprobe configuration file using configuration command "options",
like in this example: "options rapidio hdid=-1,7". An example of modprobe
configuration file is provided in the section below.
- NOTES:
+NOTES:
(i) if "hdid=" parameter is omitted all available mport will be assigned
destination ID = -1;
+
(ii) the "hdid=" parameter in systems with multiple mports can have
destination ID assignments omitted from the end of list (default = -1).
@@ -317,8 +325,7 @@ must ensure that they are loaded before the enumeration/discovery starts.
This process can be automated by specifying pre- or post- dependencies in the
RapidIO-specific modprobe configuration file as shown in the example below.
- File /etc/modprobe.d/rapidio.conf:
- ----------------------------------
+File /etc/modprobe.d/rapidio.conf::
# Configure RapidIO subsystem modules
@@ -335,17 +342,21 @@ RapidIO-specific modprobe configuration file as shown in the example below.
--------------------------
-NOTE: In the example above, one of "softdep" commands must be removed or
-commented out to keep required module loading sequence.
+NOTE:
+ In the example above, one of "softdep" commands must be removed or
+ commented out to keep required module loading sequence.
-A. References
--------------
+5. References
+=============
[1] RapidIO Trade Association. RapidIO Interconnect Specifications.
http://www.rapidio.org.
+
[2] Rapidio TA. Technology Comparisons.
http://www.rapidio.org/education/technology_comparisons/
+
[3] RapidIO support for Linux.
http://lwn.net/Articles/139118/
+
[4] Matt Porter. RapidIO for Linux. Ottawa Linux Symposium, 2005
http://www.kernel.org/doc/ols/2005/ols2005v2-pages-43-56.pdf
diff --git a/Documentation/rapidio/rio_cm.txt b/Documentation/driver-api/rapidio/rio_cm.rst
index 27aa401f1126..5294430a7a74 100644
--- a/Documentation/rapidio/rio_cm.txt
+++ b/Documentation/driver-api/rapidio/rio_cm.rst
@@ -1,13 +1,10 @@
+==========================================================================
RapidIO subsystem Channelized Messaging character device driver (rio_cm.c)
==========================================================================
-Version History:
-----------------
- 1.0.0 - Initial driver release.
-
-==========================================================================
-I. Overview
+1. Overview
+===========
This device driver is the result of collaboration within the RapidIO.org
Software Task Group (STG) between Texas Instruments, Prodrive Technologies,
@@ -41,79 +38,98 @@ in /dev directory common for all registered RapidIO mport devices.
Following ioctl commands are available to user-space applications:
-- RIO_CM_MPORT_GET_LIST : Returns to caller list of local mport devices that
+- RIO_CM_MPORT_GET_LIST:
+ Returns to caller list of local mport devices that
support messaging operations (number of entries up to RIO_MAX_MPORTS).
Each list entry is combination of mport's index in the system and RapidIO
destination ID assigned to the port.
-- RIO_CM_EP_GET_LIST_SIZE : Returns number of messaging capable remote endpoints
+- RIO_CM_EP_GET_LIST_SIZE:
+ Returns number of messaging capable remote endpoints
in a RapidIO network associated with the specified mport device.
-- RIO_CM_EP_GET_LIST : Returns list of RapidIO destination IDs for messaging
+- RIO_CM_EP_GET_LIST:
+ Returns list of RapidIO destination IDs for messaging
capable remote endpoints (peers) available in a RapidIO network associated
with the specified mport device.
-- RIO_CM_CHAN_CREATE : Creates RapidIO message exchange channel data structure
+- RIO_CM_CHAN_CREATE:
+ Creates RapidIO message exchange channel data structure
with channel ID assigned automatically or as requested by a caller.
-- RIO_CM_CHAN_BIND : Binds the specified channel data structure to the specified
+- RIO_CM_CHAN_BIND:
+ Binds the specified channel data structure to the specified
mport device.
-- RIO_CM_CHAN_LISTEN : Enables listening for connection requests on the specified
+- RIO_CM_CHAN_LISTEN:
+ Enables listening for connection requests on the specified
channel.
-- RIO_CM_CHAN_ACCEPT : Accepts a connection request from peer on the specified
+- RIO_CM_CHAN_ACCEPT:
+ Accepts a connection request from peer on the specified
channel. If wait timeout for this request is specified by a caller it is
a blocking call. If timeout set to 0 this is non-blocking call - ioctl
handler checks for a pending connection request and if one is not available
exits with -EGAIN error status immediately.
-- RIO_CM_CHAN_CONNECT : Sends a connection request to a remote peer/channel.
-- RIO_CM_CHAN_SEND : Sends a data message through the specified channel.
+- RIO_CM_CHAN_CONNECT:
+ Sends a connection request to a remote peer/channel.
+- RIO_CM_CHAN_SEND:
+ Sends a data message through the specified channel.
The handler for this request assumes that message buffer specified by
a caller includes the reserved space for a packet header required by
this driver.
-- RIO_CM_CHAN_RECEIVE : Receives a data message through a connected channel.
+- RIO_CM_CHAN_RECEIVE:
+ Receives a data message through a connected channel.
If the channel does not have an incoming message ready to return this ioctl
handler will wait for new message until timeout specified by a caller
expires. If timeout value is set to 0, ioctl handler uses a default value
defined by MAX_SCHEDULE_TIMEOUT.
-- RIO_CM_CHAN_CLOSE : Closes a specified channel and frees associated buffers.
+- RIO_CM_CHAN_CLOSE:
+ Closes a specified channel and frees associated buffers.
If the specified channel is in the CONNECTED state, sends close notification
to the remote peer.
The ioctl command codes and corresponding data structures intended for use by
user-space applications are defined in 'include/uapi/linux/rio_cm_cdev.h'.
-II. Hardware Compatibility
+2. Hardware Compatibility
+=========================
This device driver uses standard interfaces defined by kernel RapidIO subsystem
and therefore it can be used with any mport device driver registered by RapidIO
subsystem with limitations set by available mport HW implementation of messaging
mailboxes.
-III. Module parameters
+3. Module parameters
+====================
-- 'dbg_level' - This parameter allows to control amount of debug information
+- 'dbg_level'
+ - This parameter allows to control amount of debug information
generated by this device driver. This parameter is formed by set of
bit masks that correspond to the specific functional block.
For mask definitions see 'drivers/rapidio/devices/rio_cm.c'
This parameter can be changed dynamically.
Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
-- 'cmbox' - Number of RapidIO mailbox to use (default value is 1).
+- 'cmbox'
+ - Number of RapidIO mailbox to use (default value is 1).
This parameter allows to set messaging mailbox number that will be used
within entire RapidIO network. It can be used when default mailbox is
used by other device drivers or is not supported by some nodes in the
RapidIO network.
-- 'chstart' - Start channel number for dynamic assignment. Default value - 256.
+- 'chstart'
+ - Start channel number for dynamic assignment. Default value - 256.
Allows to exclude channel numbers below this parameter from dynamic
allocation to avoid conflicts with software components that use
reserved predefined channel numbers.
-IV. Known problems
+4. Known problems
+=================
None.
-V. User-space Applications and API Library
+5. User-space Applications and API Library
+==========================================
Messaging API library and applications that use this device driver are available
from RapidIO.org.
-VI. TODO List
+6. TODO List
+============
- Add support for system notification messages (reserved channel 0).
diff --git a/Documentation/rapidio/sysfs.txt b/Documentation/driver-api/rapidio/sysfs.rst
index a1adac888e6e..540f72683496 100644
--- a/Documentation/rapidio/sysfs.txt
+++ b/Documentation/driver-api/rapidio/sysfs.rst
@@ -1,3 +1,7 @@
+=============
+Sysfs entries
+=============
+
The RapidIO sysfs files have moved to:
Documentation/ABI/testing/sysfs-bus-rapidio and
Documentation/ABI/testing/sysfs-class-rapidio
diff --git a/Documentation/rapidio/tsi721.txt b/Documentation/driver-api/rapidio/tsi721.rst
index cd2a2935d51d..42aea438cd20 100644
--- a/Documentation/rapidio/tsi721.txt
+++ b/Documentation/driver-api/rapidio/tsi721.rst
@@ -1,7 +1,9 @@
+=========================================================================
RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge.
=========================================================================
-I. Overview
+1. Overview
+===========
This driver implements all currently defined RapidIO mport callback functions.
It supports maintenance read and write operations, inbound and outbound RapidIO
@@ -17,7 +19,9 @@ into the corresponding message queue. Messaging callbacks are implemented to be
fully compatible with RIONET driver (Ethernet over RapidIO messaging services).
1. Module parameters:
-- 'dbg_level' - This parameter allows to control amount of debug information
+
+- 'dbg_level'
+ - This parameter allows to control amount of debug information
generated by this device driver. This parameter is formed by set of
This parameter can be changed bit masks that correspond to the specific
functional block.
@@ -25,37 +29,44 @@ fully compatible with RIONET driver (Ethernet over RapidIO messaging services).
This parameter can be changed dynamically.
Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
-- 'dma_desc_per_channel' - This parameter defines number of hardware buffer
+- 'dma_desc_per_channel'
+ - This parameter defines number of hardware buffer
descriptors allocated for each registered Tsi721 DMA channel.
Its default value is 128.
-- 'dma_txqueue_sz' - DMA transactions queue size. Defines number of pending
+- 'dma_txqueue_sz'
+ - DMA transactions queue size. Defines number of pending
transaction requests that can be accepted by each DMA channel.
Default value is 16.
-- 'dma_sel' - DMA channel selection mask. Bitmask that defines which hardware
+- 'dma_sel'
+ - DMA channel selection mask. Bitmask that defines which hardware
DMA channels (0 ... 6) will be registered with DmaEngine core.
If bit is set to 1, the corresponding DMA channel will be registered.
DMA channels not selected by this mask will not be used by this device
driver. Default value is 0x7f (use all channels).
-- 'pcie_mrrs' - override value for PCIe Maximum Read Request Size (MRRS).
+- 'pcie_mrrs'
+ - override value for PCIe Maximum Read Request Size (MRRS).
This parameter gives an ability to override MRRS value set during PCIe
configuration process. Tsi721 supports read request sizes up to 4096B.
Value for this parameter must be set as defined by PCIe specification:
0 = 128B, 1 = 256B, 2 = 512B, 3 = 1024B, 4 = 2048B and 5 = 4096B.
Default value is '-1' (= keep platform setting).
-- 'mbox_sel' - RIO messaging MBOX selection mask. This is a bitmask that defines
+- 'mbox_sel'
+ - RIO messaging MBOX selection mask. This is a bitmask that defines
messaging MBOXes are managed by this device driver. Mask bits 0 - 3
correspond to MBOX0 - MBOX3. MBOX is under driver's control if the
corresponding bit is set to '1'. Default value is 0x0f (= all).
-II. Known problems
+2. Known problems
+=================
None.
-III. DMA Engine Support
+3. DMA Engine Support
+=====================
Tsi721 mport driver supports DMA data transfers between local system memory and
remote RapidIO devices. This functionality is implemented according to SLAVE
@@ -68,17 +79,21 @@ One BDMA channel is reserved for generation of maintenance read/write requests.
If Tsi721 mport driver have been built with RAPIDIO_DMA_ENGINE support included,
this driver will accept DMA-specific module parameter:
- "dma_desc_per_channel" - defines number of hardware buffer descriptors used by
+
+ "dma_desc_per_channel"
+ - defines number of hardware buffer descriptors used by
each BDMA channel of Tsi721 (by default - 128).
-IV. Version History
+4. Version History
- 1.1.0 - DMA operations re-worked to support data scatter/gather lists larger
+ ===== ====================================================================
+ 1.1.0 DMA operations re-worked to support data scatter/gather lists larger
than hardware buffer descriptors ring.
- 1.0.0 - Initial driver release.
+ 1.0.0 Initial driver release.
+ ===== ====================================================================
-V. License
------------------------------------------------
+5. License
+===========
Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved.
diff --git a/Documentation/rfkill.txt b/Documentation/driver-api/rfkill.rst
index 7d3684e81df6..7d3684e81df6 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/driver-api/rfkill.rst
diff --git a/Documentation/serial/cyclades_z.rst b/Documentation/driver-api/serial/cyclades_z.rst
index 532ff67e2f1c..532ff67e2f1c 100644
--- a/Documentation/serial/cyclades_z.rst
+++ b/Documentation/driver-api/serial/cyclades_z.rst
diff --git a/Documentation/serial/driver.rst b/Documentation/driver-api/serial/driver.rst
index 4537119bf624..31bd4e16fb1f 100644
--- a/Documentation/serial/driver.rst
+++ b/Documentation/driver-api/serial/driver.rst
@@ -311,7 +311,7 @@ hardware.
This call must not sleep
set_ldisc(port,termios)
- Notifier for discipline change. See Documentation/serial/tty.rst.
+ Notifier for discipline change. See Documentation/driver-api/serial/tty.rst.
Locking: caller holds tty_port->mutex
diff --git a/Documentation/serial/index.rst b/Documentation/driver-api/serial/index.rst
index d0ba22ea23bf..33ad10d05b26 100644
--- a/Documentation/serial/index.rst
+++ b/Documentation/driver-api/serial/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
==========================
Support for Serial devices
diff --git a/Documentation/serial/moxa-smartio.rst b/Documentation/driver-api/serial/moxa-smartio.rst
index 156100f17c3f..156100f17c3f 100644
--- a/Documentation/serial/moxa-smartio.rst
+++ b/Documentation/driver-api/serial/moxa-smartio.rst
diff --git a/Documentation/serial/n_gsm.rst b/Documentation/driver-api/serial/n_gsm.rst
index f3ad9fd26408..f3ad9fd26408 100644
--- a/Documentation/serial/n_gsm.rst
+++ b/Documentation/driver-api/serial/n_gsm.rst
diff --git a/Documentation/serial/rocket.rst b/Documentation/driver-api/serial/rocket.rst
index 23761eae4282..23761eae4282 100644
--- a/Documentation/serial/rocket.rst
+++ b/Documentation/driver-api/serial/rocket.rst
diff --git a/Documentation/serial/serial-iso7816.rst b/Documentation/driver-api/serial/serial-iso7816.rst
index d990143de0c6..d990143de0c6 100644
--- a/Documentation/serial/serial-iso7816.rst
+++ b/Documentation/driver-api/serial/serial-iso7816.rst
diff --git a/Documentation/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst
index 6bc824f948f9..6bc824f948f9 100644
--- a/Documentation/serial/serial-rs485.rst
+++ b/Documentation/driver-api/serial/serial-rs485.rst
diff --git a/Documentation/serial/tty.rst b/Documentation/driver-api/serial/tty.rst
index dd972caacf3e..dd972caacf3e 100644
--- a/Documentation/serial/tty.rst
+++ b/Documentation/driver-api/serial/tty.rst
diff --git a/Documentation/sgi-ioc4.txt b/Documentation/driver-api/sgi-ioc4.rst
index 72709222d3c0..72709222d3c0 100644
--- a/Documentation/sgi-ioc4.txt
+++ b/Documentation/driver-api/sgi-ioc4.rst
diff --git a/Documentation/SM501.txt b/Documentation/driver-api/sm501.rst
index 882507453ba4..882507453ba4 100644
--- a/Documentation/SM501.txt
+++ b/Documentation/driver-api/sm501.rst
diff --git a/Documentation/smsc_ece1099.txt b/Documentation/driver-api/smsc_ece1099.rst
index 079277421eaf..079277421eaf 100644
--- a/Documentation/smsc_ece1099.txt
+++ b/Documentation/driver-api/smsc_ece1099.rst
diff --git a/Documentation/switchtec.txt b/Documentation/driver-api/switchtec.rst
index 30d6a64e53f7..7611fdc53e19 100644
--- a/Documentation/switchtec.txt
+++ b/Documentation/driver-api/switchtec.rst
@@ -97,6 +97,6 @@ the following configuration settings:
NT EP BAR 2 will be dynamically configured as a Direct Window, and
the configuration file does not need to configure it explicitly.
-Please refer to Documentation/ntb.txt in Linux source tree for an overall
+Please refer to Documentation/driver-api/ntb.rst in Linux source tree for an overall
understanding of the Linux NTB stack. ntb_hw_switchtec works as an NTB
Hardware Driver in this stack.
diff --git a/Documentation/sync_file.txt b/Documentation/driver-api/sync_file.rst
index 496fb2c3b3e6..496fb2c3b3e6 100644
--- a/Documentation/sync_file.txt
+++ b/Documentation/driver-api/sync_file.rst
diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/driver-api/vfio-mediated-device.rst
index c3f69bcaf96e..25eb7d5b834b 100644
--- a/Documentation/vfio-mediated-device.txt
+++ b/Documentation/driver-api/vfio-mediated-device.rst
@@ -408,7 +408,7 @@ card.
References
==========
-1. See Documentation/vfio.txt for more information on VFIO.
+1. See Documentation/driver-api/vfio.rst for more information on VFIO.
2. struct mdev_driver in include/linux/mdev.h
3. struct mdev_parent_ops in include/linux/mdev.h
4. struct vfio_iommu_driver_ops in include/linux/vfio.h
diff --git a/Documentation/vfio.txt b/Documentation/driver-api/vfio.rst
index f1a4d3c3ba0b..f1a4d3c3ba0b 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/driver-api/vfio.rst
diff --git a/Documentation/xilinx/eemi.rst b/Documentation/driver-api/xilinx/eemi.rst
index 9dcbc6f18d75..9dcbc6f18d75 100644
--- a/Documentation/xilinx/eemi.rst
+++ b/Documentation/driver-api/xilinx/eemi.rst
diff --git a/Documentation/xilinx/index.rst b/Documentation/driver-api/xilinx/index.rst
index 01cc1a0714df..13f7589ed442 100644
--- a/Documentation/xilinx/index.rst
+++ b/Documentation/driver-api/xilinx/index.rst
@@ -1,4 +1,3 @@
-:orphan:
===========
Xilinx FPGA
diff --git a/Documentation/xillybus.txt b/Documentation/driver-api/xillybus.rst
index 2446ee303c09..2446ee303c09 100644
--- a/Documentation/xillybus.txt
+++ b/Documentation/driver-api/xillybus.rst
diff --git a/Documentation/zorro.txt b/Documentation/driver-api/zorro.rst
index 664072b017e3..664072b017e3 100644
--- a/Documentation/zorro.txt
+++ b/Documentation/driver-api/zorro.rst
diff --git a/Documentation/fault-injection/index.rst b/Documentation/fault-injection/index.rst
index 92b5639ed07a..8408a8a91b34 100644
--- a/Documentation/fault-injection/index.rst
+++ b/Documentation/fault-injection/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
===============
fault-injection
diff --git a/Documentation/fb/fbcon.rst b/Documentation/fb/fbcon.rst
index 1da65b9000de..ebca41785abe 100644
--- a/Documentation/fb/fbcon.rst
+++ b/Documentation/fb/fbcon.rst
@@ -187,7 +187,7 @@ the hardware. Thus, in a VGA console::
Assuming the VGA driver can be unloaded, one must first unbind the VGA driver
from the console layer before unloading the driver. The VGA driver cannot be
unloaded if it is still bound to the console layer. (See
-Documentation/console/console.txt for more information).
+Documentation/driver-api/console.rst for more information).
This is more complicated in the case of the framebuffer console (fbcon),
because fbcon is an intermediate layer between the console and the drivers::
@@ -204,7 +204,7 @@ fbcon. Thus, there is no need to explicitly unbind the fbdev drivers from
fbcon.
So, how do we unbind fbcon from the console? Part of the answer is in
-Documentation/console/console.txt. To summarize:
+Documentation/driver-api/console.rst. To summarize:
Echo a value to the bind file that represents the framebuffer console
driver. So assuming vtcon1 represents fbcon, then::
diff --git a/Documentation/fb/index.rst b/Documentation/fb/index.rst
index d47313714635..baf02393d8ee 100644
--- a/Documentation/fb/index.rst
+++ b/Documentation/fb/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
============
Frame Buffer
diff --git a/Documentation/fb/vesafb.rst b/Documentation/fb/vesafb.rst
index 2ed0dfb661cf..6821c87b7893 100644
--- a/Documentation/fb/vesafb.rst
+++ b/Documentation/fb/vesafb.rst
@@ -30,7 +30,7 @@ How to use it?
==============
Switching modes is done using the vga=... boot parameter. Read
-Documentation/svga.txt for details.
+Documentation/admin-guide/svga.rst for details.
You should compile in both vgacon (for text mode) and vesafb (for
graphics mode). Which of them takes over the console depends on
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt
index d2963123eb1c..ae4332464560 100644
--- a/Documentation/filesystems/nfs/nfsroot.txt
+++ b/Documentation/filesystems/nfs/nfsroot.txt
@@ -239,7 +239,7 @@ rdinit=<executable file>
A description of the process of mounting the root file system can be
found in:
- Documentation/early-userspace/README
+ Documentation/driver-api/early-userspace/early_userspace_support.rst
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index d750b6926899..fb4735fd73b0 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1500,7 +1500,7 @@ review the kernel documentation in the directory /usr/src/linux/Documentation.
This chapter is heavily based on the documentation included in the pre 2.2
kernels, and became part of it in version 2.2.1 of the Linux kernel.
-Please see: Documentation/sysctl/ directory for descriptions of these
+Please see: Documentation/admin-guide/sysctl/ directory for descriptions of these
entries.
------------------------------------------------------------------------------
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
index 79637d227e85..97d42ccaa92d 100644
--- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
@@ -105,7 +105,7 @@ All this differs from the old initrd in several ways:
- The old initrd file was a gzipped filesystem image (in some file format,
such as ext2, that needed a driver built into the kernel), while the new
initramfs archive is a gzipped cpio archive (like tar only simpler,
- see cpio(1) and Documentation/early-userspace/buffer-format.txt). The
+ see cpio(1) and Documentation/driver-api/early-userspace/buffer-format.rst). The
kernel's cpio extraction code is not only extremely small, it's also
__init text and data that can be discarded during the boot process.
@@ -159,7 +159,7 @@ One advantage of the configuration file is that root access is not required to
set permissions or create device nodes in the new archive. (Note that those
two example "file" entries expect to find files named "init.sh" and "busybox" in
a directory called "initramfs", under the linux-2.6.* directory. See
-Documentation/early-userspace/README for more details.)
+Documentation/driver-api/early-userspace/early_userspace_support.rst for more details.)
The kernel does not depend on external cpio tools. If you specify a
directory instead of a configuration file, the kernel's build infrastructure
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index 5b5311f9358d..ddf15b1b0d5a 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -319,7 +319,7 @@ quick way to lookup the sysfs interface for a device from the result of
a stat(2) operation.
More information can driver-model specific features can be found in
-Documentation/driver-model/.
+Documentation/driver-api/driver-model/.
TODO: Finish this section.
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index cad797a8a39e..5ecbc03e6b2f 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -98,7 +98,7 @@ A memory policy with a valid NodeList will be saved, as specified, for
use at file creation time. When a task allocates a file in the file
system, the mount option memory policy will be applied with a NodeList,
if any, modified by the calling task's cpuset constraints
-[See Documentation/cgroup-v1/cpusets.rst] and any optional flags, listed
+[See Documentation/admin-guide/cgroup-v1/cpusets.rst] and any optional flags, listed
below. If the resulting NodeLists is the empty set, the effective memory
policy for the file will revert to "default" policy.
diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 1252617b520f..0a72b6321f5f 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -316,7 +316,7 @@ specifies the path to the controller. In order to use these GPIOs in Linux
we need to translate them to the corresponding Linux GPIO descriptors.
There is a standard GPIO API for that and is documented in
-Documentation/gpio/.
+Documentation/admin-guide/gpio/.
In the above example we can get the corresponding two GPIO descriptors with
a code like this::
diff --git a/Documentation/fpga/index.rst b/Documentation/fpga/index.rst
index 2c87d1ea084f..f80f95667ca2 100644
--- a/Documentation/fpga/index.rst
+++ b/Documentation/fpga/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
====
fpga
diff --git a/Documentation/hid/index.rst b/Documentation/hid/index.rst
index af4324902622..737d66dc16a1 100644
--- a/Documentation/hid/index.rst
+++ b/Documentation/hid/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
=============================
Human Interface Devices (HID)
diff --git a/Documentation/hwmon/submitting-patches.rst b/Documentation/hwmon/submitting-patches.rst
index d5b05d3e54ba..452fc28d8e0b 100644
--- a/Documentation/hwmon/submitting-patches.rst
+++ b/Documentation/hwmon/submitting-patches.rst
@@ -89,7 +89,7 @@ increase the chances of your change being accepted.
console. Excessive logging can seriously affect system performance.
* Use devres functions whenever possible to allocate resources. For rationale
- and supported functions, please see Documentation/driver-model/devres.rst.
+ and supported functions, please see Documentation/driver-api/driver-model/devres.rst.
If a function is not supported by devres, consider using devm_add_action().
* If the driver has a detect function, make sure it is silent. Debug messages
diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.rst
index 5a4dea6abebd..a08b36aba015 100644
--- a/Documentation/ia64/aliasing.txt
+++ b/Documentation/ia64/aliasing.rst
@@ -1,20 +1,25 @@
- MEMORY ATTRIBUTE ALIASING ON IA-64
+==================================
+Memory Attribute Aliasing on IA-64
+==================================
- Bjorn Helgaas
- <bjorn.helgaas@hp.com>
- May 4, 2006
+Bjorn Helgaas <bjorn.helgaas@hp.com>
+May 4, 2006
-MEMORY ATTRIBUTES
+
+Memory Attributes
+=================
Itanium supports several attributes for virtual memory references.
The attribute is part of the virtual translation, i.e., it is
contained in the TLB entry. The ones of most interest to the Linux
kernel are:
- WB Write-back (cacheable)
+ == ======================
+ WB Write-back (cacheable)
UC Uncacheable
WC Write-coalescing
+ == ======================
System memory typically uses the WB attribute. The UC attribute is
used for memory-mapped I/O devices. The WC attribute is uncacheable
@@ -29,7 +34,8 @@ MEMORY ATTRIBUTES
support either WB or UC access to main memory, while others support
only WB access.
-MEMORY MAP
+Memory Map
+==========
Platform firmware describes the physical memory map and the
supported attributes for each region. At boot-time, the kernel uses
@@ -55,7 +61,8 @@ MEMORY MAP
The efi_memmap table is preserved unmodified because the original
boot-time information is required for kexec.
-KERNEL IDENTITY MAPPINGS
+Kernel Identify Mappings
+========================
Linux/ia64 identity mappings are done with large pages, currently
either 16MB or 64MB, referred to as "granules." Cacheable mappings
@@ -74,17 +81,20 @@ KERNEL IDENTITY MAPPINGS
are only partially populated, or populated with a combination of UC
and WB regions.
-USER MAPPINGS
+User Mappings
+=============
User mappings are typically done with 16K or 64K pages. The smaller
page size allows more flexibility because only 16K or 64K has to be
homogeneous with respect to memory attributes.
-POTENTIAL ATTRIBUTE ALIASING CASES
+Potential Attribute Aliasing Cases
+==================================
There are several ways the kernel creates new mappings:
- mmap of /dev/mem
+mmap of /dev/mem
+----------------
This uses remap_pfn_range(), which creates user mappings. These
mappings may be either WB or UC. If the region being mapped
@@ -98,7 +108,8 @@ POTENTIAL ATTRIBUTE ALIASING CASES
Since the EFI memory map does not describe MMIO on some
machines, this should use an uncacheable mapping as a fallback.
- mmap of /sys/class/pci_bus/.../legacy_mem
+mmap of /sys/class/pci_bus/.../legacy_mem
+-----------------------------------------
This is very similar to mmap of /dev/mem, except that legacy_mem
only allows mmap of the one megabyte "legacy MMIO" area for a
@@ -112,9 +123,10 @@ POTENTIAL ATTRIBUTE ALIASING CASES
The /dev/mem mmap constraints apply.
- mmap of /proc/bus/pci/.../??.?
+mmap of /proc/bus/pci/.../??.?
+------------------------------
- This is an MMIO mmap of PCI functions, which additionally may or
+ This is an MMIO mmap of PCI functions, which additionally may or
may not be requested as using the WC attribute.
If WC is requested, and the region in kern_memmap is either WC
@@ -124,7 +136,8 @@ POTENTIAL ATTRIBUTE ALIASING CASES
Otherwise, the user mapping must use the same attribute as the
kernel mapping.
- read/write of /dev/mem
+read/write of /dev/mem
+----------------------
This uses copy_from_user(), which implicitly uses a kernel
identity mapping. This is obviously safe for things in
@@ -138,7 +151,8 @@ POTENTIAL ATTRIBUTE ALIASING CASES
eight-byte accesses, and the copy_from_user() path doesn't allow
any control over the access size, so this would be dangerous.
- ioremap()
+ioremap()
+---------
This returns a mapping for use inside the kernel.
@@ -155,9 +169,11 @@ POTENTIAL ATTRIBUTE ALIASING CASES
Failing all of the above, we have to fall back to a UC mapping.
-PAST PROBLEM CASES
+Past Problem Cases
+==================
- mmap of various MMIO regions from /dev/mem by "X" on Intel platforms
+mmap of various MMIO regions from /dev/mem by "X" on Intel platforms
+--------------------------------------------------------------------
The EFI memory map may not report these MMIO regions.
@@ -166,12 +182,16 @@ PAST PROBLEM CASES
succeed. It may create either WB or UC user mappings, depending
on whether the region is in kern_memmap or the EFI memory map.
- mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
+mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
+----------------------------------------------------------------------
The EFI memory map reports the following attributes:
+
+ =============== ======= ==================
0x00000-0x9FFFF WB only
0xA0000-0xBFFFF UC only (VGA frame buffer)
0xC0000-0xFFFFF WB only
+ =============== ======= ==================
This mmap is done with user pages, not kernel identity mappings,
so it is safe to use WB mappings.
@@ -182,7 +202,8 @@ PAST PROBLEM CASES
never generate an uncacheable reference to the WB-only areas unless
the driver explicitly touches them.
- mmap of 0x0-0xFFFFF legacy_mem by "X"
+mmap of 0x0-0xFFFFF legacy_mem by "X"
+-------------------------------------
If the EFI memory map reports that the entire range supports the
same attributes, we can allow the mmap (and we will prefer WB if
@@ -197,15 +218,18 @@ PAST PROBLEM CASES
that doesn't report the VGA frame buffer at all), we should fail the
mmap and force the user to map just the specific region of interest.
- mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled
+mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled
+------------------------------------------------------------------------
+
+ The EFI memory map reports the following attributes::
- The EFI memory map reports the following attributes:
0x00000-0xFFFFF WB only (no VGA MMIO hole)
This is a special case of the previous case, and the mmap should
fail for the same reason as above.
- read of /sys/devices/.../rom
+read of /sys/devices/.../rom
+----------------------------
For VGA devices, this may cause an ioremap() of 0xC0000. This
used to be done with a UC mapping, because the VGA frame buffer
@@ -215,7 +239,8 @@ PAST PROBLEM CASES
We should use WB page table mappings to avoid covering the VGA
frame buffer.
-NOTES
+Notes
+=====
[1] SDM rev 2.2, vol 2, sec 4.4.1.
[2] SDM rev 2.2, vol 2, sec 4.4.6.
diff --git a/Documentation/ia64/efirtc.txt b/Documentation/ia64/efirtc.rst
index 057e6bebda8f..2f7ff5026308 100644
--- a/Documentation/ia64/efirtc.txt
+++ b/Documentation/ia64/efirtc.rst
@@ -1,12 +1,16 @@
+==========================
EFI Real Time Clock driver
--------------------------------
+==========================
+
S. Eranian <eranian@hpl.hp.com>
+
March 2000
-I/ Introduction
+1. Introduction
+===============
This document describes the efirtc.c driver has provided for
-the IA-64 platform.
+the IA-64 platform.
The purpose of this driver is to supply an API for kernel and user applications
to get access to the Time Service offered by EFI version 0.92.
@@ -16,112 +20,124 @@ SetTime(), GetWakeupTime(), SetWakeupTime() which are all supported by this
driver. We describe those calls as well the design of the driver in the
following sections.
-II/ Design Decisions
+2. Design Decisions
+===================
-The original ideas was to provide a very simple driver to get access to,
-at first, the time of day service. This is required in order to access, in a
-portable way, the CMOS clock. A program like /sbin/hwclock uses such a clock
+The original ideas was to provide a very simple driver to get access to,
+at first, the time of day service. This is required in order to access, in a
+portable way, the CMOS clock. A program like /sbin/hwclock uses such a clock
to initialize the system view of the time during boot.
Because we wanted to minimize the impact on existing user-level apps using
the CMOS clock, we decided to expose an API that was very similar to the one
-used today with the legacy RTC driver (driver/char/rtc.c). However, because
+used today with the legacy RTC driver (driver/char/rtc.c). However, because
EFI provides a simpler services, not all ioctl() are available. Also
-new ioctl()s have been introduced for things that EFI provides but not the
+new ioctl()s have been introduced for things that EFI provides but not the
legacy.
EFI uses a slightly different way of representing the time, noticeably
the reference date is different. Year is the using the full 4-digit format.
The Epoch is January 1st 1998. For backward compatibility reasons we don't
-expose this new way of representing time. Instead we use something very
+expose this new way of representing time. Instead we use something very
similar to the struct tm, i.e. struct rtc_time, as used by hwclock.
One of the reasons for doing it this way is to allow for EFI to still evolve
without necessarily impacting any of the user applications. The decoupling
enables flexibility and permits writing wrapper code is ncase things change.
The driver exposes two interfaces, one via the device file and a set of
-ioctl()s. The other is read-only via the /proc filesystem.
+ioctl()s. The other is read-only via the /proc filesystem.
As of today we don't offer a /proc/sys interface.
To allow for a uniform interface between the legacy RTC and EFI time service,
-we have created the include/linux/rtc.h header file to contain only the
-"public" API of the two drivers. The specifics of the legacy RTC are still
+we have created the include/linux/rtc.h header file to contain only the
+"public" API of the two drivers. The specifics of the legacy RTC are still
in include/linux/mc146818rtc.h.
-
-III/ Time of day service
+
+3. Time of day service
+======================
The part of the driver gives access to the time of day service of EFI.
Two ioctl()s, compatible with the legacy RTC calls:
- Read the CMOS clock: ioctl(d, RTC_RD_TIME, &rtc);
+ Read the CMOS clock::
+
+ ioctl(d, RTC_RD_TIME, &rtc);
+
+ Write the CMOS clock::
- Write the CMOS clock: ioctl(d, RTC_SET_TIME, &rtc);
+ ioctl(d, RTC_SET_TIME, &rtc);
The rtc is a pointer to a data structure defined in rtc.h which is close
-to a struct tm:
-
-struct rtc_time {
- int tm_sec;
- int tm_min;
- int tm_hour;
- int tm_mday;
- int tm_mon;
- int tm_year;
- int tm_wday;
- int tm_yday;
- int tm_isdst;
-};
+to a struct tm::
+
+ struct rtc_time {
+ int tm_sec;
+ int tm_min;
+ int tm_hour;
+ int tm_mday;
+ int tm_mon;
+ int tm_year;
+ int tm_wday;
+ int tm_yday;
+ int tm_isdst;
+ };
The driver takes care of converting back an forth between the EFI time and
this format.
Those two ioctl()s can be exercised with the hwclock command:
-For reading:
-# /sbin/hwclock --show
-Mon Mar 6 15:32:32 2000 -0.910248 seconds
+For reading::
-For setting:
-# /sbin/hwclock --systohc
+ # /sbin/hwclock --show
+ Mon Mar 6 15:32:32 2000 -0.910248 seconds
+
+For setting::
+
+ # /sbin/hwclock --systohc
Root privileges are required to be able to set the time of day.
-IV/ Wakeup Alarm service
+4. Wakeup Alarm service
+=======================
EFI provides an API by which one can program when a machine should wakeup,
i.e. reboot. This is very different from the alarm provided by the legacy
RTC which is some kind of interval timer alarm. For this reason we don't use
the same ioctl()s to get access to the service. Instead we have
-introduced 2 news ioctl()s to the interface of an RTC.
+introduced 2 news ioctl()s to the interface of an RTC.
We have added 2 new ioctl()s that are specific to the EFI driver:
- Read the current state of the alarm
- ioctl(d, RTC_WKLAM_RD, &wkt)
+ Read the current state of the alarm::
+
+ ioctl(d, RTC_WKLAM_RD, &wkt)
+
+ Set the alarm or change its status::
+
+ ioctl(d, RTC_WKALM_SET, &wkt)
- Set the alarm or change its status
- ioctl(d, RTC_WKALM_SET, &wkt)
+The wkt structure encapsulates a struct rtc_time + 2 extra fields to get
+status information::
-The wkt structure encapsulates a struct rtc_time + 2 extra fields to get
-status information:
-
-struct rtc_wkalrm {
+ struct rtc_wkalrm {
- unsigned char enabled; /* =1 if alarm is enabled */
- unsigned char pending; /* =1 if alarm is pending */
+ unsigned char enabled; /* =1 if alarm is enabled */
+ unsigned char pending; /* =1 if alarm is pending */
- struct rtc_time time;
-}
+ struct rtc_time time;
+ }
As of today, none of the existing user-level apps supports this feature.
-However writing such a program should be hard by simply using those two
-ioctl().
+However writing such a program should be hard by simply using those two
+ioctl().
Root privileges are required to be able to set the alarm.
-V/ References.
+5. References
+=============
Checkout the following Web site for more information on EFI:
diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.rst
index 9f651c181429..900f71e93a29 100644
--- a/Documentation/ia64/err_inject.txt
+++ b/Documentation/ia64/err_inject.rst
@@ -1,4 +1,4 @@
-
+========================================
IPF Machine Check (MC) error inject tool
========================================
@@ -32,94 +32,94 @@ Errata: Itanium 2 Processors Specification Update lists some errata against
the pal_mc_error_inject PAL procedure. The following err.conf has been tested
on latest Montecito PAL.
-err.conf:
+err.conf::
-#This is configuration file for err_inject_tool.
-#The format of the each line is:
-#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
-#where
-# cpu: logical cpu number the error will be inject in.
-# loop: times the error will be injected.
-# interval: In second. every so often one error is injected.
-# err_type_info, err_struct_info: PAL parameters.
-#
-#Note: All values are hex w/o or w/ 0x prefix.
+ #This is configuration file for err_inject_tool.
+ #The format of the each line is:
+ #cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
+ #where
+ # cpu: logical cpu number the error will be inject in.
+ # loop: times the error will be injected.
+ # interval: In second. every so often one error is injected.
+ # err_type_info, err_struct_info: PAL parameters.
+ #
+ #Note: All values are hex w/o or w/ 0x prefix.
-#On cpu2, inject only total 0x10 errors, interval 5 seconds
-#corrected, data cache, hier-2, physical addr(assigned by tool code).
-#working on Montecito latest PAL.
-2, 10, 5, 4101, 95
+ #On cpu2, inject only total 0x10 errors, interval 5 seconds
+ #corrected, data cache, hier-2, physical addr(assigned by tool code).
+ #working on Montecito latest PAL.
+ 2, 10, 5, 4101, 95
-#On cpu4, inject and consume total 0x10 errors, interval 5 seconds
-#corrected, data cache, hier-2, physical addr(assigned by tool code).
-#working on Montecito latest PAL.
-4, 10, 5, 4109, 95
+ #On cpu4, inject and consume total 0x10 errors, interval 5 seconds
+ #corrected, data cache, hier-2, physical addr(assigned by tool code).
+ #working on Montecito latest PAL.
+ 4, 10, 5, 4109, 95
-#On cpu15, inject and consume total 0x10 errors, interval 5 seconds
-#recoverable, DTR0, hier-2.
-#working on Montecito latest PAL.
-0xf, 0x10, 5, 4249, 15
+ #On cpu15, inject and consume total 0x10 errors, interval 5 seconds
+ #recoverable, DTR0, hier-2.
+ #working on Montecito latest PAL.
+ 0xf, 0x10, 5, 4249, 15
The sample application source code:
-err_injection_tool.c:
-
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Copyright (C) 2006 Intel Co
- * Fenghua Yu <fenghua.yu@intel.com>
- *
- */
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <sched.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <errno.h>
-#include <time.h>
-#include <sys/ipc.h>
-#include <sys/sem.h>
-#include <sys/wait.h>
-#include <sys/mman.h>
-#include <sys/shm.h>
-
-#define MAX_FN_SIZE 256
-#define MAX_BUF_SIZE 256
-#define DATA_BUF_SIZE 256
-#define NR_CPUS 512
-#define MAX_TASK_NUM 2048
-#define MIN_INTERVAL 5 // seconds
-#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte.
-#define PARA_FIELD_NUM 5
-#define MASK_SIZE (NR_CPUS/64)
-#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
-
-int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
-
-int verbose;
-#define vbprintf if (verbose) printf
-
-int log_info(int cpu, const char *fmt, ...)
-{
+err_injection_tool.c::
+
+ /*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Copyright (C) 2006 Intel Co
+ * Fenghua Yu <fenghua.yu@intel.com>
+ *
+ */
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ #include <stdio.h>
+ #include <sched.h>
+ #include <unistd.h>
+ #include <stdlib.h>
+ #include <stdarg.h>
+ #include <string.h>
+ #include <errno.h>
+ #include <time.h>
+ #include <sys/ipc.h>
+ #include <sys/sem.h>
+ #include <sys/wait.h>
+ #include <sys/mman.h>
+ #include <sys/shm.h>
+
+ #define MAX_FN_SIZE 256
+ #define MAX_BUF_SIZE 256
+ #define DATA_BUF_SIZE 256
+ #define NR_CPUS 512
+ #define MAX_TASK_NUM 2048
+ #define MIN_INTERVAL 5 // seconds
+ #define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte.
+ #define PARA_FIELD_NUM 5
+ #define MASK_SIZE (NR_CPUS/64)
+ #define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
+
+ int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
+
+ int verbose;
+ #define vbprintf if (verbose) printf
+
+ int log_info(int cpu, const char *fmt, ...)
+ {
FILE *log;
char fn[MAX_FN_SIZE];
char buf[MAX_BUF_SIZE];
@@ -142,12 +142,12 @@ int log_info(int cpu, const char *fmt, ...)
fclose(log);
return 0;
-}
+ }
-typedef unsigned long u64;
-typedef unsigned int u32;
+ typedef unsigned long u64;
+ typedef unsigned int u32;
-typedef union err_type_info_u {
+ typedef union err_type_info_u {
struct {
u64 mode : 3, /* 0-2 */
err_inj : 3, /* 3-5 */
@@ -157,9 +157,9 @@ typedef union err_type_info_u {
reserved : 48; /* 16-63 */
} err_type_info_u;
u64 err_type_info;
-} err_type_info_t;
+ } err_type_info_t;
-typedef union err_struct_info_u {
+ typedef union err_struct_info_u {
struct {
u64 siv : 1, /* 0 */
c_t : 2, /* 1-2 */
@@ -197,9 +197,9 @@ typedef union err_struct_info_u {
u64 reserved;
} err_struct_info_bus_processor_interconnect;
u64 err_struct_info;
-} err_struct_info_t;
+ } err_struct_info_t;
-typedef union err_data_buffer_u {
+ typedef union err_data_buffer_u {
struct {
u64 trigger_addr; /* 0-63 */
u64 inj_addr; /* 64-127 */
@@ -221,9 +221,9 @@ typedef union err_data_buffer_u {
u64 reserved; /* 0-63 */
} err_data_buffer_bus_processor_interconnect;
u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
-} err_data_buffer_t;
+ } err_data_buffer_t;
-typedef union capabilities_u {
+ typedef union capabilities_u {
struct {
u64 i : 1,
d : 1,
@@ -276,9 +276,9 @@ typedef union capabilities_u {
struct {
u64 reserved;
} capabilities_bus_processor_interconnect;
-} capabilities_t;
+ } capabilities_t;
-typedef struct resources_s {
+ typedef struct resources_s {
u64 ibr0 : 1,
ibr2 : 1,
ibr4 : 1,
@@ -288,24 +288,24 @@ typedef struct resources_s {
dbr4 : 1,
dbr6 : 1,
reserved : 48;
-} resources_t;
+ } resources_t;
-long get_page_size(void)
-{
+ long get_page_size(void)
+ {
long page_size=sysconf(_SC_PAGESIZE);
return page_size;
-}
+ }
-#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
-#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
-#define SHM_VA 0x2000000100000000
+ #define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
+ #define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
+ #define SHM_VA 0x2000000100000000
-int shmid;
-void *shmaddr;
+ int shmid;
+ void *shmaddr;
-int create_shm(void)
-{
+ int create_shm(void)
+ {
key_t key;
char fn[MAX_FN_SIZE];
@@ -343,34 +343,34 @@ int create_shm(void)
mlock(shmaddr, SHM_SIZE);
return 0;
-}
+ }
-int free_shm()
-{
+ int free_shm()
+ {
munlock(shmaddr, SHM_SIZE);
- shmdt(shmaddr);
+ shmdt(shmaddr);
semctl(shmid, 0, IPC_RMID);
return 0;
-}
+ }
-#ifdef _SEM_SEMUN_UNDEFINED
-union semun
-{
+ #ifdef _SEM_SEMUN_UNDEFINED
+ union semun
+ {
int val;
struct semid_ds *buf;
unsigned short int *array;
struct seminfo *__buf;
-};
-#endif
+ };
+ #endif
-u32 mode=1; /* 1: physical mode; 2: virtual mode. */
-int one_lock=1;
-key_t key[NR_CPUS];
-int semid[NR_CPUS];
+ u32 mode=1; /* 1: physical mode; 2: virtual mode. */
+ int one_lock=1;
+ key_t key[NR_CPUS];
+ int semid[NR_CPUS];
-int create_sem(int cpu)
-{
+ int create_sem(int cpu)
+ {
union semun arg;
char fn[MAX_FN_SIZE];
int sid;
@@ -407,37 +407,37 @@ int create_sem(int cpu)
}
return 0;
-}
+ }
-static int lock(int cpu)
-{
+ static int lock(int cpu)
+ {
struct sembuf lock;
lock.sem_num = cpu;
lock.sem_op = 1;
semop(semid[cpu], &lock, 1);
- return 0;
-}
+ return 0;
+ }
-static int unlock(int cpu)
-{
+ static int unlock(int cpu)
+ {
struct sembuf unlock;
unlock.sem_num = cpu;
unlock.sem_op = -1;
semop(semid[cpu], &unlock, 1);
- return 0;
-}
+ return 0;
+ }
-void free_sem(int cpu)
-{
+ void free_sem(int cpu)
+ {
semctl(semid[cpu], 0, IPC_RMID);
-}
+ }
-int wr_multi(char *fn, unsigned long *data, int size)
-{
+ int wr_multi(char *fn, unsigned long *data, int size)
+ {
int fd;
char buf[MAX_BUF_SIZE];
int ret;
@@ -459,15 +459,15 @@ int wr_multi(char *fn, unsigned long *data, int size)
ret=write(fd, buf, sizeof(buf));
close(fd);
return ret;
-}
+ }
-int wr(char *fn, unsigned long data)
-{
+ int wr(char *fn, unsigned long data)
+ {
return wr_multi(fn, &data, 1);
-}
+ }
-int rd(char *fn, unsigned long *data)
-{
+ int rd(char *fn, unsigned long *data)
+ {
int fd;
char buf[MAX_BUF_SIZE];
@@ -480,10 +480,10 @@ int rd(char *fn, unsigned long *data)
*data=strtoul(buf, NULL, 16);
close(fd);
return 0;
-}
+ }
-int rd_status(char *path, int *status)
-{
+ int rd_status(char *path, int *status)
+ {
char fn[MAX_FN_SIZE];
sprintf(fn, "%s/status", path);
if (rd(fn, (u64*)status)<0) {
@@ -492,10 +492,10 @@ int rd_status(char *path, int *status)
}
return 0;
-}
+ }
-int rd_capabilities(char *path, u64 *capabilities)
-{
+ int rd_capabilities(char *path, u64 *capabilities)
+ {
char fn[MAX_FN_SIZE];
sprintf(fn, "%s/capabilities", path);
if (rd(fn, capabilities)<0) {
@@ -504,10 +504,10 @@ int rd_capabilities(char *path, u64 *capabilities)
}
return 0;
-}
+ }
-int rd_all(char *path)
-{
+ int rd_all(char *path)
+ {
unsigned long err_type_info, err_struct_info, err_data_buffer;
int status;
unsigned long capabilities, resources;
@@ -556,11 +556,11 @@ int rd_all(char *path)
printf("resources=%lx\n", resources);
return 0;
-}
+ }
-int query_capabilities(char *path, err_type_info_t err_type_info,
+ int query_capabilities(char *path, err_type_info_t err_type_info,
u64 *capabilities)
-{
+ {
char fn[MAX_FN_SIZE];
err_struct_info_t err_struct_info;
err_data_buffer_t err_data_buffer;
@@ -583,10 +583,10 @@ int query_capabilities(char *path, err_type_info_t err_type_info,
return -1;
return 0;
-}
+ }
-int query_all_capabilities()
-{
+ int query_all_capabilities()
+ {
int status;
err_type_info_t err_type_info;
int err_sev, err_struct, struct_hier;
@@ -629,12 +629,12 @@ int query_all_capabilities()
}
return 0;
-}
+ }
-int err_inject(int cpu, char *path, err_type_info_t err_type_info,
+ int err_inject(int cpu, char *path, err_type_info_t err_type_info,
err_struct_info_t err_struct_info,
err_data_buffer_t err_data_buffer)
-{
+ {
int status;
char fn[MAX_FN_SIZE];
@@ -667,13 +667,13 @@ int err_inject(int cpu, char *path, err_type_info_t err_type_info,
}
return status;
-}
+ }
-static int construct_data_buf(char *path, err_type_info_t err_type_info,
+ static int construct_data_buf(char *path, err_type_info_t err_type_info,
err_struct_info_t err_struct_info,
err_data_buffer_t *err_data_buffer,
void *va1)
-{
+ {
char fn[MAX_FN_SIZE];
u64 virt_addr=0, phys_addr=0;
@@ -710,22 +710,22 @@ static int construct_data_buf(char *path, err_type_info_t err_type_info,
}
return 0;
-}
+ }
-typedef struct {
+ typedef struct {
u64 cpu;
u64 loop;
u64 interval;
u64 err_type_info;
u64 err_struct_info;
u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
-} parameters_t;
+ } parameters_t;
-parameters_t line_para;
-int para;
+ parameters_t line_para;
+ int para;
-static int empty_data_buffer(u64 *err_data_buffer)
-{
+ static int empty_data_buffer(u64 *err_data_buffer)
+ {
int empty=1;
int i;
@@ -734,10 +734,10 @@ static int empty_data_buffer(u64 *err_data_buffer)
empty=0;
return empty;
-}
+ }
-int err_inj()
-{
+ int err_inj()
+ {
err_type_info_t err_type_info;
err_struct_info_t err_struct_info;
err_data_buffer_t err_data_buffer;
@@ -951,10 +951,10 @@ int err_inj()
printf("All done.\n");
return 0;
-}
+ }
-void help()
-{
+ void help()
+ {
printf("err_inject_tool:\n");
printf("\t-q: query all capabilities. default: off\n");
printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
@@ -977,10 +977,10 @@ void help()
printf("The tool will take err.conf file as ");
printf("input to inject single or multiple errors ");
printf("on one or multiple cpus in parallel.\n");
-}
+ }
-int main(int argc, char **argv)
-{
+ int main(int argc, char **argv)
+ {
char c;
int do_err_inj=0;
int do_query_all=0;
@@ -1031,7 +1031,7 @@ int main(int argc, char **argv)
if (count!=PARA_FIELD_NUM+3) {
line_para.err_data_buffer[0]=-1,
line_para.err_data_buffer[1]=-1,
- line_para.err_data_buffer[2]=-1;
+ line_para.err_data_buffer[2]=-1;
count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
&line_para.cpu,
&line_para.loop,
@@ -1064,5 +1064,4 @@ int main(int argc, char **argv)
help();
return 0;
-}
-
+ }
diff --git a/Documentation/ia64/fsys.txt b/Documentation/ia64/fsys.rst
index 59dd689d9b86..a702d2cc94b6 100644
--- a/Documentation/ia64/fsys.txt
+++ b/Documentation/ia64/fsys.rst
@@ -1,9 +1,9 @@
--*-Mode: outline-*-
-
- Light-weight System Calls for IA-64
- -----------------------------------
+===================================
+Light-weight System Calls for IA-64
+===================================
Started: 13-Jan-2003
+
Last update: 27-Sep-2003
David Mosberger-Tang
@@ -52,12 +52,13 @@ privilege level is at level 0, this means that fsys-mode requires some
care (see below).
-* How to tell fsys-mode
+How to tell fsys-mode
+=====================
Linux operates in fsys-mode when (a) the privilege level is 0 (most
privileged) and (b) the stacks have NOT been switched to kernel memory
yet. For convenience, the header file <asm-ia64/ptrace.h> provides
-three macros:
+three macros::
user_mode(regs)
user_stack(task,regs)
@@ -70,11 +71,12 @@ to by "regs" was executing in user mode (privilege level 3).
user_stack() returns TRUE if the state pointed to by "regs" was
executing on the user-level stack(s). Finally, fsys_mode() returns
TRUE if the CPU state pointed to by "regs" was executing in fsys-mode.
-The fsys_mode() macro is equivalent to the expression:
+The fsys_mode() macro is equivalent to the expression::
!user_mode(regs) && user_stack(task,regs)
-* How to write an fsyscall handler
+How to write an fsyscall handler
+================================
The file arch/ia64/kernel/fsys.S contains a table of fsyscall-handlers
(fsyscall_table). This table contains one entry for each system call.
@@ -87,66 +89,72 @@ of the getpid() system call.
The entry and exit-state of an fsyscall handler is as follows:
-** Machine state on entry to fsyscall handler:
-
- - r10 = 0
- - r11 = saved ar.pfs (a user-level value)
- - r15 = system call number
- - r16 = "current" task pointer (in normal kernel-mode, this is in r13)
- - r32-r39 = system call arguments
- - b6 = return address (a user-level value)
- - ar.pfs = previous frame-state (a user-level value)
- - PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
- - all other registers may contain values passed in from user-mode
-
-** Required machine state on exit to fsyscall handler:
-
- - r11 = saved ar.pfs (as passed into the fsyscall handler)
- - r15 = system call number (as passed into the fsyscall handler)
- - r32-r39 = system call arguments (as passed into the fsyscall handler)
- - b6 = return address (as passed into the fsyscall handler)
- - ar.pfs = previous frame-state (as passed into the fsyscall handler)
+Machine state on entry to fsyscall handler
+------------------------------------------
+
+ ========= ===============================================================
+ r10 0
+ r11 saved ar.pfs (a user-level value)
+ r15 system call number
+ r16 "current" task pointer (in normal kernel-mode, this is in r13)
+ r32-r39 system call arguments
+ b6 return address (a user-level value)
+ ar.pfs previous frame-state (a user-level value)
+ PSR.be cleared to zero (i.e., little-endian byte order is in effect)
+ - all other registers may contain values passed in from user-mode
+ ========= ===============================================================
+
+Required machine state on exit to fsyscall handler
+--------------------------------------------------
+
+ ========= ===========================================================
+ r11 saved ar.pfs (as passed into the fsyscall handler)
+ r15 system call number (as passed into the fsyscall handler)
+ r32-r39 system call arguments (as passed into the fsyscall handler)
+ b6 return address (as passed into the fsyscall handler)
+ ar.pfs previous frame-state (as passed into the fsyscall handler)
+ ========= ===========================================================
Fsyscall handlers can execute with very little overhead, but with that
speed comes a set of restrictions:
- o Fsyscall-handlers MUST check for any pending work in the flags
+ * Fsyscall-handlers MUST check for any pending work in the flags
member of the thread-info structure and if any of the
TIF_ALLWORK_MASK flags are set, the handler needs to fall back on
doing a full system call (by calling fsys_fallback_syscall).
- o Fsyscall-handlers MUST preserve incoming arguments (r32-r39, r11,
+ * Fsyscall-handlers MUST preserve incoming arguments (r32-r39, r11,
r15, b6, and ar.pfs) because they will be needed in case of a
system call restart. Of course, all "preserved" registers also
must be preserved, in accordance to the normal calling conventions.
- o Fsyscall-handlers MUST check argument registers for containing a
+ * Fsyscall-handlers MUST check argument registers for containing a
NaT value before using them in any way that could trigger a
NaT-consumption fault. If a system call argument is found to
contain a NaT value, an fsyscall-handler may return immediately
with r8=EINVAL, r10=-1.
- o Fsyscall-handlers MUST NOT use the "alloc" instruction or perform
+ * Fsyscall-handlers MUST NOT use the "alloc" instruction or perform
any other operation that would trigger mandatory RSE
(register-stack engine) traffic.
- o Fsyscall-handlers MUST NOT write to any stacked registers because
+ * Fsyscall-handlers MUST NOT write to any stacked registers because
it is not safe to assume that user-level called a handler with the
proper number of arguments.
- o Fsyscall-handlers need to be careful when accessing per-CPU variables:
+ * Fsyscall-handlers need to be careful when accessing per-CPU variables:
unless proper safe-guards are taken (e.g., interruptions are avoided),
execution may be pre-empted and resumed on another CPU at any given
time.
- o Fsyscall-handlers must be careful not to leak sensitive kernel'
+ * Fsyscall-handlers must be careful not to leak sensitive kernel'
information back to user-level. In particular, before returning to
user-level, care needs to be taken to clear any scratch registers
that could contain sensitive information (note that the current
task pointer is not considered sensitive: it's already exposed
through ar.k6).
- o Fsyscall-handlers MUST NOT access user-memory without first
+ * Fsyscall-handlers MUST NOT access user-memory without first
validating access-permission (this can be done typically via
probe.r.fault and/or probe.w.fault) and without guarding against
memory access exceptions (this can be done with the EX() macros
@@ -162,7 +170,8 @@ fast system call execution (while fully preserving system call
semantics), but there is also a lot of flexibility in handling more
complicated cases.
-* Signal handling
+Signal handling
+===============
The delivery of (asynchronous) signals must be delayed until fsys-mode
is exited. This is accomplished with the help of the lower-privilege
@@ -173,7 +182,8 @@ PSR.lp and returns immediately. When fsys-mode is exited via the
occur. The trap handler clears PSR.lp again and returns immediately.
The kernel exit path then checks for and delivers any pending signals.
-* PSR Handling
+PSR Handling
+============
The "epc" instruction doesn't change the contents of PSR at all. This
is in contrast to a regular interruption, which clears almost all
@@ -181,6 +191,7 @@ bits. Because of that, some care needs to be taken to ensure things
work as expected. The following discussion describes how each PSR bit
is handled.
+======= =======================================================================
PSR.be Cleared when entering fsys-mode. A srlz.d instruction is used
to ensure the CPU is in little-endian mode before the first
load/store instruction is executed. PSR.be is normally NOT
@@ -202,7 +213,8 @@ PSR.pp Unchanged.
PSR.di Unchanged.
PSR.si Unchanged.
PSR.db Unchanged. The kernel prevents user-level from setting a hardware
- breakpoint that triggers at any privilege level other than 3 (user-mode).
+ breakpoint that triggers at any privilege level other than
+ 3 (user-mode).
PSR.lp Unchanged.
PSR.tb Lazy redirect. If a taken-branch trap occurs while in
fsys-mode, the trap-handler modifies the saved machine state
@@ -235,47 +247,52 @@ PSR.ed Unchanged. Note: This bit could only have an effect if an fsys-mode
PSR.bn Unchanged. Note: fsys-mode handlers may clear the bit, if needed.
Doing so requires clearing PSR.i and PSR.ic as well.
PSR.ia Unchanged. Note: the ia64 linux kernel never sets this bit.
+======= =======================================================================
-* Using fast system calls
+Using fast system calls
+=======================
To use fast system calls, userspace applications need simply call
__kernel_syscall_via_epc(). For example
-- example fgettimeofday() call --
+
-- fgettimeofday.S --
-#include <asm/asmmacro.h>
+::
+
+ #include <asm/asmmacro.h>
-GLOBAL_ENTRY(fgettimeofday)
-.prologue
-.save ar.pfs, r11
-mov r11 = ar.pfs
-.body
+ GLOBAL_ENTRY(fgettimeofday)
+ .prologue
+ .save ar.pfs, r11
+ mov r11 = ar.pfs
+ .body
-mov r2 = 0xa000000000020660;; // gate address
- // found by inspection of System.map for the
+ mov r2 = 0xa000000000020660;; // gate address
+ // found by inspection of System.map for the
// __kernel_syscall_via_epc() function. See
// below for how to do this for real.
-mov b7 = r2
-mov r15 = 1087 // gettimeofday syscall
-;;
-br.call.sptk.many b6 = b7
-;;
+ mov b7 = r2
+ mov r15 = 1087 // gettimeofday syscall
+ ;;
+ br.call.sptk.many b6 = b7
+ ;;
-.restore sp
+ .restore sp
-mov ar.pfs = r11
-br.ret.sptk.many rp;; // return to caller
-END(fgettimeofday)
+ mov ar.pfs = r11
+ br.ret.sptk.many rp;; // return to caller
+ END(fgettimeofday)
-- end fgettimeofday.S --
In reality, getting the gate address is accomplished by two extra
values passed via the ELF auxiliary vector (include/asm-ia64/elf.h)
- o AT_SYSINFO : is the address of __kernel_syscall_via_epc()
- o AT_SYSINFO_EHDR : is the address of the kernel gate ELF DSO
+ * AT_SYSINFO : is the address of __kernel_syscall_via_epc()
+ * AT_SYSINFO_EHDR : is the address of the kernel gate ELF DSO
The ELF DSO is a pre-linked library that is mapped in by the kernel at
the gate page. It is a proper ELF shared object so, with a dynamic
diff --git a/Documentation/ia64/README b/Documentation/ia64/ia64.rst
index aa17f2154cba..b725019a9492 100644
--- a/Documentation/ia64/README
+++ b/Documentation/ia64/ia64.rst
@@ -1,43 +1,49 @@
- Linux kernel release 2.4.xx for the IA-64 Platform
+===========================================
+Linux kernel release for the IA-64 Platform
+===========================================
- These are the release notes for Linux version 2.4 for IA-64
+ These are the release notes for Linux since version 2.4 for IA-64
platform. This document provides information specific to IA-64
ONLY, to get additional information about the Linux kernel also
read the original Linux README provided with the kernel.
-INSTALLING the kernel:
+Installing the Kernel
+=====================
- IA-64 kernel installation is the same as the other platforms, see
original README for details.
-SOFTWARE REQUIREMENTS
+Software Requirements
+=====================
Compiling and running this kernel requires an IA-64 compliant GCC
compiler. And various software packages also compiled with an
IA-64 compliant GCC compiler.
-CONFIGURING the kernel:
+Configuring the Kernel
+======================
Configuration is the same, see original README for details.
-COMPILING the kernel:
+Compiling the Kernel:
- Compiling this kernel doesn't differ from other platform so read
the original README for details BUT make sure you have an IA-64
compliant GCC compiler.
-IA-64 SPECIFICS
+IA-64 Specifics
+===============
- General issues:
- o Hardly any performance tuning has been done. Obvious targets
+ * Hardly any performance tuning has been done. Obvious targets
include the library routines (IP checksum, etc.). Less
obvious targets include making sure we don't flush the TLB
needlessly, etc.
- o SMP locks cleanup/optimization
+ * SMP locks cleanup/optimization
- o IA32 support. Currently experimental. It mostly works.
+ * IA32 support. Currently experimental. It mostly works.
diff --git a/Documentation/ia64/index.rst b/Documentation/ia64/index.rst
new file mode 100644
index 000000000000..0436e1034115
--- /dev/null
+++ b/Documentation/ia64/index.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+IA-64 Architecture
+==================
+
+.. toctree::
+ :maxdepth: 1
+
+ ia64
+ aliasing
+ efirtc
+ err_inject
+ fsys
+ irq-redir
+ mca
+ serial
+ xen
diff --git a/Documentation/ia64/IRQ-redir.txt b/Documentation/ia64/irq-redir.rst
index f7bd72261283..39bf94484a15 100644
--- a/Documentation/ia64/IRQ-redir.txt
+++ b/Documentation/ia64/irq-redir.rst
@@ -1,6 +1,8 @@
+==============================
IRQ affinity on IA64 platforms
-------------------------------
- 07.01.2002, Erich Focht <efocht@ess.nec.de>
+==============================
+
+07.01.2002, Erich Focht <efocht@ess.nec.de>
By writing to /proc/irq/IRQ#/smp_affinity the interrupt routing can be
@@ -12,22 +14,27 @@ IRQ target is one particular CPU and cannot be a mask of several
CPUs. Only the first non-zero bit is taken into account.
-Usage examples:
+Usage examples
+==============
The target CPU has to be specified as a hexadecimal CPU mask. The
first non-zero bit is the selected CPU. This format has been kept for
compatibility reasons with i386.
Set the delivery mode of interrupt 41 to fixed and route the
-interrupts to CPU #3 (logical CPU number) (2^3=0x08):
+interrupts to CPU #3 (logical CPU number) (2^3=0x08)::
+
echo "8" >/proc/irq/41/smp_affinity
Set the default route for IRQ number 41 to CPU 6 in lowest priority
-delivery mode (redirectable):
+delivery mode (redirectable)::
+
echo "r 40" >/proc/irq/41/smp_affinity
-The output of the command
+The output of the command::
+
cat /proc/irq/IRQ#/smp_affinity
+
gives the target CPU mask for the specified interrupt vector. If the CPU
mask is preceded by the character "r", the interrupt is redirectable
(i.e. lowest priority mode routing is used), otherwise its route is
@@ -35,7 +42,8 @@ fixed.
-Initialization and default behavior:
+Initialization and default behavior
+===================================
If the platform features IRQ redirection (info provided by SAL) all
IO-SAPIC interrupts are initialized with CPU#0 as their default target
@@ -43,9 +51,11 @@ and the routing is the so called "lowest priority mode" (actually
fixed SAPIC mode with hint). The XTP chipset registers are used as hints
for the IRQ routing. Currently in Linux XTP registers can have three
values:
+
- minimal for an idle task,
- normal if any other task runs,
- maximal if the CPU is going to be switched off.
+
The IRQ is routed to the CPU with lowest XTP register value, the
search begins at the default CPU. Therefore most of the interrupts
will be handled by CPU #0.
@@ -53,12 +63,14 @@ will be handled by CPU #0.
If the platform doesn't feature interrupt redirection IOSAPIC fixed
routing is used. The target CPUs are distributed in a round robin
manner. IRQs will be routed only to the selected target CPUs. Check
-with
+with::
+
cat /proc/interrupts
-Comments:
+Comments
+========
On large (multi-node) systems it is recommended to route the IRQs to
the node to which the corresponding device is connected.
@@ -66,4 +78,3 @@ For systems like the NEC AzusA we get IRQ node-affinity for free. This
is because usually the chipsets on each node redirect the interrupts
only to their own CPUs (as they cannot see the XTP registers on the
other nodes).
-
diff --git a/Documentation/ia64/mca.txt b/Documentation/ia64/mca.rst
index f097c60cba1b..08270bba44a4 100644
--- a/Documentation/ia64/mca.txt
+++ b/Documentation/ia64/mca.rst
@@ -1,5 +1,8 @@
-An ad-hoc collection of notes on IA64 MCA and INIT processing. Feel
-free to update it with notes about any area that is not clear.
+=============================================================
+An ad-hoc collection of notes on IA64 MCA and INIT processing
+=============================================================
+
+Feel free to update it with notes about any area that is not clear.
---
@@ -82,7 +85,8 @@ if we have a choice here.
own stack as running on that cpu. Then a recursive error gets a
trace of the failing handler's "task".
-[1] My (Keith Owens) original design called for ia64 to separate its
+[1]
+ My (Keith Owens) original design called for ia64 to separate its
struct task and the kernel stacks. Then the MCA/INIT data would be
chained stacks like i386 interrupt stacks. But that required
radical surgery on the rest of ia64, plus extra hard wired TLB
diff --git a/Documentation/ia64/serial.txt b/Documentation/ia64/serial.rst
index a63d2c54329b..1de70c305a79 100644
--- a/Documentation/ia64/serial.txt
+++ b/Documentation/ia64/serial.rst
@@ -1,4 +1,9 @@
-SERIAL DEVICE NAMING
+==============
+Serial Devices
+==============
+
+Serial Device Naming
+====================
As of 2.6.10, serial devices on ia64 are named based on the
order of ACPI and PCI enumeration. The first device in the
@@ -30,17 +35,21 @@ SERIAL DEVICE NAMING
(described in the ACPI namespace) plus an MP[2] (a PCI device) has
these ports:
- pre-2.6.10 pre-2.6.10
- MMIO (EFI console (EFI console
- address on builtin) on MP port) 2.6.10
- ========== ========== ========== ======
+ ========== ========== ============ ============ =======
+ Type MMIO pre-2.6.10 pre-2.6.10 2.6.10+
+ address
+ (EFI console (EFI console
+ on builtin) on MP port)
+ ========== ========== ============ ============ =======
builtin 0xff5e0000 ttyS0 ttyS1 ttyS0
MP UPS 0xf8031000 ttyS1 ttyS2 ttyS1
MP Console 0xf8030000 ttyS2 ttyS0 ttyS2
MP 2 0xf8030010 ttyS3 ttyS3 ttyS3
MP 3 0xf8030038 ttyS4 ttyS4 ttyS4
+ ========== ========== ============ ============ =======
-CONSOLE SELECTION
+Console Selection
+=================
EFI knows what your console devices are, but it doesn't tell the
kernel quite enough to actually locate them. The DIG64 HCDP
@@ -67,7 +76,8 @@ CONSOLE SELECTION
entries in /etc/inittab (for getty) and /etc/securetty (to allow
root login).
-EARLY SERIAL CONSOLE
+Early Serial Console
+====================
The kernel can't start using a serial console until it knows where
the device lives. Normally this happens when the driver enumerates
@@ -80,7 +90,8 @@ EARLY SERIAL CONSOLE
or if the EFI console path contains only a UART device and the
firmware supplies an HCDP.
-TROUBLESHOOTING SERIAL CONSOLE PROBLEMS
+Troubleshooting Serial Console Problems
+=======================================
No kernel output after elilo prints "Uncompressing Linux... done":
@@ -133,19 +144,22 @@ TROUBLESHOOTING SERIAL CONSOLE PROBLEMS
-[1] http://www.dig64.org/specifications/agreement
+[1]
+ http://www.dig64.org/specifications/agreement
The table was originally defined as the "HCDP" for "Headless
Console/Debug Port." The current version is the "PCDP" for
"Primary Console and Debug Port Devices."
-[2] The HP MP (management processor) is a PCI device that provides
+[2]
+ The HP MP (management processor) is a PCI device that provides
several UARTs. One of the UARTs is often used as a console; the
EFI Boot Manager identifies it as "Acpi(HWP0002,700)/Pci(...)/Uart".
The external connection is usually a 25-pin connector, and a
special dongle converts that to three 9-pin connectors, one of
which is labelled "Console."
-[3] EFI console devices are configured using the EFI Boot Manager
+[3]
+ EFI console devices are configured using the EFI Boot Manager
"Boot option maintenance" menu. You may have to interrupt the
boot sequence to use this menu, and you will have to reset the
box after changing console configuration.
diff --git a/Documentation/ia64/xen.rst b/Documentation/ia64/xen.rst
new file mode 100644
index 000000000000..831339c74441
--- /dev/null
+++ b/Documentation/ia64/xen.rst
@@ -0,0 +1,206 @@
+********************************************************
+Recipe for getting/building/running Xen/ia64 with pv_ops
+********************************************************
+This recipe describes how to get xen-ia64 source and build it,
+and run domU with pv_ops.
+
+Requirements
+============
+
+ - python
+ - mercurial
+ it (aka "hg") is an open-source source code
+ management software. See the below.
+ http://www.selenic.com/mercurial/wiki/
+ - git
+ - bridge-utils
+
+Getting and Building Xen and Dom0
+=================================
+
+ My environment is:
+
+ - Machine : Tiger4
+ - Domain0 OS : RHEL5
+ - DomainU OS : RHEL5
+
+ 1. Download source::
+
+ # hg clone http://xenbits.xensource.com/ext/ia64/xen-unstable.hg
+ # cd xen-unstable.hg
+ # hg clone http://xenbits.xensource.com/ext/ia64/linux-2.6.18-xen.hg
+
+ 2. # make world
+
+ 3. # make install-tools
+
+ 4. copy kernels and xen::
+
+ # cp xen/xen.gz /boot/efi/efi/redhat/
+ # cp build-linux-2.6.18-xen_ia64/vmlinux.gz \
+ /boot/efi/efi/redhat/vmlinuz-2.6.18.8-xen
+
+ 5. make initrd for Dom0/DomU::
+
+ # make -C linux-2.6.18-xen.hg ARCH=ia64 modules_install \
+ O=$(pwd)/build-linux-2.6.18-xen_ia64
+ # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6.18.8-xen.img \
+ 2.6.18.8-xen --builtin mptspi --builtin mptbase \
+ --builtin mptscsih --builtin uhci-hcd --builtin ohci-hcd \
+ --builtin ehci-hcd
+
+Making a disk image for guest OS
+================================
+
+ 1. make file::
+
+ # dd if=/dev/zero of=/root/rhel5.img bs=1M seek=4096 count=0
+ # mke2fs -F -j /root/rhel5.img
+ # mount -o loop /root/rhel5.img /mnt
+ # cp -ax /{dev,var,etc,usr,bin,sbin,lib} /mnt
+ # mkdir /mnt/{root,proc,sys,home,tmp}
+
+ Note: You may miss some device files. If so, please create them
+ with mknod. Or you can use tar instead of cp.
+
+ 2. modify DomU's fstab::
+
+ # vi /mnt/etc/fstab
+ /dev/xvda1 / ext3 defaults 1 1
+ none /dev/pts devpts gid=5,mode=620 0 0
+ none /dev/shm tmpfs defaults 0 0
+ none /proc proc defaults 0 0
+ none /sys sysfs defaults 0 0
+
+ 3. modify inittab
+
+ set runlevel to 3 to avoid X trying to start::
+
+ # vi /mnt/etc/inittab
+ id:3:initdefault:
+
+ Start a getty on the hvc0 console::
+
+ X0:2345:respawn:/sbin/mingetty hvc0
+
+ tty1-6 mingetty can be commented out
+
+ 4. add hvc0 into /etc/securetty::
+
+ # vi /mnt/etc/securetty (add hvc0)
+
+ 5. umount::
+
+ # umount /mnt
+
+FYI, virt-manager can also make a disk image for guest OS.
+It's GUI tools and easy to make it.
+
+Boot Xen & Domain0
+==================
+
+ 1. replace elilo
+ elilo of RHEL5 can boot Xen and Dom0.
+ If you use old elilo (e.g RHEL4), please download from the below
+ http://elilo.sourceforge.net/cgi-bin/blosxom
+ and copy into /boot/efi/efi/redhat/::
+
+ # cp elilo-3.6-ia64.efi /boot/efi/efi/redhat/elilo.efi
+
+ 2. modify elilo.conf (like the below)::
+
+ # vi /boot/efi/efi/redhat/elilo.conf
+ prompt
+ timeout=20
+ default=xen
+ relocatable
+
+ image=vmlinuz-2.6.18.8-xen
+ label=xen
+ vmm=xen.gz
+ initrd=initrd-2.6.18.8-xen.img
+ read-only
+ append=" -- rhgb root=/dev/sda2"
+
+The append options before "--" are for xen hypervisor,
+the options after "--" are for dom0.
+
+FYI, your machine may need console options like
+"com1=19200,8n1 console=vga,com1". For example,
+append="com1=19200,8n1 console=vga,com1 -- rhgb console=tty0 \
+console=ttyS0 root=/dev/sda2"
+
+Getting and Building domU with pv_ops
+=====================================
+
+ 1. get pv_ops tree::
+
+ # git clone http://people.valinux.co.jp/~yamahata/xen-ia64/linux-2.6-xen-ia64.git/
+
+ 2. git branch (if necessary)::
+
+ # cd linux-2.6-xen-ia64/
+ # git checkout -b your_branch origin/xen-ia64-domu-minimal-2008may19
+
+ Note:
+ The current branch is xen-ia64-domu-minimal-2008may19.
+ But you would find the new branch. You can see with
+ "git branch -r" to get the branch lists.
+
+ http://people.valinux.co.jp/~yamahata/xen-ia64/for_eagl/linux-2.6-ia64-pv-ops.git/
+
+ is also available.
+
+ The tree is based on
+
+ git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6 test)
+
+ 3. copy .config for pv_ops of domU::
+
+ # cp arch/ia64/configs/xen_domu_wip_defconfig .config
+
+ 4. make kernel with pv_ops::
+
+ # make oldconfig
+ # make
+
+ 5. install the kernel and initrd::
+
+ # cp vmlinux.gz /boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU
+ # make modules_install
+ # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img \
+ 2.6.26-rc3xen-ia64-08941-g1b12161 --builtin mptspi \
+ --builtin mptbase --builtin mptscsih --builtin uhci-hcd \
+ --builtin ohci-hcd --builtin ehci-hcd
+
+Boot DomainU with pv_ops
+========================
+
+ 1. make config of DomU::
+
+ # vi /etc/xen/rhel5
+ kernel = "/boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU"
+ ramdisk = "/boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img"
+ vcpus = 1
+ memory = 512
+ name = "rhel5"
+ disk = [ 'file:/root/rhel5.img,xvda1,w' ]
+ root = "/dev/xvda1 ro"
+ extra= "rhgb console=hvc0"
+
+ 2. After boot xen and dom0, start xend::
+
+ # /etc/init.d/xend start
+
+ ( In the debugging case, `# XEND_DEBUG=1 xend trace_start` )
+
+ 3. start domU::
+
+ # xm create -c rhel5
+
+Reference
+=========
+- Wiki of Xen/IA64 upstream merge
+ http://wiki.xensource.com/xenwiki/XenIA64/UpstreamMerge
+
+Written by Akio Takebe <takebe_akio@jp.fujitsu.com> on 28 May 2008
diff --git a/Documentation/ia64/xen.txt b/Documentation/ia64/xen.txt
deleted file mode 100644
index a12c74ce2773..000000000000
--- a/Documentation/ia64/xen.txt
+++ /dev/null
@@ -1,183 +0,0 @@
- Recipe for getting/building/running Xen/ia64 with pv_ops
- --------------------------------------------------------
-
-This recipe describes how to get xen-ia64 source and build it,
-and run domU with pv_ops.
-
-============
-Requirements
-============
-
- - python
- - mercurial
- it (aka "hg") is an open-source source code
- management software. See the below.
- http://www.selenic.com/mercurial/wiki/
- - git
- - bridge-utils
-
-=================================
-Getting and Building Xen and Dom0
-=================================
-
- My environment is;
- Machine : Tiger4
- Domain0 OS : RHEL5
- DomainU OS : RHEL5
-
- 1. Download source
- # hg clone http://xenbits.xensource.com/ext/ia64/xen-unstable.hg
- # cd xen-unstable.hg
- # hg clone http://xenbits.xensource.com/ext/ia64/linux-2.6.18-xen.hg
-
- 2. # make world
-
- 3. # make install-tools
-
- 4. copy kernels and xen
- # cp xen/xen.gz /boot/efi/efi/redhat/
- # cp build-linux-2.6.18-xen_ia64/vmlinux.gz \
- /boot/efi/efi/redhat/vmlinuz-2.6.18.8-xen
-
- 5. make initrd for Dom0/DomU
- # make -C linux-2.6.18-xen.hg ARCH=ia64 modules_install \
- O=$(pwd)/build-linux-2.6.18-xen_ia64
- # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6.18.8-xen.img \
- 2.6.18.8-xen --builtin mptspi --builtin mptbase \
- --builtin mptscsih --builtin uhci-hcd --builtin ohci-hcd \
- --builtin ehci-hcd
-
-================================
-Making a disk image for guest OS
-================================
-
- 1. make file
- # dd if=/dev/zero of=/root/rhel5.img bs=1M seek=4096 count=0
- # mke2fs -F -j /root/rhel5.img
- # mount -o loop /root/rhel5.img /mnt
- # cp -ax /{dev,var,etc,usr,bin,sbin,lib} /mnt
- # mkdir /mnt/{root,proc,sys,home,tmp}
-
- Note: You may miss some device files. If so, please create them
- with mknod. Or you can use tar instead of cp.
-
- 2. modify DomU's fstab
- # vi /mnt/etc/fstab
- /dev/xvda1 / ext3 defaults 1 1
- none /dev/pts devpts gid=5,mode=620 0 0
- none /dev/shm tmpfs defaults 0 0
- none /proc proc defaults 0 0
- none /sys sysfs defaults 0 0
-
- 3. modify inittab
- set runlevel to 3 to avoid X trying to start
- # vi /mnt/etc/inittab
- id:3:initdefault:
- Start a getty on the hvc0 console
- X0:2345:respawn:/sbin/mingetty hvc0
- tty1-6 mingetty can be commented out
-
- 4. add hvc0 into /etc/securetty
- # vi /mnt/etc/securetty (add hvc0)
-
- 5. umount
- # umount /mnt
-
-FYI, virt-manager can also make a disk image for guest OS.
-It's GUI tools and easy to make it.
-
-==================
-Boot Xen & Domain0
-==================
-
- 1. replace elilo
- elilo of RHEL5 can boot Xen and Dom0.
- If you use old elilo (e.g RHEL4), please download from the below
- http://elilo.sourceforge.net/cgi-bin/blosxom
- and copy into /boot/efi/efi/redhat/
- # cp elilo-3.6-ia64.efi /boot/efi/efi/redhat/elilo.efi
-
- 2. modify elilo.conf (like the below)
- # vi /boot/efi/efi/redhat/elilo.conf
- prompt
- timeout=20
- default=xen
- relocatable
-
- image=vmlinuz-2.6.18.8-xen
- label=xen
- vmm=xen.gz
- initrd=initrd-2.6.18.8-xen.img
- read-only
- append=" -- rhgb root=/dev/sda2"
-
-The append options before "--" are for xen hypervisor,
-the options after "--" are for dom0.
-
-FYI, your machine may need console options like
-"com1=19200,8n1 console=vga,com1". For example,
-append="com1=19200,8n1 console=vga,com1 -- rhgb console=tty0 \
-console=ttyS0 root=/dev/sda2"
-
-=====================================
-Getting and Building domU with pv_ops
-=====================================
-
- 1. get pv_ops tree
- # git clone http://people.valinux.co.jp/~yamahata/xen-ia64/linux-2.6-xen-ia64.git/
-
- 2. git branch (if necessary)
- # cd linux-2.6-xen-ia64/
- # git checkout -b your_branch origin/xen-ia64-domu-minimal-2008may19
- (Note: The current branch is xen-ia64-domu-minimal-2008may19.
- But you would find the new branch. You can see with
- "git branch -r" to get the branch lists.
- http://people.valinux.co.jp/~yamahata/xen-ia64/for_eagl/linux-2.6-ia64-pv-ops.git/
- is also available. The tree is based on
- git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6 test)
-
-
- 3. copy .config for pv_ops of domU
- # cp arch/ia64/configs/xen_domu_wip_defconfig .config
-
- 4. make kernel with pv_ops
- # make oldconfig
- # make
-
- 5. install the kernel and initrd
- # cp vmlinux.gz /boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU
- # make modules_install
- # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img \
- 2.6.26-rc3xen-ia64-08941-g1b12161 --builtin mptspi \
- --builtin mptbase --builtin mptscsih --builtin uhci-hcd \
- --builtin ohci-hcd --builtin ehci-hcd
-
-========================
-Boot DomainU with pv_ops
-========================
-
- 1. make config of DomU
- # vi /etc/xen/rhel5
- kernel = "/boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU"
- ramdisk = "/boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img"
- vcpus = 1
- memory = 512
- name = "rhel5"
- disk = [ 'file:/root/rhel5.img,xvda1,w' ]
- root = "/dev/xvda1 ro"
- extra= "rhgb console=hvc0"
-
- 2. After boot xen and dom0, start xend
- # /etc/init.d/xend start
- ( In the debugging case, # XEND_DEBUG=1 xend trace_start )
-
- 3. start domU
- # xm create -c rhel5
-
-=========
-Reference
-=========
-- Wiki of Xen/IA64 upstream merge
- http://wiki.xensource.com/xenwiki/XenIA64/UpstreamMerge
-
-Written by Akio Takebe <takebe_akio@jp.fujitsu.com> on 28 May 2008
diff --git a/Documentation/ide/index.rst b/Documentation/ide/index.rst
index 45bc12d3957f..813dfe611a31 100644
--- a/Documentation/ide/index.rst
+++ b/Documentation/ide/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
==================================
Integrated Drive Electronics (IDE)
diff --git a/Documentation/iio/index.rst b/Documentation/iio/index.rst
index 0593dca89a94..58b7a4ebac51 100644
--- a/Documentation/iio/index.rst
+++ b/Documentation/iio/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
==============
Industrial I/O
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 00a0fe4241a4..70ae148ec980 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -1,3 +1,4 @@
+
.. The Linux Kernel documentation master file, created by
sphinx-quickstart on Fri Feb 12 13:51:46 2016.
You can adapt this file completely to your liking, but it should at least
@@ -34,6 +35,7 @@ trying to get it to work optimally on a given system.
:maxdepth: 2
admin-guide/index
+ kbuild/index
Firmware-related documentation
------------------------------
@@ -55,6 +57,7 @@ the kernel interface as seen by application developers.
:maxdepth: 2
userspace-api/index
+ ioctl/index
Introduction to kernel development
@@ -75,6 +78,9 @@ merged much easier.
kernel-hacking/index
trace/index
maintainer/index
+ fault-injection/index
+ livepatch/index
+
Kernel API documentation
------------------------
@@ -90,9 +96,24 @@ needed).
driver-api/index
core-api/index
+ locking/index
+ accounting/index
+ block/index
+ cdrom/index
+ ide/index
+ fb/index
+ fpga/index
+ hid/index
+ iio/index
infiniband/index
+ leds/index
media/index
+ netlabel/index
networking/index
+ pcmcia/index
+ target/index
+ timers/index
+ watchdog/index
input/index
hwmon/index
gpu/index
@@ -105,6 +126,8 @@ needed).
usb/index
PCI/index
misc-devices/index
+ mic/index
+ scheduler/index
Architecture-specific documentation
-----------------------------------
@@ -116,7 +139,16 @@ implementation.
:maxdepth: 2
sh/index
+ arm/index
+ arm64/index
+ ia64/index
+ m68k/index
+ riscv/index
+ s390/index
+ sh/index
+ sparc/index
x86/index
+ xtensa/index
Filesystem Documentation
------------------------
diff --git a/Documentation/ioctl/botching-up-ioctls.txt b/Documentation/ioctl/botching-up-ioctls.rst
index 883fb034bd04..ac697fef3545 100644
--- a/Documentation/ioctl/botching-up-ioctls.txt
+++ b/Documentation/ioctl/botching-up-ioctls.rst
@@ -1,3 +1,4 @@
+=================================
(How to avoid) Botching up ioctls
=================================
diff --git a/Documentation/ioctl/cdrom.rst b/Documentation/ioctl/cdrom.rst
new file mode 100644
index 000000000000..3b4c0506de46
--- /dev/null
+++ b/Documentation/ioctl/cdrom.rst
@@ -0,0 +1,1233 @@
+============================
+Summary of CDROM ioctl calls
+============================
+
+- Edward A. Falk <efalk@google.com>
+
+November, 2004
+
+This document attempts to describe the ioctl(2) calls supported by
+the CDROM layer. These are by-and-large implemented (as of Linux 2.6)
+in drivers/cdrom/cdrom.c and drivers/block/scsi_ioctl.c
+
+ioctl values are listed in <linux/cdrom.h>. As of this writing, they
+are as follows:
+
+ ====================== ===============================================
+ CDROMPAUSE Pause Audio Operation
+ CDROMRESUME Resume paused Audio Operation
+ CDROMPLAYMSF Play Audio MSF (struct cdrom_msf)
+ CDROMPLAYTRKIND Play Audio Track/index (struct cdrom_ti)
+ CDROMREADTOCHDR Read TOC header (struct cdrom_tochdr)
+ CDROMREADTOCENTRY Read TOC entry (struct cdrom_tocentry)
+ CDROMSTOP Stop the cdrom drive
+ CDROMSTART Start the cdrom drive
+ CDROMEJECT Ejects the cdrom media
+ CDROMVOLCTRL Control output volume (struct cdrom_volctrl)
+ CDROMSUBCHNL Read subchannel data (struct cdrom_subchnl)
+ CDROMREADMODE2 Read CDROM mode 2 data (2336 Bytes)
+ (struct cdrom_read)
+ CDROMREADMODE1 Read CDROM mode 1 data (2048 Bytes)
+ (struct cdrom_read)
+ CDROMREADAUDIO (struct cdrom_read_audio)
+ CDROMEJECT_SW enable(1)/disable(0) auto-ejecting
+ CDROMMULTISESSION Obtain the start-of-last-session
+ address of multi session disks
+ (struct cdrom_multisession)
+ CDROM_GET_MCN Obtain the "Universal Product Code"
+ if available (struct cdrom_mcn)
+ CDROM_GET_UPC Deprecated, use CDROM_GET_MCN instead.
+ CDROMRESET hard-reset the drive
+ CDROMVOLREAD Get the drive's volume setting
+ (struct cdrom_volctrl)
+ CDROMREADRAW read data in raw mode (2352 Bytes)
+ (struct cdrom_read)
+ CDROMREADCOOKED read data in cooked mode
+ CDROMSEEK seek msf address
+ CDROMPLAYBLK scsi-cd only, (struct cdrom_blk)
+ CDROMREADALL read all 2646 bytes
+ CDROMGETSPINDOWN return 4-bit spindown value
+ CDROMSETSPINDOWN set 4-bit spindown value
+ CDROMCLOSETRAY pendant of CDROMEJECT
+ CDROM_SET_OPTIONS Set behavior options
+ CDROM_CLEAR_OPTIONS Clear behavior options
+ CDROM_SELECT_SPEED Set the CD-ROM speed
+ CDROM_SELECT_DISC Select disc (for juke-boxes)
+ CDROM_MEDIA_CHANGED Check is media changed
+ CDROM_DRIVE_STATUS Get tray position, etc.
+ CDROM_DISC_STATUS Get disc type, etc.
+ CDROM_CHANGER_NSLOTS Get number of slots
+ CDROM_LOCKDOOR lock or unlock door
+ CDROM_DEBUG Turn debug messages on/off
+ CDROM_GET_CAPABILITY get capabilities
+ CDROMAUDIOBUFSIZ set the audio buffer size
+ DVD_READ_STRUCT Read structure
+ DVD_WRITE_STRUCT Write structure
+ DVD_AUTH Authentication
+ CDROM_SEND_PACKET send a packet to the drive
+ CDROM_NEXT_WRITABLE get next writable block
+ CDROM_LAST_WRITTEN get last block written on disc
+ ====================== ===============================================
+
+
+The information that follows was determined from reading kernel source
+code. It is likely that some corrections will be made over time.
+
+------------------------------------------------------------------------------
+
+General:
+
+ Unless otherwise specified, all ioctl calls return 0 on success
+ and -1 with errno set to an appropriate value on error. (Some
+ ioctls return non-negative data values.)
+
+ Unless otherwise specified, all ioctl calls return -1 and set
+ errno to EFAULT on a failed attempt to copy data to or from user
+ address space.
+
+ Individual drivers may return error codes not listed here.
+
+ Unless otherwise specified, all data structures and constants
+ are defined in <linux/cdrom.h>
+
+------------------------------------------------------------------------------
+
+
+CDROMPAUSE
+ Pause Audio Operation
+
+
+ usage::
+
+ ioctl(fd, CDROMPAUSE, 0);
+
+
+ inputs:
+ none
+
+
+ outputs:
+ none
+
+
+ error return:
+ - ENOSYS cd drive not audio-capable.
+
+
+CDROMRESUME
+ Resume paused Audio Operation
+
+
+ usage::
+
+ ioctl(fd, CDROMRESUME, 0);
+
+
+ inputs:
+ none
+
+
+ outputs:
+ none
+
+
+ error return:
+ - ENOSYS cd drive not audio-capable.
+
+
+CDROMPLAYMSF
+ Play Audio MSF
+
+ (struct cdrom_msf)
+
+
+ usage::
+
+ struct cdrom_msf msf;
+
+ ioctl(fd, CDROMPLAYMSF, &msf);
+
+ inputs:
+ cdrom_msf structure, describing a segment of music to play
+
+
+ outputs:
+ none
+
+
+ error return:
+ - ENOSYS cd drive not audio-capable.
+
+ notes:
+ - MSF stands for minutes-seconds-frames
+ - LBA stands for logical block address
+ - Segment is described as start and end times, where each time
+ is described as minutes:seconds:frames.
+ A frame is 1/75 of a second.
+
+
+CDROMPLAYTRKIND
+ Play Audio Track/index
+
+ (struct cdrom_ti)
+
+
+ usage::
+
+ struct cdrom_ti ti;
+
+ ioctl(fd, CDROMPLAYTRKIND, &ti);
+
+ inputs:
+ cdrom_ti structure, describing a segment of music to play
+
+
+ outputs:
+ none
+
+
+ error return:
+ - ENOSYS cd drive not audio-capable.
+
+ notes:
+ - Segment is described as start and end times, where each time
+ is described as a track and an index.
+
+
+
+CDROMREADTOCHDR
+ Read TOC header
+
+ (struct cdrom_tochdr)
+
+
+ usage::
+
+ cdrom_tochdr header;
+
+ ioctl(fd, CDROMREADTOCHDR, &header);
+
+ inputs:
+ cdrom_tochdr structure
+
+
+ outputs:
+ cdrom_tochdr structure
+
+
+ error return:
+ - ENOSYS cd drive not audio-capable.
+
+
+
+CDROMREADTOCENTRY
+ Read TOC entry
+
+ (struct cdrom_tocentry)
+
+
+ usage::
+
+ struct cdrom_tocentry entry;
+
+ ioctl(fd, CDROMREADTOCENTRY, &entry);
+
+ inputs:
+ cdrom_tocentry structure
+
+
+ outputs:
+ cdrom_tocentry structure
+
+
+ error return:
+ - ENOSYS cd drive not audio-capable.
+ - EINVAL entry.cdte_format not CDROM_MSF or CDROM_LBA
+ - EINVAL requested track out of bounds
+ - EIO I/O error reading TOC
+
+ notes:
+ - TOC stands for Table Of Contents
+ - MSF stands for minutes-seconds-frames
+ - LBA stands for logical block address
+
+
+
+CDROMSTOP
+ Stop the cdrom drive
+
+
+ usage::
+
+ ioctl(fd, CDROMSTOP, 0);
+
+
+ inputs:
+ none
+
+
+ outputs:
+ none
+
+
+ error return:
+ - ENOSYS cd drive not audio-capable.
+
+ notes:
+ - Exact interpretation of this ioctl depends on the device,
+ but most seem to spin the drive down.
+
+
+CDROMSTART
+ Start the cdrom drive
+
+
+ usage::
+
+ ioctl(fd, CDROMSTART, 0);
+
+
+ inputs:
+ none
+
+
+ outputs:
+ none
+
+
+ error return:
+ - ENOSYS cd drive not audio-capable.
+
+ notes:
+ - Exact interpretation of this ioctl depends on the device,
+ but most seem to spin the drive up and/or close the tray.
+ Other devices ignore the ioctl completely.
+
+
+CDROMEJECT
+ - Ejects the cdrom media
+
+
+ usage::
+
+ ioctl(fd, CDROMEJECT, 0);
+
+
+ inputs:
+ none
+
+
+ outputs:
+ none
+
+
+ error returns:
+ - ENOSYS cd drive not capable of ejecting
+ - EBUSY other processes are accessing drive, or door is locked
+
+ notes:
+ - See CDROM_LOCKDOOR, below.
+
+
+
+
+CDROMCLOSETRAY
+ pendant of CDROMEJECT
+
+
+ usage::
+
+ ioctl(fd, CDROMCLOSETRAY, 0);
+
+
+ inputs:
+ none
+
+
+ outputs:
+ none
+
+
+ error returns:
+ - ENOSYS cd drive not capable of closing the tray
+ - EBUSY other processes are accessing drive, or door is locked
+
+ notes:
+ - See CDROM_LOCKDOOR, below.
+
+
+
+
+CDROMVOLCTRL
+ Control output volume (struct cdrom_volctrl)
+
+
+ usage::
+
+ struct cdrom_volctrl volume;
+
+ ioctl(fd, CDROMVOLCTRL, &volume);
+
+ inputs:
+ cdrom_volctrl structure containing volumes for up to 4
+ channels.
+
+ outputs:
+ none
+
+
+ error return:
+ - ENOSYS cd drive not audio-capable.
+
+
+
+CDROMVOLREAD
+ Get the drive's volume setting
+
+ (struct cdrom_volctrl)
+
+
+ usage::
+
+ struct cdrom_volctrl volume;
+
+ ioctl(fd, CDROMVOLREAD, &volume);
+
+ inputs:
+ none
+
+
+ outputs:
+ The current volume settings.
+
+
+ error return:
+ - ENOSYS cd drive not audio-capable.
+
+
+
+CDROMSUBCHNL
+ Read subchannel data
+
+ (struct cdrom_subchnl)
+
+
+ usage::
+
+ struct cdrom_subchnl q;
+
+ ioctl(fd, CDROMSUBCHNL, &q);
+
+ inputs:
+ cdrom_subchnl structure
+
+
+ outputs:
+ cdrom_subchnl structure
+
+
+ error return:
+ - ENOSYS cd drive not audio-capable.
+ - EINVAL format not CDROM_MSF or CDROM_LBA
+
+ notes:
+ - Format is converted to CDROM_MSF or CDROM_LBA
+ as per user request on return
+
+
+
+CDROMREADRAW
+ read data in raw mode (2352 Bytes)
+
+ (struct cdrom_read)
+
+ usage::
+
+ union {
+
+ struct cdrom_msf msf; /* input */
+ char buffer[CD_FRAMESIZE_RAW]; /* return */
+ } arg;
+ ioctl(fd, CDROMREADRAW, &arg);
+
+ inputs:
+ cdrom_msf structure indicating an address to read.
+
+ Only the start values are significant.
+
+ outputs:
+ Data written to address provided by user.
+
+
+ error return:
+ - EINVAL address less than 0, or msf less than 0:2:0
+ - ENOMEM out of memory
+
+ notes:
+ - As of 2.6.8.1, comments in <linux/cdrom.h> indicate that this
+ ioctl accepts a cdrom_read structure, but actual source code
+ reads a cdrom_msf structure and writes a buffer of data to
+ the same address.
+
+ - MSF values are converted to LBA values via this formula::
+
+ lba = (((m * CD_SECS) + s) * CD_FRAMES + f) - CD_MSF_OFFSET;
+
+
+
+
+CDROMREADMODE1
+ Read CDROM mode 1 data (2048 Bytes)
+
+ (struct cdrom_read)
+
+ notes:
+ Identical to CDROMREADRAW except that block size is
+ CD_FRAMESIZE (2048) bytes
+
+
+
+CDROMREADMODE2
+ Read CDROM mode 2 data (2336 Bytes)
+
+ (struct cdrom_read)
+
+ notes:
+ Identical to CDROMREADRAW except that block size is
+ CD_FRAMESIZE_RAW0 (2336) bytes
+
+
+
+CDROMREADAUDIO
+ (struct cdrom_read_audio)
+
+ usage::
+
+ struct cdrom_read_audio ra;
+
+ ioctl(fd, CDROMREADAUDIO, &ra);
+
+ inputs:
+ cdrom_read_audio structure containing read start
+ point and length
+
+ outputs:
+ audio data, returned to buffer indicated by ra
+
+
+ error return:
+ - EINVAL format not CDROM_MSF or CDROM_LBA
+ - EINVAL nframes not in range [1 75]
+ - ENXIO drive has no queue (probably means invalid fd)
+ - ENOMEM out of memory
+
+
+CDROMEJECT_SW
+ enable(1)/disable(0) auto-ejecting
+
+
+ usage::
+
+ int val;
+
+ ioctl(fd, CDROMEJECT_SW, val);
+
+ inputs:
+ Flag specifying auto-eject flag.
+
+
+ outputs:
+ none
+
+
+ error return:
+ - ENOSYS Drive is not capable of ejecting.
+ - EBUSY Door is locked
+
+
+
+
+CDROMMULTISESSION
+ Obtain the start-of-last-session address of multi session disks
+
+ (struct cdrom_multisession)
+
+ usage::
+
+ struct cdrom_multisession ms_info;
+
+ ioctl(fd, CDROMMULTISESSION, &ms_info);
+
+ inputs:
+ cdrom_multisession structure containing desired
+
+ format.
+
+ outputs:
+ cdrom_multisession structure is filled with last_session
+ information.
+
+ error return:
+ - EINVAL format not CDROM_MSF or CDROM_LBA
+
+
+CDROM_GET_MCN
+ Obtain the "Universal Product Code"
+ if available
+
+ (struct cdrom_mcn)
+
+
+ usage::
+
+ struct cdrom_mcn mcn;
+
+ ioctl(fd, CDROM_GET_MCN, &mcn);
+
+ inputs:
+ none
+
+
+ outputs:
+ Universal Product Code
+
+
+ error return:
+ - ENOSYS Drive is not capable of reading MCN data.
+
+ notes:
+ - Source code comments state::
+
+ The following function is implemented, although very few
+ audio discs give Universal Product Code information, which
+ should just be the Medium Catalog Number on the box. Note,
+ that the way the code is written on the CD is /not/ uniform
+ across all discs!
+
+
+
+
+CDROM_GET_UPC
+ CDROM_GET_MCN (deprecated)
+
+
+ Not implemented, as of 2.6.8.1
+
+
+
+CDROMRESET
+ hard-reset the drive
+
+
+ usage::
+
+ ioctl(fd, CDROMRESET, 0);
+
+
+ inputs:
+ none
+
+
+ outputs:
+ none
+
+
+ error return:
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - ENOSYS Drive is not capable of resetting.
+
+
+
+
+CDROMREADCOOKED
+ read data in cooked mode
+
+
+ usage::
+
+ u8 buffer[CD_FRAMESIZE]
+
+ ioctl(fd, CDROMREADCOOKED, buffer);
+
+ inputs:
+ none
+
+
+ outputs:
+ 2048 bytes of data, "cooked" mode.
+
+
+ notes:
+ Not implemented on all drives.
+
+
+
+
+
+CDROMREADALL
+ read all 2646 bytes
+
+
+ Same as CDROMREADCOOKED, but reads 2646 bytes.
+
+
+
+CDROMSEEK
+ seek msf address
+
+
+ usage::
+
+ struct cdrom_msf msf;
+
+ ioctl(fd, CDROMSEEK, &msf);
+
+ inputs:
+ MSF address to seek to.
+
+
+ outputs:
+ none
+
+
+
+
+CDROMPLAYBLK
+ scsi-cd only
+
+ (struct cdrom_blk)
+
+
+ usage::
+
+ struct cdrom_blk blk;
+
+ ioctl(fd, CDROMPLAYBLK, &blk);
+
+ inputs:
+ Region to play
+
+
+ outputs:
+ none
+
+
+
+
+CDROMGETSPINDOWN
+ usage::
+
+ char spindown;
+
+ ioctl(fd, CDROMGETSPINDOWN, &spindown);
+
+ inputs:
+ none
+
+
+ outputs:
+ The value of the current 4-bit spindown value.
+
+
+
+
+
+CDROMSETSPINDOWN
+ usage::
+
+ char spindown
+
+ ioctl(fd, CDROMSETSPINDOWN, &spindown);
+
+ inputs:
+ 4-bit value used to control spindown (TODO: more detail here)
+
+
+ outputs:
+ none
+
+
+
+
+
+
+CDROM_SET_OPTIONS
+ Set behavior options
+
+
+ usage::
+
+ int options;
+
+ ioctl(fd, CDROM_SET_OPTIONS, options);
+
+ inputs:
+ New values for drive options. The logical 'or' of:
+
+ ============== ==================================
+ CDO_AUTO_CLOSE close tray on first open(2)
+ CDO_AUTO_EJECT open tray on last release
+ CDO_USE_FFLAGS use O_NONBLOCK information on open
+ CDO_LOCK lock tray on open files
+ CDO_CHECK_TYPE check type on open for data
+ ============== ==================================
+
+ outputs:
+ Returns the resulting options settings in the
+ ioctl return value. Returns -1 on error.
+
+ error return:
+ - ENOSYS selected option(s) not supported by drive.
+
+
+
+
+CDROM_CLEAR_OPTIONS
+ Clear behavior options
+
+
+ Same as CDROM_SET_OPTIONS, except that selected options are
+ turned off.
+
+
+
+CDROM_SELECT_SPEED
+ Set the CD-ROM speed
+
+
+ usage::
+
+ int speed;
+
+ ioctl(fd, CDROM_SELECT_SPEED, speed);
+
+ inputs:
+ New drive speed.
+
+
+ outputs:
+ none
+
+
+ error return:
+ - ENOSYS speed selection not supported by drive.
+
+
+
+CDROM_SELECT_DISC
+ Select disc (for juke-boxes)
+
+
+ usage::
+
+ int disk;
+
+ ioctl(fd, CDROM_SELECT_DISC, disk);
+
+ inputs:
+ Disk to load into drive.
+
+
+ outputs:
+ none
+
+
+ error return:
+ - EINVAL Disk number beyond capacity of drive
+
+
+
+CDROM_MEDIA_CHANGED
+ Check is media changed
+
+
+ usage::
+
+ int slot;
+
+ ioctl(fd, CDROM_MEDIA_CHANGED, slot);
+
+ inputs:
+ Slot number to be tested, always zero except for jukeboxes.
+
+ May also be special values CDSL_NONE or CDSL_CURRENT
+
+ outputs:
+ Ioctl return value is 0 or 1 depending on whether the media
+
+ has been changed, or -1 on error.
+
+ error returns:
+ - ENOSYS Drive can't detect media change
+ - EINVAL Slot number beyond capacity of drive
+ - ENOMEM Out of memory
+
+
+
+CDROM_DRIVE_STATUS
+ Get tray position, etc.
+
+
+ usage::
+
+ int slot;
+
+ ioctl(fd, CDROM_DRIVE_STATUS, slot);
+
+ inputs:
+ Slot number to be tested, always zero except for jukeboxes.
+
+ May also be special values CDSL_NONE or CDSL_CURRENT
+
+ outputs:
+ Ioctl return value will be one of the following values
+
+ from <linux/cdrom.h>:
+
+ =================== ==========================
+ CDS_NO_INFO Information not available.
+ CDS_NO_DISC
+ CDS_TRAY_OPEN
+ CDS_DRIVE_NOT_READY
+ CDS_DISC_OK
+ -1 error
+ =================== ==========================
+
+ error returns:
+ - ENOSYS Drive can't detect drive status
+ - EINVAL Slot number beyond capacity of drive
+ - ENOMEM Out of memory
+
+
+
+
+CDROM_DISC_STATUS
+ Get disc type, etc.
+
+
+ usage::
+
+ ioctl(fd, CDROM_DISC_STATUS, 0);
+
+
+ inputs:
+ none
+
+
+ outputs:
+ Ioctl return value will be one of the following values
+
+ from <linux/cdrom.h>:
+
+ - CDS_NO_INFO
+ - CDS_AUDIO
+ - CDS_MIXED
+ - CDS_XA_2_2
+ - CDS_XA_2_1
+ - CDS_DATA_1
+
+ error returns:
+ none at present
+
+ notes:
+ - Source code comments state::
+
+
+ Ok, this is where problems start. The current interface for
+ the CDROM_DISC_STATUS ioctl is flawed. It makes the false
+ assumption that CDs are all CDS_DATA_1 or all CDS_AUDIO, etc.
+ Unfortunately, while this is often the case, it is also
+ very common for CDs to have some tracks with data, and some
+ tracks with audio. Just because I feel like it, I declare
+ the following to be the best way to cope. If the CD has
+ ANY data tracks on it, it will be returned as a data CD.
+ If it has any XA tracks, I will return it as that. Now I
+ could simplify this interface by combining these returns with
+ the above, but this more clearly demonstrates the problem
+ with the current interface. Too bad this wasn't designed
+ to use bitmasks... -Erik
+
+ Well, now we have the option CDS_MIXED: a mixed-type CD.
+ User level programmers might feel the ioctl is not very
+ useful.
+ ---david
+
+
+
+
+CDROM_CHANGER_NSLOTS
+ Get number of slots
+
+
+ usage::
+
+ ioctl(fd, CDROM_CHANGER_NSLOTS, 0);
+
+
+ inputs:
+ none
+
+
+ outputs:
+ The ioctl return value will be the number of slots in a
+ CD changer. Typically 1 for non-multi-disk devices.
+
+ error returns:
+ none
+
+
+
+CDROM_LOCKDOOR
+ lock or unlock door
+
+
+ usage::
+
+ int lock;
+
+ ioctl(fd, CDROM_LOCKDOOR, lock);
+
+ inputs:
+ Door lock flag, 1=lock, 0=unlock
+
+
+ outputs:
+ none
+
+
+ error returns:
+ - EDRIVE_CANT_DO_THIS
+
+ Door lock function not supported.
+ - EBUSY
+
+ Attempt to unlock when multiple users
+ have the drive open and not CAP_SYS_ADMIN
+
+ notes:
+ As of 2.6.8.1, the lock flag is a global lock, meaning that
+ all CD drives will be locked or unlocked together. This is
+ probably a bug.
+
+ The EDRIVE_CANT_DO_THIS value is defined in <linux/cdrom.h>
+ and is currently (2.6.8.1) the same as EOPNOTSUPP
+
+
+
+CDROM_DEBUG
+ Turn debug messages on/off
+
+
+ usage::
+
+ int debug;
+
+ ioctl(fd, CDROM_DEBUG, debug);
+
+ inputs:
+ Cdrom debug flag, 0=disable, 1=enable
+
+
+ outputs:
+ The ioctl return value will be the new debug flag.
+
+
+ error return:
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+
+
+
+CDROM_GET_CAPABILITY
+ get capabilities
+
+
+ usage::
+
+ ioctl(fd, CDROM_GET_CAPABILITY, 0);
+
+
+ inputs:
+ none
+
+
+ outputs:
+ The ioctl return value is the current device capability
+ flags. See CDC_CLOSE_TRAY, CDC_OPEN_TRAY, etc.
+
+
+
+CDROMAUDIOBUFSIZ
+ set the audio buffer size
+
+
+ usage::
+
+ int arg;
+
+ ioctl(fd, CDROMAUDIOBUFSIZ, val);
+
+ inputs:
+ New audio buffer size
+
+
+ outputs:
+ The ioctl return value is the new audio buffer size, or -1
+ on error.
+
+ error return:
+ - ENOSYS Not supported by this driver.
+
+ notes:
+ Not supported by all drivers.
+
+
+
+
+DVD_READ_STRUCT Read structure
+
+ usage::
+
+ dvd_struct s;
+
+ ioctl(fd, DVD_READ_STRUCT, &s);
+
+ inputs:
+ dvd_struct structure, containing:
+
+ =================== ==========================================
+ type specifies the information desired, one of
+ DVD_STRUCT_PHYSICAL, DVD_STRUCT_COPYRIGHT,
+ DVD_STRUCT_DISCKEY, DVD_STRUCT_BCA,
+ DVD_STRUCT_MANUFACT
+ physical.layer_num desired layer, indexed from 0
+ copyright.layer_num desired layer, indexed from 0
+ disckey.agid
+ =================== ==========================================
+
+ outputs:
+ dvd_struct structure, containing:
+
+ =================== ================================
+ physical for type == DVD_STRUCT_PHYSICAL
+ copyright for type == DVD_STRUCT_COPYRIGHT
+ disckey.value for type == DVD_STRUCT_DISCKEY
+ bca.{len,value} for type == DVD_STRUCT_BCA
+ manufact.{len,valu} for type == DVD_STRUCT_MANUFACT
+ =================== ================================
+
+ error returns:
+ - EINVAL physical.layer_num exceeds number of layers
+ - EIO Received invalid response from drive
+
+
+
+DVD_WRITE_STRUCT Write structure
+
+ Not implemented, as of 2.6.8.1
+
+
+
+DVD_AUTH Authentication
+
+ usage::
+
+ dvd_authinfo ai;
+
+ ioctl(fd, DVD_AUTH, &ai);
+
+ inputs:
+ dvd_authinfo structure. See <linux/cdrom.h>
+
+
+ outputs:
+ dvd_authinfo structure.
+
+
+ error return:
+ - ENOTTY ai.type not recognized.
+
+
+
+CDROM_SEND_PACKET
+ send a packet to the drive
+
+
+ usage::
+
+ struct cdrom_generic_command cgc;
+
+ ioctl(fd, CDROM_SEND_PACKET, &cgc);
+
+ inputs:
+ cdrom_generic_command structure containing the packet to send.
+
+
+ outputs:
+ none
+
+ cdrom_generic_command structure containing results.
+
+ error return:
+ - EIO
+
+ command failed.
+ - EPERM
+
+ Operation not permitted, either because a
+ write command was attempted on a drive which
+ is opened read-only, or because the command
+ requires CAP_SYS_RAWIO
+ - EINVAL
+
+ cgc.data_direction not set
+
+
+
+CDROM_NEXT_WRITABLE
+ get next writable block
+
+
+ usage::
+
+ long next;
+
+ ioctl(fd, CDROM_NEXT_WRITABLE, &next);
+
+ inputs:
+ none
+
+
+ outputs:
+ The next writable block.
+
+
+ notes:
+ If the device does not support this ioctl directly, the
+
+ ioctl will return CDROM_LAST_WRITTEN + 7.
+
+
+
+CDROM_LAST_WRITTEN
+ get last block written on disc
+
+
+ usage::
+
+ long last;
+
+ ioctl(fd, CDROM_LAST_WRITTEN, &last);
+
+ inputs:
+ none
+
+
+ outputs:
+ The last block written on disc
+
+
+ notes:
+ If the device does not support this ioctl directly, the
+ result is derived from the disc's table of contents. If the
+ table of contents can't be read, this ioctl returns an
+ error.
diff --git a/Documentation/ioctl/cdrom.txt b/Documentation/ioctl/cdrom.txt
deleted file mode 100644
index a4d62a9d6771..000000000000
--- a/Documentation/ioctl/cdrom.txt
+++ /dev/null
@@ -1,967 +0,0 @@
- Summary of CDROM ioctl calls.
- ============================
-
- Edward A. Falk <efalk@google.com>
-
- November, 2004
-
-This document attempts to describe the ioctl(2) calls supported by
-the CDROM layer. These are by-and-large implemented (as of Linux 2.6)
-in drivers/cdrom/cdrom.c and drivers/block/scsi_ioctl.c
-
-ioctl values are listed in <linux/cdrom.h>. As of this writing, they
-are as follows:
-
- CDROMPAUSE Pause Audio Operation
- CDROMRESUME Resume paused Audio Operation
- CDROMPLAYMSF Play Audio MSF (struct cdrom_msf)
- CDROMPLAYTRKIND Play Audio Track/index (struct cdrom_ti)
- CDROMREADTOCHDR Read TOC header (struct cdrom_tochdr)
- CDROMREADTOCENTRY Read TOC entry (struct cdrom_tocentry)
- CDROMSTOP Stop the cdrom drive
- CDROMSTART Start the cdrom drive
- CDROMEJECT Ejects the cdrom media
- CDROMVOLCTRL Control output volume (struct cdrom_volctrl)
- CDROMSUBCHNL Read subchannel data (struct cdrom_subchnl)
- CDROMREADMODE2 Read CDROM mode 2 data (2336 Bytes)
- (struct cdrom_read)
- CDROMREADMODE1 Read CDROM mode 1 data (2048 Bytes)
- (struct cdrom_read)
- CDROMREADAUDIO (struct cdrom_read_audio)
- CDROMEJECT_SW enable(1)/disable(0) auto-ejecting
- CDROMMULTISESSION Obtain the start-of-last-session
- address of multi session disks
- (struct cdrom_multisession)
- CDROM_GET_MCN Obtain the "Universal Product Code"
- if available (struct cdrom_mcn)
- CDROM_GET_UPC Deprecated, use CDROM_GET_MCN instead.
- CDROMRESET hard-reset the drive
- CDROMVOLREAD Get the drive's volume setting
- (struct cdrom_volctrl)
- CDROMREADRAW read data in raw mode (2352 Bytes)
- (struct cdrom_read)
- CDROMREADCOOKED read data in cooked mode
- CDROMSEEK seek msf address
- CDROMPLAYBLK scsi-cd only, (struct cdrom_blk)
- CDROMREADALL read all 2646 bytes
- CDROMGETSPINDOWN return 4-bit spindown value
- CDROMSETSPINDOWN set 4-bit spindown value
- CDROMCLOSETRAY pendant of CDROMEJECT
- CDROM_SET_OPTIONS Set behavior options
- CDROM_CLEAR_OPTIONS Clear behavior options
- CDROM_SELECT_SPEED Set the CD-ROM speed
- CDROM_SELECT_DISC Select disc (for juke-boxes)
- CDROM_MEDIA_CHANGED Check is media changed
- CDROM_DRIVE_STATUS Get tray position, etc.
- CDROM_DISC_STATUS Get disc type, etc.
- CDROM_CHANGER_NSLOTS Get number of slots
- CDROM_LOCKDOOR lock or unlock door
- CDROM_DEBUG Turn debug messages on/off
- CDROM_GET_CAPABILITY get capabilities
- CDROMAUDIOBUFSIZ set the audio buffer size
- DVD_READ_STRUCT Read structure
- DVD_WRITE_STRUCT Write structure
- DVD_AUTH Authentication
- CDROM_SEND_PACKET send a packet to the drive
- CDROM_NEXT_WRITABLE get next writable block
- CDROM_LAST_WRITTEN get last block written on disc
-
-
-The information that follows was determined from reading kernel source
-code. It is likely that some corrections will be made over time.
-
-
-
-
-
-
-
-General:
-
- Unless otherwise specified, all ioctl calls return 0 on success
- and -1 with errno set to an appropriate value on error. (Some
- ioctls return non-negative data values.)
-
- Unless otherwise specified, all ioctl calls return -1 and set
- errno to EFAULT on a failed attempt to copy data to or from user
- address space.
-
- Individual drivers may return error codes not listed here.
-
- Unless otherwise specified, all data structures and constants
- are defined in <linux/cdrom.h>
-
-
-
-
-CDROMPAUSE Pause Audio Operation
-
- usage:
-
- ioctl(fd, CDROMPAUSE, 0);
-
- inputs: none
-
- outputs: none
-
- error return:
- ENOSYS cd drive not audio-capable.
-
-
-CDROMRESUME Resume paused Audio Operation
-
- usage:
-
- ioctl(fd, CDROMRESUME, 0);
-
- inputs: none
-
- outputs: none
-
- error return:
- ENOSYS cd drive not audio-capable.
-
-
-CDROMPLAYMSF Play Audio MSF (struct cdrom_msf)
-
- usage:
-
- struct cdrom_msf msf;
- ioctl(fd, CDROMPLAYMSF, &msf);
-
- inputs:
- cdrom_msf structure, describing a segment of music to play
-
- outputs: none
-
- error return:
- ENOSYS cd drive not audio-capable.
-
- notes:
- MSF stands for minutes-seconds-frames
- LBA stands for logical block address
-
- Segment is described as start and end times, where each time
- is described as minutes:seconds:frames. A frame is 1/75 of
- a second.
-
-
-CDROMPLAYTRKIND Play Audio Track/index (struct cdrom_ti)
-
- usage:
-
- struct cdrom_ti ti;
- ioctl(fd, CDROMPLAYTRKIND, &ti);
-
- inputs:
- cdrom_ti structure, describing a segment of music to play
-
- outputs: none
-
- error return:
- ENOSYS cd drive not audio-capable.
-
- notes:
- Segment is described as start and end times, where each time
- is described as a track and an index.
-
-
-
-CDROMREADTOCHDR Read TOC header (struct cdrom_tochdr)
-
- usage:
-
- cdrom_tochdr header;
- ioctl(fd, CDROMREADTOCHDR, &header);
-
- inputs:
- cdrom_tochdr structure
-
- outputs:
- cdrom_tochdr structure
-
- error return:
- ENOSYS cd drive not audio-capable.
-
-
-
-CDROMREADTOCENTRY Read TOC entry (struct cdrom_tocentry)
-
- usage:
-
- struct cdrom_tocentry entry;
- ioctl(fd, CDROMREADTOCENTRY, &entry);
-
- inputs:
- cdrom_tocentry structure
-
- outputs:
- cdrom_tocentry structure
-
- error return:
- ENOSYS cd drive not audio-capable.
- EINVAL entry.cdte_format not CDROM_MSF or CDROM_LBA
- EINVAL requested track out of bounds
- EIO I/O error reading TOC
-
- notes:
- TOC stands for Table Of Contents
- MSF stands for minutes-seconds-frames
- LBA stands for logical block address
-
-
-
-CDROMSTOP Stop the cdrom drive
-
- usage:
-
- ioctl(fd, CDROMSTOP, 0);
-
- inputs: none
-
- outputs: none
-
- error return:
- ENOSYS cd drive not audio-capable.
-
- notes:
- Exact interpretation of this ioctl depends on the device,
- but most seem to spin the drive down.
-
-
-CDROMSTART Start the cdrom drive
-
- usage:
-
- ioctl(fd, CDROMSTART, 0);
-
- inputs: none
-
- outputs: none
-
- error return:
- ENOSYS cd drive not audio-capable.
-
- notes:
- Exact interpretation of this ioctl depends on the device,
- but most seem to spin the drive up and/or close the tray.
- Other devices ignore the ioctl completely.
-
-
-CDROMEJECT Ejects the cdrom media
-
- usage:
-
- ioctl(fd, CDROMEJECT, 0);
-
- inputs: none
-
- outputs: none
-
- error returns:
- ENOSYS cd drive not capable of ejecting
- EBUSY other processes are accessing drive, or door is locked
-
- notes:
- See CDROM_LOCKDOOR, below.
-
-
-
-CDROMCLOSETRAY pendant of CDROMEJECT
-
- usage:
-
- ioctl(fd, CDROMCLOSETRAY, 0);
-
- inputs: none
-
- outputs: none
-
- error returns:
- ENOSYS cd drive not capable of closing the tray
- EBUSY other processes are accessing drive, or door is locked
-
- notes:
- See CDROM_LOCKDOOR, below.
-
-
-
-CDROMVOLCTRL Control output volume (struct cdrom_volctrl)
-
- usage:
-
- struct cdrom_volctrl volume;
- ioctl(fd, CDROMVOLCTRL, &volume);
-
- inputs:
- cdrom_volctrl structure containing volumes for up to 4
- channels.
-
- outputs: none
-
- error return:
- ENOSYS cd drive not audio-capable.
-
-
-
-CDROMVOLREAD Get the drive's volume setting
- (struct cdrom_volctrl)
-
- usage:
-
- struct cdrom_volctrl volume;
- ioctl(fd, CDROMVOLREAD, &volume);
-
- inputs: none
-
- outputs:
- The current volume settings.
-
- error return:
- ENOSYS cd drive not audio-capable.
-
-
-
-CDROMSUBCHNL Read subchannel data (struct cdrom_subchnl)
-
- usage:
-
- struct cdrom_subchnl q;
- ioctl(fd, CDROMSUBCHNL, &q);
-
- inputs:
- cdrom_subchnl structure
-
- outputs:
- cdrom_subchnl structure
-
- error return:
- ENOSYS cd drive not audio-capable.
- EINVAL format not CDROM_MSF or CDROM_LBA
-
- notes:
- Format is converted to CDROM_MSF or CDROM_LBA
- as per user request on return
-
-
-
-CDROMREADRAW read data in raw mode (2352 Bytes)
- (struct cdrom_read)
-
- usage:
-
- union {
- struct cdrom_msf msf; /* input */
- char buffer[CD_FRAMESIZE_RAW]; /* return */
- } arg;
- ioctl(fd, CDROMREADRAW, &arg);
-
- inputs:
- cdrom_msf structure indicating an address to read.
- Only the start values are significant.
-
- outputs:
- Data written to address provided by user.
-
- error return:
- EINVAL address less than 0, or msf less than 0:2:0
- ENOMEM out of memory
-
- notes:
- As of 2.6.8.1, comments in <linux/cdrom.h> indicate that this
- ioctl accepts a cdrom_read structure, but actual source code
- reads a cdrom_msf structure and writes a buffer of data to
- the same address.
-
- MSF values are converted to LBA values via this formula:
-
- lba = (((m * CD_SECS) + s) * CD_FRAMES + f) - CD_MSF_OFFSET;
-
-
-
-
-CDROMREADMODE1 Read CDROM mode 1 data (2048 Bytes)
- (struct cdrom_read)
-
- notes:
- Identical to CDROMREADRAW except that block size is
- CD_FRAMESIZE (2048) bytes
-
-
-
-CDROMREADMODE2 Read CDROM mode 2 data (2336 Bytes)
- (struct cdrom_read)
-
- notes:
- Identical to CDROMREADRAW except that block size is
- CD_FRAMESIZE_RAW0 (2336) bytes
-
-
-
-CDROMREADAUDIO (struct cdrom_read_audio)
-
- usage:
-
- struct cdrom_read_audio ra;
- ioctl(fd, CDROMREADAUDIO, &ra);
-
- inputs:
- cdrom_read_audio structure containing read start
- point and length
-
- outputs:
- audio data, returned to buffer indicated by ra
-
- error return:
- EINVAL format not CDROM_MSF or CDROM_LBA
- EINVAL nframes not in range [1 75]
- ENXIO drive has no queue (probably means invalid fd)
- ENOMEM out of memory
-
-
-CDROMEJECT_SW enable(1)/disable(0) auto-ejecting
-
- usage:
-
- int val;
- ioctl(fd, CDROMEJECT_SW, val);
-
- inputs:
- Flag specifying auto-eject flag.
-
- outputs: none
-
- error return:
- ENOSYS Drive is not capable of ejecting.
- EBUSY Door is locked
-
-
-
-
-CDROMMULTISESSION Obtain the start-of-last-session
- address of multi session disks
- (struct cdrom_multisession)
- usage:
-
- struct cdrom_multisession ms_info;
- ioctl(fd, CDROMMULTISESSION, &ms_info);
-
- inputs:
- cdrom_multisession structure containing desired
- format.
-
- outputs:
- cdrom_multisession structure is filled with last_session
- information.
-
- error return:
- EINVAL format not CDROM_MSF or CDROM_LBA
-
-
-CDROM_GET_MCN Obtain the "Universal Product Code"
- if available (struct cdrom_mcn)
-
- usage:
-
- struct cdrom_mcn mcn;
- ioctl(fd, CDROM_GET_MCN, &mcn);
-
- inputs: none
-
- outputs:
- Universal Product Code
-
- error return:
- ENOSYS Drive is not capable of reading MCN data.
-
- notes:
- Source code comments state:
-
- The following function is implemented, although very few
- audio discs give Universal Product Code information, which
- should just be the Medium Catalog Number on the box. Note,
- that the way the code is written on the CD is /not/ uniform
- across all discs!
-
-
-
-
-CDROM_GET_UPC CDROM_GET_MCN (deprecated)
-
- Not implemented, as of 2.6.8.1
-
-
-
-CDROMRESET hard-reset the drive
-
- usage:
-
- ioctl(fd, CDROMRESET, 0);
-
- inputs: none
-
- outputs: none
-
- error return:
- EACCES Access denied: requires CAP_SYS_ADMIN
- ENOSYS Drive is not capable of resetting.
-
-
-
-
-CDROMREADCOOKED read data in cooked mode
-
- usage:
-
- u8 buffer[CD_FRAMESIZE]
- ioctl(fd, CDROMREADCOOKED, buffer);
-
- inputs: none
-
- outputs:
- 2048 bytes of data, "cooked" mode.
-
- notes:
- Not implemented on all drives.
-
-
-
-
-CDROMREADALL read all 2646 bytes
-
- Same as CDROMREADCOOKED, but reads 2646 bytes.
-
-
-
-CDROMSEEK seek msf address
-
- usage:
-
- struct cdrom_msf msf;
- ioctl(fd, CDROMSEEK, &msf);
-
- inputs:
- MSF address to seek to.
-
- outputs: none
-
-
-
-CDROMPLAYBLK scsi-cd only, (struct cdrom_blk)
-
- usage:
-
- struct cdrom_blk blk;
- ioctl(fd, CDROMPLAYBLK, &blk);
-
- inputs:
- Region to play
-
- outputs: none
-
-
-
-CDROMGETSPINDOWN
-
- usage:
-
- char spindown;
- ioctl(fd, CDROMGETSPINDOWN, &spindown);
-
- inputs: none
-
- outputs:
- The value of the current 4-bit spindown value.
-
-
-
-
-CDROMSETSPINDOWN
-
- usage:
-
- char spindown
- ioctl(fd, CDROMSETSPINDOWN, &spindown);
-
- inputs:
- 4-bit value used to control spindown (TODO: more detail here)
-
- outputs: none
-
-
-
-
-
-CDROM_SET_OPTIONS Set behavior options
-
- usage:
-
- int options;
- ioctl(fd, CDROM_SET_OPTIONS, options);
-
- inputs:
- New values for drive options. The logical 'or' of:
- CDO_AUTO_CLOSE close tray on first open(2)
- CDO_AUTO_EJECT open tray on last release
- CDO_USE_FFLAGS use O_NONBLOCK information on open
- CDO_LOCK lock tray on open files
- CDO_CHECK_TYPE check type on open for data
-
- outputs:
- Returns the resulting options settings in the
- ioctl return value. Returns -1 on error.
-
- error return:
- ENOSYS selected option(s) not supported by drive.
-
-
-
-
-CDROM_CLEAR_OPTIONS Clear behavior options
-
- Same as CDROM_SET_OPTIONS, except that selected options are
- turned off.
-
-
-
-CDROM_SELECT_SPEED Set the CD-ROM speed
-
- usage:
-
- int speed;
- ioctl(fd, CDROM_SELECT_SPEED, speed);
-
- inputs:
- New drive speed.
-
- outputs: none
-
- error return:
- ENOSYS speed selection not supported by drive.
-
-
-
-CDROM_SELECT_DISC Select disc (for juke-boxes)
-
- usage:
-
- int disk;
- ioctl(fd, CDROM_SELECT_DISC, disk);
-
- inputs:
- Disk to load into drive.
-
- outputs: none
-
- error return:
- EINVAL Disk number beyond capacity of drive
-
-
-
-CDROM_MEDIA_CHANGED Check is media changed
-
- usage:
-
- int slot;
- ioctl(fd, CDROM_MEDIA_CHANGED, slot);
-
- inputs:
- Slot number to be tested, always zero except for jukeboxes.
- May also be special values CDSL_NONE or CDSL_CURRENT
-
- outputs:
- Ioctl return value is 0 or 1 depending on whether the media
- has been changed, or -1 on error.
-
- error returns:
- ENOSYS Drive can't detect media change
- EINVAL Slot number beyond capacity of drive
- ENOMEM Out of memory
-
-
-
-CDROM_DRIVE_STATUS Get tray position, etc.
-
- usage:
-
- int slot;
- ioctl(fd, CDROM_DRIVE_STATUS, slot);
-
- inputs:
- Slot number to be tested, always zero except for jukeboxes.
- May also be special values CDSL_NONE or CDSL_CURRENT
-
- outputs:
- Ioctl return value will be one of the following values
- from <linux/cdrom.h>:
-
- CDS_NO_INFO Information not available.
- CDS_NO_DISC
- CDS_TRAY_OPEN
- CDS_DRIVE_NOT_READY
- CDS_DISC_OK
- -1 error
-
- error returns:
- ENOSYS Drive can't detect drive status
- EINVAL Slot number beyond capacity of drive
- ENOMEM Out of memory
-
-
-
-
-CDROM_DISC_STATUS Get disc type, etc.
-
- usage:
-
- ioctl(fd, CDROM_DISC_STATUS, 0);
-
- inputs: none
-
- outputs:
- Ioctl return value will be one of the following values
- from <linux/cdrom.h>:
- CDS_NO_INFO
- CDS_AUDIO
- CDS_MIXED
- CDS_XA_2_2
- CDS_XA_2_1
- CDS_DATA_1
-
- error returns: none at present
-
- notes:
- Source code comments state:
-
- Ok, this is where problems start. The current interface for
- the CDROM_DISC_STATUS ioctl is flawed. It makes the false
- assumption that CDs are all CDS_DATA_1 or all CDS_AUDIO, etc.
- Unfortunately, while this is often the case, it is also
- very common for CDs to have some tracks with data, and some
- tracks with audio. Just because I feel like it, I declare
- the following to be the best way to cope. If the CD has
- ANY data tracks on it, it will be returned as a data CD.
- If it has any XA tracks, I will return it as that. Now I
- could simplify this interface by combining these returns with
- the above, but this more clearly demonstrates the problem
- with the current interface. Too bad this wasn't designed
- to use bitmasks... -Erik
-
- Well, now we have the option CDS_MIXED: a mixed-type CD.
- User level programmers might feel the ioctl is not very
- useful.
- ---david
-
-
-
-
-CDROM_CHANGER_NSLOTS Get number of slots
-
- usage:
-
- ioctl(fd, CDROM_CHANGER_NSLOTS, 0);
-
- inputs: none
-
- outputs:
- The ioctl return value will be the number of slots in a
- CD changer. Typically 1 for non-multi-disk devices.
-
- error returns: none
-
-
-
-CDROM_LOCKDOOR lock or unlock door
-
- usage:
-
- int lock;
- ioctl(fd, CDROM_LOCKDOOR, lock);
-
- inputs:
- Door lock flag, 1=lock, 0=unlock
-
- outputs: none
-
- error returns:
- EDRIVE_CANT_DO_THIS Door lock function not supported.
- EBUSY Attempt to unlock when multiple users
- have the drive open and not CAP_SYS_ADMIN
-
- notes:
- As of 2.6.8.1, the lock flag is a global lock, meaning that
- all CD drives will be locked or unlocked together. This is
- probably a bug.
-
- The EDRIVE_CANT_DO_THIS value is defined in <linux/cdrom.h>
- and is currently (2.6.8.1) the same as EOPNOTSUPP
-
-
-
-CDROM_DEBUG Turn debug messages on/off
-
- usage:
-
- int debug;
- ioctl(fd, CDROM_DEBUG, debug);
-
- inputs:
- Cdrom debug flag, 0=disable, 1=enable
-
- outputs:
- The ioctl return value will be the new debug flag.
-
- error return:
- EACCES Access denied: requires CAP_SYS_ADMIN
-
-
-
-CDROM_GET_CAPABILITY get capabilities
-
- usage:
-
- ioctl(fd, CDROM_GET_CAPABILITY, 0);
-
- inputs: none
-
- outputs:
- The ioctl return value is the current device capability
- flags. See CDC_CLOSE_TRAY, CDC_OPEN_TRAY, etc.
-
-
-
-CDROMAUDIOBUFSIZ set the audio buffer size
-
- usage:
-
- int arg;
- ioctl(fd, CDROMAUDIOBUFSIZ, val);
-
- inputs:
- New audio buffer size
-
- outputs:
- The ioctl return value is the new audio buffer size, or -1
- on error.
-
- error return:
- ENOSYS Not supported by this driver.
-
- notes:
- Not supported by all drivers.
-
-
-
-DVD_READ_STRUCT Read structure
-
- usage:
-
- dvd_struct s;
- ioctl(fd, DVD_READ_STRUCT, &s);
-
- inputs:
- dvd_struct structure, containing:
- type specifies the information desired, one of
- DVD_STRUCT_PHYSICAL, DVD_STRUCT_COPYRIGHT,
- DVD_STRUCT_DISCKEY, DVD_STRUCT_BCA,
- DVD_STRUCT_MANUFACT
- physical.layer_num desired layer, indexed from 0
- copyright.layer_num desired layer, indexed from 0
- disckey.agid
-
- outputs:
- dvd_struct structure, containing:
- physical for type == DVD_STRUCT_PHYSICAL
- copyright for type == DVD_STRUCT_COPYRIGHT
- disckey.value for type == DVD_STRUCT_DISCKEY
- bca.{len,value} for type == DVD_STRUCT_BCA
- manufact.{len,valu} for type == DVD_STRUCT_MANUFACT
-
- error returns:
- EINVAL physical.layer_num exceeds number of layers
- EIO Received invalid response from drive
-
-
-
-DVD_WRITE_STRUCT Write structure
-
- Not implemented, as of 2.6.8.1
-
-
-
-DVD_AUTH Authentication
-
- usage:
-
- dvd_authinfo ai;
- ioctl(fd, DVD_AUTH, &ai);
-
- inputs:
- dvd_authinfo structure. See <linux/cdrom.h>
-
- outputs:
- dvd_authinfo structure.
-
- error return:
- ENOTTY ai.type not recognized.
-
-
-
-CDROM_SEND_PACKET send a packet to the drive
-
- usage:
-
- struct cdrom_generic_command cgc;
- ioctl(fd, CDROM_SEND_PACKET, &cgc);
-
- inputs:
- cdrom_generic_command structure containing the packet to send.
-
- outputs: none
- cdrom_generic_command structure containing results.
-
- error return:
- EIO command failed.
- EPERM Operation not permitted, either because a
- write command was attempted on a drive which
- is opened read-only, or because the command
- requires CAP_SYS_RAWIO
- EINVAL cgc.data_direction not set
-
-
-
-CDROM_NEXT_WRITABLE get next writable block
-
- usage:
-
- long next;
- ioctl(fd, CDROM_NEXT_WRITABLE, &next);
-
- inputs: none
-
- outputs:
- The next writable block.
-
- notes:
- If the device does not support this ioctl directly, the
- ioctl will return CDROM_LAST_WRITTEN + 7.
-
-
-
-CDROM_LAST_WRITTEN get last block written on disc
-
- usage:
-
- long last;
- ioctl(fd, CDROM_LAST_WRITTEN, &last);
-
- inputs: none
-
- outputs:
- The last block written on disc
-
- notes:
- If the device does not support this ioctl directly, the
- result is derived from the disc's table of contents. If the
- table of contents can't be read, this ioctl returns an
- error.
diff --git a/Documentation/ioctl/hdio.txt b/Documentation/ioctl/hdio.rst
index 18eb98c44ffe..e822e3dff176 100644
--- a/Documentation/ioctl/hdio.txt
+++ b/Documentation/ioctl/hdio.rst
@@ -1,9 +1,10 @@
- Summary of HDIO_ ioctl calls.
- ============================
+==============================
+Summary of `HDIO_` ioctl calls
+==============================
- Edward A. Falk <efalk@google.com>
+- Edward A. Falk <efalk@google.com>
- November, 2004
+November, 2004
This document attempts to describe the ioctl(2) calls supported by
the HD/IDE layer. These are by-and-large implemented (as of Linux 2.6)
@@ -14,6 +15,7 @@ are as follows:
ioctls that pass argument pointers to user space:
+ ======================= =======================================
HDIO_GETGEO get device geometry
HDIO_GET_UNMASKINTR get current unmask setting
HDIO_GET_MULTCOUNT get current IDE blockmode setting
@@ -36,9 +38,11 @@ are as follows:
HDIO_DRIVE_TASK execute task and special drive command
HDIO_DRIVE_CMD execute a special drive command
HDIO_DRIVE_CMD_AEB HDIO_DRIVE_TASK
+ ======================= =======================================
ioctls that pass non-pointer values:
+ ======================= =======================================
HDIO_SET_MULTCOUNT change IDE blockmode
HDIO_SET_UNMASKINTR permit other irqs during I/O
HDIO_SET_KEEPSETTINGS keep ioctl settings on reset
@@ -57,16 +61,13 @@ are as follows:
HDIO_SET_IDE_SCSI Set scsi emulation mode on/off
HDIO_SET_SCSI_IDE not implemented yet
+ ======================= =======================================
The information that follows was determined from reading kernel source
code. It is likely that some corrections will be made over time.
-
-
-
-
-
+------------------------------------------------------------------------------
General:
@@ -80,459 +81,610 @@ General:
Unless otherwise specified, all data structures and constants
are defined in <linux/hdreg.h>
+------------------------------------------------------------------------------
+HDIO_GETGEO
+ get device geometry
-HDIO_GETGEO get device geometry
- usage:
+ usage::
struct hd_geometry geom;
+
ioctl(fd, HDIO_GETGEO, &geom);
- inputs: none
+ inputs:
+ none
+
+
outputs:
+ hd_geometry structure containing:
- hd_geometry structure containing:
+ ========= ==================================
heads number of heads
sectors number of sectors/track
cylinders number of cylinders, mod 65536
start starting sector of this partition.
+ ========= ==================================
error returns:
- EINVAL if the device is not a disk drive or floppy drive,
- or if the user passes a null pointer
+ - EINVAL
+
+ if the device is not a disk drive or floppy drive,
+ or if the user passes a null pointer
notes:
+ Not particularly useful with modern disk drives, whose geometry
+ is a polite fiction anyway. Modern drives are addressed
+ purely by sector number nowadays (lba addressing), and the
+ drive geometry is an abstraction which is actually subject
+ to change. Currently (as of Nov 2004), the geometry values
+ are the "bios" values -- presumably the values the drive had
+ when Linux first booted.
- Not particularly useful with modern disk drives, whose geometry
- is a polite fiction anyway. Modern drives are addressed
- purely by sector number nowadays (lba addressing), and the
- drive geometry is an abstraction which is actually subject
- to change. Currently (as of Nov 2004), the geometry values
- are the "bios" values -- presumably the values the drive had
- when Linux first booted.
+ In addition, the cylinders field of the hd_geometry is an
+ unsigned short, meaning that on most architectures, this
+ ioctl will not return a meaningful value on drives with more
+ than 65535 tracks.
- In addition, the cylinders field of the hd_geometry is an
- unsigned short, meaning that on most architectures, this
- ioctl will not return a meaningful value on drives with more
- than 65535 tracks.
+ The start field is unsigned long, meaning that it will not
+ contain a meaningful value for disks over 219 Gb in size.
- The start field is unsigned long, meaning that it will not
- contain a meaningful value for disks over 219 Gb in size.
+HDIO_GET_UNMASKINTR
+ get current unmask setting
-HDIO_GET_UNMASKINTR get current unmask setting
- usage:
+ usage::
long val;
+
ioctl(fd, HDIO_GET_UNMASKINTR, &val);
- inputs: none
+ inputs:
+ none
+
+
outputs:
- The value of the drive's current unmask setting
+ The value of the drive's current unmask setting
-HDIO_SET_UNMASKINTR permit other irqs during I/O
- usage:
+
+HDIO_SET_UNMASKINTR
+ permit other irqs during I/O
+
+
+ usage::
unsigned long val;
+
ioctl(fd, HDIO_SET_UNMASKINTR, val);
inputs:
- New value for unmask flag
+ New value for unmask flag
+
+
+
+ outputs:
+ none
+
- outputs: none
error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1]
- EBUSY Controller busy
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EINVAL value out of range [0 1]
+ - EBUSY Controller busy
+
+HDIO_GET_MULTCOUNT
+ get current IDE blockmode setting
-HDIO_GET_MULTCOUNT get current IDE blockmode setting
- usage:
+ usage::
long val;
+
ioctl(fd, HDIO_GET_MULTCOUNT, &val);
- inputs: none
+ inputs:
+ none
+
+
outputs:
- The value of the current IDE block mode setting. This
- controls how many sectors the drive will transfer per
- interrupt.
+ The value of the current IDE block mode setting. This
+ controls how many sectors the drive will transfer per
+ interrupt.
+
+HDIO_SET_MULTCOUNT
+ change IDE blockmode
-HDIO_SET_MULTCOUNT change IDE blockmode
- usage:
+ usage::
int val;
+
ioctl(fd, HDIO_SET_MULTCOUNT, val);
inputs:
- New value for IDE block mode setting. This controls how many
- sectors the drive will transfer per interrupt.
+ New value for IDE block mode setting. This controls how many
+ sectors the drive will transfer per interrupt.
+
+ outputs:
+ none
+
- outputs: none
error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range supported by disk.
- EBUSY Controller busy or blockmode already set.
- EIO Drive did not accept new block mode.
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EINVAL value out of range supported by disk.
+ - EBUSY Controller busy or blockmode already set.
+ - EIO Drive did not accept new block mode.
notes:
-
- Source code comments read:
+ Source code comments read::
This is tightly woven into the driver->do_special cannot
touch. DON'T do it again until a total personality rewrite
is committed.
If blockmode has already been set, this ioctl will fail with
- EBUSY
+ -EBUSY
-HDIO_GET_QDMA get use-qdma flag
+HDIO_GET_QDMA
+ get use-qdma flag
+
Not implemented, as of 2.6.8.1
-HDIO_SET_XFER set transfer rate via proc
+HDIO_SET_XFER
+ set transfer rate via proc
+
Not implemented, as of 2.6.8.1
-HDIO_OBSOLETE_IDENTITY OBSOLETE, DO NOT USE
+HDIO_OBSOLETE_IDENTITY
+ OBSOLETE, DO NOT USE
+
Same as HDIO_GET_IDENTITY (see below), except that it only
returns the first 142 bytes of drive identity information.
-HDIO_GET_IDENTITY get IDE identification info
+HDIO_GET_IDENTITY
+ get IDE identification info
+
- usage:
+ usage::
unsigned char identity[512];
+
ioctl(fd, HDIO_GET_IDENTITY, identity);
- inputs: none
+ inputs:
+ none
- outputs:
- ATA drive identity information. For full description, see
- the IDENTIFY DEVICE and IDENTIFY PACKET DEVICE commands in
- the ATA specification.
+
+ outputs:
+ ATA drive identity information. For full description, see
+ the IDENTIFY DEVICE and IDENTIFY PACKET DEVICE commands in
+ the ATA specification.
error returns:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- ENOMSG IDENTIFY DEVICE information not available
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - ENOMSG IDENTIFY DEVICE information not available
notes:
+ Returns information that was obtained when the drive was
+ probed. Some of this information is subject to change, and
+ this ioctl does not re-probe the drive to update the
+ information.
- Returns information that was obtained when the drive was
- probed. Some of this information is subject to change, and
- this ioctl does not re-probe the drive to update the
- information.
+ This information is also available from /proc/ide/hdX/identify
- This information is also available from /proc/ide/hdX/identify
+HDIO_GET_KEEPSETTINGS
+ get keep-settings-on-reset flag
-HDIO_GET_KEEPSETTINGS get keep-settings-on-reset flag
- usage:
+ usage::
long val;
+
ioctl(fd, HDIO_GET_KEEPSETTINGS, &val);
- inputs: none
+ inputs:
+ none
+
+
outputs:
- The value of the current "keep settings" flag
+ The value of the current "keep settings" flag
+
+
notes:
+ When set, indicates that kernel should restore settings
+ after a drive reset.
- When set, indicates that kernel should restore settings
- after a drive reset.
+HDIO_SET_KEEPSETTINGS
+ keep ioctl settings on reset
-HDIO_SET_KEEPSETTINGS keep ioctl settings on reset
- usage:
+ usage::
long val;
+
ioctl(fd, HDIO_SET_KEEPSETTINGS, val);
inputs:
- New value for keep_settings flag
+ New value for keep_settings flag
+
+
+
+ outputs:
+ none
+
- outputs: none
error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1]
- EBUSY Controller busy
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EINVAL value out of range [0 1]
+ - EBUSY Controller busy
+
+HDIO_GET_32BIT
+ get current io_32bit setting
-HDIO_GET_32BIT get current io_32bit setting
- usage:
+ usage::
long val;
+
ioctl(fd, HDIO_GET_32BIT, &val);
- inputs: none
+ inputs:
+ none
+
+
outputs:
- The value of the current io_32bit setting
+ The value of the current io_32bit setting
+
+
notes:
+ 0=16-bit, 1=32-bit, 2,3 = 32bit+sync
+
- 0=16-bit, 1=32-bit, 2,3 = 32bit+sync
-HDIO_GET_NOWERR get ignore-write-error flag
+HDIO_GET_NOWERR
+ get ignore-write-error flag
- usage:
+
+ usage::
long val;
+
ioctl(fd, HDIO_GET_NOWERR, &val);
- inputs: none
+ inputs:
+ none
+
+
outputs:
- The value of the current ignore-write-error flag
+ The value of the current ignore-write-error flag
-HDIO_GET_DMA get use-dma flag
- usage:
+
+HDIO_GET_DMA
+ get use-dma flag
+
+
+ usage::
long val;
+
ioctl(fd, HDIO_GET_DMA, &val);
- inputs: none
+ inputs:
+ none
+
+
outputs:
- The value of the current use-dma flag
+ The value of the current use-dma flag
-HDIO_GET_NICE get nice flags
- usage:
+
+HDIO_GET_NICE
+ get nice flags
+
+
+ usage::
long nice;
+
ioctl(fd, HDIO_GET_NICE, &nice);
- inputs: none
+ inputs:
+ none
+
+
outputs:
+ The drive's "nice" values.
+
- The drive's "nice" values.
notes:
+ Per-drive flags which determine when the system will give more
+ bandwidth to other devices sharing the same IDE bus.
- Per-drive flags which determine when the system will give more
- bandwidth to other devices sharing the same IDE bus.
- See <linux/hdreg.h>, near symbol IDE_NICE_DSC_OVERLAP.
+ See <linux/hdreg.h>, near symbol IDE_NICE_DSC_OVERLAP.
-HDIO_SET_NICE set nice flags
+HDIO_SET_NICE
+ set nice flags
- usage:
+
+ usage::
unsigned long nice;
+
...
ioctl(fd, HDIO_SET_NICE, nice);
inputs:
- bitmask of nice flags.
+ bitmask of nice flags.
+
+
+
+ outputs:
+ none
+
- outputs: none
error returns:
- EACCES Access denied: requires CAP_SYS_ADMIN
- EPERM Flags other than DSC_OVERLAP and NICE_1 set.
- EPERM DSC_OVERLAP specified but not supported by drive
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EPERM Flags other than DSC_OVERLAP and NICE_1 set.
+ - EPERM DSC_OVERLAP specified but not supported by drive
notes:
+ This ioctl sets the DSC_OVERLAP and NICE_1 flags from values
+ provided by the user.
- This ioctl sets the DSC_OVERLAP and NICE_1 flags from values
- provided by the user.
+ Nice flags are listed in <linux/hdreg.h>, starting with
+ IDE_NICE_DSC_OVERLAP. These values represent shifts.
- Nice flags are listed in <linux/hdreg.h>, starting with
- IDE_NICE_DSC_OVERLAP. These values represent shifts.
+HDIO_GET_WCACHE
+ get write cache mode on|off
-HDIO_GET_WCACHE get write cache mode on|off
- usage:
+ usage::
long val;
+
ioctl(fd, HDIO_GET_WCACHE, &val);
- inputs: none
+ inputs:
+ none
+
+
outputs:
- The value of the current write cache mode
+ The value of the current write cache mode
-HDIO_GET_ACOUSTIC get acoustic value
- usage:
+
+HDIO_GET_ACOUSTIC
+ get acoustic value
+
+
+ usage::
long val;
+
ioctl(fd, HDIO_GET_ACOUSTIC, &val);
- inputs: none
+ inputs:
+ none
+
+
outputs:
- The value of the current acoustic settings
+ The value of the current acoustic settings
+
+
notes:
+ See HDIO_SET_ACOUSTIC
+
- See HDIO_SET_ACOUSTIC
HDIO_GET_ADDRESS
+ usage::
- usage:
long val;
+
ioctl(fd, HDIO_GET_ADDRESS, &val);
- inputs: none
+ inputs:
+ none
+
+
outputs:
- The value of the current addressing mode:
- 0 = 28-bit
- 1 = 48-bit
- 2 = 48-bit doing 28-bit
- 3 = 64-bit
+ The value of the current addressing mode:
+
+ = ===================
+ 0 28-bit
+ 1 48-bit
+ 2 48-bit doing 28-bit
+ 3 64-bit
+ = ===================
-HDIO_GET_BUSSTATE get the bus state of the hwif
+HDIO_GET_BUSSTATE
+ get the bus state of the hwif
- usage:
+
+ usage::
long state;
+
ioctl(fd, HDIO_SCAN_HWIF, &state);
- inputs: none
+ inputs:
+ none
+
+
outputs:
- Current power state of the IDE bus. One of BUSSTATE_OFF,
- BUSSTATE_ON, or BUSSTATE_TRISTATE
+ Current power state of the IDE bus. One of BUSSTATE_OFF,
+ BUSSTATE_ON, or BUSSTATE_TRISTATE
error returns:
- EACCES Access denied: requires CAP_SYS_ADMIN
+ - EACCES Access denied: requires CAP_SYS_ADMIN
-HDIO_SET_BUSSTATE set the bus state of the hwif
+HDIO_SET_BUSSTATE
+ set the bus state of the hwif
- usage:
+
+ usage::
int state;
+
...
ioctl(fd, HDIO_SCAN_HWIF, state);
inputs:
- Desired IDE power state. One of BUSSTATE_OFF, BUSSTATE_ON,
- or BUSSTATE_TRISTATE
+ Desired IDE power state. One of BUSSTATE_OFF, BUSSTATE_ON,
+ or BUSSTATE_TRISTATE
+
+ outputs:
+ none
+
- outputs: none
error returns:
- EACCES Access denied: requires CAP_SYS_RAWIO
- EOPNOTSUPP Hardware interface does not support bus power control
+ - EACCES Access denied: requires CAP_SYS_RAWIO
+ - EOPNOTSUPP Hardware interface does not support bus power control
+
+HDIO_TRISTATE_HWIF
+ execute a channel tristate
-HDIO_TRISTATE_HWIF execute a channel tristate
Not implemented, as of 2.6.8.1. See HDIO_SET_BUSSTATE
-HDIO_DRIVE_RESET execute a device reset
+HDIO_DRIVE_RESET
+ execute a device reset
+
- usage:
+ usage::
int args[3]
+
...
ioctl(fd, HDIO_DRIVE_RESET, args);
- inputs: none
+ inputs:
+ none
+
+
+
+ outputs:
+ none
+
- outputs: none
error returns:
- EACCES Access denied: requires CAP_SYS_ADMIN
- ENXIO No such device: phy dead or ctl_addr == 0
- EIO I/O error: reset timed out or hardware error
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - ENXIO No such device: phy dead or ctl_addr == 0
+ - EIO I/O error: reset timed out or hardware error
notes:
- Execute a reset on the device as soon as the current IO
- operation has completed.
+ - Execute a reset on the device as soon as the current IO
+ operation has completed.
- Executes an ATAPI soft reset if applicable, otherwise
- executes an ATA soft reset on the controller.
+ - Executes an ATAPI soft reset if applicable, otherwise
+ executes an ATA soft reset on the controller.
-HDIO_DRIVE_TASKFILE execute raw taskfile
+HDIO_DRIVE_TASKFILE
+ execute raw taskfile
- Note: If you don't have a copy of the ANSI ATA specification
- handy, you should probably ignore this ioctl.
- Execute an ATA disk command directly by writing the "taskfile"
- registers of the drive. Requires ADMIN and RAWIO access
- privileges.
+ Note:
+ If you don't have a copy of the ANSI ATA specification
+ handy, you should probably ignore this ioctl.
+
+ - Execute an ATA disk command directly by writing the "taskfile"
+ registers of the drive. Requires ADMIN and RAWIO access
+ privileges.
- usage:
+ usage::
struct {
+
ide_task_request_t req_task;
u8 outbuf[OUTPUT_SIZE];
u8 inbuf[INPUT_SIZE];
@@ -548,6 +700,7 @@ HDIO_DRIVE_TASKFILE execute raw taskfile
(See below for details on memory area passed to ioctl.)
+ ============ ===================================================
io_ports[8] values to be written to taskfile registers
hob_ports[8] high-order bytes, for extended commands.
out_flags flags indicating which registers are valid
@@ -557,24 +710,29 @@ HDIO_DRIVE_TASKFILE execute raw taskfile
out_size size of output buffer
outbuf buffer of data to be transmitted to disk
inbuf buffer of data to be received from disk (see [1])
+ ============ ===================================================
outputs:
+ =========== ====================================================
io_ports[] values returned in the taskfile registers
hob_ports[] high-order bytes, for extended commands.
out_flags flags indicating which registers are valid (see [2])
in_flags flags indicating which registers should be returned
outbuf buffer of data to be transmitted to disk (see [1])
inbuf buffer of data to be received from disk
+ =========== ====================================================
error returns:
- EACCES CAP_SYS_ADMIN or CAP_SYS_RAWIO privilege not set.
- ENOMSG Device is not a disk drive.
- ENOMEM Unable to allocate memory for task
- EFAULT req_cmd == TASKFILE_IN_OUT (not implemented as of 2.6.8)
- EPERM req_cmd == TASKFILE_MULTI_OUT and drive
- multi-count not yet set.
- EIO Drive failed the command.
+ - EACCES CAP_SYS_ADMIN or CAP_SYS_RAWIO privilege not set.
+ - ENOMSG Device is not a disk drive.
+ - ENOMEM Unable to allocate memory for task
+ - EFAULT req_cmd == TASKFILE_IN_OUT (not implemented as of 2.6.8)
+ - EPERM
+
+ req_cmd == TASKFILE_MULTI_OUT and drive
+ multi-count not yet set.
+ - EIO Drive failed the command.
notes:
@@ -615,22 +773,25 @@ HDIO_DRIVE_TASKFILE execute raw taskfile
Command is passed to the disk drive via the ide_task_request_t
structure, which contains these fields:
+ ============ ===============================================
io_ports[8] values for the taskfile registers
hob_ports[8] high-order bytes, for extended commands
out_flags flags indicating which entries in the
- io_ports[] and hob_ports[] arrays
+ io_ports[] and hob_ports[] arrays
contain valid values. Type ide_reg_valid_t.
in_flags flags indicating which entries in the
- io_ports[] and hob_ports[] arrays
+ io_ports[] and hob_ports[] arrays
are expected to contain valid values
on return.
data_phase See below
req_cmd Command type, see below
out_size output (user->drive) buffer size, bytes
in_size input (drive->user) buffer size, bytes
+ ============ ===============================================
When out_flags is zero, the following registers are loaded.
+ ============ ===============================================
HOB_FEATURE If the drive supports LBA48
HOB_NSECTOR If the drive supports LBA48
HOB_SECTOR If the drive supports LBA48
@@ -644,9 +805,11 @@ HDIO_DRIVE_TASKFILE execute raw taskfile
SELECT First, masked with 0xE0 if LBA48, 0xEF
otherwise; then, or'ed with the default
value of SELECT.
+ ============ ===============================================
If any bit in out_flags is set, the following registers are loaded.
+ ============ ===============================================
HOB_DATA If out_flags.b.data is set. HOB_DATA will
travel on DD8-DD15 on little endian machines
and on DD0-DD7 on big endian machines.
@@ -664,6 +827,7 @@ HDIO_DRIVE_TASKFILE execute raw taskfile
HCYL If out_flags.b.hcyl is set
SELECT Or'ed with the default value of SELECT and
loaded regardless of out_flags.b.select.
+ ============ ===============================================
Taskfile registers are read back from the drive into
{io|hob}_ports[] after the command completes iff one of the
@@ -674,6 +838,7 @@ HDIO_DRIVE_TASKFILE execute raw taskfile
2. One or more than one bits are set in out_flags.
3. The requested data_phase is TASKFILE_NO_DATA.
+ ============ ===============================================
HOB_DATA If in_flags.b.data is set. It will contain
DD8-DD15 on little endian machines and
DD0-DD7 on big endian machines.
@@ -689,10 +854,12 @@ HDIO_DRIVE_TASKFILE execute raw taskfile
SECTOR
LCYL
HCYL
+ ============ ===============================================
The data_phase field describes the data transfer to be
performed. Value is one of:
+ =================== ========================================
TASKFILE_IN
TASKFILE_MULTI_IN
TASKFILE_OUT
@@ -708,15 +875,18 @@ HDIO_DRIVE_TASKFILE execute raw taskfile
TASKFILE_P_OUT unimplemented
TASKFILE_P_OUT_DMA unimplemented
TASKFILE_P_OUT_DMAQ unimplemented
+ =================== ========================================
The req_cmd field classifies the command type. It may be
one of:
+ ======================== =======================================
IDE_DRIVE_TASK_NO_DATA
IDE_DRIVE_TASK_SET_XFER unimplemented
IDE_DRIVE_TASK_IN
IDE_DRIVE_TASK_OUT unimplemented
IDE_DRIVE_TASK_RAW_WRITE
+ ======================== =======================================
[6] Do not access {in|out}_flags->all except for resetting
all the bits. Always access individual bit fields. ->all
@@ -726,45 +896,57 @@ HDIO_DRIVE_TASKFILE execute raw taskfile
-HDIO_DRIVE_CMD execute a special drive command
+HDIO_DRIVE_CMD
+ execute a special drive command
+
Note: If you don't have a copy of the ANSI ATA specification
handy, you should probably ignore this ioctl.
- usage:
+ usage::
u8 args[4+XFER_SIZE];
+
...
ioctl(fd, HDIO_DRIVE_CMD, args);
inputs:
+ Commands other than WIN_SMART:
- Commands other than WIN_SMART
+ ======= =======
args[0] COMMAND
args[1] NSECTOR
args[2] FEATURE
args[3] NSECTOR
+ ======= =======
+
+ WIN_SMART:
- WIN_SMART
+ ======= =======
args[0] COMMAND
args[1] SECTOR
args[2] FEATURE
args[3] NSECTOR
+ ======= =======
outputs:
+ args[] buffer is filled with register values followed by any
+
- args[] buffer is filled with register values followed by any
data returned by the disk.
+
+ ======== ====================================================
args[0] status
args[1] error
args[2] NSECTOR
args[3] undefined
args[4+] NSECTOR * 512 bytes of data returned by the command.
+ ======== ====================================================
error returns:
- EACCES Access denied: requires CAP_SYS_RAWIO
- ENOMEM Unable to allocate memory for task
- EIO Drive reports error
+ - EACCES Access denied: requires CAP_SYS_RAWIO
+ - ENOMEM Unable to allocate memory for task
+ - EIO Drive reports error
notes:
@@ -789,20 +971,24 @@ HDIO_DRIVE_CMD execute a special drive command
-HDIO_DRIVE_TASK execute task and special drive command
+HDIO_DRIVE_TASK
+ execute task and special drive command
+
Note: If you don't have a copy of the ANSI ATA specification
handy, you should probably ignore this ioctl.
- usage:
+ usage::
u8 args[7];
+
...
ioctl(fd, HDIO_DRIVE_TASK, args);
inputs:
+ Taskfile register values:
- Taskfile register values:
+ ======= =======
args[0] COMMAND
args[1] FEATURE
args[2] NSECTOR
@@ -810,10 +996,13 @@ HDIO_DRIVE_TASK execute task and special drive command
args[4] LCYL
args[5] HCYL
args[6] SELECT
+ ======= =======
outputs:
+ Taskfile register values:
+
- Taskfile register values:
+ ======= =======
args[0] status
args[1] error
args[2] NSECTOR
@@ -821,12 +1010,13 @@ HDIO_DRIVE_TASK execute task and special drive command
args[4] LCYL
args[5] HCYL
args[6] SELECT
+ ======= =======
error returns:
- EACCES Access denied: requires CAP_SYS_RAWIO
- ENOMEM Unable to allocate memory for task
- ENOMSG Device is not a disk drive.
- EIO Drive failed the command.
+ - EACCES Access denied: requires CAP_SYS_RAWIO
+ - ENOMEM Unable to allocate memory for task
+ - ENOMSG Device is not a disk drive.
+ - EIO Drive failed the command.
notes:
@@ -836,236 +1026,317 @@ HDIO_DRIVE_TASK execute task and special drive command
-HDIO_DRIVE_CMD_AEB HDIO_DRIVE_TASK
+HDIO_DRIVE_CMD_AEB
+ HDIO_DRIVE_TASK
+
Not implemented, as of 2.6.8.1
-HDIO_SET_32BIT change io_32bit flags
+HDIO_SET_32BIT
+ change io_32bit flags
+
- usage:
+ usage::
int val;
+
ioctl(fd, HDIO_SET_32BIT, val);
inputs:
- New value for io_32bit flag
+ New value for io_32bit flag
+
+
+
+ outputs:
+ none
+
- outputs: none
error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 3]
- EBUSY Controller busy
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EINVAL value out of range [0 3]
+ - EBUSY Controller busy
-HDIO_SET_NOWERR change ignore-write-error flag
+HDIO_SET_NOWERR
+ change ignore-write-error flag
- usage:
+
+ usage::
int val;
+
ioctl(fd, HDIO_SET_NOWERR, val);
inputs:
- New value for ignore-write-error flag. Used for ignoring
+ New value for ignore-write-error flag. Used for ignoring
+
+
WRERR_STAT
- outputs: none
+ outputs:
+ none
+
+
error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1]
- EBUSY Controller busy
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EINVAL value out of range [0 1]
+ - EBUSY Controller busy
+
+HDIO_SET_DMA
+ change use-dma flag
-HDIO_SET_DMA change use-dma flag
- usage:
+ usage::
long val;
+
ioctl(fd, HDIO_SET_DMA, val);
inputs:
- New value for use-dma flag
+ New value for use-dma flag
+
+
+
+ outputs:
+ none
+
- outputs: none
error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1]
- EBUSY Controller busy
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EINVAL value out of range [0 1]
+ - EBUSY Controller busy
+
+HDIO_SET_PIO_MODE
+ reconfig interface to new speed
-HDIO_SET_PIO_MODE reconfig interface to new speed
- usage:
+ usage::
long val;
+
ioctl(fd, HDIO_SET_PIO_MODE, val);
inputs:
- New interface speed.
+ New interface speed.
+
+
+
+ outputs:
+ none
+
- outputs: none
error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 255]
- EBUSY Controller busy
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EINVAL value out of range [0 255]
+ - EBUSY Controller busy
+
+HDIO_SCAN_HWIF
+ register and (re)scan interface
-HDIO_SCAN_HWIF register and (re)scan interface
- usage:
+ usage::
int args[3]
+
...
ioctl(fd, HDIO_SCAN_HWIF, args);
inputs:
+
+ ======= =========================
args[0] io address to probe
+
+
args[1] control address to probe
args[2] irq number
+ ======= =========================
+
+ outputs:
+ none
+
- outputs: none
error returns:
- EACCES Access denied: requires CAP_SYS_RAWIO
- EIO Probe failed.
+ - EACCES Access denied: requires CAP_SYS_RAWIO
+ - EIO Probe failed.
notes:
+ This ioctl initializes the addresses and irq for a disk
+ controller, probes for drives, and creates /proc/ide
+ interfaces as appropriate.
- This ioctl initializes the addresses and irq for a disk
- controller, probes for drives, and creates /proc/ide
- interfaces as appropriate.
+HDIO_UNREGISTER_HWIF
+ unregister interface
-HDIO_UNREGISTER_HWIF unregister interface
- usage:
+ usage::
int index;
+
ioctl(fd, HDIO_UNREGISTER_HWIF, index);
inputs:
- index index of hardware interface to unregister
+ index index of hardware interface to unregister
+
+
+
+ outputs:
+ none
+
- outputs: none
error returns:
- EACCES Access denied: requires CAP_SYS_RAWIO
+ - EACCES Access denied: requires CAP_SYS_RAWIO
notes:
+ This ioctl removes a hardware interface from the kernel.
- This ioctl removes a hardware interface from the kernel.
+ Currently (2.6.8) this ioctl silently fails if any drive on
+ the interface is busy.
- Currently (2.6.8) this ioctl silently fails if any drive on
- the interface is busy.
+HDIO_SET_WCACHE
+ change write cache enable-disable
-HDIO_SET_WCACHE change write cache enable-disable
- usage:
+ usage::
int val;
+
ioctl(fd, HDIO_SET_WCACHE, val);
inputs:
- New value for write cache enable
+ New value for write cache enable
+
+
+
+ outputs:
+ none
+
- outputs: none
error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1]
- EBUSY Controller busy
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EINVAL value out of range [0 1]
+ - EBUSY Controller busy
+
+HDIO_SET_ACOUSTIC
+ change acoustic behavior
-HDIO_SET_ACOUSTIC change acoustic behavior
- usage:
+ usage::
int val;
+
ioctl(fd, HDIO_SET_ACOUSTIC, val);
inputs:
- New value for drive acoustic settings
+ New value for drive acoustic settings
+
+
+
+ outputs:
+ none
+
- outputs: none
error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 254]
- EBUSY Controller busy
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EINVAL value out of range [0 254]
+ - EBUSY Controller busy
-HDIO_SET_QDMA change use-qdma flag
+HDIO_SET_QDMA
+ change use-qdma flag
+
Not implemented, as of 2.6.8.1
-HDIO_SET_ADDRESS change lba addressing modes
+HDIO_SET_ADDRESS
+ change lba addressing modes
+
- usage:
+ usage::
int val;
+
ioctl(fd, HDIO_SET_ADDRESS, val);
inputs:
- New value for addressing mode
- 0 = 28-bit
- 1 = 48-bit
- 2 = 48-bit doing 28-bit
+ New value for addressing mode
+
+ = ===================
+ 0 28-bit
+ 1 48-bit
+ 2 48-bit doing 28-bit
+ = ===================
+
+ outputs:
+ none
+
- outputs: none
error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 2]
- EBUSY Controller busy
- EIO Drive does not support lba48 mode.
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EINVAL value out of range [0 2]
+ - EBUSY Controller busy
+ - EIO Drive does not support lba48 mode.
HDIO_SET_IDE_SCSI
+ usage::
- usage:
long val;
+
ioctl(fd, HDIO_SET_IDE_SCSI, val);
inputs:
- New value for scsi emulation mode (?)
+ New value for scsi emulation mode (?)
- outputs: none
- error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means)
- EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1]
- EBUSY Controller busy
+ outputs:
+ none
-HDIO_SET_SCSI_IDE
- Not implemented, as of 2.6.8.1
+ error return:
+ - EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ - EACCES Access denied: requires CAP_SYS_ADMIN
+ - EINVAL value out of range [0 1]
+ - EBUSY Controller busy
+
+HDIO_SET_SCSI_IDE
+ Not implemented, as of 2.6.8.1
diff --git a/Documentation/ioctl/index.rst b/Documentation/ioctl/index.rst
new file mode 100644
index 000000000000..0f0a857f6615
--- /dev/null
+++ b/Documentation/ioctl/index.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======
+IOCTLs
+======
+
+.. toctree::
+ :maxdepth: 1
+
+ ioctl-number
+
+ botching-up-ioctls
+ ioctl-decoding
+
+ cdrom
+ hdio
diff --git a/Documentation/ioctl/ioctl-decoding.txt b/Documentation/ioctl/ioctl-decoding.rst
index e35efb0cec2e..380d6bb3e3ea 100644
--- a/Documentation/ioctl/ioctl-decoding.txt
+++ b/Documentation/ioctl/ioctl-decoding.rst
@@ -1,10 +1,16 @@
+==============================
+Decoding an IOCTL Magic Number
+==============================
+
To decode a hex IOCTL code:
Most architectures use this generic format, but check
include/ARCH/ioctl.h for specifics, e.g. powerpc
uses 3 bits to encode read/write and 13 bits for size.
- bits meaning
+ ====== ==================================
+ bits meaning
+ ====== ==================================
31-30 00 - no parameters: uses _IO macro
10 - read: _IOR
01 - write: _IOW
@@ -16,9 +22,10 @@ uses 3 bits to encode read/write and 13 bits for size.
unique to each driver
7-0 function #
+ ====== ==================================
So for example 0x82187201 is a read with arg length of 0x218,
-character 'r' function 1. Grepping the source reveals this is:
+character 'r' function 1. Grepping the source reveals this is::
-#define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct dirent [2])
+ #define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct dirent [2])
diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/ioctl/ioctl-number.rst
new file mode 100644
index 000000000000..7f8dcae7a230
--- /dev/null
+++ b/Documentation/ioctl/ioctl-number.rst
@@ -0,0 +1,361 @@
+=============
+Ioctl Numbers
+=============
+
+19 October 1999
+
+Michael Elizabeth Chastain
+<mec@shout.net>
+
+If you are adding new ioctl's to the kernel, you should use the _IO
+macros defined in <linux/ioctl.h>:
+
+ ====== == ============================================
+ _IO an ioctl with no parameters
+ _IOW an ioctl with write parameters (copy_from_user)
+ _IOR an ioctl with read parameters (copy_to_user)
+ _IOWR an ioctl with both write and read parameters.
+ ====== == ============================================
+
+'Write' and 'read' are from the user's point of view, just like the
+system calls 'write' and 'read'. For example, a SET_FOO ioctl would
+be _IOW, although the kernel would actually read data from user space;
+a GET_FOO ioctl would be _IOR, although the kernel would actually write
+data to user space.
+
+The first argument to _IO, _IOW, _IOR, or _IOWR is an identifying letter
+or number from the table below. Because of the large number of drivers,
+many drivers share a partial letter with other drivers.
+
+If you are writing a driver for a new device and need a letter, pick an
+unused block with enough room for expansion: 32 to 256 ioctl commands.
+You can register the block by patching this file and submitting the
+patch to Linus Torvalds. Or you can e-mail me at <mec@shout.net> and
+I'll register one for you.
+
+The second argument to _IO, _IOW, _IOR, or _IOWR is a sequence number
+to distinguish ioctls from each other. The third argument to _IOW,
+_IOR, or _IOWR is the type of the data going into the kernel or coming
+out of the kernel (e.g. 'int' or 'struct foo'). NOTE! Do NOT use
+sizeof(arg) as the third argument as this results in your ioctl thinking
+it passes an argument of type size_t.
+
+Some devices use their major number as the identifier; this is OK, as
+long as it is unique. Some devices are irregular and don't follow any
+convention at all.
+
+Following this convention is good because:
+
+(1) Keeping the ioctl's globally unique helps error checking:
+ if a program calls an ioctl on the wrong device, it will get an
+ error rather than some unexpected behaviour.
+
+(2) The 'strace' build procedure automatically finds ioctl numbers
+ defined with _IO, _IOW, _IOR, or _IOWR.
+
+(3) 'strace' can decode numbers back into useful names when the
+ numbers are unique.
+
+(4) People looking for ioctls can grep for them more easily when
+ this convention is used to define the ioctl numbers.
+
+(5) When following the convention, the driver code can use generic
+ code to copy the parameters between user and kernel space.
+
+This table lists ioctls visible from user land for Linux/x86. It contains
+most drivers up to 2.6.31, but I know I am missing some. There has been
+no attempt to list non-X86 architectures or ioctls from drivers/staging/.
+
+==== ===== ======================================================= ================================================================
+Code Seq# Include File Comments
+ (hex)
+==== ===== ======================================================= ================================================================
+0x00 00-1F linux/fs.h conflict!
+0x00 00-1F scsi/scsi_ioctl.h conflict!
+0x00 00-1F linux/fb.h conflict!
+0x00 00-1F linux/wavefront.h conflict!
+0x02 all linux/fd.h
+0x03 all linux/hdreg.h
+0x04 D2-DC linux/umsdos_fs.h Dead since 2.6.11, but don't reuse these.
+0x06 all linux/lp.h
+0x09 all linux/raid/md_u.h
+0x10 00-0F drivers/char/s390/vmcp.h
+0x10 10-1F arch/s390/include/uapi/sclp_ctl.h
+0x10 20-2F arch/s390/include/uapi/asm/hypfs.h
+0x12 all linux/fs.h
+ linux/blkpg.h
+0x1b all InfiniBand Subsystem
+ <http://infiniband.sourceforge.net/>
+0x20 all drivers/cdrom/cm206.h
+0x22 all scsi/sg.h
+'!' 00-1F uapi/linux/seccomp.h
+'#' 00-3F IEEE 1394 Subsystem
+ Block for the entire subsystem
+'$' 00-0F linux/perf_counter.h, linux/perf_event.h
+'%' 00-0F include/uapi/linux/stm.h System Trace Module subsystem
+ <mailto:alexander.shishkin@linux.intel.com>
+'&' 00-07 drivers/firewire/nosy-user.h
+'1' 00-1F linux/timepps.h PPS kit from Ulrich Windl
+ <ftp://ftp.de.kernel.org/pub/linux/daemons/ntp/PPS/>
+'2' 01-04 linux/i2o.h
+'3' 00-0F drivers/s390/char/raw3270.h conflict!
+'3' 00-1F linux/suspend_ioctls.h, conflict!
+ kernel/power/user.c
+'8' all SNP8023 advanced NIC card
+ <mailto:mcr@solidum.com>
+';' 64-7F linux/vfio.h
+'@' 00-0F linux/radeonfb.h conflict!
+'@' 00-0F drivers/video/aty/aty128fb.c conflict!
+'A' 00-1F linux/apm_bios.h conflict!
+'A' 00-0F linux/agpgart.h, conflict!
+ drivers/char/agp/compat_ioctl.h
+'A' 00-7F sound/asound.h conflict!
+'B' 00-1F linux/cciss_ioctl.h conflict!
+'B' 00-0F include/linux/pmu.h conflict!
+'B' C0-FF advanced bbus <mailto:maassen@uni-freiburg.de>
+'C' all linux/soundcard.h conflict!
+'C' 01-2F linux/capi.h conflict!
+'C' F0-FF drivers/net/wan/cosa.h conflict!
+'D' all arch/s390/include/asm/dasd.h
+'D' 40-5F drivers/scsi/dpt/dtpi_ioctl.h
+'D' 05 drivers/scsi/pmcraid.h
+'E' all linux/input.h conflict!
+'E' 00-0F xen/evtchn.h conflict!
+'F' all linux/fb.h conflict!
+'F' 01-02 drivers/scsi/pmcraid.h conflict!
+'F' 20 drivers/video/fsl-diu-fb.h conflict!
+'F' 20 drivers/video/intelfb/intelfb.h conflict!
+'F' 20 linux/ivtvfb.h conflict!
+'F' 20 linux/matroxfb.h conflict!
+'F' 20 drivers/video/aty/atyfb_base.c conflict!
+'F' 00-0F video/da8xx-fb.h conflict!
+'F' 80-8F linux/arcfb.h conflict!
+'F' DD video/sstfb.h conflict!
+'G' 00-3F drivers/misc/sgi-gru/grulib.h conflict!
+'G' 00-0F linux/gigaset_dev.h conflict!
+'H' 00-7F linux/hiddev.h conflict!
+'H' 00-0F linux/hidraw.h conflict!
+'H' 01 linux/mei.h conflict!
+'H' 02 linux/mei.h conflict!
+'H' 03 linux/mei.h conflict!
+'H' 00-0F sound/asound.h conflict!
+'H' 20-40 sound/asound_fm.h conflict!
+'H' 80-8F sound/sfnt_info.h conflict!
+'H' 10-8F sound/emu10k1.h conflict!
+'H' 10-1F sound/sb16_csp.h conflict!
+'H' 10-1F sound/hda_hwdep.h conflict!
+'H' 40-4F sound/hdspm.h conflict!
+'H' 40-4F sound/hdsp.h conflict!
+'H' 90 sound/usb/usx2y/usb_stream.h
+'H' A0 uapi/linux/usb/cdc-wdm.h
+'H' C0-F0 net/bluetooth/hci.h conflict!
+'H' C0-DF net/bluetooth/hidp/hidp.h conflict!
+'H' C0-DF net/bluetooth/cmtp/cmtp.h conflict!
+'H' C0-DF net/bluetooth/bnep/bnep.h conflict!
+'H' F1 linux/hid-roccat.h <mailto:erazor_de@users.sourceforge.net>
+'H' F8-FA sound/firewire.h
+'I' all linux/isdn.h conflict!
+'I' 00-0F drivers/isdn/divert/isdn_divert.h conflict!
+'I' 40-4F linux/mISDNif.h conflict!
+'J' 00-1F drivers/scsi/gdth_ioctl.h
+'K' all linux/kd.h
+'L' 00-1F linux/loop.h conflict!
+'L' 10-1F drivers/scsi/mpt3sas/mpt3sas_ctl.h conflict!
+'L' 20-2F linux/lightnvm.h
+'L' E0-FF linux/ppdd.h encrypted disk device driver
+ <http://linux01.gwdg.de/~alatham/ppdd.html>
+'M' all linux/soundcard.h conflict!
+'M' 01-16 mtd/mtd-abi.h conflict!
+ and drivers/mtd/mtdchar.c
+'M' 01-03 drivers/scsi/megaraid/megaraid_sas.h
+'M' 00-0F drivers/video/fsl-diu-fb.h conflict!
+'N' 00-1F drivers/usb/scanner.h
+'N' 40-7F drivers/block/nvme.c
+'O' 00-06 mtd/ubi-user.h UBI
+'P' all linux/soundcard.h conflict!
+'P' 60-6F sound/sscape_ioctl.h conflict!
+'P' 00-0F drivers/usb/class/usblp.c conflict!
+'P' 01-09 drivers/misc/pci_endpoint_test.c conflict!
+'Q' all linux/soundcard.h
+'R' 00-1F linux/random.h conflict!
+'R' 01 linux/rfkill.h conflict!
+'R' C0-DF net/bluetooth/rfcomm.h
+'S' all linux/cdrom.h conflict!
+'S' 80-81 scsi/scsi_ioctl.h conflict!
+'S' 82-FF scsi/scsi.h conflict!
+'S' 00-7F sound/asequencer.h conflict!
+'T' all linux/soundcard.h conflict!
+'T' 00-AF sound/asound.h conflict!
+'T' all arch/x86/include/asm/ioctls.h conflict!
+'T' C0-DF linux/if_tun.h conflict!
+'U' all sound/asound.h conflict!
+'U' 00-CF linux/uinput.h conflict!
+'U' 00-EF linux/usbdevice_fs.h
+'U' C0-CF drivers/bluetooth/hci_uart.h
+'V' all linux/vt.h conflict!
+'V' all linux/videodev2.h conflict!
+'V' C0 linux/ivtvfb.h conflict!
+'V' C0 linux/ivtv.h conflict!
+'V' C0 media/davinci/vpfe_capture.h conflict!
+'V' C0 media/si4713.h conflict!
+'W' 00-1F linux/watchdog.h conflict!
+'W' 00-1F linux/wanrouter.h conflict! (pre 3.9)
+'W' 00-3F sound/asound.h conflict!
+'W' 40-5F drivers/pci/switch/switchtec.c
+'X' all fs/xfs/xfs_fs.h, conflict!
+ fs/xfs/linux-2.6/xfs_ioctl32.h,
+ include/linux/falloc.h,
+ linux/fs.h,
+'X' all fs/ocfs2/ocfs_fs.h conflict!
+'X' 01 linux/pktcdvd.h conflict!
+'Y' all linux/cyclades.h
+'Z' 14-15 drivers/message/fusion/mptctl.h
+'[' 00-3F linux/usb/tmc.h USB Test and Measurement Devices
+ <mailto:gregkh@linuxfoundation.org>
+'a' all linux/atm*.h, linux/sonet.h ATM on linux
+ <http://lrcwww.epfl.ch/>
+'a' 00-0F drivers/crypto/qat/qat_common/adf_cfg_common.h conflict! qat driver
+'b' 00-FF conflict! bit3 vme host bridge
+ <mailto:natalia@nikhefk.nikhef.nl>
+'c' all linux/cm4000_cs.h conflict!
+'c' 00-7F linux/comstats.h conflict!
+'c' 00-7F linux/coda.h conflict!
+'c' 00-1F linux/chio.h conflict!
+'c' 80-9F arch/s390/include/asm/chsc.h conflict!
+'c' A0-AF arch/x86/include/asm/msr.h conflict!
+'d' 00-FF linux/char/drm/drm.h conflict!
+'d' 02-40 pcmcia/ds.h conflict!
+'d' F0-FF linux/digi1.h
+'e' all linux/digi1.h conflict!
+'f' 00-1F linux/ext2_fs.h conflict!
+'f' 00-1F linux/ext3_fs.h conflict!
+'f' 00-0F fs/jfs/jfs_dinode.h conflict!
+'f' 00-0F fs/ext4/ext4.h conflict!
+'f' 00-0F linux/fs.h conflict!
+'f' 00-0F fs/ocfs2/ocfs2_fs.h conflict!
+'g' 00-0F linux/usb/gadgetfs.h
+'g' 20-2F linux/usb/g_printer.h
+'h' 00-7F conflict! Charon filesystem
+ <mailto:zapman@interlan.net>
+'h' 00-1F linux/hpet.h conflict!
+'h' 80-8F fs/hfsplus/ioctl.c
+'i' 00-3F linux/i2o-dev.h conflict!
+'i' 0B-1F linux/ipmi.h conflict!
+'i' 80-8F linux/i8k.h
+'j' 00-3F linux/joystick.h
+'k' 00-0F linux/spi/spidev.h conflict!
+'k' 00-05 video/kyro.h conflict!
+'k' 10-17 linux/hsi/hsi_char.h HSI character device
+'l' 00-3F linux/tcfs_fs.h transparent cryptographic file system
+ <http://web.archive.org/web/%2A/http://mikonos.dia.unisa.it/tcfs>
+'l' 40-7F linux/udf_fs_i.h in development:
+ <http://sourceforge.net/projects/linux-udf/>
+'m' 00-09 linux/mmtimer.h conflict!
+'m' all linux/mtio.h conflict!
+'m' all linux/soundcard.h conflict!
+'m' all linux/synclink.h conflict!
+'m' 00-19 drivers/message/fusion/mptctl.h conflict!
+'m' 00 drivers/scsi/megaraid/megaraid_ioctl.h conflict!
+'n' 00-7F linux/ncp_fs.h and fs/ncpfs/ioctl.c
+'n' 80-8F uapi/linux/nilfs2_api.h NILFS2
+'n' E0-FF linux/matroxfb.h matroxfb
+'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2
+'o' 00-03 mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps)
+'o' 40-41 mtd/ubi-user.h UBI
+'o' 01-A1 `linux/dvb/*.h` DVB
+'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this)
+'p' 00-1F linux/rtc.h conflict!
+'p' 00-3F linux/mc146818rtc.h conflict!
+'p' 40-7F linux/nvram.h
+'p' 80-9F linux/ppdev.h user-space parport
+ <mailto:tim@cyberelk.net>
+'p' A1-A5 linux/pps.h LinuxPPS
+ <mailto:giometti@linux.it>
+'q' 00-1F linux/serio.h
+'q' 80-FF linux/telephony.h Internet PhoneJACK, Internet LineJACK
+ linux/ixjuser.h <http://web.archive.org/web/%2A/http://www.quicknet.net>
+'r' 00-1F linux/msdos_fs.h and fs/fat/dir.c
+'s' all linux/cdk.h
+'t' 00-7F linux/ppp-ioctl.h
+'t' 80-8F linux/isdn_ppp.h
+'t' 90-91 linux/toshiba.h toshiba and toshiba_acpi SMM
+'u' 00-1F linux/smb_fs.h gone
+'u' 20-3F linux/uvcvideo.h USB video class host driver
+'u' 40-4f linux/udmabuf.h userspace dma-buf misc device
+'v' 00-1F linux/ext2_fs.h conflict!
+'v' 00-1F linux/fs.h conflict!
+'v' 00-0F linux/sonypi.h conflict!
+'v' 00-0F media/v4l2-subdev.h conflict!
+'v' C0-FF linux/meye.h conflict!
+'w' all CERN SCI driver
+'y' 00-1F packet based user level communications
+ <mailto:zapman@interlan.net>
+'z' 00-3F CAN bus card conflict!
+ <mailto:hdstich@connectu.ulm.circular.de>
+'z' 40-7F CAN bus card conflict!
+ <mailto:oe@port.de>
+'z' 10-4F drivers/s390/crypto/zcrypt_api.h conflict!
+'|' 00-7F linux/media.h
+0x80 00-1F linux/fb.h
+0x89 00-06 arch/x86/include/asm/sockios.h
+0x89 0B-DF linux/sockios.h
+0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
+0x89 E0-EF linux/dn.h PROTOPRIVATE range
+0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
+0x8B all linux/wireless.h
+0x8C 00-3F WiNRADiO driver
+ <http://www.winradio.com.au/>
+0x90 00 drivers/cdrom/sbpcd.h
+0x92 00-0F drivers/usb/mon/mon_bin.c
+0x93 60-7F linux/auto_fs.h
+0x94 all fs/btrfs/ioctl.h Btrfs filesystem
+ and linux/fs.h some lifted to vfs/generic
+0x97 00-7F fs/ceph/ioctl.h Ceph file system
+0x99 00-0F 537-Addinboard driver
+ <mailto:buk@buks.ipn.de>
+0xA0 all linux/sdp/sdp.h Industrial Device Project
+ <mailto:kenji@bitgate.com>
+0xA1 0 linux/vtpm_proxy.h TPM Emulator Proxy Driver
+0xA3 80-8F Port ACL in development:
+ <mailto:tlewis@mindspring.com>
+0xA3 90-9F linux/dtlk.h
+0xA4 00-1F uapi/linux/tee.h Generic TEE subsystem
+0xAA 00-3F linux/uapi/linux/userfaultfd.h
+0xAB 00-1F linux/nbd.h
+0xAC 00-1F linux/raw.h
+0xAD 00 Netfilter device in development:
+ <mailto:rusty@rustcorp.com.au>
+0xAE all linux/kvm.h Kernel-based Virtual Machine
+ <mailto:kvm@vger.kernel.org>
+0xAF 00-1F linux/fsl_hypervisor.h Freescale hypervisor
+0xB0 all RATIO devices in development:
+ <mailto:vgo@ratio.de>
+0xB1 00-1F PPPoX
+ <mailto:mostrows@styx.uwaterloo.ca>
+0xB3 00 linux/mmc/ioctl.h
+0xB4 00-0F linux/gpio.h <mailto:linux-gpio@vger.kernel.org>
+0xB5 00-0F uapi/linux/rpmsg.h <mailto:linux-remoteproc@vger.kernel.org>
+0xB6 all linux/fpga-dfl.h
+0xC0 00-0F linux/usb/iowarrior.h
+0xCA 00-0F uapi/misc/cxl.h
+0xCA 10-2F uapi/misc/ocxl.h
+0xCA 80-BF uapi/scsi/cxlflash_ioctl.h
+0xCB 00-1F CBM serial IEC bus in development:
+ <mailto:michael.klein@puffin.lb.shuttle.de>
+0xCC 00-0F drivers/misc/ibmvmc.h pseries VMC driver
+0xCD 01 linux/reiserfs_fs.h
+0xCF 02 fs/cifs/ioctl.c
+0xDB 00-0F drivers/char/mwave/mwavepub.h
+0xDD 00-3F ZFCP device driver see drivers/s390/scsi/
+ <mailto:aherrman@de.ibm.com>
+0xE5 00-3F linux/fuse.h
+0xEC 00-01 drivers/platform/chrome/cros_ec_dev.h ChromeOS EC driver
+0xF3 00-3F drivers/usb/misc/sisusbvga/sisusb.h sisfb (in development)
+ <mailto:thomas@winischhofer.net>
+0xF4 00-1F video/mbxfb.h mbxfb
+ <mailto:raph@8d.com>
+0xF6 all LTTng Linux Trace Toolkit Next Generation
+ <mailto:mathieu.desnoyers@efficios.com>
+0xFD all linux/dm-ioctl.h
+0xFE all linux/isst_if.h
+==== ===== ======================================================= ================================================================
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
deleted file mode 100644
index ab0b3f686454..000000000000
--- a/Documentation/ioctl/ioctl-number.txt
+++ /dev/null
@@ -1,351 +0,0 @@
-Ioctl Numbers
-19 October 1999
-Michael Elizabeth Chastain
-<mec@shout.net>
-
-If you are adding new ioctl's to the kernel, you should use the _IO
-macros defined in <linux/ioctl.h>:
-
- _IO an ioctl with no parameters
- _IOW an ioctl with write parameters (copy_from_user)
- _IOR an ioctl with read parameters (copy_to_user)
- _IOWR an ioctl with both write and read parameters.
-
-'Write' and 'read' are from the user's point of view, just like the
-system calls 'write' and 'read'. For example, a SET_FOO ioctl would
-be _IOW, although the kernel would actually read data from user space;
-a GET_FOO ioctl would be _IOR, although the kernel would actually write
-data to user space.
-
-The first argument to _IO, _IOW, _IOR, or _IOWR is an identifying letter
-or number from the table below. Because of the large number of drivers,
-many drivers share a partial letter with other drivers.
-
-If you are writing a driver for a new device and need a letter, pick an
-unused block with enough room for expansion: 32 to 256 ioctl commands.
-You can register the block by patching this file and submitting the
-patch to Linus Torvalds. Or you can e-mail me at <mec@shout.net> and
-I'll register one for you.
-
-The second argument to _IO, _IOW, _IOR, or _IOWR is a sequence number
-to distinguish ioctls from each other. The third argument to _IOW,
-_IOR, or _IOWR is the type of the data going into the kernel or coming
-out of the kernel (e.g. 'int' or 'struct foo'). NOTE! Do NOT use
-sizeof(arg) as the third argument as this results in your ioctl thinking
-it passes an argument of type size_t.
-
-Some devices use their major number as the identifier; this is OK, as
-long as it is unique. Some devices are irregular and don't follow any
-convention at all.
-
-Following this convention is good because:
-
-(1) Keeping the ioctl's globally unique helps error checking:
- if a program calls an ioctl on the wrong device, it will get an
- error rather than some unexpected behaviour.
-
-(2) The 'strace' build procedure automatically finds ioctl numbers
- defined with _IO, _IOW, _IOR, or _IOWR.
-
-(3) 'strace' can decode numbers back into useful names when the
- numbers are unique.
-
-(4) People looking for ioctls can grep for them more easily when
- this convention is used to define the ioctl numbers.
-
-(5) When following the convention, the driver code can use generic
- code to copy the parameters between user and kernel space.
-
-This table lists ioctls visible from user land for Linux/x86. It contains
-most drivers up to 2.6.31, but I know I am missing some. There has been
-no attempt to list non-X86 architectures or ioctls from drivers/staging/.
-
-Code Seq#(hex) Include File Comments
-========================================================
-0x00 00-1F linux/fs.h conflict!
-0x00 00-1F scsi/scsi_ioctl.h conflict!
-0x00 00-1F linux/fb.h conflict!
-0x00 00-1F linux/wavefront.h conflict!
-0x02 all linux/fd.h
-0x03 all linux/hdreg.h
-0x04 D2-DC linux/umsdos_fs.h Dead since 2.6.11, but don't reuse these.
-0x06 all linux/lp.h
-0x09 all linux/raid/md_u.h
-0x10 00-0F drivers/char/s390/vmcp.h
-0x10 10-1F arch/s390/include/uapi/sclp_ctl.h
-0x10 20-2F arch/s390/include/uapi/asm/hypfs.h
-0x12 all linux/fs.h
- linux/blkpg.h
-0x1b all InfiniBand Subsystem <http://infiniband.sourceforge.net/>
-0x20 all drivers/cdrom/cm206.h
-0x22 all scsi/sg.h
-'!' 00-1F uapi/linux/seccomp.h
-'#' 00-3F IEEE 1394 Subsystem Block for the entire subsystem
-'$' 00-0F linux/perf_counter.h, linux/perf_event.h
-'%' 00-0F include/uapi/linux/stm.h
- System Trace Module subsystem
- <mailto:alexander.shishkin@linux.intel.com>
-'&' 00-07 drivers/firewire/nosy-user.h
-'1' 00-1F <linux/timepps.h> PPS kit from Ulrich Windl
- <ftp://ftp.de.kernel.org/pub/linux/daemons/ntp/PPS/>
-'2' 01-04 linux/i2o.h
-'3' 00-0F drivers/s390/char/raw3270.h conflict!
-'3' 00-1F linux/suspend_ioctls.h conflict!
- and kernel/power/user.c
-'8' all SNP8023 advanced NIC card
- <mailto:mcr@solidum.com>
-';' 64-7F linux/vfio.h
-'@' 00-0F linux/radeonfb.h conflict!
-'@' 00-0F drivers/video/aty/aty128fb.c conflict!
-'A' 00-1F linux/apm_bios.h conflict!
-'A' 00-0F linux/agpgart.h conflict!
- and drivers/char/agp/compat_ioctl.h
-'A' 00-7F sound/asound.h conflict!
-'B' 00-1F linux/cciss_ioctl.h conflict!
-'B' 00-0F include/linux/pmu.h conflict!
-'B' C0-FF advanced bbus
- <mailto:maassen@uni-freiburg.de>
-'C' all linux/soundcard.h conflict!
-'C' 01-2F linux/capi.h conflict!
-'C' F0-FF drivers/net/wan/cosa.h conflict!
-'D' all arch/s390/include/asm/dasd.h
-'D' 40-5F drivers/scsi/dpt/dtpi_ioctl.h
-'D' 05 drivers/scsi/pmcraid.h
-'E' all linux/input.h conflict!
-'E' 00-0F xen/evtchn.h conflict!
-'F' all linux/fb.h conflict!
-'F' 01-02 drivers/scsi/pmcraid.h conflict!
-'F' 20 drivers/video/fsl-diu-fb.h conflict!
-'F' 20 drivers/video/intelfb/intelfb.h conflict!
-'F' 20 linux/ivtvfb.h conflict!
-'F' 20 linux/matroxfb.h conflict!
-'F' 20 drivers/video/aty/atyfb_base.c conflict!
-'F' 00-0F video/da8xx-fb.h conflict!
-'F' 80-8F linux/arcfb.h conflict!
-'F' DD video/sstfb.h conflict!
-'G' 00-3F drivers/misc/sgi-gru/grulib.h conflict!
-'G' 00-0F linux/gigaset_dev.h conflict!
-'H' 00-7F linux/hiddev.h conflict!
-'H' 00-0F linux/hidraw.h conflict!
-'H' 01 linux/mei.h conflict!
-'H' 02 linux/mei.h conflict!
-'H' 03 linux/mei.h conflict!
-'H' 00-0F sound/asound.h conflict!
-'H' 20-40 sound/asound_fm.h conflict!
-'H' 80-8F sound/sfnt_info.h conflict!
-'H' 10-8F sound/emu10k1.h conflict!
-'H' 10-1F sound/sb16_csp.h conflict!
-'H' 10-1F sound/hda_hwdep.h conflict!
-'H' 40-4F sound/hdspm.h conflict!
-'H' 40-4F sound/hdsp.h conflict!
-'H' 90 sound/usb/usx2y/usb_stream.h
-'H' A0 uapi/linux/usb/cdc-wdm.h
-'H' C0-F0 net/bluetooth/hci.h conflict!
-'H' C0-DF net/bluetooth/hidp/hidp.h conflict!
-'H' C0-DF net/bluetooth/cmtp/cmtp.h conflict!
-'H' C0-DF net/bluetooth/bnep/bnep.h conflict!
-'H' F1 linux/hid-roccat.h <mailto:erazor_de@users.sourceforge.net>
-'H' F8-FA sound/firewire.h
-'I' all linux/isdn.h conflict!
-'I' 00-0F drivers/isdn/divert/isdn_divert.h conflict!
-'I' 40-4F linux/mISDNif.h conflict!
-'J' 00-1F drivers/scsi/gdth_ioctl.h
-'K' all linux/kd.h
-'L' 00-1F linux/loop.h conflict!
-'L' 10-1F drivers/scsi/mpt3sas/mpt3sas_ctl.h conflict!
-'L' 20-2F linux/lightnvm.h
-'L' E0-FF linux/ppdd.h encrypted disk device driver
- <http://linux01.gwdg.de/~alatham/ppdd.html>
-'M' all linux/soundcard.h conflict!
-'M' 01-16 mtd/mtd-abi.h conflict!
- and drivers/mtd/mtdchar.c
-'M' 01-03 drivers/scsi/megaraid/megaraid_sas.h
-'M' 00-0F drivers/video/fsl-diu-fb.h conflict!
-'N' 00-1F drivers/usb/scanner.h
-'N' 40-7F drivers/block/nvme.c
-'O' 00-06 mtd/ubi-user.h UBI
-'P' all linux/soundcard.h conflict!
-'P' 60-6F sound/sscape_ioctl.h conflict!
-'P' 00-0F drivers/usb/class/usblp.c conflict!
-'P' 01-09 drivers/misc/pci_endpoint_test.c conflict!
-'Q' all linux/soundcard.h
-'R' 00-1F linux/random.h conflict!
-'R' 01 linux/rfkill.h conflict!
-'R' C0-DF net/bluetooth/rfcomm.h
-'S' all linux/cdrom.h conflict!
-'S' 80-81 scsi/scsi_ioctl.h conflict!
-'S' 82-FF scsi/scsi.h conflict!
-'S' 00-7F sound/asequencer.h conflict!
-'T' all linux/soundcard.h conflict!
-'T' 00-AF sound/asound.h conflict!
-'T' all arch/x86/include/asm/ioctls.h conflict!
-'T' C0-DF linux/if_tun.h conflict!
-'U' all sound/asound.h conflict!
-'U' 00-CF linux/uinput.h conflict!
-'U' 00-EF linux/usbdevice_fs.h
-'U' C0-CF drivers/bluetooth/hci_uart.h
-'V' all linux/vt.h conflict!
-'V' all linux/videodev2.h conflict!
-'V' C0 linux/ivtvfb.h conflict!
-'V' C0 linux/ivtv.h conflict!
-'V' C0 media/davinci/vpfe_capture.h conflict!
-'V' C0 media/si4713.h conflict!
-'W' 00-1F linux/watchdog.h conflict!
-'W' 00-1F linux/wanrouter.h conflict! (pre 3.9)
-'W' 00-3F sound/asound.h conflict!
-'W' 40-5F drivers/pci/switch/switchtec.c
-'X' all fs/xfs/xfs_fs.h conflict!
- and fs/xfs/linux-2.6/xfs_ioctl32.h
- and include/linux/falloc.h
- and linux/fs.h
-'X' all fs/ocfs2/ocfs_fs.h conflict!
-'X' 01 linux/pktcdvd.h conflict!
-'Y' all linux/cyclades.h
-'Z' 14-15 drivers/message/fusion/mptctl.h
-'[' 00-3F linux/usb/tmc.h USB Test and Measurement Devices
- <mailto:gregkh@linuxfoundation.org>
-'a' all linux/atm*.h, linux/sonet.h ATM on linux
- <http://lrcwww.epfl.ch/>
-'a' 00-0F drivers/crypto/qat/qat_common/adf_cfg_common.h conflict! qat driver
-'b' 00-FF conflict! bit3 vme host bridge
- <mailto:natalia@nikhefk.nikhef.nl>
-'c' all linux/cm4000_cs.h conflict!
-'c' 00-7F linux/comstats.h conflict!
-'c' 00-7F linux/coda.h conflict!
-'c' 00-1F linux/chio.h conflict!
-'c' 80-9F arch/s390/include/asm/chsc.h conflict!
-'c' A0-AF arch/x86/include/asm/msr.h conflict!
-'d' 00-FF linux/char/drm/drm.h conflict!
-'d' 02-40 pcmcia/ds.h conflict!
-'d' F0-FF linux/digi1.h
-'e' all linux/digi1.h conflict!
-'f' 00-1F linux/ext2_fs.h conflict!
-'f' 00-1F linux/ext3_fs.h conflict!
-'f' 00-0F fs/jfs/jfs_dinode.h conflict!
-'f' 00-0F fs/ext4/ext4.h conflict!
-'f' 00-0F linux/fs.h conflict!
-'f' 00-0F fs/ocfs2/ocfs2_fs.h conflict!
-'g' 00-0F linux/usb/gadgetfs.h
-'g' 20-2F linux/usb/g_printer.h
-'h' 00-7F conflict! Charon filesystem
- <mailto:zapman@interlan.net>
-'h' 00-1F linux/hpet.h conflict!
-'h' 80-8F fs/hfsplus/ioctl.c
-'i' 00-3F linux/i2o-dev.h conflict!
-'i' 0B-1F linux/ipmi.h conflict!
-'i' 80-8F linux/i8k.h
-'j' 00-3F linux/joystick.h
-'k' 00-0F linux/spi/spidev.h conflict!
-'k' 00-05 video/kyro.h conflict!
-'k' 10-17 linux/hsi/hsi_char.h HSI character device
-'l' 00-3F linux/tcfs_fs.h transparent cryptographic file system
- <http://web.archive.org/web/*/http://mikonos.dia.unisa.it/tcfs>
-'l' 40-7F linux/udf_fs_i.h in development:
- <http://sourceforge.net/projects/linux-udf/>
-'m' 00-09 linux/mmtimer.h conflict!
-'m' all linux/mtio.h conflict!
-'m' all linux/soundcard.h conflict!
-'m' all linux/synclink.h conflict!
-'m' 00-19 drivers/message/fusion/mptctl.h conflict!
-'m' 00 drivers/scsi/megaraid/megaraid_ioctl.h conflict!
-'n' 00-7F linux/ncp_fs.h and fs/ncpfs/ioctl.c
-'n' 80-8F uapi/linux/nilfs2_api.h NILFS2
-'n' E0-FF linux/matroxfb.h matroxfb
-'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2
-'o' 00-03 mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps)
-'o' 40-41 mtd/ubi-user.h UBI
-'o' 01-A1 linux/dvb/*.h DVB
-'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this)
-'p' 00-1F linux/rtc.h conflict!
-'p' 00-3F linux/mc146818rtc.h conflict!
-'p' 40-7F linux/nvram.h
-'p' 80-9F linux/ppdev.h user-space parport
- <mailto:tim@cyberelk.net>
-'p' A1-A5 linux/pps.h LinuxPPS
- <mailto:giometti@linux.it>
-'q' 00-1F linux/serio.h
-'q' 80-FF linux/telephony.h Internet PhoneJACK, Internet LineJACK
- linux/ixjuser.h <http://web.archive.org/web/*/http://www.quicknet.net>
-'r' 00-1F linux/msdos_fs.h and fs/fat/dir.c
-'s' all linux/cdk.h
-'t' 00-7F linux/ppp-ioctl.h
-'t' 80-8F linux/isdn_ppp.h
-'t' 90-91 linux/toshiba.h toshiba and toshiba_acpi SMM
-'u' 00-1F linux/smb_fs.h gone
-'u' 20-3F linux/uvcvideo.h USB video class host driver
-'u' 40-4f linux/udmabuf.h userspace dma-buf misc device
-'v' 00-1F linux/ext2_fs.h conflict!
-'v' 00-1F linux/fs.h conflict!
-'v' 00-0F linux/sonypi.h conflict!
-'v' 00-0F media/v4l2-subdev.h conflict!
-'v' C0-FF linux/meye.h conflict!
-'w' all CERN SCI driver
-'y' 00-1F packet based user level communications
- <mailto:zapman@interlan.net>
-'z' 00-3F CAN bus card conflict!
- <mailto:hdstich@connectu.ulm.circular.de>
-'z' 40-7F CAN bus card conflict!
- <mailto:oe@port.de>
-'z' 10-4F drivers/s390/crypto/zcrypt_api.h conflict!
-'|' 00-7F linux/media.h
-0x80 00-1F linux/fb.h
-0x89 00-06 arch/x86/include/asm/sockios.h
-0x89 0B-DF linux/sockios.h
-0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
-0x89 E0-EF linux/dn.h PROTOPRIVATE range
-0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
-0x8B all linux/wireless.h
-0x8C 00-3F WiNRADiO driver
- <http://www.winradio.com.au/>
-0x90 00 drivers/cdrom/sbpcd.h
-0x92 00-0F drivers/usb/mon/mon_bin.c
-0x93 60-7F linux/auto_fs.h
-0x94 all fs/btrfs/ioctl.h Btrfs filesystem
- and linux/fs.h some lifted to vfs/generic
-0x97 00-7F fs/ceph/ioctl.h Ceph file system
-0x99 00-0F 537-Addinboard driver
- <mailto:buk@buks.ipn.de>
-0xA0 all linux/sdp/sdp.h Industrial Device Project
- <mailto:kenji@bitgate.com>
-0xA1 0 linux/vtpm_proxy.h TPM Emulator Proxy Driver
-0xA3 80-8F Port ACL in development:
- <mailto:tlewis@mindspring.com>
-0xA3 90-9F linux/dtlk.h
-0xA4 00-1F uapi/linux/tee.h Generic TEE subsystem
-0xAA 00-3F linux/uapi/linux/userfaultfd.h
-0xAB 00-1F linux/nbd.h
-0xAC 00-1F linux/raw.h
-0xAD 00 Netfilter device in development:
- <mailto:rusty@rustcorp.com.au>
-0xAE all linux/kvm.h Kernel-based Virtual Machine
- <mailto:kvm@vger.kernel.org>
-0xAF 00-1F linux/fsl_hypervisor.h Freescale hypervisor
-0xB0 all RATIO devices in development:
- <mailto:vgo@ratio.de>
-0xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca>
-0xB3 00 linux/mmc/ioctl.h
-0xB4 00-0F linux/gpio.h <mailto:linux-gpio@vger.kernel.org>
-0xB5 00-0F uapi/linux/rpmsg.h <mailto:linux-remoteproc@vger.kernel.org>
-0xB6 all linux/fpga-dfl.h
-0xC0 00-0F linux/usb/iowarrior.h
-0xCA 00-0F uapi/misc/cxl.h
-0xCA 10-2F uapi/misc/ocxl.h
-0xCA 80-BF uapi/scsi/cxlflash_ioctl.h
-0xCB 00-1F CBM serial IEC bus in development:
- <mailto:michael.klein@puffin.lb.shuttle.de>
-0xCC 00-0F drivers/misc/ibmvmc.h pseries VMC driver
-0xCD 01 linux/reiserfs_fs.h
-0xCF 02 fs/cifs/ioctl.c
-0xDB 00-0F drivers/char/mwave/mwavepub.h
-0xDD 00-3F ZFCP device driver see drivers/s390/scsi/
- <mailto:aherrman@de.ibm.com>
-0xE5 00-3F linux/fuse.h
-0xEC 00-01 drivers/platform/chrome/cros_ec_dev.h ChromeOS EC driver
-0xF3 00-3F drivers/usb/misc/sisusbvga/sisusb.h sisfb (in development)
- <mailto:thomas@winischhofer.net>
-0xF4 00-1F video/mbxfb.h mbxfb
- <mailto:raph@8d.com>
-0xF6 all LTTng Linux Trace Toolkit Next Generation
- <mailto:mathieu.desnoyers@efficios.com>
-0xFD all linux/dm-ioctl.h
-0xFE all linux/isst_if.h
diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst
index 42d4cbe4460c..e323a3f2cc81 100644
--- a/Documentation/kbuild/index.rst
+++ b/Documentation/kbuild/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
===================
Kernel Build System
diff --git a/Documentation/kbuild/issues.rst b/Documentation/kbuild/issues.rst
index 9fdded4b681c..bdab01f733f6 100644
--- a/Documentation/kbuild/issues.rst
+++ b/Documentation/kbuild/issues.rst
@@ -1,11 +1,15 @@
-Recursion issue #1
-------------------
+================
+Recursion issues
+================
- .. include:: Kconfig.recursion-issue-01
- :literal:
+issue #1
+--------
-Recursion issue #2
-------------------
+.. literalinclude:: Kconfig.recursion-issue-01
+ :language: kconfig
- .. include:: Kconfig.recursion-issue-02
- :literal:
+issue #2
+--------
+
+.. literalinclude:: Kconfig.recursion-issue-02
+ :language: kconfig
diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst
index b25548963d70..ce9b99c004ae 100644
--- a/Documentation/kbuild/kbuild.rst
+++ b/Documentation/kbuild/kbuild.rst
@@ -18,7 +18,7 @@ This file lists all modules that are built into the kernel. This is used
by modprobe to not fail when trying to load something builtin.
modules.builtin.modinfo
---------------------------------------------------
+-----------------------
This file contains modinfo from all modules that are built into the kernel.
Unlike modinfo of a separate module, all fields are prefixed with module name.
@@ -153,6 +153,7 @@ Install script called when using "make install".
The default name is "installkernel".
The script will be called with the following arguments:
+
- $1 - kernel version
- $2 - kernel image file
- $3 - kernel map file
diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst
index 2bc8a7803365..74bef19f69f0 100644
--- a/Documentation/kbuild/kconfig-language.rst
+++ b/Documentation/kbuild/kconfig-language.rst
@@ -53,6 +53,7 @@ A menu entry can have a number of attributes. Not all of them are
applicable everywhere (see syntax).
- type definition: "bool"/"tristate"/"string"/"hex"/"int"
+
Every config option must have a type. There are only two basic types:
tristate and string; the other types are based on these two. The type
definition optionally accepts an input prompt, so these two examples
@@ -66,11 +67,13 @@ applicable everywhere (see syntax).
prompt "Networking support"
- input prompt: "prompt" <prompt> ["if" <expr>]
+
Every menu entry can have at most one prompt, which is used to display
to the user. Optionally dependencies only for this prompt can be added
with "if".
- default value: "default" <expr> ["if" <expr>]
+
A config option can have any number of default values. If multiple
default values are visible, only the first defined one is active.
Default values are not limited to the menu entry where they are
@@ -112,6 +115,7 @@ applicable everywhere (see syntax).
Optionally dependencies for this default value can be added with "if".
- dependencies: "depends on" <expr>
+
This defines a dependency for this menu entry. If multiple
dependencies are defined, they are connected with '&&'. Dependencies
are applied to all other options within this menu entry (which also
@@ -127,6 +131,7 @@ applicable everywhere (see syntax).
default y
- reverse dependencies: "select" <symbol> ["if" <expr>]
+
While normal dependencies reduce the upper limit of a symbol (see
below), reverse dependencies can be used to force a lower limit of
another symbol. The value of the current menu symbol is used as the
@@ -146,6 +151,7 @@ applicable everywhere (see syntax).
the illegal configurations all over.
- weak reverse dependencies: "imply" <symbol> ["if" <expr>]
+
This is similar to "select" as it enforces a lower limit on another
symbol except that the "implied" symbol's value may still be set to n
from a direct dependency or with a visible prompt.
@@ -176,6 +182,7 @@ applicable everywhere (see syntax).
configure that subsystem out without also having to unset these drivers.
- limiting menu display: "visible if" <expr>
+
This attribute is only applicable to menu blocks, if the condition is
false, the menu block is not displayed to the user (the symbols
contained there can still be selected by other symbols, though). It is
@@ -183,12 +190,14 @@ applicable everywhere (see syntax).
entries. Default value of "visible" is true.
- numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
+
This allows to limit the range of possible input values for int
and hex symbols. The user can only input a value which is larger than
or equal to the first symbol and smaller than or equal to the second
symbol.
- help text: "help" or "---help---"
+
This defines a help text. The end of the help text is determined by
the indentation level, this means it ends at the first line which has
a smaller indentation than the first line of the help text.
@@ -197,6 +206,7 @@ applicable everywhere (see syntax).
the file as an aid to developers.
- misc options: "option" <symbol>[=<value>]
+
Various less common options can be defined via this option syntax,
which can modify the behaviour of the menu entry and its config
symbol. These options are currently possible:
@@ -325,6 +335,7 @@ end a menu entry:
The first five also start the definition of a menu entry.
config::
+
"config" <symbol>
<config options>
@@ -332,6 +343,7 @@ This defines a config symbol <symbol> and accepts any of above
attributes as options.
menuconfig::
+
"menuconfig" <symbol>
<config options>
diff --git a/Documentation/kbuild/kconfig.rst b/Documentation/kbuild/kconfig.rst
index 88129af7e539..a9a855f894b3 100644
--- a/Documentation/kbuild/kconfig.rst
+++ b/Documentation/kbuild/kconfig.rst
@@ -264,6 +264,7 @@ NCONFIG_MODE
This mode shows all sub-menus in one large tree.
Example::
+
make NCONFIG_MODE=single_menu nconfig
----------------------------------------------------------------------
@@ -277,9 +278,12 @@ Searching in xconfig:
names, so you have to know something close to what you are
looking for.
- Example:
+ Example::
+
Ctrl-F hotplug
- or
+
+ or::
+
Menu: File, Search, hotplug
lists all config symbol entries that contain "hotplug" in
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index 093f2d79ab95..f31158457753 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -384,6 +384,7 @@ more details, with real examples.
-----------------------
Kbuild tracks dependencies on the following:
+
1) All prerequisite files (both `*.c` and `*.h`)
2) `CONFIG_` options used in all prerequisite files
3) Command-line used to compile target
diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst
index dc698ea456e0..a8518ac0d31d 100644
--- a/Documentation/kernel-hacking/locking.rst
+++ b/Documentation/kernel-hacking/locking.rst
@@ -1364,7 +1364,7 @@ Futex API reference
Further reading
===============
-- ``Documentation/locking/spinlocks.txt``: Linus Torvalds' spinlocking
+- ``Documentation/locking/spinlocks.rst``: Linus Torvalds' spinlocking
tutorial in the kernel sources.
- Unix Systems for Modern Architectures: Symmetric Multiprocessing and
diff --git a/Documentation/leds/index.rst b/Documentation/leds/index.rst
index 9885f7c1b75d..060f4e485897 100644
--- a/Documentation/leds/index.rst
+++ b/Documentation/leds/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
====
LEDs
diff --git a/Documentation/livepatch/index.rst b/Documentation/livepatch/index.rst
index edd291d51847..17674a9e21b2 100644
--- a/Documentation/livepatch/index.rst
+++ b/Documentation/livepatch/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
===================
Kernel Livepatching
diff --git a/Documentation/locking/index.rst b/Documentation/locking/index.rst
new file mode 100644
index 000000000000..626a463f7e42
--- /dev/null
+++ b/Documentation/locking/index.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+locking
+=======
+
+.. toctree::
+ :maxdepth: 1
+
+ lockdep-design
+ lockstat
+ locktorture
+ mutex-design
+ rt-mutex-design
+ rt-mutex
+ spinlocks
+ ww-mutex-design
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.rst
index f189d130e543..23fcbc4d3fc0 100644
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.rst
@@ -2,6 +2,7 @@ Runtime locking correctness validator
=====================================
started by Ingo Molnar <mingo@redhat.com>
+
additions by Arjan van de Ven <arjan@linux.intel.com>
Lock-class
@@ -56,7 +57,7 @@ where the last 1 category is:
When locking rules are violated, these usage bits are presented in the
locking error messages, inside curlies, with a total of 2 * n STATEs bits.
-A contrived example:
+A contrived example::
modprobe/2287 is trying to acquire lock:
(&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
@@ -70,12 +71,14 @@ of the lock and readlock (if exists), for each of the n STATEs listed
above respectively, and the character displayed at each bit position
indicates:
+ === ===================================================
'.' acquired while irqs disabled and not in irq context
'-' acquired in irq context
'+' acquired with irqs enabled
'?' acquired in irq context with irqs enabled.
+ === ===================================================
-The bits are illustrated with an example:
+The bits are illustrated with an example::
(&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
||||
@@ -90,13 +93,13 @@ context and whether that STATE is enabled yields four possible cases as
shown in the table below. The bit character is able to indicate which
exact case is for the lock as of the reporting time.
- -------------------------------------------
+ +--------------+-------------+--------------+
| | irq enabled | irq disabled |
- |-------------------------------------------|
+ +--------------+-------------+--------------+
| ever in irq | ? | - |
- |-------------------------------------------|
+ +--------------+-------------+--------------+
| never in irq | + | . |
- -------------------------------------------
+ +--------------+-------------+--------------+
The character '-' suggests irq is disabled because if otherwise the
charactor '?' would have been shown instead. Similar deduction can be
@@ -113,7 +116,7 @@ is irq-unsafe means it was ever acquired with irq enabled.
A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
following states must be exclusive: only one of them is allowed to be set
-for any lock-class based on its usage:
+for any lock-class based on its usage::
<hardirq-safe> or <hardirq-unsafe>
<softirq-safe> or <softirq-unsafe>
@@ -134,7 +137,7 @@ Multi-lock dependency rules:
The same lock-class must not be acquired twice, because this could lead
to lock recursion deadlocks.
-Furthermore, two locks can not be taken in inverse order:
+Furthermore, two locks can not be taken in inverse order::
<L1> -> <L2>
<L2> -> <L1>
@@ -148,7 +151,7 @@ operations; the validator will still find whether these locks can be
acquired in a circular fashion.
Furthermore, the following usage based lock dependencies are not allowed
-between any two lock-classes:
+between any two lock-classes::
<hardirq-safe> -> <hardirq-unsafe>
<softirq-safe> -> <softirq-unsafe>
@@ -204,16 +207,16 @@ the ordering is not static.
In order to teach the validator about this correct usage model, new
versions of the various locking primitives were added that allow you to
specify a "nesting level". An example call, for the block device mutex,
-looks like this:
+looks like this::
-enum bdev_bd_mutex_lock_class
-{
+ enum bdev_bd_mutex_lock_class
+ {
BD_MUTEX_NORMAL,
BD_MUTEX_WHOLE,
BD_MUTEX_PARTITION
-};
+ };
- mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
+mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
In this case the locking is done on a bdev object that is known to be a
partition.
@@ -234,7 +237,7 @@ must be held: lockdep_assert_held*(&lock) and lockdep_*pin_lock(&lock).
As the name suggests, lockdep_assert_held* family of macros assert that a
particular lock is held at a certain time (and generate a WARN() otherwise).
This annotation is largely used all over the kernel, e.g. kernel/sched/
-core.c
+core.c::
void update_rq_clock(struct rq *rq)
{
@@ -253,7 +256,7 @@ out to be especially helpful to debug code with callbacks, where an upper
layer assumes a lock remains taken, but a lower layer thinks it can maybe drop
and reacquire the lock ("unwittingly" introducing races). lockdep_pin_lock()
returns a 'struct pin_cookie' that is then used by lockdep_unpin_lock() to check
-that nobody tampered with the lock, e.g. kernel/sched/sched.h
+that nobody tampered with the lock, e.g. kernel/sched/sched.h::
static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
{
@@ -280,7 +283,7 @@ correctness) in the sense that for every simple, standalone single-task
locking sequence that occurred at least once during the lifetime of the
kernel, the validator proves it with a 100% certainty that no
combination and timing of these locking sequences can cause any class of
-lock related deadlock. [*]
+lock related deadlock. [1]_
I.e. complex multi-CPU and multi-task locking scenarios do not have to
occur in practice to prove a deadlock: only the simple 'component'
@@ -299,7 +302,9 @@ possible combination of locking interaction between CPUs, combined with
every possible hardirq and softirq nesting scenario (which is impossible
to do in practice).
-[*] assuming that the validator itself is 100% correct, and no other
+.. [1]
+
+ assuming that the validator itself is 100% correct, and no other
part of the system corrupts the state of the validator in any way.
We also assume that all NMI/SMM paths [which could interrupt
even hardirq-disabled codepaths] are correct and do not interfere
@@ -310,7 +315,7 @@ to do in practice).
Performance:
------------
-The above rules require _massive_ amounts of runtime checking. If we did
+The above rules require **massive** amounts of runtime checking. If we did
that for every lock taken and for every irqs-enable event, it would
render the system practically unusably slow. The complexity of checking
is O(N^2), so even with just a few hundred lock-classes we'd have to do
@@ -369,17 +374,17 @@ be harder to do than to say.
Of course, if you do run out of lock classes, the next thing to do is
to find the offending lock classes. First, the following command gives
-you the number of lock classes currently in use along with the maximum:
+you the number of lock classes currently in use along with the maximum::
grep "lock-classes" /proc/lockdep_stats
-This command produces the following output on a modest system:
+This command produces the following output on a modest system::
- lock-classes: 748 [max: 8191]
+ lock-classes: 748 [max: 8191]
If the number allocated (748 above) increases continually over time,
then there is likely a leak. The following command can be used to
-identify the leaking lock classes:
+identify the leaking lock classes::
grep "BD" /proc/lockdep
diff --git a/Documentation/locking/lockstat.rst b/Documentation/locking/lockstat.rst
new file mode 100644
index 000000000000..536eab8dbd99
--- /dev/null
+++ b/Documentation/locking/lockstat.rst
@@ -0,0 +1,204 @@
+===============
+Lock Statistics
+===============
+
+What
+====
+
+As the name suggests, it provides statistics on locks.
+
+
+Why
+===
+
+Because things like lock contention can severely impact performance.
+
+How
+===
+
+Lockdep already has hooks in the lock functions and maps lock instances to
+lock classes. We build on that (see Documentation/locking/lockdep-design.rst).
+The graph below shows the relation between the lock functions and the various
+hooks therein::
+
+ __acquire
+ |
+ lock _____
+ | \
+ | __contended
+ | |
+ | <wait>
+ | _______/
+ |/
+ |
+ __acquired
+ |
+ .
+ <hold>
+ .
+ |
+ __release
+ |
+ unlock
+
+ lock, unlock - the regular lock functions
+ __* - the hooks
+ <> - states
+
+With these hooks we provide the following statistics:
+
+ con-bounces
+ - number of lock contention that involved x-cpu data
+ contentions
+ - number of lock acquisitions that had to wait
+ wait time
+ min
+ - shortest (non-0) time we ever had to wait for a lock
+ max
+ - longest time we ever had to wait for a lock
+ total
+ - total time we spend waiting on this lock
+ avg
+ - average time spent waiting on this lock
+ acq-bounces
+ - number of lock acquisitions that involved x-cpu data
+ acquisitions
+ - number of times we took the lock
+ hold time
+ min
+ - shortest (non-0) time we ever held the lock
+ max
+ - longest time we ever held the lock
+ total
+ - total time this lock was held
+ avg
+ - average time this lock was held
+
+These numbers are gathered per lock class, per read/write state (when
+applicable).
+
+It also tracks 4 contention points per class. A contention point is a call site
+that had to wait on lock acquisition.
+
+Configuration
+-------------
+
+Lock statistics are enabled via CONFIG_LOCK_STAT.
+
+Usage
+-----
+
+Enable collection of statistics::
+
+ # echo 1 >/proc/sys/kernel/lock_stat
+
+Disable collection of statistics::
+
+ # echo 0 >/proc/sys/kernel/lock_stat
+
+Look at the current lock statistics::
+
+ ( line numbers not part of actual output, done for clarity in the explanation
+ below )
+
+ # less /proc/lock_stat
+
+ 01 lock_stat version 0.4
+ 02-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 03 class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg
+ 04-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 05
+ 06 &mm->mmap_sem-W: 46 84 0.26 939.10 16371.53 194.90 47291 2922365 0.16 2220301.69 17464026916.32 5975.99
+ 07 &mm->mmap_sem-R: 37 100 1.31 299502.61 325629.52 3256.30 212344 34316685 0.10 7744.91 95016910.20 2.77
+ 08 ---------------
+ 09 &mm->mmap_sem 1 [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
+ 10 &mm->mmap_sem 96 [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+ 11 &mm->mmap_sem 34 [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+ 12 &mm->mmap_sem 17 [<ffffffff81127e71>] vm_munmap+0x41/0x80
+ 13 ---------------
+ 14 &mm->mmap_sem 1 [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0
+ 15 &mm->mmap_sem 60 [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250
+ 16 &mm->mmap_sem 41 [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+ 17 &mm->mmap_sem 68 [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+ 18
+ 19.............................................................................................................................................................................................................................
+ 20
+ 21 unix_table_lock: 110 112 0.21 49.24 163.91 1.46 21094 66312 0.12 624.42 31589.81 0.48
+ 22 ---------------
+ 23 unix_table_lock 45 [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+ 24 unix_table_lock 47 [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+ 25 unix_table_lock 15 [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+ 26 unix_table_lock 5 [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+ 27 ---------------
+ 28 unix_table_lock 39 [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+ 29 unix_table_lock 49 [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+ 30 unix_table_lock 20 [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+ 31 unix_table_lock 4 [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+
+
+This excerpt shows the first two lock class statistics. Line 01 shows the
+output version - each time the format changes this will be updated. Line 02-04
+show the header with column descriptions. Lines 05-18 and 20-31 show the actual
+statistics. These statistics come in two parts; the actual stats separated by a
+short separator (line 08, 13) from the contention points.
+
+Lines 09-12 show the first 4 recorded contention points (the code
+which tries to get the lock) and lines 14-17 show the first 4 recorded
+contended points (the lock holder). It is possible that the max
+con-bounces point is missing in the statistics.
+
+The first lock (05-18) is a read/write lock, and shows two lines above the
+short separator. The contention points don't match the column descriptors,
+they have two: contentions and [<IP>] symbol. The second set of contention
+points are the points we're contending with.
+
+The integer part of the time values is in us.
+
+Dealing with nested locks, subclasses may appear::
+
+ 32...........................................................................................................................................................................................................................
+ 33
+ 34 &rq->lock: 13128 13128 0.43 190.53 103881.26 7.91 97454 3453404 0.00 401.11 13224683.11 3.82
+ 35 ---------
+ 36 &rq->lock 645 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+ 37 &rq->lock 297 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+ 38 &rq->lock 360 [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
+ 39 &rq->lock 428 [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
+ 40 ---------
+ 41 &rq->lock 77 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+ 42 &rq->lock 174 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+ 43 &rq->lock 4715 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+ 44 &rq->lock 893 [<ffffffff81340524>] schedule+0x157/0x7b8
+ 45
+ 46...........................................................................................................................................................................................................................
+ 47
+ 48 &rq->lock/1: 1526 11488 0.33 388.73 136294.31 11.86 21461 38404 0.00 37.93 109388.53 2.84
+ 49 -----------
+ 50 &rq->lock/1 11526 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+ 51 -----------
+ 52 &rq->lock/1 5645 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+ 53 &rq->lock/1 1224 [<ffffffff81340524>] schedule+0x157/0x7b8
+ 54 &rq->lock/1 4336 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+ 55 &rq->lock/1 181 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+
+Line 48 shows statistics for the second subclass (/1) of &rq->lock class
+(subclass starts from 0), since in this case, as line 50 suggests,
+double_rq_lock actually acquires a nested lock of two spinlocks.
+
+View the top contending locks::
+
+ # grep : /proc/lock_stat | head
+ clockevents_lock: 2926159 2947636 0.15 46882.81 1784540466.34 605.41 3381345 3879161 0.00 2260.97 53178395.68 13.71
+ tick_broadcast_lock: 346460 346717 0.18 2257.43 39364622.71 113.54 3642919 4242696 0.00 2263.79 49173646.60 11.59
+ &mapping->i_mmap_mutex: 203896 203899 3.36 645530.05 31767507988.39 155800.21 3361776 8893984 0.17 2254.15 14110121.02 1.59
+ &rq->lock: 135014 136909 0.18 606.09 842160.68 6.15 1540728 10436146 0.00 728.72 17606683.41 1.69
+ &(&zone->lru_lock)->rlock: 93000 94934 0.16 59.18 188253.78 1.98 1199912 3809894 0.15 391.40 3559518.81 0.93
+ tasklist_lock-W: 40667 41130 0.23 1189.42 428980.51 10.43 270278 510106 0.16 653.51 3939674.91 7.72
+ tasklist_lock-R: 21298 21305 0.20 1310.05 215511.12 10.12 186204 241258 0.14 1162.33 1179779.23 4.89
+ rcu_node_1: 47656 49022 0.16 635.41 193616.41 3.95 844888 1865423 0.00 764.26 1656226.96 0.89
+ &(&dentry->d_lockref.lock)->rlock: 39791 40179 0.15 1302.08 88851.96 2.21 2790851 12527025 0.10 1910.75 3379714.27 0.27
+ rcu_node_0: 29203 30064 0.16 786.55 1555573.00 51.74 88963 244254 0.00 398.87 428872.51 1.76
+
+Clear the statistics::
+
+ # echo 0 > /proc/lock_stat
diff --git a/Documentation/locking/lockstat.txt b/Documentation/locking/lockstat.txt
deleted file mode 100644
index fdbeb0c45ef3..000000000000
--- a/Documentation/locking/lockstat.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-
-LOCK STATISTICS
-
-- WHAT
-
-As the name suggests, it provides statistics on locks.
-
-- WHY
-
-Because things like lock contention can severely impact performance.
-
-- HOW
-
-Lockdep already has hooks in the lock functions and maps lock instances to
-lock classes. We build on that (see Documentation/locking/lockdep-design.txt).
-The graph below shows the relation between the lock functions and the various
-hooks therein.
-
- __acquire
- |
- lock _____
- | \
- | __contended
- | |
- | <wait>
- | _______/
- |/
- |
- __acquired
- |
- .
- <hold>
- .
- |
- __release
- |
- unlock
-
-lock, unlock - the regular lock functions
-__* - the hooks
-<> - states
-
-With these hooks we provide the following statistics:
-
- con-bounces - number of lock contention that involved x-cpu data
- contentions - number of lock acquisitions that had to wait
- wait time min - shortest (non-0) time we ever had to wait for a lock
- max - longest time we ever had to wait for a lock
- total - total time we spend waiting on this lock
- avg - average time spent waiting on this lock
- acq-bounces - number of lock acquisitions that involved x-cpu data
- acquisitions - number of times we took the lock
- hold time min - shortest (non-0) time we ever held the lock
- max - longest time we ever held the lock
- total - total time this lock was held
- avg - average time this lock was held
-
-These numbers are gathered per lock class, per read/write state (when
-applicable).
-
-It also tracks 4 contention points per class. A contention point is a call site
-that had to wait on lock acquisition.
-
- - CONFIGURATION
-
-Lock statistics are enabled via CONFIG_LOCK_STAT.
-
- - USAGE
-
-Enable collection of statistics:
-
-# echo 1 >/proc/sys/kernel/lock_stat
-
-Disable collection of statistics:
-
-# echo 0 >/proc/sys/kernel/lock_stat
-
-Look at the current lock statistics:
-
-( line numbers not part of actual output, done for clarity in the explanation
- below )
-
-# less /proc/lock_stat
-
-01 lock_stat version 0.4
-02-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-03 class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg
-04-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-05
-06 &mm->mmap_sem-W: 46 84 0.26 939.10 16371.53 194.90 47291 2922365 0.16 2220301.69 17464026916.32 5975.99
-07 &mm->mmap_sem-R: 37 100 1.31 299502.61 325629.52 3256.30 212344 34316685 0.10 7744.91 95016910.20 2.77
-08 ---------------
-09 &mm->mmap_sem 1 [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
-10 &mm->mmap_sem 96 [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
-11 &mm->mmap_sem 34 [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
-12 &mm->mmap_sem 17 [<ffffffff81127e71>] vm_munmap+0x41/0x80
-13 ---------------
-14 &mm->mmap_sem 1 [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0
-15 &mm->mmap_sem 60 [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250
-16 &mm->mmap_sem 41 [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
-17 &mm->mmap_sem 68 [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
-18
-19.............................................................................................................................................................................................................................
-20
-21 unix_table_lock: 110 112 0.21 49.24 163.91 1.46 21094 66312 0.12 624.42 31589.81 0.48
-22 ---------------
-23 unix_table_lock 45 [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
-24 unix_table_lock 47 [<ffffffff8150b111>] unix_release_sock+0x31/0x250
-25 unix_table_lock 15 [<ffffffff8150ca37>] unix_find_other+0x117/0x230
-26 unix_table_lock 5 [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
-27 ---------------
-28 unix_table_lock 39 [<ffffffff8150b111>] unix_release_sock+0x31/0x250
-29 unix_table_lock 49 [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
-30 unix_table_lock 20 [<ffffffff8150ca37>] unix_find_other+0x117/0x230
-31 unix_table_lock 4 [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
-
-
-This excerpt shows the first two lock class statistics. Line 01 shows the
-output version - each time the format changes this will be updated. Line 02-04
-show the header with column descriptions. Lines 05-18 and 20-31 show the actual
-statistics. These statistics come in two parts; the actual stats separated by a
-short separator (line 08, 13) from the contention points.
-
-Lines 09-12 show the first 4 recorded contention points (the code
-which tries to get the lock) and lines 14-17 show the first 4 recorded
-contended points (the lock holder). It is possible that the max
-con-bounces point is missing in the statistics.
-
-The first lock (05-18) is a read/write lock, and shows two lines above the
-short separator. The contention points don't match the column descriptors,
-they have two: contentions and [<IP>] symbol. The second set of contention
-points are the points we're contending with.
-
-The integer part of the time values is in us.
-
-Dealing with nested locks, subclasses may appear:
-
-32...........................................................................................................................................................................................................................
-33
-34 &rq->lock: 13128 13128 0.43 190.53 103881.26 7.91 97454 3453404 0.00 401.11 13224683.11 3.82
-35 ---------
-36 &rq->lock 645 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
-37 &rq->lock 297 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-38 &rq->lock 360 [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
-39 &rq->lock 428 [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
-40 ---------
-41 &rq->lock 77 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
-42 &rq->lock 174 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-43 &rq->lock 4715 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
-44 &rq->lock 893 [<ffffffff81340524>] schedule+0x157/0x7b8
-45
-46...........................................................................................................................................................................................................................
-47
-48 &rq->lock/1: 1526 11488 0.33 388.73 136294.31 11.86 21461 38404 0.00 37.93 109388.53 2.84
-49 -----------
-50 &rq->lock/1 11526 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
-51 -----------
-52 &rq->lock/1 5645 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
-53 &rq->lock/1 1224 [<ffffffff81340524>] schedule+0x157/0x7b8
-54 &rq->lock/1 4336 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
-55 &rq->lock/1 181 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-
-Line 48 shows statistics for the second subclass (/1) of &rq->lock class
-(subclass starts from 0), since in this case, as line 50 suggests,
-double_rq_lock actually acquires a nested lock of two spinlocks.
-
-View the top contending locks:
-
-# grep : /proc/lock_stat | head
- clockevents_lock: 2926159 2947636 0.15 46882.81 1784540466.34 605.41 3381345 3879161 0.00 2260.97 53178395.68 13.71
- tick_broadcast_lock: 346460 346717 0.18 2257.43 39364622.71 113.54 3642919 4242696 0.00 2263.79 49173646.60 11.59
- &mapping->i_mmap_mutex: 203896 203899 3.36 645530.05 31767507988.39 155800.21 3361776 8893984 0.17 2254.15 14110121.02 1.59
- &rq->lock: 135014 136909 0.18 606.09 842160.68 6.15 1540728 10436146 0.00 728.72 17606683.41 1.69
- &(&zone->lru_lock)->rlock: 93000 94934 0.16 59.18 188253.78 1.98 1199912 3809894 0.15 391.40 3559518.81 0.93
- tasklist_lock-W: 40667 41130 0.23 1189.42 428980.51 10.43 270278 510106 0.16 653.51 3939674.91 7.72
- tasklist_lock-R: 21298 21305 0.20 1310.05 215511.12 10.12 186204 241258 0.14 1162.33 1179779.23 4.89
- rcu_node_1: 47656 49022 0.16 635.41 193616.41 3.95 844888 1865423 0.00 764.26 1656226.96 0.89
- &(&dentry->d_lockref.lock)->rlock: 39791 40179 0.15 1302.08 88851.96 2.21 2790851 12527025 0.10 1910.75 3379714.27 0.27
- rcu_node_0: 29203 30064 0.16 786.55 1555573.00 51.74 88963 244254 0.00 398.87 428872.51 1.76
-
-Clear the statistics:
-
-# echo 0 > /proc/lock_stat
diff --git a/Documentation/locking/locktorture.txt b/Documentation/locking/locktorture.rst
index 6a8df4cd19bf..e79eeeca3ac6 100644
--- a/Documentation/locking/locktorture.txt
+++ b/Documentation/locking/locktorture.rst
@@ -1,6 +1,9 @@
+==================================
Kernel Lock Torture Test Operation
+==================================
CONFIG_LOCK_TORTURE_TEST
+========================
The CONFIG LOCK_TORTURE_TEST config option provides a kernel module
that runs torture tests on core kernel locking primitives. The kernel
@@ -18,61 +21,77 @@ can be simulated by either enlarging this critical region hold time and/or
creating more kthreads.
-MODULE PARAMETERS
+Module Parameters
+=================
This module has the following parameters:
- ** Locktorture-specific **
+Locktorture-specific
+--------------------
-nwriters_stress Number of kernel threads that will stress exclusive lock
+nwriters_stress
+ Number of kernel threads that will stress exclusive lock
ownership (writers). The default value is twice the number
of online CPUs.
-nreaders_stress Number of kernel threads that will stress shared lock
+nreaders_stress
+ Number of kernel threads that will stress shared lock
ownership (readers). The default is the same amount of writer
locks. If the user did not specify nwriters_stress, then
both readers and writers be the amount of online CPUs.
-torture_type Type of lock to torture. By default, only spinlocks will
+torture_type
+ Type of lock to torture. By default, only spinlocks will
be tortured. This module can torture the following locks,
with string values as follows:
- o "lock_busted": Simulates a buggy lock implementation.
+ - "lock_busted":
+ Simulates a buggy lock implementation.
- o "spin_lock": spin_lock() and spin_unlock() pairs.
+ - "spin_lock":
+ spin_lock() and spin_unlock() pairs.
- o "spin_lock_irq": spin_lock_irq() and spin_unlock_irq()
- pairs.
+ - "spin_lock_irq":
+ spin_lock_irq() and spin_unlock_irq() pairs.
- o "rw_lock": read/write lock() and unlock() rwlock pairs.
+ - "rw_lock":
+ read/write lock() and unlock() rwlock pairs.
- o "rw_lock_irq": read/write lock_irq() and unlock_irq()
- rwlock pairs.
+ - "rw_lock_irq":
+ read/write lock_irq() and unlock_irq()
+ rwlock pairs.
- o "mutex_lock": mutex_lock() and mutex_unlock() pairs.
+ - "mutex_lock":
+ mutex_lock() and mutex_unlock() pairs.
- o "rtmutex_lock": rtmutex_lock() and rtmutex_unlock()
- pairs. Kernel must have CONFIG_RT_MUTEX=y.
+ - "rtmutex_lock":
+ rtmutex_lock() and rtmutex_unlock() pairs.
+ Kernel must have CONFIG_RT_MUTEX=y.
- o "rwsem_lock": read/write down() and up() semaphore pairs.
+ - "rwsem_lock":
+ read/write down() and up() semaphore pairs.
- ** Torture-framework (RCU + locking) **
+Torture-framework (RCU + locking)
+---------------------------------
-shutdown_secs The number of seconds to run the test before terminating
+shutdown_secs
+ The number of seconds to run the test before terminating
the test and powering off the system. The default is
zero, which disables test termination and system shutdown.
This capability is useful for automated testing.
-onoff_interval The number of seconds between each attempt to execute a
+onoff_interval
+ The number of seconds between each attempt to execute a
randomly selected CPU-hotplug operation. Defaults
to zero, which disables CPU hotplugging. In
CONFIG_HOTPLUG_CPU=n kernels, locktorture will silently
refuse to do any CPU-hotplug operations regardless of
what value is specified for onoff_interval.
-onoff_holdoff The number of seconds to wait until starting CPU-hotplug
+onoff_holdoff
+ The number of seconds to wait until starting CPU-hotplug
operations. This would normally only be used when
locktorture was built into the kernel and started
automatically at boot time, in which case it is useful
@@ -80,53 +99,59 @@ onoff_holdoff The number of seconds to wait until starting CPU-hotplug
coming and going. This parameter is only useful if
CONFIG_HOTPLUG_CPU is enabled.
-stat_interval Number of seconds between statistics-related printk()s.
+stat_interval
+ Number of seconds between statistics-related printk()s.
By default, locktorture will report stats every 60 seconds.
Setting the interval to zero causes the statistics to
be printed -only- when the module is unloaded, and this
is the default.
-stutter The length of time to run the test before pausing for this
+stutter
+ The length of time to run the test before pausing for this
same period of time. Defaults to "stutter=5", so as
to run and pause for (roughly) five-second intervals.
Specifying "stutter=0" causes the test to run continuously
without pausing, which is the old default behavior.
-shuffle_interval The number of seconds to keep the test threads affinitied
+shuffle_interval
+ The number of seconds to keep the test threads affinitied
to a particular subset of the CPUs, defaults to 3 seconds.
Used in conjunction with test_no_idle_hz.
-verbose Enable verbose debugging printing, via printk(). Enabled
+verbose
+ Enable verbose debugging printing, via printk(). Enabled
by default. This extra information is mostly related to
high-level errors and reports from the main 'torture'
framework.
-STATISTICS
+Statistics
+==========
-Statistics are printed in the following format:
+Statistics are printed in the following format::
-spin_lock-torture: Writes: Total: 93746064 Max/Min: 0/0 Fail: 0
- (A) (B) (C) (D) (E)
+ spin_lock-torture: Writes: Total: 93746064 Max/Min: 0/0 Fail: 0
+ (A) (B) (C) (D) (E)
-(A): Lock type that is being tortured -- torture_type parameter.
+ (A): Lock type that is being tortured -- torture_type parameter.
-(B): Number of writer lock acquisitions. If dealing with a read/write primitive
- a second "Reads" statistics line is printed.
+ (B): Number of writer lock acquisitions. If dealing with a read/write
+ primitive a second "Reads" statistics line is printed.
-(C): Number of times the lock was acquired.
+ (C): Number of times the lock was acquired.
-(D): Min and max number of times threads failed to acquire the lock.
+ (D): Min and max number of times threads failed to acquire the lock.
-(E): true/false values if there were errors acquiring the lock. This should
- -only- be positive if there is a bug in the locking primitive's
- implementation. Otherwise a lock should never fail (i.e., spin_lock()).
- Of course, the same applies for (C), above. A dummy example of this is
- the "lock_busted" type.
+ (E): true/false values if there were errors acquiring the lock. This should
+ -only- be positive if there is a bug in the locking primitive's
+ implementation. Otherwise a lock should never fail (i.e., spin_lock()).
+ Of course, the same applies for (C), above. A dummy example of this is
+ the "lock_busted" type.
-USAGE
+Usage
+=====
-The following script may be used to torture locks:
+The following script may be used to torture locks::
#!/bin/sh
diff --git a/Documentation/locking/mutex-design.txt b/Documentation/locking/mutex-design.rst
index 818aca19612f..4d8236b81fa5 100644
--- a/Documentation/locking/mutex-design.txt
+++ b/Documentation/locking/mutex-design.rst
@@ -1,6 +1,9 @@
+=======================
Generic Mutex Subsystem
+=======================
started by Ingo Molnar <mingo@redhat.com>
+
updated by Davidlohr Bueso <davidlohr@hp.com>
What are mutexes?
@@ -23,7 +26,7 @@ Implementation
Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
and implemented in kernel/locking/mutex.c. These locks use an atomic variable
(->owner) to keep track of the lock state during its lifetime. Field owner
-actually contains 'struct task_struct *' to the current lock owner and it is
+actually contains `struct task_struct *` to the current lock owner and it is
therefore NULL if not currently owned. Since task_struct pointers are aligned
at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
if waiter list is non-empty). In its most basic form it also includes a
@@ -101,29 +104,36 @@ features that make lock debugging easier and faster:
Interfaces
----------
-Statically define the mutex:
+Statically define the mutex::
+
DEFINE_MUTEX(name);
-Dynamically initialize the mutex:
+Dynamically initialize the mutex::
+
mutex_init(mutex);
-Acquire the mutex, uninterruptible:
+Acquire the mutex, uninterruptible::
+
void mutex_lock(struct mutex *lock);
void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
int mutex_trylock(struct mutex *lock);
-Acquire the mutex, interruptible:
+Acquire the mutex, interruptible::
+
int mutex_lock_interruptible_nested(struct mutex *lock,
unsigned int subclass);
int mutex_lock_interruptible(struct mutex *lock);
-Acquire the mutex, interruptible, if dec to 0:
+Acquire the mutex, interruptible, if dec to 0::
+
int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
-Unlock the mutex:
+Unlock the mutex::
+
void mutex_unlock(struct mutex *lock);
-Test if the mutex is taken:
+Test if the mutex is taken::
+
int mutex_is_locked(struct mutex *lock);
Disadvantages
diff --git a/Documentation/locking/rt-mutex-design.txt b/Documentation/locking/rt-mutex-design.rst
index 3d7b865539cc..59c2a64efb21 100644
--- a/Documentation/locking/rt-mutex-design.txt
+++ b/Documentation/locking/rt-mutex-design.rst
@@ -1,14 +1,15 @@
-#
-# Copyright (c) 2006 Steven Rostedt
-# Licensed under the GNU Free Documentation License, Version 1.2
-#
-
+==============================
RT-mutex implementation design
-------------------------------
+==============================
+
+Copyright (c) 2006 Steven Rostedt
+
+Licensed under the GNU Free Documentation License, Version 1.2
+
This document tries to describe the design of the rtmutex.c implementation.
It doesn't describe the reasons why rtmutex.c exists. For that please see
-Documentation/locking/rt-mutex.txt. Although this document does explain problems
+Documentation/locking/rt-mutex.rst. Although this document does explain problems
that happen without this code, but that is in the concept to understand
what the code actually is doing.
@@ -41,17 +42,17 @@ to release the lock, because for all we know, B is a CPU hog and will
never give C a chance to release the lock. This is called unbounded priority
inversion.
-Here's a little ASCII art to show the problem.
+Here's a little ASCII art to show the problem::
- grab lock L1 (owned by C)
- |
-A ---+
- C preempted by B
- |
-C +----+
+ grab lock L1 (owned by C)
+ |
+ A ---+
+ C preempted by B
+ |
+ C +----+
-B +-------->
- B now keeps A from running.
+ B +-------->
+ B now keeps A from running.
Priority Inheritance (PI)
@@ -75,24 +76,29 @@ Terminology
Here I explain some terminology that is used in this document to help describe
the design that is used to implement PI.
-PI chain - The PI chain is an ordered series of locks and processes that cause
+PI chain
+ - The PI chain is an ordered series of locks and processes that cause
processes to inherit priorities from a previous process that is
blocked on one of its locks. This is described in more detail
later in this document.
-mutex - In this document, to differentiate from locks that implement
+mutex
+ - In this document, to differentiate from locks that implement
PI and spin locks that are used in the PI code, from now on
the PI locks will be called a mutex.
-lock - In this document from now on, I will use the term lock when
+lock
+ - In this document from now on, I will use the term lock when
referring to spin locks that are used to protect parts of the PI
algorithm. These locks disable preemption for UP (when
CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from
entering critical sections simultaneously.
-spin lock - Same as lock above.
+spin lock
+ - Same as lock above.
-waiter - A waiter is a struct that is stored on the stack of a blocked
+waiter
+ - A waiter is a struct that is stored on the stack of a blocked
process. Since the scope of the waiter is within the code for
a process being blocked on the mutex, it is fine to allocate
the waiter on the process's stack (local variable). This
@@ -104,14 +110,18 @@ waiter - A waiter is a struct that is stored on the stack of a blocked
waiter is sometimes used in reference to the task that is waiting
on a mutex. This is the same as waiter->task.
-waiters - A list of processes that are blocked on a mutex.
+waiters
+ - A list of processes that are blocked on a mutex.
-top waiter - The highest priority process waiting on a specific mutex.
+top waiter
+ - The highest priority process waiting on a specific mutex.
-top pi waiter - The highest priority process waiting on one of the mutexes
+top pi waiter
+ - The highest priority process waiting on one of the mutexes
that a specific process owns.
-Note: task and process are used interchangeably in this document, mostly to
+Note:
+ task and process are used interchangeably in this document, mostly to
differentiate between two processes that are being described together.
@@ -123,7 +133,7 @@ inheritance to take place. Multiple chains may converge, but a chain
would never diverge, since a process can't be blocked on more than one
mutex at a time.
-Example:
+Example::
Process: A, B, C, D, E
Mutexes: L1, L2, L3, L4
@@ -137,21 +147,21 @@ Example:
D owns L4
E blocked on L4
-The chain would be:
+The chain would be::
E->L4->D->L3->C->L2->B->L1->A
To show where two chains merge, we could add another process F and
another mutex L5 where B owns L5 and F is blocked on mutex L5.
-The chain for F would be:
+The chain for F would be::
F->L5->B->L1->A
Since a process may own more than one mutex, but never be blocked on more than
one, the chains merge.
-Here we show both chains:
+Here we show both chains::
E->L4->D->L3->C->L2-+
|
@@ -165,12 +175,12 @@ than the processes to the left or below in the chain.
Also since a mutex may have more than one process blocked on it, we can
have multiple chains merge at mutexes. If we add another process G that is
-blocked on mutex L2:
+blocked on mutex L2::
G->L2->B->L1->A
And once again, to show how this can grow I will show the merging chains
-again.
+again::
E->L4->D->L3->C-+
+->L2-+
@@ -184,7 +194,7 @@ the chain (A and B in this example), must have their priorities increased
to that of G.
Mutex Waiters Tree
------------------
+------------------
Every mutex keeps track of all the waiters that are blocked on itself. The
mutex has a rbtree to store these waiters by priority. This tree is protected
@@ -219,19 +229,19 @@ defined. But is very complex to figure it out, since it depends on all
the nesting of mutexes. Let's look at the example where we have 3 mutexes,
L1, L2, and L3, and four separate functions func1, func2, func3 and func4.
The following shows a locking order of L1->L2->L3, but may not actually
-be directly nested that way.
+be directly nested that way::
-void func1(void)
-{
+ void func1(void)
+ {
mutex_lock(L1);
/* do anything */
mutex_unlock(L1);
-}
+ }
-void func2(void)
-{
+ void func2(void)
+ {
mutex_lock(L1);
mutex_lock(L2);
@@ -239,10 +249,10 @@ void func2(void)
mutex_unlock(L2);
mutex_unlock(L1);
-}
+ }
-void func3(void)
-{
+ void func3(void)
+ {
mutex_lock(L2);
mutex_lock(L3);
@@ -250,30 +260,30 @@ void func3(void)
mutex_unlock(L3);
mutex_unlock(L2);
-}
+ }
-void func4(void)
-{
+ void func4(void)
+ {
mutex_lock(L3);
/* do something again */
mutex_unlock(L3);
-}
+ }
Now we add 4 processes that run each of these functions separately.
Processes A, B, C, and D which run functions func1, func2, func3 and func4
respectively, and such that D runs first and A last. With D being preempted
-in func4 in the "do something again" area, we have a locking that follows:
+in func4 in the "do something again" area, we have a locking that follows::
-D owns L3
- C blocked on L3
- C owns L2
- B blocked on L2
- B owns L1
- A blocked on L1
+ D owns L3
+ C blocked on L3
+ C owns L2
+ B blocked on L2
+ B owns L1
+ A blocked on L1
-And thus we have the chain A->L1->B->L2->C->L3->D.
+ And thus we have the chain A->L1->B->L2->C->L3->D.
This gives us a PI depth of 4 (four processes), but looking at any of the
functions individually, it seems as though they only have at most a locking
@@ -298,7 +308,7 @@ not true, the rtmutex.c code will be broken!), this allows for the least
significant bit to be used as a flag. Bit 0 is used as the "Has Waiters"
flag. It's set whenever there are waiters on a mutex.
-See Documentation/locking/rt-mutex.txt for further details.
+See Documentation/locking/rt-mutex.rst for further details.
cmpxchg Tricks
--------------
@@ -307,17 +317,17 @@ Some architectures implement an atomic cmpxchg (Compare and Exchange). This
is used (when applicable) to keep the fast path of grabbing and releasing
mutexes short.
-cmpxchg is basically the following function performed atomically:
+cmpxchg is basically the following function performed atomically::
-unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C)
-{
+ unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C)
+ {
unsigned long T = *A;
if (*A == *B) {
*A = *C;
}
return T;
-}
-#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c)
+ }
+ #define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c)
This is really nice to have, since it allows you to only update a variable
if the variable is what you expect it to be. You know if it succeeded if
@@ -352,9 +362,10 @@ Then rt_mutex_setprio is called to adjust the priority of the task to the
new priority. Note that rt_mutex_setprio is defined in kernel/sched/core.c
to implement the actual change in priority.
-(Note: For the "prio" field in task_struct, the lower the number, the
+Note:
+ For the "prio" field in task_struct, the lower the number, the
higher the priority. A "prio" of 5 is of higher priority than a
- "prio" of 10.)
+ "prio" of 10.
It is interesting to note that rt_mutex_adjust_prio can either increase
or decrease the priority of the task. In the case that a higher priority
@@ -439,6 +450,7 @@ wait_lock, which this code currently holds. So setting the "Has Waiters" flag
forces the current owner to synchronize with this code.
The lock is taken if the following are true:
+
1) The lock has no owner
2) The current task is the highest priority against all other
waiters of the lock
@@ -546,10 +558,13 @@ Credits
-------
Author: Steven Rostedt <rostedt@goodmis.org>
+
Updated: Alex Shi <alex.shi@linaro.org> - 7/6/2017
-Original Reviewers: Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and
+Original Reviewers:
+ Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and
Randy Dunlap
+
Update (7/6/2017) Reviewers: Steven Rostedt and Sebastian Siewior
Updates
diff --git a/Documentation/locking/rt-mutex.txt b/Documentation/locking/rt-mutex.rst
index 35793e003041..c365dc302081 100644
--- a/Documentation/locking/rt-mutex.txt
+++ b/Documentation/locking/rt-mutex.rst
@@ -1,5 +1,6 @@
+==================================
RT-mutex subsystem with PI support
-----------------------------------
+==================================
RT-mutexes with priority inheritance are used to support PI-futexes,
which enable pthread_mutex_t priority inheritance attributes
@@ -46,27 +47,30 @@ The state of the rt-mutex is tracked via the owner field of the rt-mutex
structure:
lock->owner holds the task_struct pointer of the owner. Bit 0 is used to
-keep track of the "lock has waiters" state.
+keep track of the "lock has waiters" state:
- owner bit0
+ ============ ======= ================================================
+ owner bit0 Notes
+ ============ ======= ================================================
NULL 0 lock is free (fast acquire possible)
NULL 1 lock is free and has waiters and the top waiter
- is going to take the lock*
+ is going to take the lock [1]_
taskpointer 0 lock is held (fast release possible)
- taskpointer 1 lock is held and has waiters**
+ taskpointer 1 lock is held and has waiters [2]_
+ ============ ======= ================================================
The fast atomic compare exchange based acquire and release is only
possible when bit 0 of lock->owner is 0.
-(*) It also can be a transitional state when grabbing the lock
-with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
-we need to set the bit0 before looking at the lock, and the owner may be
-NULL in this small time, hence this can be a transitional state.
+.. [1] It also can be a transitional state when grabbing the lock
+ with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
+ we need to set the bit0 before looking at the lock, and the owner may
+ be NULL in this small time, hence this can be a transitional state.
-(**) There is a small time when bit 0 is set but there are no
-waiters. This can happen when grabbing the lock in the slow path.
-To prevent a cmpxchg of the owner releasing the lock, we need to
-set this bit before looking at the lock.
+.. [2] There is a small time when bit 0 is set but there are no
+ waiters. This can happen when grabbing the lock in the slow path.
+ To prevent a cmpxchg of the owner releasing the lock, we need to
+ set this bit before looking at the lock.
BTW, there is still technically a "Pending Owner", it's just not called
that anymore. The pending owner happens to be the top_waiter of a lock
diff --git a/Documentation/locking/spinlocks.txt b/Documentation/locking/spinlocks.rst
index ff35e40bdf5b..098107fb7d86 100644
--- a/Documentation/locking/spinlocks.txt
+++ b/Documentation/locking/spinlocks.rst
@@ -1,8 +1,13 @@
+===============
+Locking lessons
+===============
+
Lesson 1: Spin locks
+====================
-The most basic primitive for locking is spinlock.
+The most basic primitive for locking is spinlock::
-static DEFINE_SPINLOCK(xxx_lock);
+ static DEFINE_SPINLOCK(xxx_lock);
unsigned long flags;
@@ -19,23 +24,25 @@ worry about UP vs SMP issues: the spinlocks work correctly under both.
NOTE! Implications of spin_locks for memory are further described in:
Documentation/memory-barriers.txt
+
(5) LOCK operations.
+
(6) UNLOCK operations.
The above is usually pretty simple (you usually need and want only one
spinlock for most things - using more than one spinlock can make things a
lot more complex and even slower and is usually worth it only for
-sequences that you _know_ need to be split up: avoid it at all cost if you
+sequences that you **know** need to be split up: avoid it at all cost if you
aren't sure).
This is really the only really hard part about spinlocks: once you start
using spinlocks they tend to expand to areas you might not have noticed
before, because you have to make sure the spinlocks correctly protect the
-shared data structures _everywhere_ they are used. The spinlocks are most
+shared data structures **everywhere** they are used. The spinlocks are most
easily added to places that are completely independent of other code (for
example, internal driver data structures that nobody else ever touches).
- NOTE! The spin-lock is safe only when you _also_ use the lock itself
+ NOTE! The spin-lock is safe only when you **also** use the lock itself
to do locking across CPU's, which implies that EVERYTHING that
touches a shared variable has to agree about the spinlock they want
to use.
@@ -43,6 +50,7 @@ example, internal driver data structures that nobody else ever touches).
----
Lesson 2: reader-writer spinlocks.
+==================================
If your data accesses have a very natural pattern where you usually tend
to mostly read from the shared variables, the reader-writer locks
@@ -54,7 +62,7 @@ to change the variables it has to get an exclusive write lock.
simple spinlocks. Unless the reader critical section is long, you
are better off just using spinlocks.
-The routines look the same as above:
+The routines look the same as above::
rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
@@ -71,7 +79,7 @@ The routines look the same as above:
The above kind of lock may be useful for complex data structures like
linked lists, especially searching for entries without changing the list
itself. The read lock allows many concurrent readers. Anything that
-_changes_ the list will have to get the write lock.
+**changes** the list will have to get the write lock.
NOTE! RCU is better for list traversal, but requires careful
attention to design detail (see Documentation/RCU/listRCU.txt).
@@ -87,10 +95,11 @@ to get the write-lock at the very beginning.
----
Lesson 3: spinlocks revisited.
+==============================
The single spin-lock primitives above are by no means the only ones. They
are the most safe ones, and the ones that work under all circumstances,
-but partly _because_ they are safe they are also fairly slow. They are slower
+but partly **because** they are safe they are also fairly slow. They are slower
than they'd need to be, because they do have to disable interrupts
(which is just a single instruction on a x86, but it's an expensive one -
and on other architectures it can be worse).
@@ -98,7 +107,7 @@ and on other architectures it can be worse).
If you have a case where you have to protect a data structure across
several CPU's and you want to use spinlocks you can potentially use
cheaper versions of the spinlocks. IFF you know that the spinlocks are
-never used in interrupt handlers, you can use the non-irq versions:
+never used in interrupt handlers, you can use the non-irq versions::
spin_lock(&lock);
...
@@ -110,7 +119,7 @@ This is useful if you know that the data in question is only ever
manipulated from a "process context", ie no interrupts involved.
The reasons you mustn't use these versions if you have interrupts that
-play with the spinlock is that you can get deadlocks:
+play with the spinlock is that you can get deadlocks::
spin_lock(&lock);
...
@@ -147,9 +156,10 @@ indeed), while write-locks need to protect themselves against interrupts.
----
Reference information:
+======================
For dynamic initialization, use spin_lock_init() or rwlock_init() as
-appropriate:
+appropriate::
spinlock_t xxx_lock;
rwlock_t xxx_rw_lock;
diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.rst
index f0ed7c30e695..1846c199da23 100644
--- a/Documentation/locking/ww-mutex-design.txt
+++ b/Documentation/locking/ww-mutex-design.rst
@@ -1,3 +1,4 @@
+======================================
Wound/Wait Deadlock-Proof Mutex Design
======================================
@@ -85,6 +86,7 @@ Furthermore there are three different class of w/w lock acquire functions:
no deadlock potential and hence the ww_mutex_lock call will block and not
prematurely return -EDEADLK. The advantage of the _slow functions is in
interface safety:
+
- ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow
has a void return type. Note that since ww mutex code needs loops/retries
anyway the __must_check doesn't result in spurious warnings, even though the
@@ -115,36 +117,36 @@ expect the number of simultaneous competing transactions to be typically small,
and you want to reduce the number of rollbacks.
Three different ways to acquire locks within the same w/w class. Common
-definitions for methods #1 and #2:
+definitions for methods #1 and #2::
-static DEFINE_WW_CLASS(ww_class);
+ static DEFINE_WW_CLASS(ww_class);
-struct obj {
+ struct obj {
struct ww_mutex lock;
/* obj data */
-};
+ };
-struct obj_entry {
+ struct obj_entry {
struct list_head head;
struct obj *obj;
-};
+ };
Method 1, using a list in execbuf->buffers that's not allowed to be reordered.
This is useful if a list of required objects is already tracked somewhere.
Furthermore the lock helper can use propagate the -EALREADY return code back to
the caller as a signal that an object is twice on the list. This is useful if
the list is constructed from userspace input and the ABI requires userspace to
-not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl).
+not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl)::
-int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
+ int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+ {
struct obj *res_obj = NULL;
struct obj_entry *contended_entry = NULL;
struct obj_entry *entry;
ww_acquire_init(ctx, &ww_class);
-retry:
+ retry:
list_for_each_entry (entry, list, head) {
if (entry->obj == res_obj) {
res_obj = NULL;
@@ -160,7 +162,7 @@ retry:
ww_acquire_done(ctx);
return 0;
-err:
+ err:
list_for_each_entry_continue_reverse (entry, list, head)
ww_mutex_unlock(&entry->obj->lock);
@@ -176,14 +178,14 @@ err:
ww_acquire_fini(ctx);
return ret;
-}
+ }
Method 2, using a list in execbuf->buffers that can be reordered. Same semantics
of duplicate entry detection using -EALREADY as method 1 above. But the
-list-reordering allows for a bit more idiomatic code.
+list-reordering allows for a bit more idiomatic code::
-int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
+ int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+ {
struct obj_entry *entry, *entry2;
ww_acquire_init(ctx, &ww_class);
@@ -216,24 +218,25 @@ int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
ww_acquire_done(ctx);
return 0;
-}
+ }
-Unlocking works the same way for both methods #1 and #2:
+Unlocking works the same way for both methods #1 and #2::
-void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
+ void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+ {
struct obj_entry *entry;
list_for_each_entry (entry, list, head)
ww_mutex_unlock(&entry->obj->lock);
ww_acquire_fini(ctx);
-}
+ }
Method 3 is useful if the list of objects is constructed ad-hoc and not upfront,
e.g. when adjusting edges in a graph where each node has its own ww_mutex lock,
and edges can only be changed when holding the locks of all involved nodes. w/w
mutexes are a natural fit for such a case for two reasons:
+
- They can handle lock-acquisition in any order which allows us to start walking
a graph from a starting point and then iteratively discovering new edges and
locking down the nodes those edges connect to.
@@ -243,6 +246,7 @@ mutexes are a natural fit for such a case for two reasons:
as a starting point).
Note that this approach differs in two important ways from the above methods:
+
- Since the list of objects is dynamically constructed (and might very well be
different when retrying due to hitting the -EDEADLK die condition) there's
no need to keep any object on a persistent list when it's not locked. We can
@@ -260,17 +264,17 @@ any interface misuse for these cases.
Also, method 3 can't fail the lock acquisition step since it doesn't return
-EALREADY. Of course this would be different when using the _interruptible
-variants, but that's outside of the scope of these examples here.
+variants, but that's outside of the scope of these examples here::
-struct obj {
+ struct obj {
struct ww_mutex ww_mutex;
struct list_head locked_list;
-};
+ };
-static DEFINE_WW_CLASS(ww_class);
+ static DEFINE_WW_CLASS(ww_class);
-void __unlock_objs(struct list_head *list)
-{
+ void __unlock_objs(struct list_head *list)
+ {
struct obj *entry, *temp;
list_for_each_entry_safe (entry, temp, list, locked_list) {
@@ -279,15 +283,15 @@ void __unlock_objs(struct list_head *list)
list_del(&entry->locked_list);
ww_mutex_unlock(entry->ww_mutex)
}
-}
+ }
-void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
+ void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+ {
struct obj *obj;
ww_acquire_init(ctx, &ww_class);
-retry:
+ retry:
/* re-init loop start state */
loop {
/* magic code which walks over a graph and decides which objects
@@ -312,13 +316,13 @@ retry:
ww_acquire_done(ctx);
return 0;
-}
+ }
-void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
+ void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+ {
__unlock_objs(list);
ww_acquire_fini(ctx);
-}
+ }
Method 4: Only lock one single objects. In that case deadlock detection and
prevention is obviously overkill, since with grabbing just one lock you can't
@@ -329,11 +333,14 @@ Implementation Details
----------------------
Design:
+^^^^^^^
+
ww_mutex currently encapsulates a struct mutex, this means no extra overhead for
normal mutex locks, which are far more common. As such there is only a small
increase in code size if wait/wound mutexes are not used.
We maintain the following invariants for the wait list:
+
(1) Waiters with an acquire context are sorted by stamp order; waiters
without an acquire context are interspersed in FIFO order.
(2) For Wait-Die, among waiters with contexts, only the first one can have
@@ -355,6 +362,8 @@ Design:
therefore be directed towards the uncontended cases.
Lockdep:
+^^^^^^^^
+
Special care has been taken to warn for as many cases of api abuse
as possible. Some common api abuses will be caught with
CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended.
@@ -379,5 +388,6 @@ Lockdep:
having called ww_acquire_fini on the first.
- 'normal' deadlocks that can occur.
-FIXME: Update this section once we have the TASK_DEADLOCK task state flag magic
-implemented.
+FIXME:
+ Update this section once we have the TASK_DEADLOCK task state flag magic
+ implemented.
diff --git a/Documentation/m68k/index.rst b/Documentation/m68k/index.rst
new file mode 100644
index 000000000000..3a5ba7fe1703
--- /dev/null
+++ b/Documentation/m68k/index.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+m68k Architecture
+=================
+
+.. toctree::
+ :maxdepth: 2
+
+ kernel-options
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.rst
index 79d21246c75a..cabd9419740d 100644
--- a/Documentation/m68k/kernel-options.txt
+++ b/Documentation/m68k/kernel-options.rst
@@ -1,22 +1,24 @@
-
-
- Command Line Options for Linux/m68k
- ===================================
+===================================
+Command Line Options for Linux/m68k
+===================================
Last Update: 2 May 1999
+
Linux/m68k version: 2.2.6
+
Author: Roman.Hodek@informatik.uni-erlangen.de (Roman Hodek)
+
Update: jds@kom.auc.dk (Jes Sorensen) and faq@linux-m68k.org (Chris Lawrence)
0) Introduction
===============
- Often I've been asked which command line options the Linux/m68k
+Often I've been asked which command line options the Linux/m68k
kernel understands, or how the exact syntax for the ... option is, or
... about the option ... . I hope, this document supplies all the
answers...
- Note that some options might be outdated, their descriptions being
+Note that some options might be outdated, their descriptions being
incomplete or missing. Please update the information and send in the
patches.
@@ -38,11 +40,11 @@ argument contains an '=', it is of class 2, and the definition is put
into init's environment. All other arguments are passed to init as
command line options.
- This document describes the valid kernel options for Linux/m68k in
+This document describes the valid kernel options for Linux/m68k in
the version mentioned at the start of this file. Later revisions may
add new such options, and some may be missing in older versions.
- In general, the value (the part after the '=') of an option is a
+In general, the value (the part after the '=') of an option is a
list of values separated by commas. The interpretation of these values
is up to the driver that "owns" the option. This association of
options with drivers is also the reason that some are further
@@ -55,21 +57,21 @@ subdivided.
2.1) root=
----------
-Syntax: root=/dev/<device>
- or: root=<hex_number>
+:Syntax: root=/dev/<device>
+:or: root=<hex_number>
This tells the kernel which device it should mount as the root
filesystem. The device must be a block device with a valid filesystem
on it.
- The first syntax gives the device by name. These names are converted
+The first syntax gives the device by name. These names are converted
into a major/minor number internally in the kernel in an unusual way.
Normally, this "conversion" is done by the device files in /dev, but
this isn't possible here, because the root filesystem (with /dev)
isn't mounted yet... So the kernel parses the name itself, with some
hardcoded name to number mappings. The name must always be a
combination of two or three letters, followed by a decimal number.
-Valid names are:
+Valid names are::
/dev/ram: -> 0x0100 (initial ramdisk)
/dev/hda: -> 0x0300 (first IDE disk)
@@ -81,7 +83,7 @@ Valid names are:
/dev/sde: -> 0x0840 (fifth SCSI disk)
/dev/fd : -> 0x0200 (floppy disk)
- The name must be followed by a decimal number, that stands for the
+The name must be followed by a decimal number, that stands for the
partition number. Internally, the value of the number is just
added to the device number mentioned in the table above. The
exceptions are /dev/ram and /dev/fd, where /dev/ram refers to an
@@ -100,12 +102,12 @@ the kernel command line.
[Strange and maybe uninteresting stuff ON]
- This unusual translation of device names has some strange
+This unusual translation of device names has some strange
consequences: If, for example, you have a symbolic link from /dev/fd
to /dev/fd0D720 as an abbreviation for floppy driver #0 in DD format,
you cannot use this name for specifying the root device, because the
kernel cannot see this symlink before mounting the root FS and it
-isn't in the table above. If you use it, the root device will not be
+isn't in the table above. If you use it, the root device will not be
set at all, without an error message. Another example: You cannot use a
partition on e.g. the sixth SCSI disk as the root filesystem, if you
want to specify it by name. This is, because only the devices up to
@@ -118,7 +120,7 @@ knowledge that each disk uses 16 minors, and write "root=/dev/sde17"
[Strange and maybe uninteresting stuff OFF]
- If the device containing your root partition isn't in the table
+If the device containing your root partition isn't in the table
above, you can also specify it by major and minor numbers. These are
written in hex, with no prefix and no separator between. E.g., if you
have a CD with contents appropriate as a root filesystem in the first
@@ -136,6 +138,7 @@ known partition UUID as the starting point. For example,
if partition 5 of the device has the UUID of
00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as
follows:
+
PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
Authoritative information can be found in
@@ -145,8 +148,8 @@ Authoritative information can be found in
2.2) ro, rw
-----------
-Syntax: ro
- or: rw
+:Syntax: ro
+:or: rw
These two options tell the kernel whether it should mount the root
filesystem read-only or read-write. The default is read-only, except
@@ -156,7 +159,7 @@ for ramdisks, which default to read-write.
2.3) debug
----------
-Syntax: debug
+:Syntax: debug
This raises the kernel log level to 10 (the default is 7). This is the
same level as set by the "dmesg" command, just that the maximum level
@@ -166,7 +169,7 @@ selectable by dmesg is 8.
2.4) debug=
-----------
-Syntax: debug=<device>
+:Syntax: debug=<device>
This option causes certain kernel messages be printed to the selected
debugging device. This can aid debugging the kernel, since the
@@ -175,7 +178,7 @@ devices are possible depends on the machine type. There are no checks
for the validity of the device name. If the device isn't implemented,
nothing happens.
- Messages logged this way are in general stack dumps after kernel
+Messages logged this way are in general stack dumps after kernel
memory faults or bad kernel traps, and kernel panics. To be exact: all
messages of level 0 (panic messages) and all messages printed while
the log level is 8 or more (their level doesn't matter). Before stack
@@ -185,19 +188,27 @@ at least 8 can also be set by the "debug" command line option (see
Devices possible for Amiga:
- - "ser": built-in serial port; parameters: 9600bps, 8N1
- - "mem": Save the messages to a reserved area in chip mem. After
+ - "ser":
+ built-in serial port; parameters: 9600bps, 8N1
+ - "mem":
+ Save the messages to a reserved area in chip mem. After
rebooting, they can be read under AmigaOS with the tool
'dmesg'.
Devices possible for Atari:
- - "ser1": ST-MFP serial port ("Modem1"); parameters: 9600bps, 8N1
- - "ser2": SCC channel B serial port ("Modem2"); parameters: 9600bps, 8N1
- - "ser" : default serial port
+ - "ser1":
+ ST-MFP serial port ("Modem1"); parameters: 9600bps, 8N1
+ - "ser2":
+ SCC channel B serial port ("Modem2"); parameters: 9600bps, 8N1
+ - "ser" :
+ default serial port
This is "ser2" for a Falcon, and "ser1" for any other machine
- - "midi": The MIDI port; parameters: 31250bps, 8N1
- - "par" : parallel port
+ - "midi":
+ The MIDI port; parameters: 31250bps, 8N1
+ - "par" :
+ parallel port
+
The printing routine for this implements a timeout for the
case there's no printer connected (else the kernel would
lock up). The timeout is not exact, but usually a few
@@ -205,26 +216,29 @@ Devices possible for Atari:
2.6) ramdisk_size=
--------------
+------------------
-Syntax: ramdisk_size=<size>
+:Syntax: ramdisk_size=<size>
- This option instructs the kernel to set up a ramdisk of the given
+This option instructs the kernel to set up a ramdisk of the given
size in KBytes. Do not use this option if the ramdisk contents are
passed by bootstrap! In this case, the size is selected automatically
and should not be overwritten.
- The only application is for root filesystems on floppy disks, that
+The only application is for root filesystems on floppy disks, that
should be loaded into memory. To do that, select the corresponding
size of the disk as ramdisk size, and set the root device to the disk
drive (with "root=").
2.7) swap=
+
+ I can't find any sign of this option in 2.2.6.
+
2.8) buff=
-----------
- I can't find any sign of these options in 2.2.6.
+ I can't find any sign of this option in 2.2.6.
3) General Device Options (Amiga and Atari)
@@ -233,13 +247,13 @@ drive (with "root=").
3.1) ether=
-----------
-Syntax: ether=[<irq>[,<base_addr>[,<mem_start>[,<mem_end>]]]],<dev-name>
+:Syntax: ether=[<irq>[,<base_addr>[,<mem_start>[,<mem_end>]]]],<dev-name>
- <dev-name> is the name of a net driver, as specified in
+<dev-name> is the name of a net driver, as specified in
drivers/net/Space.c in the Linux source. Most prominent are eth0, ...
eth3, sl0, ... sl3, ppp0, ..., ppp3, dummy, and lo.
- The non-ethernet drivers (sl, ppp, dummy, lo) obviously ignore the
+The non-ethernet drivers (sl, ppp, dummy, lo) obviously ignore the
settings by this options. Also, the existing ethernet drivers for
Linux/m68k (ariadne, a2065, hydra) don't use them because Zorro boards
are really Plug-'n-Play, so the "ether=" option is useless altogether
@@ -249,9 +263,9 @@ for Linux/m68k.
3.2) hd=
--------
-Syntax: hd=<cylinders>,<heads>,<sectors>
+:Syntax: hd=<cylinders>,<heads>,<sectors>
- This option sets the disk geometry of an IDE disk. The first hd=
+This option sets the disk geometry of an IDE disk. The first hd=
option is for the first IDE disk, the second for the second one.
(I.e., you can give this option twice.) In most cases, you won't have
to use this option, since the kernel can obtain the geometry data
@@ -262,9 +276,9 @@ disks.
3.3) max_scsi_luns=
-------------------
-Syntax: max_scsi_luns=<n>
+:Syntax: max_scsi_luns=<n>
- Sets the maximum number of LUNs (logical units) of SCSI devices to
+Sets the maximum number of LUNs (logical units) of SCSI devices to
be scanned. Valid values for <n> are between 1 and 8. Default is 8 if
"Probe all LUNs on each SCSI device" was selected during the kernel
configuration, else 1.
@@ -273,9 +287,9 @@ configuration, else 1.
3.4) st=
--------
-Syntax: st=<buffer_size>,[<write_thres>,[<max_buffers>]]
+:Syntax: st=<buffer_size>,[<write_thres>,[<max_buffers>]]
- Sets several parameters of the SCSI tape driver. <buffer_size> is
+Sets several parameters of the SCSI tape driver. <buffer_size> is
the number of 512-byte buffers reserved for tape operations for each
device. <write_thres> sets the number of blocks which must be filled
to start an actual write operation to the tape. Maximum value is the
@@ -286,9 +300,9 @@ buffers allocated for all tape devices.
3.5) dmasound=
--------------
-Syntax: dmasound=[<buffers>,<buffer-size>[,<catch-radius>]]
+:Syntax: dmasound=[<buffers>,<buffer-size>[,<catch-radius>]]
- This option controls some configurations of the Linux/m68k DMA sound
+This option controls some configurations of the Linux/m68k DMA sound
driver (Amiga and Atari): <buffers> is the number of buffers you want
to use (minimum 4, default 4), <buffer-size> is the size of each
buffer in kilobytes (minimum 4, default 32) and <catch-radius> says
@@ -305,20 +319,22 @@ don't need to expand the sound.
4.1) video=
-----------
-Syntax: video=<fbname>:<sub-options...>
+:Syntax: video=<fbname>:<sub-options...>
The <fbname> parameter specifies the name of the frame buffer,
-eg. most atari users will want to specify `atafb' here. The
+eg. most atari users will want to specify `atafb` here. The
<sub-options> is a comma-separated list of the sub-options listed
below.
-NB: Please notice that this option was renamed from `atavideo' to
- `video' during the development of the 1.3.x kernels, thus you
+NB:
+ Please notice that this option was renamed from `atavideo` to
+ `video` during the development of the 1.3.x kernels, thus you
might need to update your boot-scripts if upgrading to 2.x from
an 1.2.x kernel.
-NBB: The behavior of video= was changed in 2.1.57 so the recommended
-option is to specify the name of the frame buffer.
+NBB:
+ The behavior of video= was changed in 2.1.57 so the recommended
+ option is to specify the name of the frame buffer.
4.1.1) Video Mode
-----------------
@@ -341,11 +357,11 @@ mode, if the hardware allows. Currently defined names are:
- falh2 : 896x608x1, Falcon only
- falh16 : 896x608x4, Falcon only
- If no video mode is given on the command line, the kernel tries the
+If no video mode is given on the command line, the kernel tries the
modes names "default<n>" in turn, until one is possible with the
hardware in use.
- A video mode setting doesn't make sense, if the external driver is
+A video mode setting doesn't make sense, if the external driver is
activated by a "external:" sub-option.
4.1.2) inverse
@@ -358,17 +374,17 @@ option, you can make the background white.
4.1.3) font
-----------
-Syntax: font:<fontname>
+:Syntax: font:<fontname>
Specify the font to use in text modes. Currently you can choose only
-between `VGA8x8', `VGA8x16' and `PEARL8x8'. `VGA8x8' is default, if the
+between `VGA8x8`, `VGA8x16` and `PEARL8x8`. `VGA8x8` is default, if the
vertical size of the display is less than 400 pixel rows. Otherwise, the
-`VGA8x16' font is the default.
+`VGA8x16` font is the default.
-4.1.4) hwscroll_
-----------------
+4.1.4) `hwscroll_`
+------------------
-Syntax: hwscroll_<n>
+:Syntax: `hwscroll_<n>`
The number of additional lines of video memory to reserve for
speeding up the scrolling ("hardware scrolling"). Hardware scrolling
@@ -378,7 +394,7 @@ possible with plain STs and graphics cards (The former because the
base address must be on a 256 byte boundary there, the latter because
the kernel doesn't know how to set the base address at all.)
- By default, <n> is set to the number of visible text lines on the
+By default, <n> is set to the number of visible text lines on the
display. Thus, the amount of video memory is doubled, compared to no
hardware scrolling. You can turn off the hardware scrolling altogether
by setting <n> to 0.
@@ -386,31 +402,31 @@ by setting <n> to 0.
4.1.5) internal:
----------------
-Syntax: internal:<xres>;<yres>[;<xres_max>;<yres_max>;<offset>]
+:Syntax: internal:<xres>;<yres>[;<xres_max>;<yres_max>;<offset>]
This option specifies the capabilities of some extended internal video
hardware, like e.g. OverScan. <xres> and <yres> give the (extended)
dimensions of the screen.
- If your OverScan needs a black border, you have to write the last
+If your OverScan needs a black border, you have to write the last
three arguments of the "internal:". <xres_max> is the maximum line
length the hardware allows, <yres_max> the maximum number of lines.
<offset> is the offset of the visible part of the screen memory to its
physical start, in bytes.
- Often, extended interval video hardware has to be activated somehow.
+Often, extended interval video hardware has to be activated somehow.
For this, see the "sw_*" options below.
4.1.6) external:
----------------
-Syntax:
- external:<xres>;<yres>;<depth>;<org>;<scrmem>[;<scrlen>[;<vgabase>\
- [;<colw>[;<coltype>[;<xres_virtual>]]]]]
+:Syntax:
+ external:<xres>;<yres>;<depth>;<org>;<scrmem>[;<scrlen>[;<vgabase>
+ [;<colw>[;<coltype>[;<xres_virtual>]]]]]
-[I had to break this line...]
+.. I had to break this line...
- This is probably the most complicated parameter... It specifies that
+This is probably the most complicated parameter... It specifies that
you have some external video hardware (a graphics board), and how to
use it under Linux/m68k. The kernel cannot know more about the hardware
than you tell it here! The kernel also is unable to set or change any
@@ -418,38 +434,44 @@ video modes, since it doesn't know about any board internal. So, you
have to switch to that video mode before you start Linux, and cannot
switch to another mode once Linux has started.
- The first 3 parameters of this sub-option should be obvious: <xres>,
+The first 3 parameters of this sub-option should be obvious: <xres>,
<yres> and <depth> give the dimensions of the screen and the number of
planes (depth). The depth is the logarithm to base 2 of the number
of colors possible. (Or, the other way round: The number of colors is
2^depth).
- You have to tell the kernel furthermore how the video memory is
+You have to tell the kernel furthermore how the video memory is
organized. This is done by a letter as <org> parameter:
- 'n': "normal planes", i.e. one whole plane after another
- 'i': "interleaved planes", i.e. 16 bit of the first plane, than 16 bit
+ 'n':
+ "normal planes", i.e. one whole plane after another
+ 'i':
+ "interleaved planes", i.e. 16 bit of the first plane, than 16 bit
of the next, and so on... This mode is used only with the
- built-in Atari video modes, I think there is no card that
- supports this mode.
- 'p': "packed pixels", i.e. <depth> consecutive bits stand for all
- planes of one pixel; this is the most common mode for 8 planes
- (256 colors) on graphic cards
- 't': "true color" (more or less packed pixels, but without a color
- lookup table); usually depth is 24
+ built-in Atari video modes, I think there is no card that
+ supports this mode.
+ 'p':
+ "packed pixels", i.e. <depth> consecutive bits stand for all
+ planes of one pixel; this is the most common mode for 8 planes
+ (256 colors) on graphic cards
+ 't':
+ "true color" (more or less packed pixels, but without a color
+ lookup table); usually depth is 24
For monochrome modes (i.e., <depth> is 1), the <org> letter has a
different meaning:
- 'n': normal colors, i.e. 0=white, 1=black
- 'i': inverted colors, i.e. 0=black, 1=white
+ 'n':
+ normal colors, i.e. 0=white, 1=black
+ 'i':
+ inverted colors, i.e. 0=black, 1=white
- The next important information about the video hardware is the base
+The next important information about the video hardware is the base
address of the video memory. That is given in the <scrmem> parameter,
as a hexadecimal number with a "0x" prefix. You have to find out this
address in the documentation of your hardware.
- The next parameter, <scrlen>, tells the kernel about the size of the
+The next parameter, <scrlen>, tells the kernel about the size of the
video memory. If it's missing, the size is calculated from <xres>,
<yres>, and <depth>. For now, it is not useful to write a value here.
It would be used only for hardware scrolling (which isn't possible
@@ -460,7 +482,7 @@ empty, either by ending the "external:" after the video address or by
writing two consecutive semicolons, if you want to give a <vgabase>
(it is allowed to leave this parameter empty).
- The <vgabase> parameter is optional. If it is not given, the kernel
+The <vgabase> parameter is optional. If it is not given, the kernel
cannot read or write any color registers of the video hardware, and
thus you have to set appropriate colors before you start Linux. But if
your card is somehow VGA compatible, you can tell the kernel the base
@@ -472,18 +494,18 @@ uses the addresses vgabase+0x3c7...vgabase+0x3c9. The <vgabase>
parameter is written in hexadecimal with a "0x" prefix, just as
<scrmem>.
- <colw> is meaningful only if <vgabase> is specified. It tells the
+<colw> is meaningful only if <vgabase> is specified. It tells the
kernel how wide each of the color register is, i.e. the number of bits
per single color (red/green/blue). Default is 6, another quite usual
value is 8.
- Also <coltype> is used together with <vgabase>. It tells the kernel
+Also <coltype> is used together with <vgabase>. It tells the kernel
about the color register model of your gfx board. Currently, the types
"vga" (which is also the default) and "mv300" (SANG MV300) are
implemented.
- Parameter <xres_virtual> is required for ProMST or ET4000 cards where
-the physical linelength differs from the visible length. With ProMST,
+Parameter <xres_virtual> is required for ProMST or ET4000 cards where
+the physical linelength differs from the visible length. With ProMST,
xres_virtual must be set to 2048. For ET4000, xres_virtual depends on the
initialisation of the video-card.
If you're missing a corresponding yres_virtual: the external part is legacy,
@@ -499,13 +521,13 @@ currently works only with the ScreenWonder!
4.1.8) monitorcap:
-------------------
-Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
+:Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
This describes the capabilities of a multisync monitor. Don't use it
with a fixed-frequency monitor! For now, only the Falcon frame buffer
uses the settings of "monitorcap:".
- <vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
+<vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
your monitor can work with, in Hz. <hmin> and <hmax> are the same for
the horizontal frequency, in kHz.
@@ -520,28 +542,28 @@ If this option is given, the framebuffer device doesn't do any video
mode calculations and settings on its own. The only Atari fb device
that does this currently is the Falcon.
- What you reach with this: Settings for unknown video extensions
+What you reach with this: Settings for unknown video extensions
aren't overridden by the driver, so you can still use the mode found
when booting, when the driver doesn't know to set this mode itself.
But this also means, that you can't switch video modes anymore...
- An example where you may want to use "keep" is the ScreenBlaster for
+An example where you may want to use "keep" is the ScreenBlaster for
the Falcon.
4.2) atamouse=
--------------
-Syntax: atamouse=<x-threshold>,[<y-threshold>]
+:Syntax: atamouse=<x-threshold>,[<y-threshold>]
- With this option, you can set the mouse movement reporting threshold.
+With this option, you can set the mouse movement reporting threshold.
This is the number of pixels of mouse movement that have to accumulate
before the IKBD sends a new mouse packet to the kernel. Higher values
reduce the mouse interrupt load and thus reduce the chance of keyboard
overruns. Lower values give a slightly faster mouse responses and
slightly better mouse tracking.
- You can set the threshold in x and y separately, but usually this is
+You can set the threshold in x and y separately, but usually this is
of little practical use. If there's just one number in the option, it
is used for both dimensions. The default value is 2 for both
thresholds.
@@ -550,7 +572,7 @@ thresholds.
4.3) ataflop=
-------------
-Syntax: ataflop=<drive type>[,<trackbuffering>[,<steprateA>[,<steprateB>]]]
+:Syntax: ataflop=<drive type>[,<trackbuffering>[,<steprateA>[,<steprateB>]]]
The drive type may be 0, 1, or 2, for DD, HD, and ED, resp. This
setting affects how many buffers are reserved and which formats are
@@ -563,15 +585,15 @@ Syntax: ataflop=<drive type>[,<trackbuffering>[,<steprateA>[,<steprateB>]]]
no for the Medusa and yes for all others.
With the two following parameters, you can change the default
- steprate used for drive A and B, resp.
+ steprate used for drive A and B, resp.
4.4) atascsi=
-------------
-Syntax: atascsi=<can_queue>[,<cmd_per_lun>[,<scat-gat>[,<host-id>[,<tagged>]]]]
+:Syntax: atascsi=<can_queue>[,<cmd_per_lun>[,<scat-gat>[,<host-id>[,<tagged>]]]]
- This option sets some parameters for the Atari native SCSI driver.
+This option sets some parameters for the Atari native SCSI driver.
Generally, any number of arguments can be omitted from the end. And
for each of the numbers, a negative value means "use default". The
defaults depend on whether TT-style or Falcon-style SCSI is used.
@@ -597,11 +619,14 @@ ignored (others aren't affected).
32). Default: 8/1. (Note: Values > 1 seem to cause problems on a
Falcon, cause not yet known.)
- The <cmd_per_lun> value at a great part determines the amount of
+ The <cmd_per_lun> value at a great part determines the amount of
memory SCSI reserves for itself. The formula is rather
complicated, but I can give you some hints:
- no scatter-gather : cmd_per_lun * 232 bytes
- full scatter-gather: cmd_per_lun * approx. 17 Kbytes
+
+ no scatter-gather:
+ cmd_per_lun * 232 bytes
+ full scatter-gather:
+ cmd_per_lun * approx. 17 Kbytes
<scat-gat>:
Size of the scatter-gather table, i.e. the number of requests
@@ -634,19 +659,23 @@ ignored (others aren't affected).
4.5 switches=
-------------
-Syntax: switches=<list of switches>
+:Syntax: switches=<list of switches>
- With this option you can switch some hardware lines that are often
+With this option you can switch some hardware lines that are often
used to enable/disable certain hardware extensions. Examples are
OverScan, overclocking, ...
- The <list of switches> is a comma-separated list of the following
+The <list of switches> is a comma-separated list of the following
items:
- ikbd: set RTS of the keyboard ACIA high
- midi: set RTS of the MIDI ACIA high
- snd6: set bit 6 of the PSG port A
- snd7: set bit 6 of the PSG port A
+ ikbd:
+ set RTS of the keyboard ACIA high
+ midi:
+ set RTS of the MIDI ACIA high
+ snd6:
+ set bit 6 of the PSG port A
+ snd7:
+ set bit 6 of the PSG port A
It doesn't make sense to mention a switch more than once (no
difference to only once), but you can give as many switches as you
@@ -654,16 +683,16 @@ want to enable different features. The switch lines are set as early
as possible during kernel initialization (even before determining the
present hardware.)
- All of the items can also be prefixed with "ov_", i.e. "ov_ikbd",
-"ov_midi", ... These options are meant for switching on an OverScan
+All of the items can also be prefixed with `ov_`, i.e. `ov_ikbd`,
+`ov_midi`, ... These options are meant for switching on an OverScan
video extension. The difference to the bare option is that the
switch-on is done after video initialization, and somehow synchronized
to the HBLANK. A speciality is that ov_ikbd and ov_midi are switched
off before rebooting, so that OverScan is disabled and TOS boots
correctly.
- If you give an option both, with and without the "ov_" prefix, the
-earlier initialization ("ov_"-less) takes precedence. But the
+If you give an option both, with and without the `ov_` prefix, the
+earlier initialization (`ov_`-less) takes precedence. But the
switching-off on reset still happens in this case.
5) Options for Amiga Only:
@@ -672,10 +701,10 @@ switching-off on reset still happens in this case.
5.1) video=
-----------
-Syntax: video=<fbname>:<sub-options...>
+:Syntax: video=<fbname>:<sub-options...>
The <fbname> parameter specifies the name of the frame buffer, valid
-options are `amifb', `cyber', 'virge', `retz3' and `clgen', provided
+options are `amifb`, `cyber`, 'virge', `retz3` and `clgen`, provided
that the respective frame buffer devices have been compiled into the
kernel (or compiled as loadable modules). The behavior of the <fbname>
option was changed in 2.1.57 so it is now recommended to specify this
@@ -697,9 +726,11 @@ predefined video modes are available:
NTSC modes:
- ntsc : 640x200, 15 kHz, 60 Hz
- ntsc-lace : 640x400, 15 kHz, 60 Hz interlaced
+
PAL modes:
- pal : 640x256, 15 kHz, 50 Hz
- pal-lace : 640x512, 15 kHz, 50 Hz interlaced
+
ECS modes:
- multiscan : 640x480, 29 kHz, 57 Hz
- multiscan-lace : 640x960, 29 kHz, 57 Hz interlaced
@@ -715,6 +746,7 @@ ECS modes:
- dblpal-lace : 640x1024, 27 kHz, 47 Hz interlaced
- dblntsc : 640x200, 27 kHz, 57 Hz doublescan
- dblpal : 640x256, 27 kHz, 47 Hz doublescan
+
VGA modes:
- vga : 640x480, 31 kHz, 60 Hz
- vga70 : 640x400, 31 kHz, 70 Hz
@@ -726,7 +758,7 @@ chipset and 8-bit color for the AGA chipset.
5.1.2) depth
------------
-Syntax: depth:<nr. of bit-planes>
+:Syntax: depth:<nr. of bit-planes>
Specify the number of bit-planes for the selected video-mode.
@@ -739,32 +771,32 @@ Use inverted display (black on white). Functionally the same as the
5.1.4) font
-----------
-Syntax: font:<fontname>
+:Syntax: font:<fontname>
Specify the font to use in text modes. Functionally the same as the
-"font" sub-option for the Atari, except that `PEARL8x8' is used instead
-of `VGA8x8' if the vertical size of the display is less than 400 pixel
+"font" sub-option for the Atari, except that `PEARL8x8` is used instead
+of `VGA8x8` if the vertical size of the display is less than 400 pixel
rows.
5.1.5) monitorcap:
-------------------
-Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
+:Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
This describes the capabilities of a multisync monitor. For now, only
the color frame buffer uses the settings of "monitorcap:".
- <vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
+<vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
your monitor can work with, in Hz. <hmin> and <hmax> are the same for
the horizontal frequency, in kHz.
- The defaults are 50;90;15;38 (Generic Amiga multisync monitor).
+The defaults are 50;90;15;38 (Generic Amiga multisync monitor).
5.2) fd_def_df0=
----------------
-Syntax: fd_def_df0=<value>
+:Syntax: fd_def_df0=<value>
Sets the df0 value for "silent" floppy drives. The value should be in
hexadecimal with "0x" prefix.
@@ -773,7 +805,7 @@ hexadecimal with "0x" prefix.
5.3) wd33c93=
-------------
-Syntax: wd33c93=<sub-options...>
+:Syntax: wd33c93=<sub-options...>
These options affect the A590/A2091, A3000 and GVP Series II SCSI
controllers.
@@ -784,9 +816,9 @@ below.
5.3.1) nosync
-------------
-Syntax: nosync:bitmask
+:Syntax: nosync:bitmask
- bitmask is a byte where the 1st 7 bits correspond with the 7
+bitmask is a byte where the 1st 7 bits correspond with the 7
possible SCSI devices. Set a bit to prevent sync negotiation on that
device. To maintain backwards compatibility, a command-line such as
"wd33c93=255" will be automatically translated to
@@ -796,35 +828,35 @@ all devices, eg. nosync:0xff.
5.3.2) period
-------------
-Syntax: period:ns
+:Syntax: period:ns
- `ns' is the minimum # of nanoseconds in a SCSI data transfer
+`ns` is the minimum # of nanoseconds in a SCSI data transfer
period. Default is 500; acceptable values are 250 - 1000.
5.3.3) disconnect
-----------------
-Syntax: disconnect:x
+:Syntax: disconnect:x
- Specify x = 0 to never allow disconnects, 2 to always allow them.
+Specify x = 0 to never allow disconnects, 2 to always allow them.
x = 1 does 'adaptive' disconnects, which is the default and generally
the best choice.
5.3.4) debug
------------
-Syntax: debug:x
+:Syntax: debug:x
- If `DEBUGGING_ON' is defined, x is a bit mask that causes various
+If `DEBUGGING_ON` is defined, x is a bit mask that causes various
types of debug output to printed - see the DB_xxx defines in
wd33c93.h.
5.3.5) clock
------------
-Syntax: clock:x
+:Syntax: clock:x
- x = clock input in MHz for WD33c93 chip. Normal values would be from
+x = clock input in MHz for WD33c93 chip. Normal values would be from
8 through 20. The default value depends on your hostadapter(s),
default for the A3000 internal controller is 14, for the A2091 it's 8
and for the GVP hostadapters it's either 8 or 14, depending on the
@@ -834,15 +866,15 @@ hostadapters.
5.3.6) next
-----------
- No argument. Used to separate blocks of keywords when there's more
+No argument. Used to separate blocks of keywords when there's more
than one wd33c93-based host adapter in the system.
5.3.7) nodma
------------
-Syntax: nodma:x
+:Syntax: nodma:x
- If x is 1 (or if the option is just written as "nodma"), the WD33c93
+If x is 1 (or if the option is just written as "nodma"), the WD33c93
controller will not use DMA (= direct memory access) to access the
Amiga's memory. This is useful for some systems (like A3000's and
A4000's with the A3640 accelerator, revision 3.0) that have problems
@@ -853,32 +885,27 @@ possible.
5.4) gvp11=
-----------
-Syntax: gvp11=<addr-mask>
+:Syntax: gvp11=<addr-mask>
- The earlier versions of the GVP driver did not handle DMA
+The earlier versions of the GVP driver did not handle DMA
address-mask settings correctly which made it necessary for some
people to use this option, in order to get their GVP controller
running under Linux. These problems have hopefully been solved and the
use of this option is now highly unrecommended!
- Incorrect use can lead to unpredictable behavior, so please only use
+Incorrect use can lead to unpredictable behavior, so please only use
this option if you *know* what you are doing and have a reason to do
so. In any case if you experience problems and need to use this
option, please inform us about it by mailing to the Linux/68k kernel
mailing list.
- The address mask set by this option specifies which addresses are
+The address mask set by this option specifies which addresses are
valid for DMA with the GVP Series II SCSI controller. An address is
valid, if no bits are set except the bits that are set in the mask,
too.
- Some versions of the GVP can only DMA into a 24 bit address range,
+Some versions of the GVP can only DMA into a 24 bit address range,
some can address a 25 bit address range while others can use the whole
32 bit address range for DMA. The correct setting depends on your
controller and should be autodetected by the driver. An example is the
24 bit region which is specified by a mask of 0x00fffffe.
-
-
-/* Local Variables: */
-/* mode: text */
-/* End: */
diff --git a/Documentation/mic/index.rst b/Documentation/mic/index.rst
index 082fa8f6a260..3a8d06367ef1 100644
--- a/Documentation/mic/index.rst
+++ b/Documentation/mic/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
=============================================
Intel Many Integrated Core (MIC) architecture
=============================================
diff --git a/Documentation/netlabel/index.rst b/Documentation/netlabel/index.rst
index 47f1e0e5acd1..984e1b191b12 100644
--- a/Documentation/netlabel/index.rst
+++ b/Documentation/netlabel/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
========
NetLabel
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 48c79e78817b..df33674799b5 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -2287,7 +2287,7 @@ addr_scope_policy - INTEGER
/proc/sys/net/core/*
- Please see: Documentation/sysctl/net.txt for descriptions of these entries.
+ Please see: Documentation/admin-guide/sysctl/net.rst for descriptions of these entries.
/proc/sys/net/unix/*
diff --git a/Documentation/pcmcia/index.rst b/Documentation/pcmcia/index.rst
index 779c8527109e..7ae1f62fca14 100644
--- a/Documentation/pcmcia/index.rst
+++ b/Documentation/pcmcia/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
======
pcmcia
diff --git a/Documentation/pi-futex.txt b/Documentation/pi-futex.txt
index b154f6c0c36e..c33ba2befbf8 100644
--- a/Documentation/pi-futex.txt
+++ b/Documentation/pi-futex.txt
@@ -119,4 +119,4 @@ properties of futexes, and all four combinations are possible: futex,
robust-futex, PI-futex, robust+PI-futex.
More details about priority inheritance can be found in
-Documentation/locking/rt-mutex.txt.
+Documentation/locking/rt-mutex.rst.
diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt
index 0c41d6d463f3..10e7f4d16c14 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -59,7 +59,7 @@ as follows:
the default calculated size. Use this option if default
boot memory size is not sufficient for second kernel to
boot successfully. For syntax of crashkernel= parameter,
- refer to Documentation/kdump/kdump.rst. If any offset is
+ refer to Documentation/admin-guide/kdump/kdump.rst. If any offset is
provided in crashkernel= parameter, it will be ignored
as fadump uses a predefined offset to reserve memory
for boot memory dump preservation in case of a crash.
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst
index 365efc9e4aa8..8e56337d422d 100644
--- a/Documentation/process/submit-checklist.rst
+++ b/Documentation/process/submit-checklist.rst
@@ -107,7 +107,7 @@ and elsewhere regarding submitting Linux kernel patches.
and why.
26) If any ioctl's are added by the patch, then also update
- ``Documentation/ioctl/ioctl-number.txt``.
+ ``Documentation/ioctl/ioctl-number.rst``.
27) If your modified source code depends on or uses any of the kernel
APIs or features that are related to the following ``Kconfig`` symbols,
diff --git a/Documentation/pti/pti_intel_mid.txt b/Documentation/pti/pti_intel_mid.txt
deleted file mode 100644
index e7a5b6d1f7a9..000000000000
--- a/Documentation/pti/pti_intel_mid.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-The Intel MID PTI project is HW implemented in Intel Atom
-system-on-a-chip designs based on the Parallel Trace
-Interface for MIPI P1149.7 cJTAG standard. The kernel solution
-for this platform involves the following files:
-
-./include/linux/pti.h
-./drivers/.../n_tracesink.h
-./drivers/.../n_tracerouter.c
-./drivers/.../n_tracesink.c
-./drivers/.../pti.c
-
-pti.c is the driver that enables various debugging features
-popular on platforms from certain mobile manufacturers.
-n_tracerouter.c and n_tracesink.c allow extra system information to
-be collected and routed to the pti driver, such as trace
-debugging data from a modem. Although n_tracerouter
-and n_tracesink are a part of the complete PTI solution,
-these two line disciplines can work separately from
-pti.c and route any data stream from one /dev/tty node
-to another /dev/tty node via kernel-space. This provides
-a stable, reliable connection that will not break unless
-the user-space application shuts down (plus avoids
-kernel->user->kernel context switch overheads of routing
-data).
-
-An example debugging usage for this driver system:
- *Hook /dev/ttyPTI0 to syslogd. Opening this port will also start
- a console device to further capture debugging messages to PTI.
- *Hook /dev/ttyPTI1 to modem debugging data to write to PTI HW.
- This is where n_tracerouter and n_tracesink are used.
- *Hook /dev/pti to a user-level debugging application for writing
- to PTI HW.
- *Use mipi_* Kernel Driver API in other device drivers for
- debugging to PTI by first requesting a PTI write address via
- mipi_request_masterchannel(1).
-
-Below is example pseudo-code on how a 'privileged' application
-can hook up n_tracerouter and n_tracesink to any tty on
-a system. 'Privileged' means the application has enough
-privileges to successfully manipulate the ldisc drivers
-but is not just blindly executing as 'root'. Keep in mind
-the use of ioctl(,TIOCSETD,) is not specific to the n_tracerouter
-and n_tracesink line discpline drivers but is a generic
-operation for a program to use a line discpline driver
-on a tty port other than the default n_tty.
-
-/////////// To hook up n_tracerouter and n_tracesink /////////
-
-// Note that n_tracerouter depends on n_tracesink.
-#include <errno.h>
-#define ONE_TTY "/dev/ttyOne"
-#define TWO_TTY "/dev/ttyTwo"
-
-// needed global to hand onto ldisc connection
-static int g_fd_source = -1;
-static int g_fd_sink = -1;
-
-// these two vars used to grab LDISC values from loaded ldisc drivers
-// in OS. Look at /proc/tty/ldiscs to get the right numbers from
-// the ldiscs loaded in the system.
-int source_ldisc_num, sink_ldisc_num = -1;
-int retval;
-
-g_fd_source = open(ONE_TTY, O_RDWR); // must be R/W
-g_fd_sink = open(TWO_TTY, O_RDWR); // must be R/W
-
-if (g_fd_source <= 0) || (g_fd_sink <= 0) {
- // doubt you'll want to use these exact error lines of code
- printf("Error on open(). errno: %d\n",errno);
- return errno;
-}
-
-retval = ioctl(g_fd_sink, TIOCSETD, &sink_ldisc_num);
-if (retval < 0) {
- printf("Error on ioctl(). errno: %d\n", errno);
- return errno;
-}
-
-retval = ioctl(g_fd_source, TIOCSETD, &source_ldisc_num);
-if (retval < 0) {
- printf("Error on ioctl(). errno: %d\n", errno);
- return errno;
-}
-
-/////////// To disconnect n_tracerouter and n_tracesink ////////
-
-// First make sure data through the ldiscs has stopped.
-
-// Second, disconnect ldiscs. This provides a
-// little cleaner shutdown on tty stack.
-sink_ldisc_num = 0;
-source_ldisc_num = 0;
-ioctl(g_fd_uart, TIOCSETD, &sink_ldisc_num);
-ioctl(g_fd_gadget, TIOCSETD, &source_ldisc_num);
-
-// Three, program closes connection, and cleanup:
-close(g_fd_uart);
-close(g_fd_gadget);
-g_fd_uart = g_fd_gadget = NULL;
diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index c42a21b99046..523d54b60087 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -204,21 +204,21 @@ potentially expensive tree iterations. This is done at negligible runtime
overhead for maintanence; albeit larger memory footprint.
Similar to the rb_root structure, cached rbtrees are initialized to be
-empty via:
+empty via::
struct rb_root_cached mytree = RB_ROOT_CACHED;
Cached rbtree is simply a regular rb_root with an extra pointer to cache the
leftmost node. This allows rb_root_cached to exist wherever rb_root does,
which permits augmented trees to be supported as well as only a few extra
-interfaces:
+interfaces::
struct rb_node *rb_first_cached(struct rb_root_cached *tree);
void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool);
void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
Both insert and erase calls have their respective counterpart of augmented
-trees:
+trees::
void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *,
bool, struct rb_augment_callbacks *);
diff --git a/Documentation/riscv/index.rst b/Documentation/riscv/index.rst
index c4b906d9b5a7..e3ca0922a8c2 100644
--- a/Documentation/riscv/index.rst
+++ b/Documentation/riscv/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
===================
RISC-V architecture
===================
diff --git a/Documentation/s390/debugging390.rst b/Documentation/s390/debugging390.rst
index d49305fd5e1a..73ad0b06c666 100644
--- a/Documentation/s390/debugging390.rst
+++ b/Documentation/s390/debugging390.rst
@@ -170,7 +170,7 @@ currently running at.
| +----------------+-------------------------------------------------+
| | 32 | Basic Addressing Mode |
| | | |
-| | | Used to set addressing mode |
+| | | Used to set addressing mode:: |
| | | |
| | | +---------+----------+----------+ |
| | | | PSW 31 | PSW 32 | | |
diff --git a/Documentation/s390/index.rst b/Documentation/s390/index.rst
index 1a914da2a07b..4602312909d3 100644
--- a/Documentation/s390/index.rst
+++ b/Documentation/s390/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
=================
s390 Architecture
=================
diff --git a/Documentation/s390/vfio-ccw.rst b/Documentation/s390/vfio-ccw.rst
index 1f6d0b56d53e..1e210c6afa88 100644
--- a/Documentation/s390/vfio-ccw.rst
+++ b/Documentation/s390/vfio-ccw.rst
@@ -38,7 +38,7 @@ every detail. More information/reference could be found here:
qemu/hw/s390x/css.c
For vfio mediated device framework:
-- Documentation/vfio-mediated-device.txt
+- Documentation/driver-api/vfio-mediated-device.rst
Motivation of vfio-ccw
----------------------
@@ -322,5 +322,5 @@ Reference
2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
3. https://en.wikipedia.org/wiki/Channel_I/O
4. Documentation/s390/cds.rst
-5. Documentation/vfio.txt
-6. Documentation/vfio-mediated-device.txt
+5. Documentation/driver-api/vfio.rst
+6. Documentation/driver-api/vfio-mediated-device.rst
diff --git a/Documentation/scheduler/index.rst b/Documentation/scheduler/index.rst
index 058be77a4c34..69074e5de9c4 100644
--- a/Documentation/scheduler/index.rst
+++ b/Documentation/scheduler/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
===============
Linux Scheduler
===============
diff --git a/Documentation/scheduler/sched-deadline.rst b/Documentation/scheduler/sched-deadline.rst
index 3391e86d810c..14a2f7bf63fe 100644
--- a/Documentation/scheduler/sched-deadline.rst
+++ b/Documentation/scheduler/sched-deadline.rst
@@ -669,7 +669,7 @@ Deadline Task Scheduling
-deadline tasks cannot have an affinity mask smaller that the entire
root_domain they are created on. However, affinities can be specified
- through the cpuset facility (Documentation/cgroup-v1/cpusets.rst).
+ through the cpuset facility (Documentation/admin-guide/cgroup-v1/cpusets.rst).
5.1 SCHED_DEADLINE and cpusets HOWTO
------------------------------------
diff --git a/Documentation/scheduler/sched-design-CFS.rst b/Documentation/scheduler/sched-design-CFS.rst
index 53b30d1967cf..a96c72651877 100644
--- a/Documentation/scheduler/sched-design-CFS.rst
+++ b/Documentation/scheduler/sched-design-CFS.rst
@@ -222,7 +222,7 @@ SCHED_BATCH) tasks.
These options need CONFIG_CGROUPS to be defined, and let the administrator
create arbitrary groups of tasks, using the "cgroup" pseudo filesystem. See
- Documentation/cgroup-v1/cgroups.rst for more information about this filesystem.
+ Documentation/admin-guide/cgroup-v1/cgroups.rst for more information about this filesystem.
When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each
group created using the pseudo filesystem. See example steps below to create
diff --git a/Documentation/scheduler/sched-rt-group.rst b/Documentation/scheduler/sched-rt-group.rst
index d27d3f3712fd..655a096ec8fb 100644
--- a/Documentation/scheduler/sched-rt-group.rst
+++ b/Documentation/scheduler/sched-rt-group.rst
@@ -133,7 +133,7 @@ This uses the cgroup virtual file system and "<cgroup>/cpu.rt_runtime_us"
to control the CPU time reserved for each control group.
For more information on working with control groups, you should read
-Documentation/cgroup-v1/cgroups.rst as well.
+Documentation/admin-guide/cgroup-v1/cgroups.rst as well.
Group settings are checked against the following limits in order to keep the
configuration schedulable:
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index aad6d92ffe31..fc503dd689a7 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -8,7 +8,10 @@ Security Documentation
credentials
IMA-templates
keys/index
- LSM
+ lsm
+ lsm-development
+ sak
SCTP
self-protection
+ siphash
tpm/index
diff --git a/Documentation/security/LSM.rst b/Documentation/security/lsm-development.rst
index 31d92bc5fdd2..31d92bc5fdd2 100644
--- a/Documentation/security/LSM.rst
+++ b/Documentation/security/lsm-development.rst
diff --git a/Documentation/lsm.txt b/Documentation/security/lsm.rst
index ad4dfd020e0d..ad4dfd020e0d 100644
--- a/Documentation/lsm.txt
+++ b/Documentation/security/lsm.rst
diff --git a/Documentation/SAK.txt b/Documentation/security/sak.rst
index 260e1d3687bd..260e1d3687bd 100644
--- a/Documentation/SAK.txt
+++ b/Documentation/security/sak.rst
diff --git a/Documentation/siphash.txt b/Documentation/security/siphash.rst
index 9965821ab333..9965821ab333 100644
--- a/Documentation/siphash.txt
+++ b/Documentation/security/siphash.rst
diff --git a/Documentation/security/tpm/index.rst b/Documentation/security/tpm/index.rst
index af77a7bbb070..3296533e54cf 100644
--- a/Documentation/security/tpm/index.rst
+++ b/Documentation/security/tpm/index.rst
@@ -5,3 +5,4 @@ Trusted Platform Module documentation
.. toctree::
tpm_vtpm_proxy
+ xen-tpmfront
diff --git a/Documentation/security/tpm/xen-tpmfront.txt b/Documentation/security/tpm/xen-tpmfront.rst
index 69346de87ff3..00d5b1db227d 100644
--- a/Documentation/security/tpm/xen-tpmfront.txt
+++ b/Documentation/security/tpm/xen-tpmfront.rst
@@ -1,4 +1,6 @@
+=============================
Virtual TPM interface for Xen
+=============================
Authors: Matthew Fioravante (JHUAPL), Daniel De Graaf (NSA)
@@ -6,7 +8,8 @@ This document describes the virtual Trusted Platform Module (vTPM) subsystem for
Xen. The reader is assumed to have familiarity with building and installing Xen,
Linux, and a basic understanding of the TPM and vTPM concepts.
-INTRODUCTION
+Introduction
+------------
The goal of this work is to provide a TPM functionality to a virtual guest
operating system (in Xen terms, a DomU). This allows programs to interact with
@@ -24,81 +27,89 @@ This mini-os vTPM subsystem was built on top of the previous vTPM work done by
IBM and Intel corporation.
-DESIGN OVERVIEW
+Design Overview
---------------
-The architecture of vTPM is described below:
-
-+------------------+
-| Linux DomU | ...
-| | ^ |
-| v | |
-| xen-tpmfront |
-+------------------+
- | ^
- v |
-+------------------+
-| mini-os/tpmback |
-| | ^ |
-| v | |
-| vtpm-stubdom | ...
-| | ^ |
-| v | |
-| mini-os/tpmfront |
-+------------------+
- | ^
- v |
-+------------------+
-| mini-os/tpmback |
-| | ^ |
-| v | |
-| vtpmmgr-stubdom |
-| | ^ |
-| v | |
-| mini-os/tpm_tis |
-+------------------+
- | ^
- v |
-+------------------+
-| Hardware TPM |
-+------------------+
-
- * Linux DomU: The Linux based guest that wants to use a vTPM. There may be
+The architecture of vTPM is described below::
+
+ +------------------+
+ | Linux DomU | ...
+ | | ^ |
+ | v | |
+ | xen-tpmfront |
+ +------------------+
+ | ^
+ v |
+ +------------------+
+ | mini-os/tpmback |
+ | | ^ |
+ | v | |
+ | vtpm-stubdom | ...
+ | | ^ |
+ | v | |
+ | mini-os/tpmfront |
+ +------------------+
+ | ^
+ v |
+ +------------------+
+ | mini-os/tpmback |
+ | | ^ |
+ | v | |
+ | vtpmmgr-stubdom |
+ | | ^ |
+ | v | |
+ | mini-os/tpm_tis |
+ +------------------+
+ | ^
+ v |
+ +------------------+
+ | Hardware TPM |
+ +------------------+
+
+* Linux DomU:
+ The Linux based guest that wants to use a vTPM. There may be
more than one of these.
- * xen-tpmfront.ko: Linux kernel virtual TPM frontend driver. This driver
+* xen-tpmfront.ko:
+ Linux kernel virtual TPM frontend driver. This driver
provides vTPM access to a Linux-based DomU.
- * mini-os/tpmback: Mini-os TPM backend driver. The Linux frontend driver
+* mini-os/tpmback:
+ Mini-os TPM backend driver. The Linux frontend driver
connects to this backend driver to facilitate communications
between the Linux DomU and its vTPM. This driver is also
used by vtpmmgr-stubdom to communicate with vtpm-stubdom.
- * vtpm-stubdom: A mini-os stub domain that implements a vTPM. There is a
+* vtpm-stubdom:
+ A mini-os stub domain that implements a vTPM. There is a
one to one mapping between running vtpm-stubdom instances and
logical vtpms on the system. The vTPM Platform Configuration
Registers (PCRs) are normally all initialized to zero.
- * mini-os/tpmfront: Mini-os TPM frontend driver. The vTPM mini-os domain
+* mini-os/tpmfront:
+ Mini-os TPM frontend driver. The vTPM mini-os domain
vtpm-stubdom uses this driver to communicate with
vtpmmgr-stubdom. This driver is also used in mini-os
domains such as pv-grub that talk to the vTPM domain.
- * vtpmmgr-stubdom: A mini-os domain that implements the vTPM manager. There is
+* vtpmmgr-stubdom:
+ A mini-os domain that implements the vTPM manager. There is
only one vTPM manager and it should be running during the
entire lifetime of the machine. This domain regulates
access to the physical TPM on the system and secures the
persistent state of each vTPM.
- * mini-os/tpm_tis: Mini-os TPM version 1.2 TPM Interface Specification (TIS)
+* mini-os/tpm_tis:
+ Mini-os TPM version 1.2 TPM Interface Specification (TIS)
driver. This driver used by vtpmmgr-stubdom to talk directly to
the hardware TPM. Communication is facilitated by mapping
hardware memory pages into vtpmmgr-stubdom.
- * Hardware TPM: The physical TPM that is soldered onto the motherboard.
+* Hardware TPM:
+ The physical TPM that is soldered onto the motherboard.
-INTEGRATION WITH XEN
+Integration With Xen
--------------------
Support for the vTPM driver was added in Xen using the libxl toolstack in Xen
diff --git a/Documentation/sparc/index.rst b/Documentation/sparc/index.rst
index 91f7d6643dd5..71cff621f243 100644
--- a/Documentation/sparc/index.rst
+++ b/Documentation/sparc/index.rst
@@ -1,5 +1,3 @@
-:orphan:
-
==================
Sparc Architecture
==================
diff --git a/Documentation/sysctl/abi.txt b/Documentation/sysctl/abi.txt
deleted file mode 100644
index 63f4ebcf652c..000000000000
--- a/Documentation/sysctl/abi.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Documentation for /proc/sys/abi/* kernel version 2.6.0.test2
- (c) 2003, Fabian Frederick <ffrederick@users.sourceforge.net>
-
-For general info : README.
-
-==============================================================
-
-This path is binary emulation relevant aka personality types aka abi.
-When a process is executed, it's linked to an exec_domain whose
-personality is defined using values available from /proc/sys/abi.
-You can find further details about abi in include/linux/personality.h.
-
-Here are the files featuring in 2.6 kernel :
-
-- defhandler_coff
-- defhandler_elf
-- defhandler_lcall7
-- defhandler_libcso
-- fake_utsname
-- trace
-
-===========================================================
-defhandler_coff:
-defined value :
-PER_SCOSVR3
-0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE
-
-===========================================================
-defhandler_elf:
-defined value :
-PER_LINUX
-0
-
-===========================================================
-defhandler_lcall7:
-defined value :
-PER_SVR4
-0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-
-===========================================================
-defhandler_libsco:
-defined value:
-PER_SVR4
-0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-
-===========================================================
-fake_utsname:
-Unused
-
-===========================================================
-trace:
-Unused
-
-===========================================================
diff --git a/Documentation/target/index.rst b/Documentation/target/index.rst
index b68f48982392..4b24f81f747e 100644
--- a/Documentation/target/index.rst
+++ b/Documentation/target/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
==================
TCM Virtual Device
diff --git a/Documentation/timers/index.rst b/Documentation/timers/index.rst
index 91f6f8263c48..df510ad0c989 100644
--- a/Documentation/timers/index.rst
+++ b/Documentation/timers/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
======
timers
diff --git a/Documentation/translations/it_IT/kernel-hacking/locking.rst b/Documentation/translations/it_IT/kernel-hacking/locking.rst
index 5fd8a1abd2be..b9a6be4b8499 100644
--- a/Documentation/translations/it_IT/kernel-hacking/locking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/locking.rst
@@ -1404,7 +1404,7 @@ Riferimento per l'API dei Futex
Approfondimenti
===============
-- ``Documentation/locking/spinlocks.txt``: la guida di Linus Torvalds agli
+- ``Documentation/locking/spinlocks.rst``: la guida di Linus Torvalds agli
spinlock del kernel.
- Unix Systems for Modern Architectures: Symmetric Multiprocessing and
diff --git a/Documentation/translations/it_IT/process/submit-checklist.rst b/Documentation/translations/it_IT/process/submit-checklist.rst
index ea74cae958d7..995ee69fab11 100644
--- a/Documentation/translations/it_IT/process/submit-checklist.rst
+++ b/Documentation/translations/it_IT/process/submit-checklist.rst
@@ -117,7 +117,7 @@ sottomissione delle patch, in particolare
sorgenti che ne spieghi la logica: cosa fanno e perché.
25) Se la patch aggiunge nuove chiamate ioctl, allora aggiornate
- ``Documentation/ioctl/ioctl-number.txt``.
+ ``Documentation/ioctl/ioctl-number.rst``.
26) Se il codice che avete modificato dipende o usa una qualsiasi interfaccia o
funzionalità del kernel che è associata a uno dei seguenti simboli
diff --git a/Documentation/translations/zh_CN/arm/Booting b/Documentation/translations/zh_CN/arm/Booting
index 1fe866f8218f..562e9a2957e6 100644
--- a/Documentation/translations/zh_CN/arm/Booting
+++ b/Documentation/translations/zh_CN/arm/Booting
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm/Booting
+Chinese translated version of Documentation/arm/booting.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -9,7 +9,7 @@ or if there is a problem with the translation.
Maintainer: Russell King <linux@arm.linux.org.uk>
Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
---------------------------------------------------------------------
-Documentation/arm/Booting 的中文翻译
+Documentation/arm/booting.rst 的中文翻译
如果想评论或更新本文的内容,请直接è”系原文档的维护者。如果你使用英文
交æµæœ‰å›°éš¾çš„è¯ï¼Œä¹Ÿå¯ä»¥å‘中文版维护者求助。如果本翻译更新ä¸åŠæ—¶æˆ–者翻
diff --git a/Documentation/translations/zh_CN/arm/kernel_user_helpers.txt b/Documentation/translations/zh_CN/arm/kernel_user_helpers.txt
index cd7fc8f34cf9..99af4363984d 100644
--- a/Documentation/translations/zh_CN/arm/kernel_user_helpers.txt
+++ b/Documentation/translations/zh_CN/arm/kernel_user_helpers.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm/kernel_user_helpers.txt
+Chinese translated version of Documentation/arm/kernel_user_helpers.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -10,7 +10,7 @@ Maintainer: Nicolas Pitre <nicolas.pitre@linaro.org>
Dave Martin <dave.martin@linaro.org>
Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
---------------------------------------------------------------------
-Documentation/arm/kernel_user_helpers.txt 的中文翻译
+Documentation/arm/kernel_user_helpers.rst 的中文翻译
如果想评论或更新本文的内容,请直接è”系原文档的维护者。如果你使用英文
交æµæœ‰å›°éš¾çš„è¯ï¼Œä¹Ÿå¯ä»¥å‘中文版维护者求助。如果本翻译更新ä¸åŠæ—¶æˆ–者翻
diff --git a/Documentation/translations/zh_CN/filesystems/sysfs.txt b/Documentation/translations/zh_CN/filesystems/sysfs.txt
index 452271dda141..ee1f37da5b23 100644
--- a/Documentation/translations/zh_CN/filesystems/sysfs.txt
+++ b/Documentation/translations/zh_CN/filesystems/sysfs.txt
@@ -288,7 +288,7 @@ dev/ 包å«ä¸¤ä¸ªå­ç›®å½•ï¼š char/ å’Œ block/。在这两个å­ç›®å½•ä¸­ï¼Œæœ‰ä
中相应的设备。/sys/dev æ供一个通过一个 stat(2) æ“作结果,查找
设备 sysfs 接å£å¿«æ·çš„方法。
-更多有关 driver-model 的特性信æ¯å¯ä»¥åœ¨ Documentation/driver-model/
+更多有关 driver-model 的特性信æ¯å¯ä»¥åœ¨ Documentation/driver-api/driver-model/
中找到。
diff --git a/Documentation/translations/zh_CN/gpio.txt b/Documentation/translations/zh_CN/gpio.txt
index 4cb1ba8b8fed..a23ee14fc927 100644
--- a/Documentation/translations/zh_CN/gpio.txt
+++ b/Documentation/translations/zh_CN/gpio.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/gpio
+Chinese translated version of Documentation/admin-guide/gpio
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -10,7 +10,7 @@ Maintainer: Grant Likely <grant.likely@secretlab.ca>
Linus Walleij <linus.walleij@linaro.org>
Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
---------------------------------------------------------------------
-Documentation/gpio 的中文翻译
+Documentation/admin-guide/gpio 的中文翻译
如果想评论或更新本文的内容,请直接è”系原文档的维护者。如果你使用英文
交æµæœ‰å›°éš¾çš„è¯ï¼Œä¹Ÿå¯ä»¥å‘中文版维护者求助。如果本翻译更新ä¸åŠæ—¶æˆ–者翻
diff --git a/Documentation/translations/zh_CN/oops-tracing.txt b/Documentation/translations/zh_CN/oops-tracing.txt
index 368ddd05b304..c5f3bda7abcb 100644
--- a/Documentation/translations/zh_CN/oops-tracing.txt
+++ b/Documentation/translations/zh_CN/oops-tracing.txt
@@ -53,8 +53,8 @@ cat /proc/kmsg > file, 然而你必须介入中止传输, kmsg是一个“æ°
(2)用串å£ç»ˆç«¯å¯åŠ¨ï¼ˆè¯·å‚看Documentation/admin-guide/serial-console.rst),è¿è¡Œä¸€ä¸ªnull
modem到å¦ä¸€å°æœºå™¨å¹¶ç”¨ä½ å–œæ¬¢çš„通讯工具获å–输出。Minicom工作地很好。
-(3)使用Kdump(请å‚看Documentation/kdump/kdump.rst),
-使用在Documentation/kdump/gdbmacros.txt中定义的dmesg gdbå®ï¼Œä»Žæ—§çš„内存中æå–内核
+(3)使用Kdump(请å‚看Documentation/admin-guide/kdump/kdump.rst),
+使用在Documentation/admin-guide/kdump/gdbmacros.txt中定义的dmesg gdbå®ï¼Œä»Žæ—§çš„内存中æå–内核
环形缓冲区。
完整信æ¯
diff --git a/Documentation/translations/zh_CN/process/submit-checklist.rst b/Documentation/translations/zh_CN/process/submit-checklist.rst
index f4785d2b0491..8738c55e42a2 100644
--- a/Documentation/translations/zh_CN/process/submit-checklist.rst
+++ b/Documentation/translations/zh_CN/process/submit-checklist.rst
@@ -97,7 +97,7 @@ Linux内核补ä¸æ交清å•
24) 所有内存å±éšœä¾‹å¦‚ ``barrier()``, ``rmb()``, ``wmb()`` 都需è¦æºä»£ç ä¸­çš„注
释æ¥è§£é‡Šå®ƒä»¬æ­£åœ¨æ‰§è¡Œçš„æ“作åŠå…¶åŽŸå› çš„逻辑。
-25) 如果补ä¸æ·»åŠ äº†ä»»ä½•ioctl,那么也è¦æ›´æ–° ``Documentation/ioctl/ioctl-number.txt``
+25) 如果补ä¸æ·»åŠ äº†ä»»ä½•ioctl,那么也è¦æ›´æ–° ``Documentation/ioctl/ioctl-number.rst``
26) 如果修改åŽçš„æºä»£ç ä¾èµ–或使用与以下 ``Kconfig`` 符å·ç›¸å…³çš„任何内核API或
功能,则在ç¦ç”¨ç›¸å…³ ``Kconfig`` 符å·å’Œ/或 ``=m`` (如果该选项å¯ç”¨ï¼‰çš„情况
diff --git a/Documentation/accelerators/ocxl.rst b/Documentation/userspace-api/accelerators/ocxl.rst
index b1cea19a90f5..14cefc020e2d 100644
--- a/Documentation/accelerators/ocxl.rst
+++ b/Documentation/userspace-api/accelerators/ocxl.rst
@@ -1,5 +1,3 @@
-:orphan:
-
========================================================
OpenCAPI (Open Coherent Accelerator Processor Interface)
========================================================
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index a3233da7fa88..ad494da40009 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -20,6 +20,7 @@ place where this information is gathered.
seccomp_filter
unshare
spec_ctrl
+ accelerators/ocxl
.. only:: subproject and html
diff --git a/Documentation/vm/numa.rst b/Documentation/vm/numa.rst
index 130f3cfa1c19..99fdeca917ca 100644
--- a/Documentation/vm/numa.rst
+++ b/Documentation/vm/numa.rst
@@ -67,7 +67,7 @@ nodes. Each emulated node will manage a fraction of the underlying cells'
physical memory. NUMA emluation is useful for testing NUMA kernel and
application features on non-NUMA platforms, and as a sort of memory resource
management mechanism when used together with cpusets.
-[see Documentation/cgroup-v1/cpusets.rst]
+[see Documentation/admin-guide/cgroup-v1/cpusets.rst]
For each node with memory, Linux constructs an independent memory management
subsystem, complete with its own free page lists, in-use page lists, usage
@@ -114,7 +114,7 @@ allocation behavior using Linux NUMA memory policy. [see
System administrators can restrict the CPUs and nodes' memories that a non-
privileged user can specify in the scheduling or NUMA commands and functions
-using control groups and CPUsets. [see Documentation/cgroup-v1/cpusets.rst]
+using control groups and CPUsets. [see Documentation/admin-guide/cgroup-v1/cpusets.rst]
On architectures that do not hide memoryless nodes, Linux will include only
zones [nodes] with memory in the zonelists. This means that for a memoryless
diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst
index 35bba27d5fff..1d6cd7db4e43 100644
--- a/Documentation/vm/page_migration.rst
+++ b/Documentation/vm/page_migration.rst
@@ -41,7 +41,7 @@ locations.
Larger installations usually partition the system using cpusets into
sections of nodes. Paul Jackson has equipped cpusets with the ability to
move pages when a task is moved to another cpuset (See
-Documentation/cgroup-v1/cpusets.rst).
+Documentation/admin-guide/cgroup-v1/cpusets.rst).
Cpusets allows the automation of process locality. If a task is moved to
a new cpuset then also all its pages are moved with it so that the
performance of the process does not sink dramatically. Also the pages
diff --git a/Documentation/vm/unevictable-lru.rst b/Documentation/vm/unevictable-lru.rst
index c6d94118fbcc..17d0861b0f1d 100644
--- a/Documentation/vm/unevictable-lru.rst
+++ b/Documentation/vm/unevictable-lru.rst
@@ -98,7 +98,7 @@ Memory Control Group Interaction
--------------------------------
The unevictable LRU facility interacts with the memory control group [aka
-memory controller; see Documentation/cgroup-v1/memory.rst] by extending the
+memory controller; see Documentation/admin-guide/cgroup-v1/memory.rst] by extending the
lru_list enum.
The memory controller data structure automatically gets a per-zone unevictable
@@ -439,7 +439,7 @@ Compacting MLOCKED Pages
The unevictable LRU can be scanned for compactable regions and the default
behavior is to do so. /proc/sys/vm/compact_unevictable_allowed controls
-this behavior (see Documentation/sysctl/vm.txt). Once scanning of the
+this behavior (see Documentation/admin-guide/sysctl/vm.rst). Once scanning of the
unevictable LRU is enabled, the work of compaction is mostly handled by
the page migration code and the same work flow as described in MIGRATING
MLOCKED PAGES will apply.
diff --git a/Documentation/w1/w1.netlink b/Documentation/w1/w1.netlink
index ef2727192d69..94ad4c420828 100644
--- a/Documentation/w1/w1.netlink
+++ b/Documentation/w1/w1.netlink
@@ -183,7 +183,7 @@ acknowledge number is set to seq+1.
Additional documantion, source code examples.
============================================
-1. Documentation/connector
+1. Documentation/driver-api/connector.rst
2. http://www.ioremap.net/archive/w1
This archive includes userspace application w1d.c which uses
read/write/search commands for all master/slave devices found on the bus.
diff --git a/Documentation/watchdog/index.rst b/Documentation/watchdog/index.rst
index 33a0de631e84..c177645081d8 100644
--- a/Documentation/watchdog/index.rst
+++ b/Documentation/watchdog/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
======================
Linux Watchdog Support
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index f2de1b2d3ac7..af64c4bb4447 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -20,6 +20,8 @@ x86-specific Documentation
mtrr
pat
intel_mpx
+ intel-iommu
+ intel_txt
amd-memory-encryption
pti
mds
diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/x86/intel-iommu.rst
index 9dae6b47e398..9dae6b47e398 100644
--- a/Documentation/Intel-IOMMU.txt
+++ b/Documentation/x86/intel-iommu.rst
diff --git a/Documentation/intel_txt.txt b/Documentation/x86/intel_txt.rst
index d83c1a2122c9..d83c1a2122c9 100644
--- a/Documentation/intel_txt.txt
+++ b/Documentation/x86/intel_txt.rst
diff --git a/Documentation/x86/topology.rst b/Documentation/x86/topology.rst
index 8e9704f61017..e29739904e37 100644
--- a/Documentation/x86/topology.rst
+++ b/Documentation/x86/topology.rst
@@ -9,7 +9,7 @@ representation in the kernel. Update/change when doing changes to the
respective code.
The architecture-agnostic topology definitions are in
-Documentation/cputopology.txt. This file holds x86-specific
+Documentation/admin-guide/cputopology.rst. This file holds x86-specific
differences/specialities which must not necessarily apply to the generic
definitions. Thus, the way to read up on Linux topology on x86 is to start
with the generic one and look at this one in parallel for the x86 specifics.
diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
index 30108684ae87..ff9bcfd2cc14 100644
--- a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
+++ b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
@@ -15,7 +15,7 @@ assign them to cpusets and their attached tasks. This is a way of limiting the
amount of system memory that are available to a certain class of tasks.
For more information on the features of cpusets, see
-Documentation/cgroup-v1/cpusets.rst.
+Documentation/admin-guide/cgroup-v1/cpusets.rst.
There are a number of different configurations you can use for your needs. For
more information on the numa=fake command line option and its various ways of
configuring fake nodes, see Documentation/x86/x86_64/boot-options.rst.
@@ -40,7 +40,7 @@ A machine may be split as follows with "numa=fake=4*512," as reported by dmesg::
On node 3 totalpages: 131072
Now following the instructions for mounting the cpusets filesystem from
-Documentation/cgroup-v1/cpusets.rst, you can assign fake nodes (i.e. contiguous memory
+Documentation/admin-guide/cgroup-v1/cpusets.rst, you can assign fake nodes (i.e. contiguous memory
address spaces) to individual cpusets::
[root@xroads /]# mkdir exampleset
diff --git a/Documentation/xtensa/atomctl.txt b/Documentation/xtensa/atomctl.rst
index 1da783ac200c..1ecbd0ba9a2e 100644
--- a/Documentation/xtensa/atomctl.txt
+++ b/Documentation/xtensa/atomctl.rst
@@ -1,3 +1,7 @@
+===========================================
+Atomic Operation Control (ATOMCTL) Register
+===========================================
+
We Have Atomic Operation Control (ATOMCTL) Register.
This register determines the effect of using a S32C1I instruction
with various combinations of:
@@ -8,7 +12,7 @@ with various combinations of:
2. With and without An Intelligent Memory Controller which
can do Atomic Transactions itself.
-The Core comes up with a default value of for the three types of cache ops:
+The Core comes up with a default value of for the three types of cache ops::
0x28: (WB: Internal, WT: Internal, BY:Exception)
@@ -30,15 +34,18 @@ CUSTOMER-WARNING:
Developers might find using RCW in Bypass mode convenient when testing
with the cache being bypassed; for example studying cache alias problems.
-See Section 4.3.12.4 of ISA; Bits:
+See Section 4.3.12.4 of ISA; Bits::
WB WT BY
5 4 | 3 2 | 1 0
+
+========= ================== ================== ===============
2 Bit
Field
Values WB - Write Back WT - Write Thru BY - Bypass
---------- --------------- ----------------- ----------------
+========= ================== ================== ===============
0 Exception Exception Exception
1 RCW Transaction RCW Transaction RCW Transaction
2 Internal Operation Internal Operation Reserved
3 Reserved Reserved Reserved
+========= ================== ================== ===============
diff --git a/Documentation/xtensa/booting.txt b/Documentation/xtensa/booting.rst
index 402b33a2619f..e1b83707e5b6 100644
--- a/Documentation/xtensa/booting.txt
+++ b/Documentation/xtensa/booting.rst
@@ -1,10 +1,13 @@
-Passing boot parameters to the kernel.
+=====================================
+Passing boot parameters to the kernel
+=====================================
Boot parameters are represented as a TLV list in the memory. Please see
arch/xtensa/include/asm/bootparam.h for definition of the bp_tag structure and
tag value constants. First entry in the list must have type BP_TAG_FIRST, last
entry must have type BP_TAG_LAST. The address of the first list entry is
passed to the kernel in the register a2. The address type depends on MMU type:
+
- For configurations without MMU, with region protection or with MPU the
address must be the physical address.
- For configurations with region translarion MMU or with MMUv3 and CONFIG_MMU=n
diff --git a/Documentation/xtensa/index.rst b/Documentation/xtensa/index.rst
new file mode 100644
index 000000000000..52fa04eb39a3
--- /dev/null
+++ b/Documentation/xtensa/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+Xtensa Architecture
+===================
+
+.. toctree::
+ :maxdepth: 1
+
+ atomctl
+ booting
+ mmu
diff --git a/Documentation/xtensa/mmu.rst b/Documentation/xtensa/mmu.rst
new file mode 100644
index 000000000000..e52a12960fdc
--- /dev/null
+++ b/Documentation/xtensa/mmu.rst
@@ -0,0 +1,195 @@
+=============================
+MMUv3 initialization sequence
+=============================
+
+The code in the initialize_mmu macro sets up MMUv3 memory mapping
+identically to MMUv2 fixed memory mapping. Depending on
+CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX symbol this code is
+located in addresses it was linked for (symbol undefined), or not
+(symbol defined), so it needs to be position-independent.
+
+The code has the following assumptions:
+
+ - This code fragment is run only on an MMU v3.
+ - TLBs are in their reset state.
+ - ITLBCFG and DTLBCFG are zero (reset state).
+ - RASID is 0x04030201 (reset state).
+ - PS.RING is zero (reset state).
+ - LITBASE is zero (reset state, PC-relative literals); required to be PIC.
+
+TLB setup proceeds along the following steps.
+
+ Legend:
+
+ - VA = virtual address (two upper nibbles of it);
+ - PA = physical address (two upper nibbles of it);
+ - pc = physical range that contains this code;
+
+After step 2, we jump to virtual address in the range 0x40000000..0x5fffffff
+or 0x00000000..0x1fffffff, depending on whether the kernel was loaded below
+0x40000000 or above. That address corresponds to next instruction to execute
+in this code. After step 4, we jump to intended (linked) address of this code.
+The scheme below assumes that the kernel is loaded below 0x40000000.
+
+ ====== ===== ===== ===== ===== ====== ===== =====
+ - Step0 Step1 Step2 Step3 Step4 Step5
+
+ VA PA PA PA PA VA PA PA
+ ====== ===== ===== ===== ===== ====== ===== =====
+ E0..FF -> E0 -> E0 -> E0 F0..FF -> F0 -> F0
+ C0..DF -> C0 -> C0 -> C0 E0..EF -> F0 -> F0
+ A0..BF -> A0 -> A0 -> A0 D8..DF -> 00 -> 00
+ 80..9F -> 80 -> 80 -> 80 D0..D7 -> 00 -> 00
+ 60..7F -> 60 -> 60 -> 60
+ 40..5F -> 40 -> pc -> pc 40..5F -> pc
+ 20..3F -> 20 -> 20 -> 20
+ 00..1F -> 00 -> 00 -> 00
+ ====== ===== ===== ===== ===== ====== ===== =====
+
+The default location of IO peripherals is above 0xf0000000. This may be changed
+using a "ranges" property in a device tree simple-bus node. See the Devicetree
+Specification, section 4.5 for details on the syntax and semantics of
+simple-bus nodes. The following limitations apply:
+
+1. Only top level simple-bus nodes are considered
+
+2. Only one (first) simple-bus node is considered
+
+3. Empty "ranges" properties are not supported
+
+4. Only the first triplet in the "ranges" property is considered
+
+5. The parent-bus-address value is rounded down to the nearest 256MB boundary
+
+6. The IO area covers the entire 256MB segment of parent-bus-address; the
+ "ranges" triplet length field is ignored
+
+
+MMUv3 address space layouts.
+============================
+
+Default MMUv2-compatible layout::
+
+ Symbol VADDR Size
+ +------------------+
+ | Userspace | 0x00000000 TASK_SIZE
+ +------------------+ 0x40000000
+ +------------------+
+ | Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
+ +------------------+
+ | KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
+ +------------------+ 0x8e400000
+ +------------------+
+ | VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB
+ +------------------+ VMALLOC_END
+ | Cache aliasing | TLBTEMP_BASE_1 0xc7ff0000 DCACHE_WAY_SIZE
+ | remap area 1 |
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
+ | remap area 2 |
+ +------------------+
+ +------------------+
+ | KMAP area | PKMAP_BASE PTRS_PER_PTE *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ | | (4MB * DCACHE_N_COLORS)
+ +------------------+
+ | Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
+ | | NR_CPUS *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ +------------------+ FIXADDR_TOP 0xcffff000
+ +------------------+
+ | Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xd0000000 128MB
+ +------------------+
+ | Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xd8000000 128MB
+ +------------------+
+ | Cached KIO | XCHAL_KIO_CACHED_VADDR 0xe0000000 256MB
+ +------------------+
+ | Uncached KIO | XCHAL_KIO_BYPASS_VADDR 0xf0000000 256MB
+ +------------------+
+
+
+256MB cached + 256MB uncached layout::
+
+ Symbol VADDR Size
+ +------------------+
+ | Userspace | 0x00000000 TASK_SIZE
+ +------------------+ 0x40000000
+ +------------------+
+ | Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
+ +------------------+
+ | KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
+ +------------------+ 0x8e400000
+ +------------------+
+ | VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB
+ +------------------+ VMALLOC_END
+ | Cache aliasing | TLBTEMP_BASE_1 0xa7ff0000 DCACHE_WAY_SIZE
+ | remap area 1 |
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
+ | remap area 2 |
+ +------------------+
+ +------------------+
+ | KMAP area | PKMAP_BASE PTRS_PER_PTE *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ | | (4MB * DCACHE_N_COLORS)
+ +------------------+
+ | Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
+ | | NR_CPUS *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ +------------------+ FIXADDR_TOP 0xaffff000
+ +------------------+
+ | Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xb0000000 256MB
+ +------------------+
+ | Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xc0000000 256MB
+ +------------------+
+ +------------------+
+ | Cached KIO | XCHAL_KIO_CACHED_VADDR 0xe0000000 256MB
+ +------------------+
+ | Uncached KIO | XCHAL_KIO_BYPASS_VADDR 0xf0000000 256MB
+ +------------------+
+
+
+512MB cached + 512MB uncached layout::
+
+ Symbol VADDR Size
+ +------------------+
+ | Userspace | 0x00000000 TASK_SIZE
+ +------------------+ 0x40000000
+ +------------------+
+ | Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
+ +------------------+
+ | KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
+ +------------------+ 0x8e400000
+ +------------------+
+ | VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB
+ +------------------+ VMALLOC_END
+ | Cache aliasing | TLBTEMP_BASE_1 0x97ff0000 DCACHE_WAY_SIZE
+ | remap area 1 |
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
+ | remap area 2 |
+ +------------------+
+ +------------------+
+ | KMAP area | PKMAP_BASE PTRS_PER_PTE *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ | | (4MB * DCACHE_N_COLORS)
+ +------------------+
+ | Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
+ | | NR_CPUS *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ +------------------+ FIXADDR_TOP 0x9ffff000
+ +------------------+
+ | Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xa0000000 512MB
+ +------------------+
+ | Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xc0000000 512MB
+ +------------------+
+ | Cached KIO | XCHAL_KIO_CACHED_VADDR 0xe0000000 256MB
+ +------------------+
+ | Uncached KIO | XCHAL_KIO_BYPASS_VADDR 0xf0000000 256MB
+ +------------------+
diff --git a/Documentation/xtensa/mmu.txt b/Documentation/xtensa/mmu.txt
deleted file mode 100644
index 318114de63f3..000000000000
--- a/Documentation/xtensa/mmu.txt
+++ /dev/null
@@ -1,189 +0,0 @@
-MMUv3 initialization sequence.
-
-The code in the initialize_mmu macro sets up MMUv3 memory mapping
-identically to MMUv2 fixed memory mapping. Depending on
-CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX symbol this code is
-located in addresses it was linked for (symbol undefined), or not
-(symbol defined), so it needs to be position-independent.
-
-The code has the following assumptions:
- This code fragment is run only on an MMU v3.
- TLBs are in their reset state.
- ITLBCFG and DTLBCFG are zero (reset state).
- RASID is 0x04030201 (reset state).
- PS.RING is zero (reset state).
- LITBASE is zero (reset state, PC-relative literals); required to be PIC.
-
-TLB setup proceeds along the following steps.
-
- Legend:
- VA = virtual address (two upper nibbles of it);
- PA = physical address (two upper nibbles of it);
- pc = physical range that contains this code;
-
-After step 2, we jump to virtual address in the range 0x40000000..0x5fffffff
-or 0x00000000..0x1fffffff, depending on whether the kernel was loaded below
-0x40000000 or above. That address corresponds to next instruction to execute
-in this code. After step 4, we jump to intended (linked) address of this code.
-The scheme below assumes that the kernel is loaded below 0x40000000.
-
- Step0 Step1 Step2 Step3 Step4 Step5
- ===== ===== ===== ===== ===== =====
- VA PA PA PA PA VA PA PA
- ------ -- -- -- -- ------ -- --
- E0..FF -> E0 -> E0 -> E0 F0..FF -> F0 -> F0
- C0..DF -> C0 -> C0 -> C0 E0..EF -> F0 -> F0
- A0..BF -> A0 -> A0 -> A0 D8..DF -> 00 -> 00
- 80..9F -> 80 -> 80 -> 80 D0..D7 -> 00 -> 00
- 60..7F -> 60 -> 60 -> 60
- 40..5F -> 40 -> pc -> pc 40..5F -> pc
- 20..3F -> 20 -> 20 -> 20
- 00..1F -> 00 -> 00 -> 00
-
-The default location of IO peripherals is above 0xf0000000. This may be changed
-using a "ranges" property in a device tree simple-bus node. See the Devicetree
-Specification, section 4.5 for details on the syntax and semantics of
-simple-bus nodes. The following limitations apply:
-
-1. Only top level simple-bus nodes are considered
-
-2. Only one (first) simple-bus node is considered
-
-3. Empty "ranges" properties are not supported
-
-4. Only the first triplet in the "ranges" property is considered
-
-5. The parent-bus-address value is rounded down to the nearest 256MB boundary
-
-6. The IO area covers the entire 256MB segment of parent-bus-address; the
- "ranges" triplet length field is ignored
-
-
-MMUv3 address space layouts.
-============================
-
-Default MMUv2-compatible layout.
-
- Symbol VADDR Size
-+------------------+
-| Userspace | 0x00000000 TASK_SIZE
-+------------------+ 0x40000000
-+------------------+
-| Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
-+------------------+
-| KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
-+------------------+ 0x8e400000
-+------------------+
-| VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB
-+------------------+ VMALLOC_END
-| Cache aliasing | TLBTEMP_BASE_1 0xc7ff0000 DCACHE_WAY_SIZE
-| remap area 1 |
-+------------------+
-| Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
-| remap area 2 |
-+------------------+
-+------------------+
-| KMAP area | PKMAP_BASE PTRS_PER_PTE *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-| | (4MB * DCACHE_N_COLORS)
-+------------------+
-| Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
-| | NR_CPUS *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-+------------------+ FIXADDR_TOP 0xcffff000
-+------------------+
-| Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xd0000000 128MB
-+------------------+
-| Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xd8000000 128MB
-+------------------+
-| Cached KIO | XCHAL_KIO_CACHED_VADDR 0xe0000000 256MB
-+------------------+
-| Uncached KIO | XCHAL_KIO_BYPASS_VADDR 0xf0000000 256MB
-+------------------+
-
-
-256MB cached + 256MB uncached layout.
-
- Symbol VADDR Size
-+------------------+
-| Userspace | 0x00000000 TASK_SIZE
-+------------------+ 0x40000000
-+------------------+
-| Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
-+------------------+
-| KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
-+------------------+ 0x8e400000
-+------------------+
-| VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB
-+------------------+ VMALLOC_END
-| Cache aliasing | TLBTEMP_BASE_1 0xa7ff0000 DCACHE_WAY_SIZE
-| remap area 1 |
-+------------------+
-| Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
-| remap area 2 |
-+------------------+
-+------------------+
-| KMAP area | PKMAP_BASE PTRS_PER_PTE *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-| | (4MB * DCACHE_N_COLORS)
-+------------------+
-| Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
-| | NR_CPUS *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-+------------------+ FIXADDR_TOP 0xaffff000
-+------------------+
-| Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xb0000000 256MB
-+------------------+
-| Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xc0000000 256MB
-+------------------+
-+------------------+
-| Cached KIO | XCHAL_KIO_CACHED_VADDR 0xe0000000 256MB
-+------------------+
-| Uncached KIO | XCHAL_KIO_BYPASS_VADDR 0xf0000000 256MB
-+------------------+
-
-
-512MB cached + 512MB uncached layout.
-
- Symbol VADDR Size
-+------------------+
-| Userspace | 0x00000000 TASK_SIZE
-+------------------+ 0x40000000
-+------------------+
-| Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
-+------------------+
-| KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
-+------------------+ 0x8e400000
-+------------------+
-| VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB
-+------------------+ VMALLOC_END
-| Cache aliasing | TLBTEMP_BASE_1 0x97ff0000 DCACHE_WAY_SIZE
-| remap area 1 |
-+------------------+
-| Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
-| remap area 2 |
-+------------------+
-+------------------+
-| KMAP area | PKMAP_BASE PTRS_PER_PTE *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-| | (4MB * DCACHE_N_COLORS)
-+------------------+
-| Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
-| | NR_CPUS *
-| | DCACHE_N_COLORS *
-| | PAGE_SIZE
-+------------------+ FIXADDR_TOP 0x9ffff000
-+------------------+
-| Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xa0000000 512MB
-+------------------+
-| Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xc0000000 512MB
-+------------------+
-| Cached KIO | XCHAL_KIO_CACHED_VADDR 0xe0000000 256MB
-+------------------+
-| Uncached KIO | XCHAL_KIO_BYPASS_VADDR 0xf0000000 256MB
-+------------------+
diff --git a/MAINTAINERS b/MAINTAINERS
index 6debe6829716..c144bd6a432e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1155,7 +1155,7 @@ APPLIED MICRO (APM) X-GENE SOC PMU
M: Khuong Dinh <khuong@os.amperecomputing.com>
S: Supported
F: drivers/perf/xgene_pmu.c
-F: Documentation/perf/xgene-pmu.txt
+F: Documentation/admin-guide/perf/xgene-pmu.rst
F: Documentation/devicetree/bindings/perf/apm-xgene-pmu.txt
APTINA CAMERA SENSOR PLL
@@ -2218,7 +2218,7 @@ F: drivers/*/*s3c64xx*
F: drivers/*/*s5pv210*
F: drivers/memory/samsung/*
F: drivers/soc/samsung/*
-F: Documentation/arm/Samsung/
+F: Documentation/arm/samsung/
F: Documentation/devicetree/bindings/arm/samsung/
F: Documentation/devicetree/bindings/sram/samsung-sram.txt
F: Documentation/devicetree/bindings/power/pd-samsung.txt
@@ -2689,7 +2689,7 @@ ATA OVER ETHERNET (AOE) DRIVER
M: "Justin Sanders" <justin@coraid.com>
W: http://www.openaoe.org/
S: Supported
-F: Documentation/aoe/
+F: Documentation/admin-guide/aoe/
F: drivers/block/aoe/
ATHEROS 71XX/9XXX GPIO DRIVER
@@ -2968,7 +2968,7 @@ M: Jens Axboe <axboe@kernel.dk>
L: linux-block@vger.kernel.org
S: Maintained
F: block/bfq-*
-F: Documentation/block/bfq-iosched.txt
+F: Documentation/block/bfq-iosched.rst
BFS FILE SYSTEM
M: "Tigran A. Aivazian" <aivazian.tigran@gmail.com>
@@ -4158,7 +4158,7 @@ L: cgroups@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
S: Maintained
F: Documentation/admin-guide/cgroup-v2.rst
-F: Documentation/cgroup-v1/
+F: Documentation/admin-guide/cgroup-v1/
F: include/linux/cgroup*
F: kernel/cgroup/
@@ -4169,7 +4169,7 @@ W: http://www.bullopensource.org/cpuset/
W: http://oss.sgi.com/projects/cpusets/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
S: Maintained
-F: Documentation/cgroup-v1/cpusets.rst
+F: Documentation/admin-guide/cgroup-v1/cpusets.rst
F: include/linux/cpuset.h
F: kernel/cgroup/cpuset.c
@@ -4655,7 +4655,7 @@ DELL SYSTEMS MANAGEMENT BASE DRIVER (dcdbas)
M: Stuart Hayes <stuart.w.hayes@gmail.com>
L: platform-driver-x86@vger.kernel.org
S: Maintained
-F: Documentation/dcdbas.txt
+F: Documentation/driver-api/dcdbas.rst
F: drivers/platform/x86/dcdbas.*
DELL WMI NOTIFICATIONS DRIVER
@@ -4748,7 +4748,7 @@ Q: http://patchwork.kernel.org/project/dm-devel/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git
T: quilt http://people.redhat.com/agk/patches/linux/editing/
S: Maintained
-F: Documentation/device-mapper/
+F: Documentation/admin-guide/device-mapper/
F: drivers/md/Makefile
F: drivers/md/Kconfig
F: drivers/md/dm*
@@ -5019,7 +5019,7 @@ T: git git://git.linbit.com/drbd-8.4.git
S: Supported
F: drivers/block/drbd/
F: lib/lru_cache.c
-F: Documentation/blockdev/drbd/
+F: Documentation/admin-guide/blockdev/
DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
@@ -6100,7 +6100,7 @@ M: Ard Biesheuvel <ard.biesheuvel@linaro.org>
L: linux-efi@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
S: Maintained
-F: Documentation/efi-stub.txt
+F: Documentation/admin-guide/efi-stub.rst
F: arch/*/kernel/efi.c
F: arch/x86/boot/compressed/eboot.[ch]
F: arch/*/include/asm/efi.h
@@ -6675,7 +6675,7 @@ S: Maintained
F: scripts/gcc-plugins/
F: scripts/gcc-plugin.sh
F: scripts/Makefile.gcc-plugins
-F: Documentation/gcc-plugins.txt
+F: Documentation/core-api/gcc-plugins.rst
GASKET DRIVER FRAMEWORK
M: Rob Springer <rspringer@google.com>
@@ -6887,7 +6887,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
S: Maintained
F: Documentation/devicetree/bindings/gpio/
F: Documentation/driver-api/gpio/
-F: Documentation/gpio/
+F: Documentation/admin-guide/gpio/
F: Documentation/ABI/testing/gpio-cdev
F: Documentation/ABI/obsolete/sysfs-gpio
F: drivers/gpio/
@@ -7108,7 +7108,7 @@ M: Herbert Xu <herbert@gondor.apana.org.au>
L: linux-crypto@vger.kernel.org
S: Odd fixes
F: Documentation/devicetree/bindings/rng/
-F: Documentation/hw_random.txt
+F: Documentation/admin-guide/hw_random.rst
F: drivers/char/hw_random/
F: include/linux/hw_random.h
@@ -7282,7 +7282,7 @@ M: Shaokun Zhang <zhangshaokun@hisilicon.com>
W: http://www.hisilicon.com
S: Supported
F: drivers/perf/hisilicon
-F: Documentation/perf/hisi-pmu.txt
+F: Documentation/admin-guide/perf/hisi-pmu.rst
HISILICON ROCE DRIVER
M: Lijun Ou <oulijun@huawei.com>
@@ -8332,7 +8332,7 @@ L: tboot-devel@lists.sourceforge.net
W: http://tboot.sourceforge.net
T: hg http://tboot.hg.sourceforge.net:8000/hgroot/tboot/tboot
S: Supported
-F: Documentation/intel_txt.txt
+F: Documentation/x86/intel_txt.rst
F: include/linux/tboot.h
F: arch/x86/kernel/tboot.c
@@ -8346,7 +8346,7 @@ INTERCONNECT API
M: Georgi Djakov <georgi.djakov@linaro.org>
L: linux-pm@vger.kernel.org
S: Maintained
-F: Documentation/interconnect/
+F: Documentation/driver-api/interconnect.rst
F: Documentation/devicetree/bindings/interconnect/
F: drivers/interconnect/
F: include/dt-bindings/interconnect/
@@ -8482,7 +8482,7 @@ F: drivers/irqchip/
ISA
M: William Breathitt Gray <vilhelm.gray@gmail.com>
S: Maintained
-F: Documentation/isa.txt
+F: Documentation/driver-api/isa.rst
F: drivers/base/isa.c
F: include/linux/isa.h
@@ -8497,7 +8497,7 @@ F: drivers/media/radio/radio-isa*
ISAPNP
M: Jaroslav Kysela <perex@perex.cz>
S: Maintained
-F: Documentation/isapnp.txt
+F: Documentation/driver-api/isapnp.rst
F: drivers/pnp/isapnp/
F: include/linux/isapnp.h
@@ -8695,7 +8695,7 @@ R: Vivek Goyal <vgoyal@redhat.com>
L: kexec@lists.infradead.org
W: http://lse.sourceforge.net/kdump/
S: Maintained
-F: Documentation/kdump/
+F: Documentation/admin-guide/kdump/
KEENE FM RADIO TRANSMITTER DRIVER
M: Hans Verkuil <hverkuil@xs4all.nl>
@@ -9049,7 +9049,7 @@ M: Matan Ziv-Av <matan@svgalib.org>
L: platform-driver-x86@vger.kernel.org
S: Maintained
F: Documentation/ABI/testing/sysfs-platform-lg-laptop
-F: Documentation/laptops/lg-laptop.rst
+F: Documentation/admin-guide/laptops/lg-laptop.rst
F: drivers/platform/x86/lg-laptop.c
LG2160 MEDIA DRIVER
@@ -9418,7 +9418,7 @@ M: "Richard Russon (FlatCap)" <ldm@flatcap.org>
L: linux-ntfs-dev@lists.sourceforge.net
W: http://www.linux-ntfs.org/content/view/19/37/
S: Maintained
-F: Documentation/ldm.txt
+F: Documentation/admin-guide/ldm.rst
F: block/partitions/ldm.*
LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
@@ -10380,7 +10380,7 @@ M: Johannes Thumshirn <morbidrsa@gmail.com>
S: Maintained
F: drivers/mcb/
F: include/linux/mcb.h
-F: Documentation/men-chameleon-bus.txt
+F: Documentation/driver-api/men-chameleon-bus.rst
MEN F21BMC (Board Management Controller)
M: Andreas Werner <andreas.werner@men.de>
@@ -10794,7 +10794,7 @@ F: include/uapi/linux/meye.h
MOXA SMARTIO/INDUSTIO/INTELLIO SERIAL CARD
M: Jiri Slaby <jirislaby@gmail.com>
S: Maintained
-F: Documentation/serial/moxa-smartio.rst
+F: Documentation/driver-api/serial/moxa-smartio.rst
F: drivers/tty/mxser.*
MR800 AVERMEDIA USB FM RADIO DRIVER
@@ -11095,7 +11095,7 @@ M: Josef Bacik <josef@toxicpanda.com>
S: Maintained
L: linux-block@vger.kernel.org
L: nbd@other.debian.org
-F: Documentation/blockdev/nbd.txt
+F: Documentation/admin-guide/blockdev/nbd.rst
F: drivers/block/nbd.c
F: include/trace/events/nbd.h
F: include/uapi/linux/nbd.h
@@ -11554,7 +11554,7 @@ F: arch/powerpc/include/asm/pnv-ocxl.h
F: drivers/misc/ocxl/
F: include/misc/ocxl*
F: include/uapi/misc/ocxl.h
-F: Documentation/accelerators/ocxl.rst
+F: Documentation/userspace-api/accelerators/ocxl.rst
OMAP AUDIO SUPPORT
M: Peter Ujfalusi <peter.ujfalusi@ti.com>
@@ -11590,7 +11590,7 @@ L: linux-omap@vger.kernel.org
L: linux-fbdev@vger.kernel.org
S: Orphan
F: drivers/video/fbdev/omap2/
-F: Documentation/arm/OMAP/DSS
+F: Documentation/arm/omap/dss.rst
OMAP FRAMEBUFFER SUPPORT
L: linux-fbdev@vger.kernel.org
@@ -12077,7 +12077,7 @@ PARALLEL LCD/KEYPAD PANEL DRIVER
M: Willy Tarreau <willy@haproxy.com>
M: Ksenija Stanojevic <ksenija.stanojevic@gmail.com>
S: Odd Fixes
-F: Documentation/auxdisplay/lcd-panel-cgram.txt
+F: Documentation/admin-guide/lcd-panel-cgram.rst
F: drivers/auxdisplay/panel.c
PARALLEL PORT SUBSYSTEM
@@ -12089,7 +12089,7 @@ F: drivers/parport/
F: include/linux/parport*.h
F: drivers/char/ppdev.c
F: include/uapi/linux/ppdev.h
-F: Documentation/parport*.txt
+F: Documentation/driver-api/parport*.rst
PARAVIRT_OPS INTERFACE
M: Juergen Gross <jgross@suse.com>
@@ -12105,7 +12105,7 @@ PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
M: Tim Waugh <tim@cyberelk.net>
L: linux-parport@lists.infradead.org (subscribers-only)
S: Maintained
-F: Documentation/blockdev/paride.txt
+F: Documentation/admin-guide/blockdev/paride.rst
F: drivers/block/paride/
PARISC ARCHITECTURE
@@ -12264,7 +12264,7 @@ M: Kurt Schwemmer <kurt.schwemmer@microsemi.com>
M: Logan Gunthorpe <logang@deltatee.com>
L: linux-pci@vger.kernel.org
S: Maintained
-F: Documentation/switchtec.txt
+F: Documentation/driver-api/switchtec.rst
F: Documentation/ABI/testing/sysfs-class-switchtec
F: drivers/pci/switch/switchtec*
F: include/uapi/linux/switchtec_ioctl.h
@@ -12611,6 +12611,17 @@ F: arch/arm/boot/dts/picoxcell*
F: arch/arm/mach-picoxcell/
F: drivers/crypto/picoxcell*
+PIDFD API
+M: Christian Brauner <christian@brauner.io>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git
+F: samples/pidfd/
+F: tools/testing/selftests/pidfd/
+K: (?i)pidfd
+K: (?i)clone3
+K: \b(clone_args|kernel_clone_args)\b
+
PIN CONTROL SUBSYSTEM
M: Linus Walleij <linus.walleij@linaro.org>
L: linux-gpio@vger.kernel.org
@@ -13025,7 +13036,7 @@ M: Thierry Reding <thierry.reding@gmail.com>
L: linux-pwm@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm.git
-F: Documentation/pwm.txt
+F: Documentation/driver-api/pwm.rst
F: Documentation/devicetree/bindings/pwm/
F: include/linux/pwm.h
F: drivers/pwm/
@@ -13386,7 +13397,7 @@ F: drivers/net/wireless/ralink/rt2x00/
RAMDISK RAM BLOCK DEVICE DRIVER
M: Jens Axboe <axboe@kernel.dk>
S: Maintained
-F: Documentation/blockdev/ramdisk.txt
+F: Documentation/admin-guide/blockdev/ramdisk.rst
F: drivers/block/brd.c
RANCHU VIRTUAL BOARD FOR MIPS
@@ -13495,7 +13506,7 @@ Q: http://patchwork.ozlabs.org/project/rtc-linux/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
S: Maintained
F: Documentation/devicetree/bindings/rtc/
-F: Documentation/rtc.txt
+F: Documentation/admin-guide/rtc.rst
F: drivers/rtc/
F: include/linux/rtc.h
F: include/uapi/linux/rtc.h
@@ -13639,7 +13650,7 @@ W: http://wireless.kernel.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
S: Maintained
-F: Documentation/rfkill.txt
+F: Documentation/driver-api/rfkill.rst
F: Documentation/ABI/stable/sysfs-class-rfkill
F: net/rfkill/
F: include/linux/rfkill.h
@@ -13708,7 +13719,7 @@ ROCKETPORT DRIVER
P: Comtrol Corp.
W: http://www.comtrol.com
S: Maintained
-F: Documentation/serial/rocket.rst
+F: Documentation/driver-api/serial/rocket.rst
F: drivers/tty/rocket*
ROCKETPORT EXPRESS/INFINITY DRIVER
@@ -14102,7 +14113,7 @@ M: Sylwester Nawrocki <s.nawrocki@samsung.com>
L: linux-kernel@vger.kernel.org
S: Supported
F: Documentation/devicetree/bindings/phy/samsung-phy.txt
-F: Documentation/phy/samsung-usb2.txt
+F: Documentation/driver-api/phy/samsung-usb2.rst
F: drivers/phy/samsung/phy-exynos4210-usb2.c
F: drivers/phy/samsung/phy-exynos4x12-usb2.c
F: drivers/phy/samsung/phy-exynos5250-usb2.c
@@ -14408,7 +14419,7 @@ SGI SN-IA64 (Altix) SERIAL CONSOLE DRIVER
M: Pat Gefre <pfg@sgi.com>
L: linux-ia64@vger.kernel.org
S: Supported
-F: Documentation/ia64/serial.txt
+F: Documentation/ia64/serial.rst
F: drivers/tty/serial/ioc?_serial.c
F: include/linux/ioc?.h
@@ -14914,7 +14925,7 @@ M: Mattia Dongili <malattia@linux.it>
L: platform-driver-x86@vger.kernel.org
W: http://www.linux.it/~malattia/wiki/index.php/Sony_drivers
S: Maintained
-F: Documentation/laptops/sony-laptop.txt
+F: Documentation/admin-guide/laptops/sony-laptop.rst
F: drivers/char/sonypi.c
F: drivers/platform/x86/sony-laptop.c
F: include/linux/sony-laptop.h
@@ -15332,7 +15343,7 @@ SVGA HANDLING
M: Martin Mares <mj@ucw.cz>
L: linux-video@atrey.karlin.mff.cuni.cz
S: Maintained
-F: Documentation/svga.txt
+F: Documentation/admin-guide/svga.rst
F: arch/x86/boot/video*
SWIOTLB SUBSYSTEM
@@ -15369,7 +15380,7 @@ F: drivers/dma-buf/dma-fence*
F: drivers/dma-buf/sw_sync.c
F: include/linux/sync_file.h
F: include/uapi/linux/sync_file.h
-F: Documentation/sync_file.txt
+F: Documentation/driver-api/sync_file.rst
T: git git://anongit.freedesktop.org/drm/drm-misc
SYNOPSYS ARC ARCHITECTURE
@@ -15990,7 +16001,7 @@ F: sound/soc/codecs/isabelle*
TI LP855x BACKLIGHT DRIVER
M: Milo Kim <milo.kim@ti.com>
S: Maintained
-F: Documentation/backlight/lp855x-driver.txt
+F: Documentation/driver-api/backlight/lp855x-driver.rst
F: drivers/video/backlight/lp855x_bl.c
F: include/linux/platform_data/lp855x.h
@@ -16254,7 +16265,7 @@ M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
M: Jiri Slaby <jslaby@suse.com>
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
-F: Documentation/serial/
+F: Documentation/driver-api/serial/
F: drivers/tty/
F: drivers/tty/serial/serial_core.c
F: include/linux/serial_core.h
@@ -16865,7 +16876,7 @@ R: Cornelia Huck <cohuck@redhat.com>
L: kvm@vger.kernel.org
T: git git://github.com/awilliam/linux-vfio.git
S: Maintained
-F: Documentation/vfio.txt
+F: Documentation/driver-api/vfio.rst
F: drivers/vfio/
F: include/linux/vfio.h
F: include/uapi/linux/vfio.h
@@ -16874,7 +16885,7 @@ VFIO MEDIATED DEVICE DRIVERS
M: Kirti Wankhede <kwankhede@nvidia.com>
L: kvm@vger.kernel.org
S: Maintained
-F: Documentation/vfio-mediated-device.txt
+F: Documentation/driver-api/vfio-mediated-device.rst
F: drivers/vfio/mdev/
F: include/linux/mdev.h
F: samples/vfio-mdev/
@@ -17749,7 +17760,7 @@ R: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
L: linux-kernel@vger.kernel.org
S: Maintained
F: drivers/block/zram/
-F: Documentation/blockdev/zram.txt
+F: Documentation/admin-guide/blockdev/zram.rst
ZS DECSTATION Z85C30 SERIAL DRIVER
M: "Maciej W. Rozycki" <macro@linux-mips.org>
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 1db9bbcfb84e..728fe028c02c 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -474,3 +474,4 @@
542 common fsmount sys_fsmount
543 common fspick sys_fspick
544 common pidfd_open sys_pidfd_open
+# 545 reserved for clone3
diff --git a/arch/arc/boot/dts/haps_hs.dts b/arch/arc/boot/dts/haps_hs.dts
index 1ebfa046492b..44bc522fdec8 100644
--- a/arch/arc/boot/dts/haps_hs.dts
+++ b/arch/arc/boot/dts/haps_hs.dts
@@ -62,5 +62,35 @@
#interrupt-cells = <1>;
interrupts = <20>;
};
+
+ virtio0: virtio@f0100000 {
+ compatible = "virtio,mmio";
+ reg = <0xf0100000 0x2000>;
+ interrupts = <31>;
+ };
+
+ virtio1: virtio@f0102000 {
+ compatible = "virtio,mmio";
+ reg = <0xf0102000 0x2000>;
+ interrupts = <32>;
+ };
+
+ virtio2: virtio@f0104000 {
+ compatible = "virtio,mmio";
+ reg = <0xf0104000 0x2000>;
+ interrupts = <33>;
+ };
+
+ virtio3: virtio@f0106000 {
+ compatible = "virtio,mmio";
+ reg = <0xf0106000 0x2000>;
+ interrupts = <34>;
+ };
+
+ virtio4: virtio@f0108000 {
+ compatible = "virtio,mmio";
+ reg = <0xf0108000 0x2000>;
+ interrupts = <35>;
+ };
};
};
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 9a45cb093096..bfc7f5f5d6f2 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -8,6 +8,7 @@
*/
/dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/reset/snps,hsdk-reset.h>
/ {
@@ -252,6 +253,19 @@
dma-coherent;
};
+ spi0: spi@20000 {
+ compatible = "snps,dw-apb-ssi";
+ reg = <0x20000 0x100>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <16>;
+ num-cs = <2>;
+ reg-io-width = <4>;
+ clocks = <&input_clk>;
+ cs-gpios = <&creg_gpio 0 GPIO_ACTIVE_LOW>,
+ <&creg_gpio 1 GPIO_ACTIVE_LOW>;
+ };
+
creg_gpio: gpio@14b0 {
compatible = "snps,creg-gpio-hsdk";
reg = <0x14b0 0x4>;
diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig
index b117e6c16d41..436f2135bdc1 100644
--- a/arch/arc/configs/haps_hs_defconfig
+++ b/arch/arc/configs/haps_hs_defconfig
@@ -35,10 +35,12 @@ CONFIG_INET=y
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_BLK_DEV is not set
+CONFIG_VIRTIO_BLK=y
CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
# CONFIG_NET_VENDOR_ARC is not set
# CONFIG_NET_VENDOR_BROADCOM is not set
# CONFIG_NET_VENDOR_INTEL is not set
@@ -68,6 +70,7 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
+CONFIG_VIRTIO_MMIO=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
index c8fb5d60c53f..403125d9c9a3 100644
--- a/arch/arc/configs/hsdk_defconfig
+++ b/arch/arc/configs/hsdk_defconfig
@@ -46,6 +46,9 @@ CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_DW=y
CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
+CONFIG_SPI=y
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_DW_MMIO=y
CONFIG_GPIOLIB=y
CONFIG_GPIO_SYSFS=y
CONFIG_GPIO_DWAPB=y
@@ -66,6 +69,8 @@ CONFIG_MMC=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_DW=y
+CONFIG_DMADEVICES=y
+CONFIG_DW_AXI_DMAC=y
CONFIG_EXT3_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
index 225e7df2d8ed..f5ae394ebe06 100644
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -7,232 +7,251 @@
#include <asm/irqflags-arcv2.h>
#include <asm/thread_info.h> /* For THREAD_SIZE */
+/*
+ * Interrupt/Exception stack layout (pt_regs) for ARCv2
+ * (End of struct aligned to end of page [unless nested])
+ *
+ * INTERRUPT EXCEPTION
+ *
+ * manual --------------------- manual
+ * | orig_r0 |
+ * | event/ECR |
+ * | bta |
+ * | user_r25 |
+ * | gp |
+ * | fp |
+ * | sp |
+ * | r12 |
+ * | r30 |
+ * | r58 |
+ * | r59 |
+ * hw autosave ---------------------
+ * optional | r0 |
+ * | r1 |
+ * ~ ~
+ * | r9 |
+ * | r10 |
+ * | r11 |
+ * | blink |
+ * | lpe |
+ * | lps |
+ * | lpc |
+ * | ei base |
+ * | ldi base |
+ * | jli base |
+ * ---------------------
+ * hw autosave | pc / eret |
+ * mandatory | stat32 / erstatus |
+ * ---------------------
+ */
+
/*------------------------------------------------------------------------*/
-.macro INTERRUPT_PROLOGUE called_from
+.macro INTERRUPT_PROLOGUE
- ; Before jumping to Interrupt Vector, hardware micro-ops did following:
+ ; (A) Before jumping to Interrupt Vector, hardware micro-ops did following:
; 1. SP auto-switched to kernel mode stack
- ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1, K:0)
- ; 3. Auto saved: r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI, PC, STAT32
+ ; 2. STATUS32.Z flag set if in U mode at time of interrupt (U:1,K:0)
+ ; 3. Auto save: (mandatory) Push PC and STAT32 on stack
+ ; hardware does even if CONFIG_ARC_IRQ_NO_AUTOSAVE
+ ; 4. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
;
- ; Now manually save: r12, sp, fp, gp, r25
+ ; (B) Manually saved some regs: r12,r25,r30, sp,fp,gp, ACCL pair
#ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE
-.ifnc \called_from, exception
- st.as r9, [sp, -10] ; save r9 in it's final stack slot
- sub sp, sp, 12 ; skip JLI, LDI, EI
-
- PUSH lp_count
- PUSHAX lp_start
- PUSHAX lp_end
- PUSH blink
-
- PUSH r11
- PUSH r10
-
- sub sp, sp, 4 ; skip r9
-
- PUSH r8
- PUSH r7
- PUSH r6
- PUSH r5
- PUSH r4
- PUSH r3
- PUSH r2
- PUSH r1
- PUSH r0
-.endif
-#endif
+ ; carve pt_regs on stack (case #3), PC/STAT32 already on stack
+ sub sp, sp, SZ_PT_REGS - 8
-#ifdef CONFIG_ARC_HAS_ACCL_REGS
- PUSH r59
- PUSH r58
+ __SAVE_REGFILE_HARD
+#else
+ ; carve pt_regs on stack (case #4), which grew partially already
+ sub sp, sp, PT_r0
#endif
- PUSH r30
- PUSH r12
+ __SAVE_REGFILE_SOFT
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+ ; (A) Before jumping to Exception Vector, hardware micro-ops did following:
+ ; 1. SP auto-switched to kernel mode stack
+ ; 2. STATUS32.Z flag set if in U mode at time of exception (U:1,K:0)
+ ;
+ ; (B) Manually save the complete reg file below
+
+ sub sp, sp, SZ_PT_REGS ; carve pt_regs
+
+ ; _HARD saves r10 clobbered by _SOFT as scratch hence comes first
+
+ __SAVE_REGFILE_HARD
+ __SAVE_REGFILE_SOFT
+
+ st r0, [sp] ; orig_r0
+
+ lr r10, [eret]
+ lr r11, [erstatus]
+ ST2 r10, r11, PT_ret
+
+ lr r10, [ecr]
+ lr r11, [erbta]
+ ST2 r10, r11, PT_event
+
+ ; OUTPUT: r10 has ECR expected by EV_Trap
+.endm
+
+/*------------------------------------------------------------------------
+ * This macro saves the registers manually which would normally be autosaved
+ * by hardware on taken interrupts. It is used by
+ * - exception handlers (which don't have autosave)
+ * - interrupt autosave disabled due to CONFIG_ARC_IRQ_NO_AUTOSAVE
+ */
+.macro __SAVE_REGFILE_HARD
+
+ ST2 r0, r1, PT_r0
+ ST2 r2, r3, PT_r2
+ ST2 r4, r5, PT_r4
+ ST2 r6, r7, PT_r6
+ ST2 r8, r9, PT_r8
+ ST2 r10, r11, PT_r10
+
+ st blink, [sp, PT_blink]
+
+ lr r10, [lp_end]
+ lr r11, [lp_start]
+ ST2 r10, r11, PT_lpe
+
+ st lp_count, [sp, PT_lpc]
+
+ ; skip JLI, LDI, EI for now
+.endm
+
+/*------------------------------------------------------------------------
+ * This macros saves a bunch of other registers which can't be autosaved for
+ * various reasons:
+ * - r12: the last caller saved scratch reg since hardware saves in pairs so r0-r11
+ * - r30: free reg, used by gcc as scratch
+ * - ACCL/ACCH pair when they exist
+ */
+.macro __SAVE_REGFILE_SOFT
+
+ ST2 gp, fp, PT_r26 ; gp (r26), fp (r27)
+
+ st r12, [sp, PT_sp + 4]
+ st r30, [sp, PT_sp + 8]
; Saving pt_regs->sp correctly requires some extra work due to the way
; Auto stack switch works
; - U mode: retrieve it from AUX_USER_SP
; - K mode: add the offset from current SP where H/w starts auto push
;
- ; Utilize the fact that Z bit is set if Intr taken in U mode
- mov.nz r9, sp
- add.nz r9, r9, SZ_PT_REGS - PT_sp - 4
- bnz 1f
+ ; 1. Utilize the fact that Z bit is set if Intr taken in U mode
+ ; 2. Upon entry SP is always saved (for any inspection, unwinding etc),
+ ; but on return, restored only if U mode
- lr r9, [AUX_USER_SP]
-1:
- PUSH r9 ; SP
+ lr r10, [AUX_USER_SP] ; U mode SP
+
+ ; ISA requires ADD.nz to have same dest and src reg operands
+ mov.nz r10, sp
+ add.nz r10, r10, SZ_PT_REGS ; K mode SP
- PUSH fp
- PUSH gp
+ st r10, [sp, PT_sp] ; SP (pt_regs->sp)
#ifdef CONFIG_ARC_CURR_IN_REG
- PUSH r25 ; user_r25
+ st r25, [sp, PT_user_r25]
GET_CURR_TASK_ON_CPU r25
-#else
- sub sp, sp, 4
#endif
-.ifnc \called_from, exception
- sub sp, sp, 12 ; BTA/ECR/orig_r0 placeholder per pt_regs
-.endif
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+ ST2 r58, r59, PT_sp + 12
+#endif
.endm
/*------------------------------------------------------------------------*/
-.macro INTERRUPT_EPILOGUE called_from
+.macro __RESTORE_REGFILE_SOFT
-.ifnc \called_from, exception
- add sp, sp, 12 ; skip BTA/ECR/orig_r0 placeholderss
-.endif
+ LD2 gp, fp, PT_r26 ; gp (r26), fp (r27)
-#ifdef CONFIG_ARC_CURR_IN_REG
- POP r25
-#else
- add sp, sp, 4
-#endif
+ ld r12, [sp, PT_sp + 4]
+ ld r30, [sp, PT_sp + 8]
- POP gp
- POP fp
-
- ; Don't touch AUX_USER_SP if returning to K mode (Z bit set)
- ; (Z bit set on K mode is inverse of INTERRUPT_PROLOGUE)
- add.z sp, sp, 4
+ ; Restore SP (into AUX_USER_SP) only if returning to U mode
+ ; - for K mode, it will be implicitly restored as stack is unwound
+ ; - Z flag set on K is inverse of what hardware does on interrupt entry
+ ; but that doesn't really matter
bz 1f
- POPAX AUX_USER_SP
+ ld r10, [sp, PT_sp] ; SP (pt_regs->sp)
+ sr r10, [AUX_USER_SP]
1:
- POP r12
- POP r30
-#ifdef CONFIG_ARC_HAS_ACCL_REGS
- POP r58
- POP r59
+#ifdef CONFIG_ARC_CURR_IN_REG
+ ld r25, [sp, PT_user_r25]
#endif
-#ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE
-.ifnc \called_from, exception
- POP r0
- POP r1
- POP r2
- POP r3
- POP r4
- POP r5
- POP r6
- POP r7
- POP r8
- POP r9
- POP r10
- POP r11
-
- POP blink
- POPAX lp_end
- POPAX lp_start
-
- POP r9
- mov lp_count, r9
-
- add sp, sp, 12 ; skip JLI, LDI, EI
- ld.as r9, [sp, -10] ; reload r9 which got clobbered
-.endif
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+ LD2 r58, r59, PT_sp + 12
#endif
+.endm
+/*------------------------------------------------------------------------*/
+.macro __RESTORE_REGFILE_HARD
+
+ ld blink, [sp, PT_blink]
+
+ LD2 r10, r11, PT_lpe
+ sr r10, [lp_end]
+ sr r11, [lp_start]
+
+ ld r10, [sp, PT_lpc] ; lp_count can't be target of LD
+ mov lp_count, r10
+
+ LD2 r0, r1, PT_r0
+ LD2 r2, r3, PT_r2
+ LD2 r4, r5, PT_r4
+ LD2 r6, r7, PT_r6
+ LD2 r8, r9, PT_r8
+ LD2 r10, r11, PT_r10
.endm
+
/*------------------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
+.macro INTERRUPT_EPILOGUE
- ; Before jumping to Exception Vector, hardware micro-ops did following:
- ; 1. SP auto-switched to kernel mode stack
- ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1,K:0)
- ;
- ; Now manually save the complete reg file
-
- PUSH r9 ; freeup a register: slot of erstatus
-
- PUSHAX eret
- sub sp, sp, 12 ; skip JLI, LDI, EI
- PUSH lp_count
- PUSHAX lp_start
- PUSHAX lp_end
- PUSH blink
-
- PUSH r11
- PUSH r10
-
- ld.as r9, [sp, 10] ; load stashed r9 (status32 stack slot)
- lr r10, [erstatus]
- st.as r10, [sp, 10] ; save status32 at it's right stack slot
-
- PUSH r9
- PUSH r8
- PUSH r7
- PUSH r6
- PUSH r5
- PUSH r4
- PUSH r3
- PUSH r2
- PUSH r1
- PUSH r0
-
- ; -- for interrupts, regs above are auto-saved by h/w in that order --
- ; Now do what ISR prologue does (manually save r12, sp, fp, gp, r25)
- ;
- ; Set Z flag if this was from U mode (expected by INTERRUPT_PROLOGUE)
- ; Although H/w exception micro-ops do set Z flag for U mode (just like
- ; for interrupts), it could get clobbered in case we soft land here from
- ; a TLB Miss exception handler (tlbex.S)
+ ; INPUT: r0 has STAT32 of calling context
+ ; INPUT: Z flag set if returning to K mode
- and r10, r10, STATUS_U_MASK
- xor.f 0, r10, STATUS_U_MASK
+ ; _SOFT clobbers r10 restored by _HARD hence the order
- INTERRUPT_PROLOGUE exception
+ __RESTORE_REGFILE_SOFT
- PUSHAX erbta
- PUSHAX ecr ; r9 contains ECR, expected by EV_Trap
+#ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE
+ __RESTORE_REGFILE_HARD
+ add sp, sp, SZ_PT_REGS - 8
+#else
+ add sp, sp, PT_r0
+#endif
- PUSH r0 ; orig_r0
.endm
/*------------------------------------------------------------------------*/
.macro EXCEPTION_EPILOGUE
- ; Assumes r0 has PT_status32
- btst r0, STATUS_U_BIT ; Z flag set if K, used in INTERRUPT_EPILOGUE
-
- add sp, sp, 8 ; orig_r0/ECR don't need restoring
- POPAX erbta
-
- INTERRUPT_EPILOGUE exception
+ ; INPUT: r0 has STAT32 of calling context
- POP r0
- POP r1
- POP r2
- POP r3
- POP r4
- POP r5
- POP r6
- POP r7
- POP r8
- POP r9
- POP r10
- POP r11
+ btst r0, STATUS_U_BIT ; Z flag set if K, used in restoring SP
- POP blink
- POPAX lp_end
- POPAX lp_start
+ ld r10, [sp, PT_event + 4]
+ sr r10, [erbta]
- POP r9
- mov lp_count, r9
+ LD2 r10, r11, PT_ret
+ sr r10, [eret]
+ sr r11, [erstatus]
- add sp, sp, 12 ; skip JLI, LDI, EI
- POPAX eret
- POPAX erstatus
+ __RESTORE_REGFILE_SOFT
+ __RESTORE_REGFILE_HARD
- ld.as r9, [sp, -12] ; reload r9 which got clobbered
+ add sp, sp, SZ_PT_REGS
.endm
.macro FAKE_RET_FROM_EXCPN
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 66ba1bf21d28..66a292335ee6 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -195,8 +195,8 @@
PUSHAX CTOP_AUX_EFLAGS
#endif
- lr r9, [ecr]
- st r9, [sp, PT_event] /* EV_Trap expects r9 to have ECR */
+ lr r10, [ecr]
+ st r10, [sp, PT_event] /* EV_Trap expects r10 to have ECR */
.endm
/*--------------------------------------------------------------
diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
index 54f5ec5c1759..a0eeb9f8f0a9 100644
--- a/arch/arc/include/asm/linkage.h
+++ b/arch/arc/include/asm/linkage.h
@@ -10,6 +10,24 @@
#ifdef __ASSEMBLY__
+.macro ST2 e, o, off
+#ifdef CONFIG_ARC_HAS_LL64
+ std \e, [sp, \off]
+#else
+ st \e, [sp, \off]
+ st \o, [sp, \off+4]
+#endif
+.endm
+
+.macro LD2 e, o, off
+#ifdef CONFIG_ARC_HAS_LL64
+ ldd \e, [sp, \off]
+#else
+ ld \e, [sp, \off]
+ ld \o, [sp, \off+4]
+#endif
+.endm
+
#define ASM_NL ` /* use '`' to mark new line in macro */
/* annotation for data we want in DCCM - if enabled in .config */
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
index dba116535005..1f621e416521 100644
--- a/arch/arc/kernel/asm-offsets.c
+++ b/arch/arc/kernel/asm-offsets.c
@@ -55,7 +55,14 @@ int main(void)
DEFINE(PT_r5, offsetof(struct pt_regs, r5));
DEFINE(PT_r6, offsetof(struct pt_regs, r6));
DEFINE(PT_r7, offsetof(struct pt_regs, r7));
+ DEFINE(PT_r8, offsetof(struct pt_regs, r8));
+ DEFINE(PT_r10, offsetof(struct pt_regs, r10));
+ DEFINE(PT_r26, offsetof(struct pt_regs, r26));
DEFINE(PT_ret, offsetof(struct pt_regs, ret));
+ DEFINE(PT_blink, offsetof(struct pt_regs, blink));
+ DEFINE(PT_lpe, offsetof(struct pt_regs, lp_end));
+ DEFINE(PT_lpc, offsetof(struct pt_regs, lp_count));
+ DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index 14254b866fdc..12d5f12d10d2 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -67,7 +67,7 @@ reserved:
ENTRY(handle_interrupt)
- INTERRUPT_PROLOGUE irq
+ INTERRUPT_PROLOGUE
# irq control APIs local_irq_save/restore/disable/enable fiddle with
# global interrupt enable bits in STATUS32 (.IE for 1 prio, .E[] for 2 prio)
@@ -79,7 +79,7 @@ ENTRY(handle_interrupt)
#
# Note this disable is only for consistent book-keeping as further interrupts
# will be disabled anyways even w/o this. Hardware tracks active interrupts
- # seperately in AUX_IRQ_ACTIVE.active and will not take new interrupts
+ # seperately in AUX_IRQ_ACT.active and will not take new interrupts
# unless this one returns (or higher prio becomes pending in 2-prio scheme)
IRQ_DISABLE
@@ -200,17 +200,18 @@ restore_regs:
ld r0, [sp, PT_status32] ; U/K mode at time of entry
lr r10, [AUX_IRQ_ACT]
- bmsk r11, r10, 15 ; AUX_IRQ_ACT.ACTIVE
+ bmsk r11, r10, 15 ; extract AUX_IRQ_ACT.active
breq r11, 0, .Lexcept_ret ; No intr active, ret from Exception
;####### Return from Intr #######
+.Lisr_ret:
+
debug_marker_l1:
; bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot
btst r0, STATUS_DE_BIT ; Z flag set if bit clear
bnz .Lintr_ret_to_delay_slot ; branch if STATUS_DE_BIT set
-.Lisr_ret_fast_path:
; Handle special case #1: (Entry via Exception, Return via IRQ)
;
; Exception in U mode, preempted in kernel, Intr taken (K mode), orig
@@ -223,7 +224,7 @@ debug_marker_l1:
bset.nz r11, r11, AUX_IRQ_ACT_BIT_U ; NZ means U
sr r11, [AUX_IRQ_ACT]
- INTERRUPT_EPILOGUE irq
+ INTERRUPT_EPILOGUE
rtie
;####### Return from Exception / pure kernel mode #######
@@ -244,8 +245,8 @@ debug_marker_syscall:
;
; IRQ RTIE won't reliably restore DE bit and/or BTA, needs workaround
;
-; Solution is return from Intr w/o any delay slot quirks into a kernel trampoline
-; and from pure kernel mode return to delay slot which handles DS bit/BTA correctly
+; Solution is to drop out of interrupt context into pure kernel mode
+; and return from pure kernel mode which does right things for delay slot
.Lintr_ret_to_delay_slot:
debug_marker_ds:
@@ -254,48 +255,9 @@ debug_marker_ds:
add r2, r2, 1
st r2, [@intr_to_DE_cnt]
- ld r2, [sp, PT_ret]
- ld r3, [sp, PT_status32]
-
- ; STAT32 for Int return created from scratch
- ; (No delay dlot, disable Further intr in trampoline)
-
- bic r0, r3, STATUS_U_MASK|STATUS_DE_MASK|STATUS_IE_MASK|STATUS_L_MASK
- st r0, [sp, PT_status32]
-
- mov r1, .Lintr_ret_to_delay_slot_2
- st r1, [sp, PT_ret]
-
- ; Orig exception PC/STAT32 safekept @orig_r0 and @event stack slots
- st r2, [sp, 0]
- st r3, [sp, 4]
-
- b .Lisr_ret_fast_path
-
-.Lintr_ret_to_delay_slot_2:
- ; Trampoline to restore orig exception PC/STAT32/BTA/AUX_USER_SP
- sub sp, sp, SZ_PT_REGS
- st r9, [sp, -4]
-
- ld r9, [sp, 0]
- sr r9, [eret]
-
- ld r9, [sp, 4]
- sr r9, [erstatus]
-
- ; restore AUX_USER_SP if returning to U mode
- bbit0 r9, STATUS_U_BIT, 1f
- ld r9, [sp, PT_sp]
- sr r9, [AUX_USER_SP]
-
-1:
- ld r9, [sp, 8]
- sr r9, [erbta]
-
- ld r9, [sp, -4]
- add sp, sp, SZ_PT_REGS
-
- ; return from pure kernel mode to delay slot
- rtie
+ ; drop out of interrupt context (clear AUX_IRQ_ACT.active)
+ bmskn r11, r10, 15
+ sr r11, [AUX_IRQ_ACT]
+ b .Lexcept_ret
END(ret_from_exception)
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
index 7fe59880c16b..5cb0cd7e4eab 100644
--- a/arch/arc/kernel/entry-compact.S
+++ b/arch/arc/kernel/entry-compact.S
@@ -256,7 +256,7 @@ ENTRY(EV_TLBProtV)
EXCEPTION_PROLOGUE
- mov r2, r9 ; ECR set into r9 already
+ mov r2, r10 ; ECR set into r10 already
lr r0, [efa] ; Faulting Data address (not part of pt_regs saved above)
; Exception auto-disables further Intr/exceptions.
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index a2bfacbcfce1..72be01270e24 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -232,8 +232,8 @@ ENTRY(EV_Trap)
EXCEPTION_PROLOGUE
;============ TRAP 1 :breakpoints
- ; Check ECR for trap with arg (PROLOGUE ensures r9 has ECR)
- bmsk.f 0, r9, 7
+ ; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR)
+ bmsk.f 0, r10, 7
bnz trap_with_param
;============ TRAP (no param): syscall top level
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 182ce67dfe10..c2663fce7f6c 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -181,11 +181,6 @@ static void *__init unw_hdr_alloc_early(unsigned long sz)
return memblock_alloc_from(sz, sizeof(unsigned int), MAX_DMA_ADDRESS);
}
-static void *unw_hdr_alloc(unsigned long sz)
-{
- return kmalloc(sz, GFP_KERNEL);
-}
-
static void init_unwind_table(struct unwind_table *table, const char *name,
const void *core_start, unsigned long core_size,
const void *init_start, unsigned long init_size,
@@ -366,6 +361,10 @@ ret_err:
}
#ifdef CONFIG_MODULES
+static void *unw_hdr_alloc(unsigned long sz)
+{
+ return kmalloc(sz, GFP_KERNEL);
+}
static struct unwind_table *last_table;
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 81e84426fe21..3861543b66a0 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -63,24 +63,19 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
struct vm_area_struct *vma = NULL;
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
- int si_code = SEGV_MAPERR;
- int ret;
- vm_fault_t fault;
- int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ int sig, si_code = SEGV_MAPERR;
+ unsigned int write = 0, exec = 0, mask;
+ vm_fault_t fault = VM_FAULT_SIGSEGV; /* handle_mm_fault() output */
+ unsigned int flags; /* handle_mm_fault() input */
/*
- * We fault-in kernel-space virtual memory on-demand. The
- * 'reference' page table is init_mm.pgd.
- *
* NOTE! We MUST NOT take any locks for this case. We may
* be in an interrupt or a critical region, and should
* only copy the information from the master page table,
* nothing more.
*/
if (address >= VMALLOC_START && !user_mode(regs)) {
- ret = handle_kernel_vaddr_fault(address);
- if (unlikely(ret))
+ if (unlikely(handle_kernel_vaddr_fault(address)))
goto no_context;
else
return;
@@ -93,143 +88,117 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
if (faulthandler_disabled() || !mm)
goto no_context;
+ if (regs->ecr_cause & ECR_C_PROTV_STORE) /* ST/EX */
+ write = 1;
+ else if ((regs->ecr_vec == ECR_V_PROTV) &&
+ (regs->ecr_cause == ECR_C_PROTV_INST_FETCH))
+ exec = 1;
+
+ flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+ if (write)
+ flags |= FAULT_FLAG_WRITE;
+
retry:
down_read(&mm->mmap_sem);
+
vma = find_vma(mm, address);
if (!vma)
goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
+ if (unlikely(address < vma->vm_start)) {
+ if (!(vma->vm_flags & VM_GROWSDOWN) || expand_stack(vma, address))
+ goto bad_area;
+ }
/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
+ * vm_area is good, now check permissions for this memory access
*/
-good_area:
- si_code = SEGV_ACCERR;
-
- /* Handle protection violation, execute on heap or stack */
-
- if ((regs->ecr_vec == ECR_V_PROTV) &&
- (regs->ecr_cause == ECR_C_PROTV_INST_FETCH))
+ mask = VM_READ;
+ if (write)
+ mask = VM_WRITE;
+ if (exec)
+ mask = VM_EXEC;
+
+ if (!(vma->vm_flags & mask)) {
+ si_code = SEGV_ACCERR;
goto bad_area;
-
- if (write) {
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- flags |= FAULT_FLAG_WRITE;
- } else {
- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
- goto bad_area;
}
- /*
- * If for any reason at all we couldn't handle the fault,
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
- */
fault = handle_mm_fault(vma, address, flags);
- if (fatal_signal_pending(current)) {
+ /*
+ * Fault retry nuances
+ */
+ if (unlikely(fault & VM_FAULT_RETRY)) {
/*
- * if fault retry, mmap_sem already relinquished by core mm
- * so OK to return to user mode (with signal handled first)
+ * If fault needs to be retried, handle any pending signals
+ * first (by returning to user mode).
+ * mmap_sem already relinquished by core mm for RETRY case
*/
- if (fault & VM_FAULT_RETRY) {
+ if (fatal_signal_pending(current)) {
if (!user_mode(regs))
goto no_context;
return;
}
- }
-
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
-
- if (likely(!(fault & VM_FAULT_ERROR))) {
+ /*
+ * retry state machine
+ */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- /* To avoid updating stats twice for retry case */
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
-
- if (fault & VM_FAULT_RETRY) {
- flags &= ~FAULT_FLAG_ALLOW_RETRY;
- flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
+ goto retry;
}
-
- /* Fault Handled Gracefully */
- up_read(&mm->mmap_sem);
- return;
}
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGSEGV)
- goto bad_area;
- else if (fault & VM_FAULT_SIGBUS)
- goto do_sigbus;
-
- /* no man's land */
- BUG();
+bad_area:
+ up_read(&mm->mmap_sem);
/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
+ * Major/minor page fault accounting
+ * (in case of retry we only land here once)
*/
-bad_area:
- up_read(&mm->mmap_sem);
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
- /* User mode accesses just cause a SIGSEGV */
- if (user_mode(regs)) {
- tsk->thread.fault_address = address;
- force_sig_fault(SIGSEGV, si_code, (void __user *)address);
- return;
- }
+ if (likely(!(fault & VM_FAULT_ERROR))) {
+ if (fault & VM_FAULT_MAJOR) {
+ tsk->maj_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+ regs, address);
+ } else {
+ tsk->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+ regs, address);
+ }
-no_context:
- /* Are we prepared to handle this kernel fault?
- *
- * (The kernel has valid exception-points in the source
- * when it accesses user-memory. When it fails in one
- * of those points, we find it in a table and do a jump
- * to some fixup code that loads an appropriate error
- * code)
- */
- if (fixup_exception(regs))
+ /* Normal return path: fault Handled Gracefully */
return;
+ }
- die("Oops", regs, address);
-
-out_of_memory:
- up_read(&mm->mmap_sem);
+ if (!user_mode(regs))
+ goto no_context;
- if (user_mode(regs)) {
+ if (fault & VM_FAULT_OOM) {
pagefault_out_of_memory();
return;
}
- goto no_context;
+ if (fault & VM_FAULT_SIGBUS) {
+ sig = SIGBUS;
+ si_code = BUS_ADRERR;
+ }
+ else {
+ sig = SIGSEGV;
+ }
-do_sigbus:
- up_read(&mm->mmap_sem);
+ tsk->thread.fault_address = address;
+ force_sig_fault(sig, si_code, (void __user *)address);
+ return;
- if (!user_mode(regs))
- goto no_context;
+no_context:
+ if (fixup_exception(regs))
+ return;
- tsk->thread.fault_address = address;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
+ die("Oops", regs, address);
}
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 471a97bf492d..c55d95dd2f39 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -393,6 +393,17 @@ EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation
;-------- Common routine to call Linux Page Fault Handler -----------
do_slow_path_pf:
+#ifdef CONFIG_ISA_ARCV2
+ ; Set Z flag if exception in U mode. Hardware micro-ops do this on any
+ ; taken interrupt/exception, and thus is already the case at the entry
+ ; above, but ensuing code would have already clobbered.
+ ; EXCEPTION_PROLOGUE called in slow path, relies on correct Z flag set
+
+ lr r2, [erstatus]
+ and r2, r2, STATUS_U_MASK
+ bxor.f 0, r2, STATUS_U_BIT
+#endif
+
; Restore the 4-scratch regs saved by fast path miss handler
TLBMISS_RESTORE_REGS
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2bf1ce39a96d..600c5ba1af41 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1297,7 +1297,7 @@ config SMP
will run faster if you say N here.
See also <file:Documentation/x86/i386/IO-APIC.rst>,
- <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at
+ <file:Documentation/admin-guide/lockup-watchdogs.rst> and the SMP-HOWTO available at
<http://tldp.org/HOWTO/SMP-HOWTO.html>.
If you don't know what to do here, say N.
@@ -2036,7 +2036,7 @@ config CRASH_DUMP
kdump/kexec. The crash dump kernel must be compiled to a
memory address not used by the main kernel
- For more details see Documentation/kdump/kdump.rst
+ For more details see Documentation/admin-guide/kdump/kdump.rst
config AUTO_ZRELADDR
bool "Auto calculation of the decompressed kernel image address"
@@ -2142,7 +2142,7 @@ config VFP
Say Y to include VFP support code in the kernel. This is needed
if your hardware includes a VFP unit.
- Please see <file:Documentation/arm/VFP/release-notes.txt> for
+ Please see <file:Documentation/arm/vfp/release-notes.rst> for
release notes and additional status information.
Say N if your target does not have VFP hardware.
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index e24ad60891b2..8a9aeeb504dd 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -21,7 +21,7 @@
/*
* The public API for this code is documented in arch/arm/include/asm/mcpm.h.
* For a comprehensive description of the main algorithm used here, please
- * see Documentation/arm/cluster-pm-race-avoidance.txt.
+ * see Documentation/arm/cluster-pm-race-avoidance.rst.
*/
struct sync_struct mcpm_sync;
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index d5bd75dd576d..291d969bc719 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -5,7 +5,7 @@
* Created by: Nicolas Pitre, March 2012
* Copyright: (C) 2012-2013 Linaro Limited
*
- * Refer to Documentation/arm/cluster-pm-race-avoidance.txt
+ * Refer to Documentation/arm/cluster-pm-race-avoidance.rst
* for details of the synchronisation algorithms used here.
*/
diff --git a/arch/arm/common/vlock.S b/arch/arm/common/vlock.S
index 9675cc15d0c4..f1c7fd44f1b1 100644
--- a/arch/arm/common/vlock.S
+++ b/arch/arm/common/vlock.S
@@ -6,7 +6,7 @@
* Copyright: (C) 2012-2013 Linaro Limited
*
* This algorithm is described in more detail in
- * Documentation/arm/vlocks.txt.
+ * Documentation/arm/vlocks.rst.
*/
#include <linux/linkage.h>
diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h
index 77e5582c2259..67d20712cb48 100644
--- a/arch/arm/include/asm/setup.h
+++ b/arch/arm/include/asm/setup.h
@@ -5,7 +5,7 @@
* Copyright (C) 1997-1999 Russell King
*
* Structure passed to kernel to tell it about the
- * hardware it's running on. See Documentation/arm/Setup
+ * hardware it's running on. See Documentation/arm/setup.rst
* for more info.
*/
#ifndef __ASMARM_SETUP_H
diff --git a/arch/arm/include/uapi/asm/setup.h b/arch/arm/include/uapi/asm/setup.h
index 6b335a9ff8c8..25ceda63b284 100644
--- a/arch/arm/include/uapi/asm/setup.h
+++ b/arch/arm/include/uapi/asm/setup.h
@@ -9,7 +9,7 @@
* published by the Free Software Foundation.
*
* Structure passed to kernel to tell it about the
- * hardware it's running on. See Documentation/arm/Setup
+ * hardware it's running on. See Documentation/arm/setup.rst
* for more info.
*/
#ifndef _UAPI__ASMARM_SETUP_H
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0b8cfdd60b90..858d4e541532 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -826,7 +826,7 @@ ENDPROC(__switch_to)
* existing ones. This mechanism should be used only for things that are
* really small and justified, and not be abused freely.
*
- * See Documentation/arm/kernel_user_helpers.txt for formal definitions.
+ * See Documentation/arm/kernel_user_helpers.rst for formal definitions.
*/
THUMB( .arm )
diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h
index c93356a8d662..56411bb63d45 100644
--- a/arch/arm/mach-exynos/common.h
+++ b/arch/arm/mach-exynos/common.h
@@ -106,7 +106,7 @@ void exynos_firmware_init(void);
#define C2_STATE (1 << 3)
/*
* Magic values for bootloader indicating chosen low power mode.
- * See also Documentation/arm/Samsung/Bootloader-interface.txt
+ * See also Documentation/arm/samsung/bootloader-interface.rst
*/
#define EXYNOS_SLEEP_MAGIC 0x00000bad
#define EXYNOS_AFTR_MAGIC 0xfcba0d10
diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig
index fc5378b00f3d..f7211b57b1e7 100644
--- a/arch/arm/mach-ixp4xx/Kconfig
+++ b/arch/arm/mach-ixp4xx/Kconfig
@@ -33,7 +33,7 @@ config MACH_AVILA
help
Say 'Y' here if you want your kernel to support the Gateworks
Avila Network Platform. For more information on this platform,
- see <file:Documentation/arm/IXP4xx>.
+ see <file:Documentation/arm/ixp4xx.rst>.
config MACH_LOFT
bool "Loft"
@@ -49,7 +49,7 @@ config ARCH_ADI_COYOTE
help
Say 'Y' here if you want your kernel to support the ADI
Engineering Coyote Gateway Reference Platform. For more
- information on this platform, see <file:Documentation/arm/IXP4xx>.
+ information on this platform, see <file:Documentation/arm/ixp4xx.rst>.
config MACH_GATEWAY7001
bool "Gateway 7001"
@@ -72,21 +72,21 @@ config ARCH_IXDP425
help
Say 'Y' here if you want your kernel to support Intel's
IXDP425 Development Platform (Also known as Richfield).
- For more information on this platform, see <file:Documentation/arm/IXP4xx>.
+ For more information on this platform, see <file:Documentation/arm/ixp4xx.rst>.
config MACH_IXDPG425
bool "IXDPG425"
help
Say 'Y' here if you want your kernel to support Intel's
IXDPG425 Development Platform (Also known as Montajade).
- For more information on this platform, see <file:Documentation/arm/IXP4xx>.
+ For more information on this platform, see <file:Documentation/arm/ixp4xx.rst>.
config MACH_IXDP465
bool "IXDP465"
help
Say 'Y' here if you want your kernel to support Intel's
IXDP465 Development Platform (Also known as BMP).
- For more information on this platform, see <file:Documentation/arm/IXP4xx>.
+ For more information on this platform, see <file:Documentation/arm/ixp4xx.rst>.
config MACH_GORAMO_MLR
bool "GORAMO Multi Link Router"
@@ -99,7 +99,7 @@ config MACH_KIXRP435
help
Say 'Y' here if you want your kernel to support Intel's
KIXRP435 Reference Platform.
- For more information on this platform, see <file:Documentation/arm/IXP4xx>.
+ For more information on this platform, see <file:Documentation/arm/ixp4xx.rst>.
#
# IXCDP1100 is the exact same HW as IXDP425, but with a different machine
@@ -116,7 +116,7 @@ config ARCH_PRPMC1100
help
Say 'Y' here if you want your kernel to support the Motorola
PrPCM1100 Processor Mezanine Module. For more information on
- this platform, see <file:Documentation/arm/IXP4xx>.
+ this platform, see <file:Documentation/arm/ixp4xx.rst>.
config MACH_NAS100D
bool
diff --git a/arch/arm/mach-s3c24xx/pm.c b/arch/arm/mach-s3c24xx/pm.c
index adcb90645460..c64988c609ad 100644
--- a/arch/arm/mach-s3c24xx/pm.c
+++ b/arch/arm/mach-s3c24xx/pm.c
@@ -5,7 +5,7 @@
//
// S3C24XX Power Manager (Suspend-To-RAM) support
//
-// See Documentation/arm/Samsung-S3C24XX/Suspend.txt for more information
+// See Documentation/arm/samsung-s3c24xx/suspend.rst for more information
//
// Parts based on arch/arm/mach-pxa/pm.c
//
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index cc798115aa9b..820b60a50125 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -709,7 +709,7 @@ config ARM_VIRT_EXT
assistance.
A compliant bootloader is required in order to make maximum
- use of this feature. Refer to Documentation/arm/Booting for
+ use of this feature. Refer to Documentation/arm/booting.rst for
details.
config SWP_EMULATE
@@ -875,7 +875,7 @@ config KUSER_HELPERS
the CPU type fitted to the system. This permits binaries to be
run on ARMv4 through to ARMv7 without modification.
- See Documentation/arm/kernel_user_helpers.txt for details.
+ See Documentation/arm/kernel_user_helpers.rst for details.
However, the fixed address nature of these helpers can be used
by ROP (return orientated programming) authors when creating
diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig
index 53da57fba39c..301e572651c0 100644
--- a/arch/arm/plat-samsung/Kconfig
+++ b/arch/arm/plat-samsung/Kconfig
@@ -243,7 +243,7 @@ config SAMSUNG_PM_DEBUG
depends on DEBUG_EXYNOS_UART || DEBUG_S3C24XX_UART || DEBUG_S3C2410_UART
help
Say Y here if you want verbose debugging from the PM Suspend and
- Resume code. See <file:Documentation/arm/Samsung-S3C24XX/Suspend.txt>
+ Resume code. See <file:Documentation/arm/samsung-s3c24xx/suspend.rst>
for more information.
config S3C_PM_DEBUG_LED_SMDK
@@ -268,7 +268,7 @@ config SAMSUNG_PM_CHECK
Note, this can take several seconds depending on memory size
and CPU speed.
- See <file:Documentation/arm/Samsung-S3C24XX/Suspend.txt>
+ See <file:Documentation/arm/samsung-s3c24xx/suspend.rst>
config SAMSUNG_PM_CHECK_CHUNKSIZE
int "S3C2410 PM Suspend CRC Chunksize (KiB)"
@@ -280,7 +280,7 @@ config SAMSUNG_PM_CHECK_CHUNKSIZE
the CRC data block will take more memory, but will identify any
faults with better precision.
- See <file:Documentation/arm/Samsung-S3C24XX/Suspend.txt>
+ See <file:Documentation/arm/samsung-s3c24xx/suspend.rst>
config SAMSUNG_WAKEMASK
bool
diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
index 4eac94c1eb6f..9e74c7ff6b04 100644
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -7,7 +7,7 @@
# http://www.arm.linux.org.uk/developer/machines/download.php
#
# Please do not send patches to this file; it is automatically generated!
-# To add an entry into this database, please see Documentation/arm/README,
+# To add an entry into this database, please see Documentation/arm/arm.rst,
# or visit:
#
# http://www.arm.linux.org.uk/developer/machines/?action=new
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0758d89524d0..e1ea69994e0f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -997,7 +997,7 @@ config CRASH_DUMP
reserved region and then later executed after a crash by
kdump/kexec.
- For more details see Documentation/kdump/kdump.rst
+ For more details see Documentation/admin-guide/kdump/kdump.rst
config XEN_DOM0
def_bool y
@@ -1143,7 +1143,7 @@ config KUSER_HELPERS
the system. This permits binaries to be run on ARMv4 through
to ARMv8 without modification.
- See Documentation/arm/kernel_user_helpers.txt for details.
+ See Documentation/arm/kernel_user_helpers.rst for details.
However, the fixed address nature of these helpers can be used
by ROP (return orientated programming) authors when creating
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 49825e9e421e..42bd8c0c60e0 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -10,7 +10,7 @@
* aarch32_setup_additional_pages() and are provided for compatibility
* reasons with 32 bit (aarch32) applications that need them.
*
- * See Documentation/arm/kernel_user_helpers.txt for formal definitions.
+ * See Documentation/arm/kernel_user_helpers.rst for formal definitions.
*/
#include <asm/unistd.h>
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 8f106638913c..3795d18276c4 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -852,7 +852,7 @@ valid_phys_addr_range (phys_addr_t phys_addr, unsigned long size)
* /dev/mem reads and writes use copy_to_user(), which implicitly
* uses a granule-sized kernel identity mapping. It's really
* only safe to do this for regions in kern_memmap. For more
- * details, see Documentation/ia64/aliasing.txt.
+ * details, see Documentation/ia64/aliasing.rst.
*/
attr = kern_mem_attribute(phys_addr, size);
if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC)
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index d80c99a5f55d..0750a716adc7 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -28,7 +28,7 @@
#include <asm/native/inst.h>
/*
- * See Documentation/ia64/fsys.txt for details on fsyscalls.
+ * See Documentation/ia64/fsys.rst for details on fsyscalls.
*
* On entry to an fsyscall handler:
* r10 = 0 (i.e., defaults to "successful syscall return")
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index ecc44926737b..36d5faf4c86c 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -355,3 +355,4 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
+# 435 reserved for clone3
diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c
index 5e3e7b1fdac5..0c0de2c4ec69 100644
--- a/arch/ia64/mm/ioremap.c
+++ b/arch/ia64/mm/ioremap.c
@@ -42,7 +42,7 @@ ioremap (unsigned long phys_addr, unsigned long size)
/*
* For things in kern_memmap, we must use the same attribute
* as the rest of the kernel. For more details, see
- * Documentation/ia64/aliasing.txt.
+ * Documentation/ia64/aliasing.rst.
*/
attr = kern_mem_attribute(phys_addr, size);
if (attr & EFI_MEMORY_WB)
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index e308196c2229..165e561dc81a 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -450,7 +450,7 @@ pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
return -ENOSYS;
/*
- * Avoid attribute aliasing. See Documentation/ia64/aliasing.txt
+ * Avoid attribute aliasing. See Documentation/ia64/aliasing.rst
* for more details.
*/
if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 9a3eb2558568..a88a285a0e5f 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -434,3 +434,4 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
+# 435 reserved for clone3
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index 1738a06396f9..2f81a94c71a6 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -162,7 +162,7 @@ void __init plat_mem_setup(void)
ioport_resource.start = 0;
ioport_resource.end = ~0;
- /* intended to somewhat resemble ARM; see Documentation/arm/Booting */
+ /* intended to somewhat resemble ARM; see Documentation/arm/booting.rst */
if (fw_arg0 == 0 && fw_arg1 == 0xffffffff)
dtb = phys_to_virt(fw_arg2);
else if (fw_passed_dtb) /* UHI interface or appended dtb */
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 97035e19ad03..c9c879ec9b6d 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -373,3 +373,4 @@
432 n32 fsmount sys_fsmount
433 n32 fspick sys_fspick
434 n32 pidfd_open sys_pidfd_open
+# 435 reserved for clone3
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index d7292722d3b0..bbce9159caa1 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -349,3 +349,4 @@
432 n64 fsmount sys_fsmount
433 n64 fspick sys_fspick
434 n64 pidfd_open sys_pidfd_open
+# 435 reserved for clone3
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index dba084c92f14..9653591428ec 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -422,3 +422,4 @@
432 o32 fsmount sys_fsmount
433 o32 fspick sys_fspick
434 o32 pidfd_open sys_pidfd_open
+# 435 reserved for clone3
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 42875ff15671..6d732e451071 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -277,7 +277,7 @@ config SMP
machines, but will use only one CPU of a multiprocessor machine.
On a uniprocessor machine, the kernel will run faster if you say N.
- See also <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO
+ See also <file:Documentation/admin-guide/lockup-watchdogs.rst> and the SMP-HOWTO
available at <http://www.tldp.org/docs.html#howto>.
If you don't know what to do here, say N.
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 5022b9e179c2..c7aadfef5386 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -431,3 +431,4 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
+# 435 reserved for clone3
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index f2c3bda2d39f..3331749aab20 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -516,3 +516,4 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
+# 435 reserved for clone3
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 6ebacfeaf853..a90d3e945445 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -437,3 +437,4 @@
432 common fsmount sys_fsmount sys_fsmount
433 common fspick sys_fspick sys_fspick
434 common pidfd_open sys_pidfd_open sys_pidfd_open
+# 435 reserved for clone3
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 31a7d12db705..6b1b5941b618 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -626,7 +626,7 @@ config CRASH_DUMP
to a memory address not used by the main kernel using
PHYSICAL_START.
- For more details see Documentation/kdump/kdump.rst
+ For more details see Documentation/admin-guide/kdump/kdump.rst
config KEXEC_JUMP
bool "kexec jump (EXPERIMENTAL)"
@@ -679,7 +679,7 @@ config SMP
People using multiprocessor machines who say Y here should also say
Y to "Enhanced Real Time Clock Support", below.
- See also <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO
+ See also <file:Documentation/admin-guide/lockup-watchdogs.rst> and the SMP-HOWTO
available at <http://www.tldp.org/docs.html#howto>.
If you don't know what to do here, say N.
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 834c9c7d79fa..b5ed26c4c005 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -437,3 +437,4 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
+# 435 reserved for clone3
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index e9f5d62e9817..7926a2e11bdc 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -180,7 +180,7 @@ config SMP
Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
Management" code will be disabled if you say Y here.
- See also <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO
+ See also <file:Documentation/admin-guide/lockup-watchdogs.rst> and the SMP-HOWTO
available at <http://www.tldp.org/docs.html#howto>.
If you don't know what to do here, say N.
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index c58e71f21129..8c8cc7537fb2 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -480,3 +480,4 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
+# 435 reserved for clone3
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4a55bd01e918..1342654e8057 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -402,7 +402,7 @@ config SMP
Management" code will be disabled if you say Y here.
See also <file:Documentation/x86/i386/IO-APIC.rst>,
- <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at
+ <file:Documentation/admin-guide/lockup-watchdogs.rst> and the SMP-HOWTO available at
<http://www.tldp.org/docs.html#howto>.
If you don't know what to do here, say N.
@@ -1959,7 +1959,7 @@ config EFI_STUB
This kernel feature allows a bzImage to be loaded directly
by EFI firmware without the use of a bootloader.
- See Documentation/efi-stub.txt for more information.
+ See Documentation/admin-guide/efi-stub.rst for more information.
config EFI_MIXED
bool "EFI mixed-mode support"
@@ -2057,7 +2057,7 @@ config CRASH_DUMP
to a memory address not used by the main kernel or BIOS using
PHYSICAL_START, or it must be built as a relocatable image
(CONFIG_RELOCATABLE=y).
- For more details see Documentation/kdump/kdump.rst
+ For more details see Documentation/admin-guide/kdump/kdump.rst
config KEXEC_JUMP
bool "kexec jump"
@@ -2094,7 +2094,7 @@ config PHYSICAL_START
the reserved region. In other words, it can be set based on
the "X" value as specified in the "crashkernel=YM@XM"
command line boot parameter passed to the panic-ed
- kernel. Please take a look at Documentation/kdump/kdump.rst
+ kernel. Please take a look at Documentation/admin-guide/kdump/kdump.rst
for more details about crash dumps.
Usage of bzImage for capturing the crash dump is recommended as
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 64a6c952091e..21790307121e 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -239,6 +239,7 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
{
struct kernel_clone_args args = {
.flags = (clone_flags & ~CSIGNAL),
+ .pidfd = parent_tidptr,
.child_tid = child_tidptr,
.parent_tid = parent_tidptr,
.exit_signal = (clone_flags & CSIGNAL),
@@ -246,5 +247,8 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
.tls = tls_val,
};
+ if (!legacy_clone_args_valid(&args))
+ return -EINVAL;
+
return _do_fork(&args);
}
diff --git a/arch/xtensa/boot/dts/virt.dts b/arch/xtensa/boot/dts/virt.dts
new file mode 100644
index 000000000000..6aecbc0f3549
--- /dev/null
+++ b/arch/xtensa/boot/dts/virt.dts
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/dts-v1/;
+
+/ {
+ compatible = "cdns,xtensa-iss";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&pic>;
+
+ chosen {
+ bootargs = "console=ttyS0,115200n8 debug";
+ };
+
+ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x80000000>;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ cpu@0 {
+ compatible = "cdns,xtensa-cpu";
+ reg = <0>;
+ clocks = <&osc>;
+ };
+ };
+
+ clocks {
+ osc: osc {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <40000000>;
+ };
+ };
+
+ pic: pic {
+ compatible = "cdns,xtensa-pic";
+ /* one cell: internal irq number,
+ * two cells: second cell == 0: internal irq number
+ * second cell == 1: external irq number
+ */
+ #address-cells = <0>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ };
+
+ pci {
+ compatible = "pci-host-ecam-generic";
+ device_type = "pci";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <0x1>;
+
+ bus-range = <0x0 0x3f>;
+ reg = <0xc0000000 0x04000000>;
+
+ // BUS_ADDRESS(3) CPU_PHYSICAL(1) SIZE(2)
+ ranges = <0x01000000 0x0 0xc4000000 0xc4000000 0x0 0x04000000>,
+ <0x02000000 0x0 0xc8000000 0xc8000000 0x0 0x18000000>;
+
+ // PCI_DEVICE(3) INT#(1) CONTROLLER(PHANDLE) CONTROLLER_DATA(2)
+ interrupt-map = <
+ 0x0000 0x0 0x0 0x1 &pic 0x0 0x1
+ 0x0800 0x0 0x0 0x1 &pic 0x1 0x1
+ 0x1000 0x0 0x0 0x1 &pic 0x2 0x1
+ 0x1800 0x0 0x0 0x1 &pic 0x3 0x1
+ >;
+
+ interrupt-map-mask = <0x1800 0x0 0x0 0x7>;
+ };
+};
diff --git a/arch/xtensa/configs/virt_defconfig b/arch/xtensa/configs/virt_defconfig
new file mode 100644
index 000000000000..bfc45a138e72
--- /dev/null
+++ b/arch/xtensa/configs/virt_defconfig
@@ -0,0 +1,113 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_IRQ_TIME_ACCOUNTING=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_MEMCG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_NAMESPACES=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_PERF_EVENTS=y
+CONFIG_XTENSA_VARIANT_DC233C=y
+CONFIG_XTENSA_UNALIGNED_USER=y
+CONFIG_VECTORS_OFFSET=0x00002000
+CONFIG_XTENSA_KSEG_512M=y
+CONFIG_HIGHMEM=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=0x80000000@0"
+CONFIG_USE_OF=y
+CONFIG_BUILTIN_DTB_SOURCE="virt"
+# CONFIG_PARSE_BOOTPARAM is not set
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_WIRELESS is not set
+CONFIG_PCI=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_UEVENT_HELPER=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+# CONFIG_ETHERNET is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_VIRTIO=y
+# CONFIG_HWMON is not set
+CONFIG_DRM=y
+CONFIG_DRM_VGEM=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FB_MODE_HELPERS=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_INPUT=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_FANOTIFY=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_SWAP=y
+CONFIG_ROOT_NFS=y
+CONFIG_SUNRPC_DEBUG=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_CRYPTO_ECHAINIV=y
+CONFIG_CRYPTO_DEFLATE=y
+CONFIG_CRYPTO_LZO=y
+CONFIG_CRYPTO_ANSI_CPRNG=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_FONTS=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_STACKTRACE=y
+CONFIG_RCU_TRACE=y
+# CONFIG_FTRACE is not set
+# CONFIG_S32C1I_SELFTEST is not set
diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h
index 8308a9c3abb2..71a7e846bc1f 100644
--- a/arch/xtensa/include/asm/asmmacro.h
+++ b/arch/xtensa/include/asm/asmmacro.h
@@ -191,4 +191,50 @@
#endif
.endm
+#define XTENSA_STACK_ALIGNMENT 16
+
+#if defined(__XTENSA_WINDOWED_ABI__)
+#define XTENSA_FRAME_SIZE_RESERVE 16
+#define XTENSA_SPILL_STACK_RESERVE 32
+
+#define abi_entry(frame_size) \
+ entry sp, (XTENSA_FRAME_SIZE_RESERVE + \
+ (((frame_size) + XTENSA_STACK_ALIGNMENT - 1) & \
+ -XTENSA_STACK_ALIGNMENT))
+#define abi_entry_default abi_entry(0)
+
+#define abi_ret(frame_size) retw
+#define abi_ret_default retw
+
+#elif defined(__XTENSA_CALL0_ABI__)
+
+#define XTENSA_SPILL_STACK_RESERVE 0
+
+#define abi_entry(frame_size) __abi_entry (frame_size)
+
+ .macro __abi_entry frame_size
+ .ifgt \frame_size
+ addi sp, sp, -(((\frame_size) + XTENSA_STACK_ALIGNMENT - 1) & \
+ -XTENSA_STACK_ALIGNMENT)
+ .endif
+ .endm
+
+#define abi_entry_default
+
+#define abi_ret(frame_size) __abi_ret (frame_size)
+
+ .macro __abi_ret frame_size
+ .ifgt \frame_size
+ addi sp, sp, (((\frame_size) + XTENSA_STACK_ALIGNMENT - 1) & \
+ -XTENSA_STACK_ALIGNMENT)
+ .endif
+ ret
+ .endm
+
+#define abi_ret_default ret
+
+#else
+#error Unsupported Xtensa ABI
+#endif
+
#endif /* _XTENSA_ASMMACRO_H */
diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h
index 323d05789159..3b054d2bede0 100644
--- a/arch/xtensa/include/asm/initialize_mmu.h
+++ b/arch/xtensa/include/asm/initialize_mmu.h
@@ -42,7 +42,7 @@
#if XCHAL_HAVE_S32C1I && (XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RC_2009_0)
/*
* We Have Atomic Operation Control (ATOMCTL) Register; Initialize it.
- * For details see Documentation/xtensa/atomctl.txt
+ * For details see Documentation/xtensa/atomctl.rst
*/
#if XCHAL_DCACHE_IS_COHERENT
movi a3, 0x25 /* For SMP/MX -- internal for writeback,
diff --git a/arch/xtensa/include/asm/platform.h b/arch/xtensa/include/asm/platform.h
index 560483356a06..913826dfa838 100644
--- a/arch/xtensa/include/asm/platform.h
+++ b/arch/xtensa/include/asm/platform.h
@@ -55,16 +55,6 @@ extern void platform_idle (void);
extern void platform_heartbeat (void);
/*
- * platform_pcibios_init is called to allow the platform to setup the pci bus.
- */
-extern void platform_pcibios_init (void);
-
-/*
- * platform_pcibios_fixup allows to modify the PCI configuration.
- */
-extern int platform_pcibios_fixup (void);
-
-/*
* platform_calibrate_ccount calibrates cpu clock freq (CONFIG_XTENSA_CALIBRATE)
*/
extern void platform_calibrate_ccount (void);
diff --git a/arch/xtensa/include/asm/types.h b/arch/xtensa/include/asm/types.h
deleted file mode 100644
index 2b410b8c7f79..000000000000
--- a/arch/xtensa/include/asm/types.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * include/asm-xtensa/types.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-#ifndef _XTENSA_TYPES_H
-#define _XTENSA_TYPES_H
-
-#include <uapi/asm/types.h>
-
-#ifndef __ASSEMBLY__
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
-
-#define BITS_PER_LONG 32
-
-#endif
-#endif /* _XTENSA_TYPES_H */
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index 92bf24a9da92..60c220020054 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -121,7 +121,9 @@
ENTRY(coprocessor_flush)
- entry a1, 32
+ /* reserve 4 bytes on stack to save a0 */
+ abi_entry(4)
+
s32i a0, a1, 0
movi a0, .Lsave_cp_regs_jump_table
addx8 a3, a3, a0
@@ -131,7 +133,8 @@ ENTRY(coprocessor_flush)
beqz a3, 1f
callx0 a3
1: l32i a0, a1, 0
- retw
+
+ abi_ret(4)
ENDPROC(coprocessor_flush)
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index e54af8b7e0f8..183fa8e0bb5b 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1842,7 +1842,8 @@ ENDPROC(fast_store_prohibited)
ENTRY(system_call)
- entry a1, 32
+ /* reserve 4 bytes on stack for function parameter */
+ abi_entry(4)
/* regs->syscall = regs->areg[2] */
@@ -1892,7 +1893,7 @@ ENTRY(system_call)
s32i a6, a2, PT_AREG2
bnez a3, 1f
- retw
+ abi_ret(4)
1:
l32i a4, a1, 4
@@ -1901,7 +1902,7 @@ ENTRY(system_call)
mov a6, a2
call4 do_syscall_trace_leave
s32i a3, a2, PT_SYSCALL
- retw
+ abi_ret(4)
ENDPROC(system_call)
@@ -1952,7 +1953,7 @@ ENDPROC(system_call)
ENTRY(_switch_to)
- entry a1, 48
+ abi_entry(XTENSA_SPILL_STACK_RESERVE)
mov a11, a3 # and 'next' (a3)
@@ -2013,7 +2014,7 @@ ENTRY(_switch_to)
wsr a14, ps
rsync
- retw
+ abi_ret(XTENSA_SPILL_STACK_RESERVE)
ENDPROC(_switch_to)
diff --git a/arch/xtensa/kernel/mcount.S b/arch/xtensa/kernel/mcount.S
index 0eeda2e4a25e..5e4619f52858 100644
--- a/arch/xtensa/kernel/mcount.S
+++ b/arch/xtensa/kernel/mcount.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
+#include <asm/asmmacro.h>
#include <asm/ftrace.h>
/*
@@ -21,13 +22,13 @@
ENTRY(_mcount)
- entry a1, 16
+ abi_entry_default
movi a4, ftrace_trace_function
l32i a4, a4, 0
movi a3, ftrace_stub
bne a3, a4, 1f
- retw
+ abi_ret_default
1: xor a7, a2, a1
movi a3, 0x3fffffff
@@ -40,11 +41,11 @@ ENTRY(_mcount)
addi a6, a6, -MCOUNT_INSN_SIZE
callx4 a4
- retw
+ abi_ret_default
ENDPROC(_mcount)
ENTRY(ftrace_stub)
- entry a1, 16
- retw
+ abi_entry_default
+ abi_ret_default
ENDPROC(ftrace_stub)
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 8b823f94e568..3f32e275997a 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -24,23 +24,6 @@
#include <asm/pci-bridge.h>
#include <asm/platform.h>
-/* PCI Controller */
-
-
-/*
- * pcibios_alloc_controller
- * pcibios_enable_device
- * pcibios_fixups
- * pcibios_align_resource
- * pcibios_fixup_bus
- * pci_bus_add_device
- */
-
-static struct pci_controller *pci_ctrl_head;
-static struct pci_controller **pci_ctrl_tail = &pci_ctrl_head;
-
-static int pci_bus_count;
-
/*
* We need to avoid collisions with `mirrored' VGA ports
* and other strange ISA hardware, so we always want the
@@ -75,81 +58,6 @@ pcibios_align_resource(void *data, const struct resource *res,
return start;
}
-static void __init pci_controller_apertures(struct pci_controller *pci_ctrl,
- struct list_head *resources)
-{
- struct resource *res;
- unsigned long io_offset;
- int i;
-
- io_offset = (unsigned long)pci_ctrl->io_space.base;
- res = &pci_ctrl->io_resource;
- if (!res->flags) {
- if (io_offset)
- pr_err("I/O resource not set for host bridge %d\n",
- pci_ctrl->index);
- res->start = 0;
- res->end = IO_SPACE_LIMIT;
- res->flags = IORESOURCE_IO;
- }
- res->start += io_offset;
- res->end += io_offset;
- pci_add_resource_offset(resources, res, io_offset);
-
- for (i = 0; i < 3; i++) {
- res = &pci_ctrl->mem_resources[i];
- if (!res->flags) {
- if (i > 0)
- continue;
- pr_err("Memory resource not set for host bridge %d\n",
- pci_ctrl->index);
- res->start = 0;
- res->end = ~0U;
- res->flags = IORESOURCE_MEM;
- }
- pci_add_resource(resources, res);
- }
-}
-
-static int __init pcibios_init(void)
-{
- struct pci_controller *pci_ctrl;
- struct list_head resources;
- struct pci_bus *bus;
- int next_busno = 0, ret;
-
- pr_info("PCI: Probing PCI hardware\n");
-
- /* Scan all of the recorded PCI controllers. */
- for (pci_ctrl = pci_ctrl_head; pci_ctrl; pci_ctrl = pci_ctrl->next) {
- pci_ctrl->last_busno = 0xff;
- INIT_LIST_HEAD(&resources);
- pci_controller_apertures(pci_ctrl, &resources);
- bus = pci_scan_root_bus(NULL, pci_ctrl->first_busno,
- pci_ctrl->ops, pci_ctrl, &resources);
- if (!bus)
- continue;
-
- pci_ctrl->bus = bus;
- pci_ctrl->last_busno = bus->busn_res.end;
- if (next_busno <= pci_ctrl->last_busno)
- next_busno = pci_ctrl->last_busno+1;
- }
- pci_bus_count = next_busno;
- ret = platform_pcibios_fixup();
- if (ret)
- return ret;
-
- for (pci_ctrl = pci_ctrl_head; pci_ctrl; pci_ctrl = pci_ctrl->next) {
- if (pci_ctrl->bus)
- pci_bus_add_devices(pci_ctrl->bus);
- }
-
- return 0;
-}
-
-subsys_initcall(pcibios_init);
-
void pcibios_fixup_bus(struct pci_bus *bus)
{
if (bus->parent) {
@@ -158,38 +66,6 @@ void pcibios_fixup_bus(struct pci_bus *bus)
}
}
-void pcibios_set_master(struct pci_dev *dev)
-{
- /* No special bus mastering setup handling */
-}
-
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
- u16 cmd, old_cmd;
- int idx;
- struct resource *r;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- old_cmd = cmd;
- for (idx=0; idx<6; idx++) {
- r = &dev->resource[idx];
- if (!r->start && r->end) {
- pci_err(dev, "can't enable device: resource collisions\n");
- return -EINVAL;
- }
- if (r->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- if (r->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
- if (cmd != old_cmd) {
- pci_info(dev, "enabling device (%04x -> %04x)\n", old_cmd, cmd);
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
-
- return 0;
-}
-
/*
* Platform support for /proc/bus/pci/X/Y mmap()s.
* -- paulus.
diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c
index 1cf008284dd2..a95ba05b0abe 100644
--- a/arch/xtensa/kernel/platform.c
+++ b/arch/xtensa/kernel/platform.c
@@ -34,8 +34,6 @@ _F(void, halt, (void), { while(1); });
_F(void, power_off, (void), { while(1); });
_F(void, idle, (void), { __asm__ __volatile__ ("waiti 0" ::: "memory"); });
_F(void, heartbeat, (void), { });
-_F(int, pcibios_fixup, (void), { return 0; });
-_F(void, pcibios_init, (void), { });
#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
_F(void, calibrate_ccount, (void),
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 176cb46bcf12..5cb8a62e091c 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -405,10 +405,6 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
# endif
#endif
-
-#ifdef CONFIG_PCI
- platform_pcibios_init();
-#endif
}
static DEFINE_PER_CPU(struct cpu, cpu_data);
diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S
index c6e73b12e519..4cb9ca58d9ad 100644
--- a/arch/xtensa/lib/checksum.S
+++ b/arch/xtensa/lib/checksum.S
@@ -43,7 +43,7 @@ ENTRY(csum_partial)
* Experiments with Ethernet and SLIP connections show that buf
* is aligned on either a 2-byte or 4-byte boundary.
*/
- entry sp, 32
+ abi_entry_default
extui a5, a2, 0, 2
bnez a5, 8f /* branch if 2-byte aligned */
/* Fall-through on common case, 4-byte alignment */
@@ -107,7 +107,7 @@ ENTRY(csum_partial)
ONES_ADD(a4, a6)
7:
mov a2, a4
- retw
+ abi_ret_default
/* uncommon case, buf is 2-byte aligned */
8:
@@ -195,7 +195,7 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
ENTRY(csum_partial_copy_generic)
- entry sp, 32
+ abi_entry_default
mov a12, a3
mov a11, a4
or a10, a2, a3
@@ -316,7 +316,7 @@ EX(11f) s8i a9, a3, 0
ONES_ADD(a5, a9)
8:
mov a2, a5
- retw
+ abi_ret_default
5:
/* Control branch to here when either src or dst is odd. We
@@ -383,12 +383,12 @@ ENDPROC(csum_partial_copy_generic)
blt a12, a11, .Leloop
#endif
2:
- retw
+ abi_ret_default
11:
movi a2, -EFAULT
s32i a2, a7, 0 /* dst_err_ptr */
movi a2, 0
- retw
+ abi_ret_default
.previous
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S
index efecfd7ed8cc..582d817979ed 100644
--- a/arch/xtensa/lib/memcopy.S
+++ b/arch/xtensa/lib/memcopy.S
@@ -79,7 +79,7 @@
bne a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end
#endif /* !XCHAL_HAVE_LOOPS */
.Lbytecopydone:
- retw
+ abi_ret_default
/*
* Destination is unaligned
@@ -112,7 +112,7 @@
ENTRY(__memcpy)
WEAK(memcpy)
- entry sp, 16 # minimal stack frame
+ abi_entry_default
# a2/ dst, a3/ src, a4/ len
mov a5, a2 # copy dst so that a2 is return value
.Lcommon:
@@ -161,7 +161,7 @@ WEAK(memcpy)
bbsi.l a4, 2, .L3
bbsi.l a4, 1, .L4
bbsi.l a4, 0, .L5
- retw
+ abi_ret_default
.L3:
# copy 4 bytes
l32i a6, a3, 0
@@ -170,7 +170,7 @@ WEAK(memcpy)
addi a5, a5, 4
bbsi.l a4, 1, .L4
bbsi.l a4, 0, .L5
- retw
+ abi_ret_default
.L4:
# copy 2 bytes
l16ui a6, a3, 0
@@ -178,12 +178,12 @@ WEAK(memcpy)
s16i a6, a5, 0
addi a5, a5, 2
bbsi.l a4, 0, .L5
- retw
+ abi_ret_default
.L5:
# copy 1 byte
l8ui a6, a3, 0
s8i a6, a5, 0
- retw
+ abi_ret_default
/*
* Destination is aligned, Source is unaligned
@@ -255,7 +255,7 @@ WEAK(memcpy)
#endif
bbsi.l a4, 1, .L14
bbsi.l a4, 0, .L15
-.Ldone: retw
+.Ldone: abi_ret_default
.L14:
# copy 2 bytes
l8ui a6, a3, 0
@@ -265,12 +265,12 @@ WEAK(memcpy)
s8i a7, a5, 1
addi a5, a5, 2
bbsi.l a4, 0, .L15
- retw
+ abi_ret_default
.L15:
# copy 1 byte
l8ui a6, a3, 0
s8i a6, a5, 0
- retw
+ abi_ret_default
ENDPROC(__memcpy)
@@ -280,7 +280,7 @@ ENDPROC(__memcpy)
ENTRY(bcopy)
- entry sp, 16 # minimal stack frame
+ abi_entry_default
# a2=src, a3=dst, a4=len
mov a5, a3
mov a3, a2
@@ -346,7 +346,7 @@ ENDPROC(bcopy)
# $a3:src != $a7:src_start
#endif /* !XCHAL_HAVE_LOOPS */
.Lbackbytecopydone:
- retw
+ abi_ret_default
/*
* Destination is unaligned
@@ -380,7 +380,7 @@ ENDPROC(bcopy)
ENTRY(__memmove)
WEAK(memmove)
- entry sp, 16 # minimal stack frame
+ abi_entry_default
# a2/ dst, a3/ src, a4/ len
mov a5, a2 # copy dst so that a2 is return value
.Lmovecommon:
@@ -435,7 +435,7 @@ WEAK(memmove)
bbsi.l a4, 2, .Lback3
bbsi.l a4, 1, .Lback4
bbsi.l a4, 0, .Lback5
- retw
+ abi_ret_default
.Lback3:
# copy 4 bytes
addi a3, a3, -4
@@ -444,7 +444,7 @@ WEAK(memmove)
s32i a6, a5, 0
bbsi.l a4, 1, .Lback4
bbsi.l a4, 0, .Lback5
- retw
+ abi_ret_default
.Lback4:
# copy 2 bytes
addi a3, a3, -2
@@ -452,14 +452,14 @@ WEAK(memmove)
addi a5, a5, -2
s16i a6, a5, 0
bbsi.l a4, 0, .Lback5
- retw
+ abi_ret_default
.Lback5:
# copy 1 byte
addi a3, a3, -1
l8ui a6, a3, 0
addi a5, a5, -1
s8i a6, a5, 0
- retw
+ abi_ret_default
/*
* Destination is aligned, Source is unaligned
@@ -531,7 +531,7 @@ WEAK(memmove)
bbsi.l a4, 1, .Lback14
bbsi.l a4, 0, .Lback15
.Lbackdone:
- retw
+ abi_ret_default
.Lback14:
# copy 2 bytes
addi a3, a3, -2
@@ -541,13 +541,13 @@ WEAK(memmove)
s8i a6, a5, 0
s8i a7, a5, 1
bbsi.l a4, 0, .Lback15
- retw
+ abi_ret_default
.Lback15:
# copy 1 byte
addi a3, a3, -1
addi a5, a5, -1
l8ui a6, a3, 0
s8i a6, a5, 0
- retw
+ abi_ret_default
ENDPROC(__memmove)
diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S
index 8632eacbdc80..59b1524fd601 100644
--- a/arch/xtensa/lib/memset.S
+++ b/arch/xtensa/lib/memset.S
@@ -34,7 +34,7 @@
ENTRY(__memset)
WEAK(memset)
- entry sp, 16 # minimal stack frame
+ abi_entry_default
# a2/ dst, a3/ c, a4/ length
extui a3, a3, 0, 8 # mask to just 8 bits
slli a7, a3, 8 # duplicate character in all bytes of word
@@ -48,7 +48,7 @@ WEAK(memset)
srli a7, a4, 4 # number of loop iterations with 16B
# per iteration
bnez a4, .Laligned
- retw
+ abi_ret_default
/*
* Destination is word-aligned.
@@ -95,7 +95,7 @@ EX(10f) s16i a3, a5, 0
EX(10f) s8i a3, a5, 0
.L5:
.Lret1:
- retw
+ abi_ret_default
/*
* Destination is unaligned
@@ -139,7 +139,7 @@ EX(10f) s8i a3, a5, 0
blt a5, a6, .Lbyteloop
#endif /* !XCHAL_HAVE_LOOPS */
.Lbytesetdone:
- retw
+ abi_ret_default
ENDPROC(__memset)
@@ -150,4 +150,4 @@ ENDPROC(__memset)
10:
movi a2, 0
- retw
+ abi_ret_default
diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S
index c4c6c8578d59..4faf46fe3f38 100644
--- a/arch/xtensa/lib/strncpy_user.S
+++ b/arch/xtensa/lib/strncpy_user.S
@@ -50,7 +50,7 @@
.text
ENTRY(__strncpy_user)
- entry sp, 16 # minimal stack frame
+ abi_entry_default
# a2/ dst, a3/ src, a4/ len
mov a11, a2 # leave dst in return value register
beqz a4, .Lret # if len is zero
@@ -93,7 +93,7 @@ EX(10f) s8i a9, a11, 0 # store byte 0
bnez a4, .Lsrcaligned # if len is nonzero
.Lret:
sub a2, a11, a2 # compute strlen
- retw
+ abi_ret_default
/*
* dst is word-aligned, src is word-aligned
@@ -148,14 +148,14 @@ EX(10f) s8i a9, a11, 0
.Lz3: # byte 3 is zero
addi a11, a11, 3 # advance dst pointer
sub a2, a11, a2 # compute strlen
- retw
+ abi_ret_default
.Lz0: # byte 0 is zero
#ifdef __XTENSA_EB__
movi a9, 0
#endif /* __XTENSA_EB__ */
EX(10f) s8i a9, a11, 0
sub a2, a11, a2 # compute strlen
- retw
+ abi_ret_default
.Lz1: # byte 1 is zero
#ifdef __XTENSA_EB__
extui a9, a9, 16, 16
@@ -163,7 +163,7 @@ EX(10f) s8i a9, a11, 0
EX(10f) s16i a9, a11, 0
addi a11, a11, 1 # advance dst pointer
sub a2, a11, a2 # compute strlen
- retw
+ abi_ret_default
.Lz2: # byte 2 is zero
#ifdef __XTENSA_EB__
extui a9, a9, 16, 16
@@ -173,7 +173,7 @@ EX(10f) s16i a9, a11, 0
EX(10f) s8i a9, a11, 2
addi a11, a11, 2 # advance dst pointer
sub a2, a11, a2 # compute strlen
- retw
+ abi_ret_default
.align 4 # 1 mod 4 alignment for LOOPNEZ
.byte 0 # (0 mod 4 alignment for LBEG)
@@ -199,7 +199,7 @@ EX(10f) s8i a9, a11, 0
.Lunalignedend:
sub a2, a11, a2 # compute strlen
- retw
+ abi_ret_default
ENDPROC(__strncpy_user)
@@ -214,4 +214,4 @@ ENDPROC(__strncpy_user)
10:
11:
movi a2, -EFAULT
- retw
+ abi_ret_default
diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S
index 1f2ca2bb2ab3..3d391dca3efb 100644
--- a/arch/xtensa/lib/strnlen_user.S
+++ b/arch/xtensa/lib/strnlen_user.S
@@ -45,7 +45,7 @@
.text
ENTRY(__strnlen_user)
- entry sp, 16 # minimal stack frame
+ abi_entry_default
# a2/ s, a3/ len
addi a4, a2, -4 # because we overincrement at the end;
# we compensate with load offsets of 4
@@ -96,7 +96,7 @@ EX(10f) l32i a9, a4, 4 # load 4 bytes for remaining checks
addi a4, a4, 1 # advance string pointer
.L101:
sub a2, a4, a2 # compute length
- retw
+ abi_ret_default
# NOTE that in several places below, we point to the byte just after
# the zero byte in order to include the NULL terminator in the count.
@@ -106,15 +106,15 @@ EX(10f) l32i a9, a4, 4 # load 4 bytes for remaining checks
.Lz0: # byte 0 is zero
addi a4, a4, 1 # point just beyond zero byte
sub a2, a4, a2 # subtract to get length
- retw
+ abi_ret_default
.Lz1: # byte 1 is zero
addi a4, a4, 1+1 # point just beyond zero byte
sub a2, a4, a2 # subtract to get length
- retw
+ abi_ret_default
.Lz2: # byte 2 is zero
addi a4, a4, 2+1 # point just beyond zero byte
sub a2, a4, a2 # subtract to get length
- retw
+ abi_ret_default
.L1mod2: # address is odd
EX(10f) l8ui a9, a4, 4 # get byte 0
@@ -130,7 +130,7 @@ EX(10f) l32i a9, a4, 0 # get word with first two bytes of string
# byte 3 is zero
addi a4, a4, 3+1 # point just beyond zero byte
sub a2, a4, a2 # subtract to get length
- retw
+ abi_ret_default
ENDPROC(__strnlen_user)
@@ -138,4 +138,4 @@ ENDPROC(__strnlen_user)
.align 4
10:
movi a2, 0
- retw
+ abi_ret_default
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S
index 228607e30bc2..a0aa4047f94a 100644
--- a/arch/xtensa/lib/usercopy.S
+++ b/arch/xtensa/lib/usercopy.S
@@ -60,7 +60,7 @@
.text
ENTRY(__xtensa_copy_user)
- entry sp, 16 # minimal stack frame
+ abi_entry_default
# a2/ dst, a3/ src, a4/ len
mov a5, a2 # copy dst so that a2 is return value
mov a11, a4 # preserve original len for error case
@@ -75,7 +75,7 @@ ENTRY(__xtensa_copy_user)
__ssa8 a3 # set shift amount from byte offset
bnez a4, .Lsrcunaligned
movi a2, 0 # return success for len==0
- retw
+ abi_ret_default
/*
* Destination is unaligned
@@ -127,7 +127,7 @@ EX(10f) s8i a6, a5, 0
#endif /* !XCHAL_HAVE_LOOPS */
.Lbytecopydone:
movi a2, 0 # return success for len bytes copied
- retw
+ abi_ret_default
/*
* Destination and source are word-aligned.
@@ -187,7 +187,7 @@ EX(10f) l8ui a6, a3, 0
EX(10f) s8i a6, a5, 0
.L5:
movi a2, 0 # return success for len bytes copied
- retw
+ abi_ret_default
/*
* Destination is aligned, Source is unaligned
@@ -264,7 +264,7 @@ EX(10f) l8ui a6, a3, 0
EX(10f) s8i a6, a5, 0
.L15:
movi a2, 0 # return success for len bytes copied
- retw
+ abi_ret_default
ENDPROC(__xtensa_copy_user)
@@ -281,4 +281,4 @@ ENDPROC(__xtensa_copy_user)
10:
sub a2, a5, a2 /* a2 <-- bytes copied */
sub a2, a11, a2 /* a2 <-- bytes not copied */
- retw
+ abi_ret_default
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index b51746f2b80b..79467c749416 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -45,10 +45,7 @@ void __init bootmem_init(void)
* If PHYS_OFFSET is zero reserve page at address 0:
* successfull allocations should never return NULL.
*/
- if (PHYS_OFFSET)
- memblock_reserve(0, PHYS_OFFSET);
- else
- memblock_reserve(0, 1);
+ memblock_reserve(0, PHYS_OFFSET ? PHYS_OFFSET : 1);
early_init_fdt_scan_reserved_mem();
diff --git a/arch/xtensa/mm/misc.S b/arch/xtensa/mm/misc.S
index 11a01c3e9cea..6aa036c427c3 100644
--- a/arch/xtensa/mm/misc.S
+++ b/arch/xtensa/mm/misc.S
@@ -30,7 +30,7 @@
ENTRY(clear_page)
- entry a1, 16
+ abi_entry_default
movi a3, 0
__loopi a2, a7, PAGE_SIZE, 32
@@ -44,7 +44,7 @@ ENTRY(clear_page)
s32i a3, a2, 28
__endla a2, a7, 32
- retw
+ abi_ret_default
ENDPROC(clear_page)
@@ -57,7 +57,7 @@ ENDPROC(clear_page)
ENTRY(copy_page)
- entry a1, 16
+ abi_entry_default
__loopi a2, a4, PAGE_SIZE, 32
@@ -86,7 +86,7 @@ ENTRY(copy_page)
__endl a2, a4
- retw
+ abi_ret_default
ENDPROC(copy_page)
@@ -116,7 +116,7 @@ ENTRY(__tlbtemp_mapping_start)
ENTRY(clear_page_alias)
- entry a1, 32
+ abi_entry_default
/* Skip setting up a temporary DTLB if not aliased low page. */
@@ -144,14 +144,14 @@ ENTRY(clear_page_alias)
__endla a2, a7, 32
bnez a6, 1f
- retw
+ abi_ret_default
/* We need to invalidate the temporary idtlb entry, if any. */
1: idtlb a4
dsync
- retw
+ abi_ret_default
ENDPROC(clear_page_alias)
@@ -164,7 +164,7 @@ ENDPROC(clear_page_alias)
ENTRY(copy_page_alias)
- entry a1, 32
+ abi_entry_default
/* Skip setting up a temporary DTLB for destination if not aliased. */
@@ -221,19 +221,19 @@ ENTRY(copy_page_alias)
bnez a6, 1f
bnez a7, 2f
- retw
+ abi_ret_default
1: addi a2, a2, -PAGE_SIZE
idtlb a2
dsync
bnez a7, 2f
- retw
+ abi_ret_default
2: addi a3, a3, -PAGE_SIZE+1
idtlb a3
dsync
- retw
+ abi_ret_default
ENDPROC(copy_page_alias)
@@ -248,7 +248,7 @@ ENDPROC(copy_page_alias)
ENTRY(__flush_invalidate_dcache_page_alias)
- entry sp, 16
+ abi_entry_default
movi a7, 0 # required for exception handler
addi a6, a3, (PAGE_KERNEL | _PAGE_HW_WRITE)
@@ -261,7 +261,7 @@ ENTRY(__flush_invalidate_dcache_page_alias)
idtlb a4
dsync
- retw
+ abi_ret_default
ENDPROC(__flush_invalidate_dcache_page_alias)
@@ -272,7 +272,7 @@ ENDPROC(__flush_invalidate_dcache_page_alias)
ENTRY(__invalidate_dcache_page_alias)
- entry sp, 16
+ abi_entry_default
movi a7, 0 # required for exception handler
addi a6, a3, (PAGE_KERNEL | _PAGE_HW_WRITE)
@@ -285,7 +285,7 @@ ENTRY(__invalidate_dcache_page_alias)
idtlb a4
dsync
- retw
+ abi_ret_default
ENDPROC(__invalidate_dcache_page_alias)
#endif
@@ -296,7 +296,7 @@ ENTRY(__tlbtemp_mapping_itlb)
ENTRY(__invalidate_icache_page_alias)
- entry sp, 16
+ abi_entry_default
addi a6, a3, (PAGE_KERNEL_EXEC | _PAGE_HW_WRITE)
mov a4, a2
@@ -307,7 +307,7 @@ ENTRY(__invalidate_icache_page_alias)
iitlb a4
isync
- retw
+ abi_ret_default
ENDPROC(__invalidate_icache_page_alias)
@@ -325,12 +325,12 @@ ENTRY(__tlbtemp_mapping_end)
ENTRY(__invalidate_icache_page)
- entry sp, 16
+ abi_entry_default
___invalidate_icache_page a2 a3
isync
- retw
+ abi_ret_default
ENDPROC(__invalidate_icache_page)
@@ -340,12 +340,12 @@ ENDPROC(__invalidate_icache_page)
ENTRY(__invalidate_dcache_page)
- entry sp, 16
+ abi_entry_default
___invalidate_dcache_page a2 a3
dsync
- retw
+ abi_ret_default
ENDPROC(__invalidate_dcache_page)
@@ -355,12 +355,12 @@ ENDPROC(__invalidate_dcache_page)
ENTRY(__flush_invalidate_dcache_page)
- entry sp, 16
+ abi_entry_default
___flush_invalidate_dcache_page a2 a3
dsync
- retw
+ abi_ret_default
ENDPROC(__flush_invalidate_dcache_page)
@@ -370,12 +370,12 @@ ENDPROC(__flush_invalidate_dcache_page)
ENTRY(__flush_dcache_page)
- entry sp, 16
+ abi_entry_default
___flush_dcache_page a2 a3
dsync
- retw
+ abi_ret_default
ENDPROC(__flush_dcache_page)
@@ -385,12 +385,12 @@ ENDPROC(__flush_dcache_page)
ENTRY(__invalidate_icache_range)
- entry sp, 16
+ abi_entry_default
___invalidate_icache_range a2 a3 a4
isync
- retw
+ abi_ret_default
ENDPROC(__invalidate_icache_range)
@@ -400,12 +400,12 @@ ENDPROC(__invalidate_icache_range)
ENTRY(__flush_invalidate_dcache_range)
- entry sp, 16
+ abi_entry_default
___flush_invalidate_dcache_range a2 a3 a4
dsync
- retw
+ abi_ret_default
ENDPROC(__flush_invalidate_dcache_range)
@@ -415,12 +415,12 @@ ENDPROC(__flush_invalidate_dcache_range)
ENTRY(__flush_dcache_range)
- entry sp, 16
+ abi_entry_default
___flush_dcache_range a2 a3 a4
dsync
- retw
+ abi_ret_default
ENDPROC(__flush_dcache_range)
@@ -430,11 +430,11 @@ ENDPROC(__flush_dcache_range)
ENTRY(__invalidate_dcache_range)
- entry sp, 16
+ abi_entry_default
___invalidate_dcache_range a2 a3 a4
- retw
+ abi_ret_default
ENDPROC(__invalidate_dcache_range)
@@ -444,12 +444,12 @@ ENDPROC(__invalidate_dcache_range)
ENTRY(__invalidate_icache_all)
- entry sp, 16
+ abi_entry_default
___invalidate_icache_all a2 a3
isync
- retw
+ abi_ret_default
ENDPROC(__invalidate_icache_all)
@@ -459,12 +459,12 @@ ENDPROC(__invalidate_icache_all)
ENTRY(__flush_invalidate_dcache_all)
- entry sp, 16
+ abi_entry_default
___flush_invalidate_dcache_all a2 a3
dsync
- retw
+ abi_ret_default
ENDPROC(__flush_invalidate_dcache_all)
@@ -474,11 +474,11 @@ ENDPROC(__flush_invalidate_dcache_all)
ENTRY(__invalidate_dcache_all)
- entry sp, 16
+ abi_entry_default
___invalidate_dcache_all a2 a3
dsync
- retw
+ abi_ret_default
ENDPROC(__invalidate_dcache_all)
diff --git a/block/Kconfig b/block/Kconfig
index 56cb1695cd87..8b5f8e560eb4 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -89,7 +89,7 @@ config BLK_DEV_THROTTLING
one needs to mount and use blkio cgroup controller for creating
cgroups and specifying per device IO rate policies.
- See Documentation/cgroup-v1/blkio-controller.rst for more information.
+ See Documentation/admin-guide/cgroup-v1/blkio-controller.rst for more information.
config BLK_DEV_THROTTLING_LOW
bool "Block throttling .low limit interface support (EXPERIMENTAL)"
@@ -110,7 +110,7 @@ config BLK_CMDLINE_PARSER
which don't otherwise have any standardized method for listing the
partitions on a block device.
- See Documentation/block/cmdline-partition.txt for more information.
+ See Documentation/block/cmdline-partition.rst for more information.
config BLK_WBT
bool "Enable support for block device writeback throttling"
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 7a6b2f29a582..b89310a022ad 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -26,7 +26,7 @@ config IOSCHED_BFQ
regardless of the device parameters and with any workload. It
also guarantees a low latency to interactive and soft
real-time applications. Details in
- Documentation/block/bfq-iosched.txt
+ Documentation/block/bfq-iosched.rst
config BFQ_GROUP_IOSCHED
bool "BFQ hierarchical scheduling support"
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 50c9d2598500..72860325245a 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -17,7 +17,7 @@
* low-latency capabilities. BFQ also supports full hierarchical
* scheduling through cgroups. Next paragraphs provide an introduction
* on BFQ inner workings. Details on BFQ benefits, usage and
- * limitations can be found in Documentation/block/bfq-iosched.txt.
+ * limitations can be found in Documentation/block/bfq-iosched.rst.
*
* BFQ is a proportional-share storage-I/O scheduling algorithm based
* on the slice-by-slice service scheme of CFQ. But BFQ assigns
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 825c9c070458..ca39b4624cf8 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -383,7 +383,7 @@ static const struct blk_integrity_profile nop_profile = {
* send/receive integrity metadata it must use this function to register
* the capability with the block layer. The template is a blk_integrity
* struct with values appropriate for the underlying hardware. See
- * Documentation/block/data-integrity.txt.
+ * Documentation/block/data-integrity.rst.
*/
void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
{
diff --git a/block/ioprio.c b/block/ioprio.c
index 2e0559f157c8..77bcab11dce5 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -17,7 +17,7 @@
*
* ioprio_set(PRIO_PROCESS, pid, prio);
*
- * See also Documentation/block/ioprio.txt
+ * See also Documentation/block/ioprio.rst
*
*/
#include <linux/gfp.h>
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index b8a682b5a1bb..2a2a2e82832e 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -25,7 +25,7 @@
#include "blk-mq-sched.h"
/*
- * See Documentation/block/deadline-iosched.txt
+ * See Documentation/block/deadline-iosched.rst
*/
static const int read_expire = HZ / 2; /* max time before a read is submitted. */
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig
index 37b9710cc80a..702689a628f0 100644
--- a/block/partitions/Kconfig
+++ b/block/partitions/Kconfig
@@ -194,7 +194,7 @@ config LDM_PARTITION
Normal partitions are now called Basic Disks under Windows 2000, XP,
and Vista.
- For a fuller description read <file:Documentation/ldm.txt>.
+ For a fuller description read <file:Documentation/admin-guide/ldm.rst>.
If unsure, say N.
diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c
index 60fb3df9897c..f1edd5452249 100644
--- a/block/partitions/cmdline.c
+++ b/block/partitions/cmdline.c
@@ -11,7 +11,7 @@
*
* The format for the command line is just like mtdparts.
*
- * For further information, see "Documentation/block/cmdline-partition.txt"
+ * For further information, see "Documentation/block/cmdline-partition.rst"
*
*/
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 713903290385..506a0175a5a7 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -5,7 +5,7 @@
* Copyright (c) 2002-3 Patrick Mochel
* Copyright (c) 2002-3 Open Source Development Labs
*
- * Please see Documentation/driver-model/platform.rst for more
+ * Please see Documentation/driver-api/driver-model/platform.rst for more
* information.
*/
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 96ec7e0fc1ea..1bb8ec575352 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -31,7 +31,7 @@ config BLK_DEV_FD
If you want to use the floppy disk drive(s) of your PC under Linux,
say Y. Information about this driver, especially important for IBM
Thinkpad users, is contained in
- <file:Documentation/blockdev/floppy.txt>.
+ <file:Documentation/admin-guide/blockdev/floppy.rst>.
That file also contains the location of the Floppy driver FAQ as
well as location of the fdutils package used to configure additional
parameters of the driver at run time.
@@ -96,7 +96,7 @@ config PARIDE
your computer's parallel port. Most of them are actually IDE devices
using a parallel port IDE adapter. This option enables the PARIDE
subsystem which contains drivers for many of these external drives.
- Read <file:Documentation/blockdev/paride.txt> for more information.
+ Read <file:Documentation/admin-guide/blockdev/paride.rst> for more information.
If you have said Y to the "Parallel-port support" configuration
option, you may share a single port between your printer and other
@@ -261,7 +261,7 @@ config BLK_DEV_NBD
userland (making server and client physically the same computer,
communicating using the loopback network device).
- Read <file:Documentation/blockdev/nbd.txt> for more information,
+ Read <file:Documentation/admin-guide/blockdev/nbd.rst> for more information,
especially about where to find the server code, which runs in user
space and does not need special kernel support.
@@ -303,7 +303,7 @@ config BLK_DEV_RAM
during the initial install of Linux.
Note that the kernel command line option "ramdisk=XX" is now obsolete.
- For details, read <file:Documentation/blockdev/ramdisk.txt>.
+ For details, read <file:Documentation/admin-guide/blockdev/ramdisk.rst>.
To compile this driver as a module, choose M here: the
module will be called brd. An alias "rd" has been defined
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index b933a7eea52b..f652c1ac3ae9 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4424,7 +4424,7 @@ static int __init floppy_setup(char *str)
pr_cont("\n");
} else
DPRINT("botched floppy option\n");
- DPRINT("Read Documentation/blockdev/floppy.txt\n");
+ DPRINT("Read Documentation/admin-guide/blockdev/floppy.rst\n");
return 0;
}
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 1ffc64770643..fe7a4b7d30cf 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -12,7 +12,7 @@ config ZRAM
It has several use cases, for example: /tmp storage, use as swap
disks and maybe many more.
- See Documentation/blockdev/zram.txt for more information.
+ See Documentation/admin-guide/blockdev/zram.rst for more information.
config ZRAM_WRITEBACK
bool "Write back incompressible or idle page to backing device"
@@ -26,7 +26,7 @@ config ZRAM_WRITEBACK
With /sys/block/zramX/{idle,writeback}, application could ask
idle page's writeback to the backing device to save in memory.
- See Documentation/blockdev/zram.txt for more information.
+ See Documentation/admin-guide/blockdev/zram.rst for more information.
config ZRAM_MEMORY_TRACKING
bool "Track zRam block status"
@@ -36,4 +36,4 @@ config ZRAM_MEMORY_TRACKING
of zRAM. Admin could see the information via
/sys/kernel/debug/zram/zramX/block_state.
- See Documentation/blockdev/zram.txt for more information.
+ See Documentation/admin-guide/blockdev/zram.rst for more information.
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 466ebd84ad17..3e866885a405 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -291,7 +291,7 @@ config RTC
and set the RTC in an SMP compatible fashion.
If you think you have a use for such a device (such as periodic data
- sampling), then say Y here, and read <file:Documentation/rtc.txt>
+ sampling), then say Y here, and read <file:Documentation/admin-guide/rtc.rst>
for details.
To compile this driver as a module, choose M here: the
@@ -313,7 +313,7 @@ config JS_RTC
/dev/rtc.
If you think you have a use for such a device (such as periodic data
- sampling), then say Y here, and read <file:Documentation/rtc.txt>
+ sampling), then say Y here, and read <file:Documentation/admin-guide/rtc.rst>
for details.
To compile this driver as a module, choose M here: the
@@ -382,7 +382,7 @@ config SONYPI
Device which can be found in many (all ?) Sony Vaio laptops.
If you have one of those laptops, read
- <file:Documentation/laptops/sonypi.txt>, and say Y or M here.
+ <file:Documentation/admin-guide/laptops/sonypi.rst>, and say Y or M here.
To compile this driver as a module, choose M here: the
module will be called sonypi.
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 95be7228f327..9044d31ab1a1 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -4,7 +4,7 @@
* Copyright 2006 Michael Buesch <m@bues.ch>
* Copyright 2005 (c) MontaVista Software, Inc.
*
- * Please read Documentation/hw_random.txt for details on use.
+ * Please read Documentation/admin-guide/hw_random.rst for details on use.
*
* This software may be used and distributed according to the terms
* of the GNU General Public License, incorporated herein by reference.
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
index 4ab14d58e85b..6f7cbf6c2b55 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
@@ -8,7 +8,7 @@
* keysize in CBC and ECB mode.
* Add support also for DES and 3DES in CBC and ECB mode.
*
- * You could find the datasheet in Documentation/arm/sunxi/README
+ * You could find the datasheet in Documentation/arm/sunxi.rst
*/
#include "sun4i-ss.h"
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-core.c b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
index cdcda7f059c8..2e8704271f45 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-core.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
@@ -6,7 +6,7 @@
*
* Core file which registers crypto algorithms supported by the SS.
*
- * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ * You could find a link for the datasheet in Documentation/arm/sunxi.rst
*/
#include <linux/clk.h>
#include <linux/crypto.h>
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c
index d2b6d89aad28..fcffba5ef927 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c
@@ -6,7 +6,7 @@
*
* This file add support for MD5 and SHA1.
*
- * You could find the datasheet in Documentation/arm/sunxi/README
+ * You could find the datasheet in Documentation/arm/sunxi.rst
*/
#include "sun4i-ss.h"
#include <linux/scatterlist.h>
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss.h b/drivers/crypto/sunxi-ss/sun4i-ss.h
index 68b82d1a6303..8654d48aedc0 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss.h
+++ b/drivers/crypto/sunxi-ss/sun4i-ss.h
@@ -8,7 +8,7 @@
* Support MD5 and SHA1 hash algorithms.
* Support DES and 3DES
*
- * You could find the datasheet in Documentation/arm/sunxi/README
+ * You could find the datasheet in Documentation/arm/sunxi.rst
*/
#include <linux/clk.h>
diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig
index d5f915830b68..b6a9c2f1bc41 100644
--- a/drivers/dma-buf/Kconfig
+++ b/drivers/dma-buf/Kconfig
@@ -15,7 +15,7 @@ config SYNC_FILE
associated with a buffer. When a job is submitted to the GPU a fence
is attached to the buffer and is transferred via userspace, using Sync
Files fds, to the DRM driver for example. More details at
- Documentation/sync_file.txt.
+ Documentation/driver-api/sync_file.rst.
config SW_SYNC
bool "Sync File Validation Framework"
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 43d7d6a9d9ab..bb13c266c329 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1312,7 +1312,7 @@ config GPIO_BT8XX
The card needs to be physically altered for using it as a
GPIO card. For more information on how to build a GPIO card
from a BT8xx TV card, see the documentation file at
- Documentation/bt8xxgpio.txt
+ Documentation/driver-api/bt8xxgpio.rst
If unsure, say N.
diff --git a/drivers/gpio/gpio-cs5535.c b/drivers/gpio/gpio-cs5535.c
index 3611a0571667..53b24e3ae7de 100644
--- a/drivers/gpio/gpio-cs5535.c
+++ b/drivers/gpio/gpio-cs5535.c
@@ -41,7 +41,7 @@ MODULE_PARM_DESC(mask, "GPIO channel mask.");
/*
* FIXME: convert this singleton driver to use the state container
- * design pattern, see Documentation/driver-model/design-patterns.rst
+ * design pattern, see Documentation/driver-api/driver-model/design-patterns.rst
*/
static struct cs5535_gpio_chip {
struct gpio_chip chip;
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 3313378c743b..1d80222587ad 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -141,7 +141,7 @@ config DRM_LOAD_EDID_FIRMWARE
monitor are unable to provide appropriate EDID data. Since this
feature is provided as a workaround for broken hardware, the
default case is N. Details and instructions how to build your own
- EDID data are given in Documentation/EDID/howto.rst.
+ EDID data are given in Documentation/driver-api/edid.rst.
config DRM_DP_CEC
bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support"
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 9441a36a2469..bd810454d239 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -736,7 +736,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
* };
*
* Please make sure that you follow all the best practices from
- * ``Documentation/ioctl/botching-up-ioctls.txt``. Note that drm_ioctl()
+ * ``Documentation/ioctl/botching-up-ioctls.rst``. Note that drm_ioctl()
* automatically zero-extends structures, hence make sure you can add more stuff
* at the end, i.e. don't put a variable sized array there.
*
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index 53187821df01..fcfe1a03c4a1 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -36,7 +36,7 @@
* of extra utility/tracking out of our acquire-ctx. This is provided
* by &struct drm_modeset_lock and &struct drm_modeset_acquire_ctx.
*
- * For basic principles of &ww_mutex, see: Documentation/locking/ww-mutex-design.txt
+ * For basic principles of &ww_mutex, see: Documentation/locking/ww-mutex-design.rst
*
* The basic usage pattern is to::
*
diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c
index 92f6e1ae23a2..f11ba7f2dca7 100644
--- a/drivers/input/touchscreen/sun4i-ts.c
+++ b/drivers/input/touchscreen/sun4i-ts.c
@@ -22,7 +22,7 @@
* in the kernel). So this driver offers straight forward, reliable single
* touch functionality only.
*
- * s.a. A20 User Manual "1.15 TP" (Documentation/arm/sunxi/README)
+ * s.a. A20 User Manual "1.15 TP" (Documentation/arm/sunxi.rst)
* (looks like the description in the A20 User Manual v1.3 is better
* than the one in the A10 User Manual v.1.5)
*/
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 5ccac0b77f17..3834332f4963 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -453,7 +453,7 @@ config DM_INIT
Enable "dm-mod.create=" parameter to create mapped devices at init time.
This option is useful to allow mounting rootfs without requiring an
initramfs.
- See Documentation/device-mapper/dm-init.rst for dm-mod.create="..."
+ See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..."
format.
If unsure, say N.
diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index b65faef2c4b5..b869316d3722 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -25,7 +25,7 @@ static char *create;
* Format: dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
* Table format: <start_sector> <num_sectors> <target_type> <target_args>
*
- * See Documentation/device-mapper/dm-init.rst for dm-mod.create="..." format
+ * See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..." format
* details.
*/
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 7a87a640f8ba..8a60a4a070ac 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3558,7 +3558,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
* v1.5.0+:
*
* Sync action:
- * See Documentation/device-mapper/dm-raid.rst for
+ * See Documentation/admin-guide/device-mapper/dm-raid.rst for
* information on each of these states.
*/
DMEMIT(" %s", sync_action);
diff --git a/drivers/mtd/nand/raw/nand_ecc.c b/drivers/mtd/nand/raw/nand_ecc.c
index 223fbd8052b3..09fdced659f5 100644
--- a/drivers/mtd/nand/raw/nand_ecc.c
+++ b/drivers/mtd/nand/raw/nand_ecc.c
@@ -11,7 +11,7 @@
* Thomas Gleixner (tglx@linutronix.de)
*
* Information on how this algorithm works and how it was developed
- * can be found in Documentation/mtd/nand_ecc.txt
+ * can be found in Documentation/driver-api/mtd/nand_ecc.rst
*/
#include <linux/types.h>
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 41c90f2ddb31..63db08d9bafa 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2286,7 +2286,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
struct ice_hw *hw;
int err;
- /* this driver uses devres, see Documentation/driver-model/devres.rst */
+ /* this driver uses devres, see Documentation/driver-api/driver-model/devres.rst */
err = pcim_enable_device(pdev);
if (err)
return err;
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 54500798f23a..a5fde15e91d3 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -33,7 +33,7 @@ config BLK_DEV_PMEM
Documentation/admin-guide/kernel-parameters.rst). This driver converts
these persistent memory ranges into block devices that are
capable of DAX (direct-access) file system mappings. See
- Documentation/nvdimm/nvdimm.txt for more details.
+ Documentation/driver-api/nvdimm/nvdimm.rst for more details.
Say Y if you want to use an NVDIMM
diff --git a/drivers/pci/switch/Kconfig b/drivers/pci/switch/Kconfig
index aee28a5bb98f..d370f4ce0492 100644
--- a/drivers/pci/switch/Kconfig
+++ b/drivers/pci/switch/Kconfig
@@ -9,7 +9,7 @@ config PCI_SW_SWITCHTEC
Enables support for the management interface for the MicroSemi
Switchtec series of PCIe switches. Supports userspace access
to submit MRPC commands to the switch via /dev/switchtecX
- devices. See <file:Documentation/switchtec.txt> for more
+ devices. See <file:Documentation/driver-api/switchtec.rst> for more
information.
endmenu
diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c
index 15b8c10c2b2b..656e830798d9 100644
--- a/drivers/perf/qcom_l3_pmu.c
+++ b/drivers/perf/qcom_l3_pmu.c
@@ -8,7 +8,7 @@
* the slices. User space needs to aggregate to individual counts to provide
* a global picture.
*
- * See Documentation/perf/qcom_l3_pmu.txt for more details.
+ * See Documentation/admin-guide/perf/qcom_l3_pmu.rst for more details.
*
* Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
*/
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index cc29fe79c283..1b67bb578f9f 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -118,7 +118,7 @@ config DCDBAS
Interrupts (SMIs) and Host Control Actions (system power cycle or
power off after OS shutdown) on certain Dell systems.
- See <file:Documentation/dcdbas.txt> for more details on the driver
+ See <file:Documentation/driver-api/dcdbas.rst> for more details on the driver
and the Dell systems on which Dell systems management software makes
use of this driver.
@@ -259,7 +259,7 @@ config DELL_RBU
DELL system. Note you need a Dell OpenManage or Dell Update package (DUP)
supporting application to communicate with the BIOS regarding the new
image for the image update to take effect.
- See <file:Documentation/dell_rbu.txt> for more details on the driver.
+ See <file:Documentation/driver-api/dell_rbu.rst> for more details on the driver.
config FUJITSU_LAPTOP
@@ -448,7 +448,7 @@ config SONY_LAPTOP
screen brightness control, Fn keys and allows powering on/off some
devices.
- Read <file:Documentation/laptops/sony-laptop.txt> for more information.
+ Read <file:Documentation/admin-guide/laptops/sony-laptop.rst> for more information.
config SONYPI_COMPAT
bool "Sonypi compatibility"
@@ -500,7 +500,7 @@ config THINKPAD_ACPI
support for Fn-Fx key combinations, Bluetooth control, video
output switching, ThinkLight control, UltraBay eject and more.
For more information about this driver see
- <file:Documentation/laptops/thinkpad-acpi.txt> and
+ <file:Documentation/admin-guide/laptops/thinkpad-acpi.rst> and
<http://ibm-acpi.sf.net/> .
This driver was formerly known as ibm-acpi.
diff --git a/drivers/platform/x86/dcdbas.c b/drivers/platform/x86/dcdbas.c
index 12cf9475ac85..84f4cc839cc3 100644
--- a/drivers/platform/x86/dcdbas.c
+++ b/drivers/platform/x86/dcdbas.c
@@ -7,7 +7,7 @@
* and Host Control Actions (power cycle or power off after OS shutdown) on
* Dell systems.
*
- * See Documentation/dcdbas.txt for more information.
+ * See Documentation/driver-api/dcdbas.rst for more information.
*
* Copyright (C) 1995-2006 Dell Inc.
*/
diff --git a/drivers/platform/x86/dell_rbu.c b/drivers/platform/x86/dell_rbu.c
index a58fc10293ee..3691391fea6b 100644
--- a/drivers/platform/x86/dell_rbu.c
+++ b/drivers/platform/x86/dell_rbu.c
@@ -24,7 +24,7 @@
* on every time the packet data is written. This driver requires an
* application to break the BIOS image in to fixed sized packet chunks.
*
- * See Documentation/dell_rbu.txt for more info.
+ * See Documentation/driver-api/dell_rbu.rst for more info.
*/
#include <linux/init.h>
#include <linux/module.h>
diff --git a/drivers/pnp/isapnp/Kconfig b/drivers/pnp/isapnp/Kconfig
index 4b58a3dcb52b..d0479a563123 100644
--- a/drivers/pnp/isapnp/Kconfig
+++ b/drivers/pnp/isapnp/Kconfig
@@ -7,6 +7,6 @@ config ISAPNP
depends on ISA || COMPILE_TEST
help
Say Y here if you would like support for ISA Plug and Play devices.
- Some information is in <file:Documentation/isapnp.txt>.
+ Some information is in <file:Documentation/driver-api/isapnp.rst>.
If unsure, say Y.
diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig
index fadafc64705f..677d1aff61b7 100644
--- a/drivers/rapidio/Kconfig
+++ b/drivers/rapidio/Kconfig
@@ -86,7 +86,7 @@ config RAPIDIO_CHMAN
This option includes RapidIO channelized messaging driver which
provides socket-like interface to allow sharing of single RapidIO
messaging mailbox between multiple user-space applications.
- See "Documentation/rapidio/rio_cm.txt" for driver description.
+ See "Documentation/driver-api/rapidio/rio_cm.rst" for driver description.
config RAPIDIO_MPORT_CDEV
tristate "RapidIO /dev mport device driver"
diff --git a/drivers/staging/unisys/Documentation/overview.txt b/drivers/staging/unisys/Documentation/overview.txt
index 9ab30af265a5..f8a4144b239c 100644
--- a/drivers/staging/unisys/Documentation/overview.txt
+++ b/drivers/staging/unisys/Documentation/overview.txt
@@ -15,7 +15,7 @@ normally be unsharable, specifically:
* visorinput - keyboard and mouse
These drivers conform to the standard Linux bus/device model described
-within Documentation/driver-model/, and utilize a driver named visorbus to
+within Documentation/driver-api/driver-model/, and utilize a driver named visorbus to
present the virtual busses involved. Drivers in the 'visor*' driver set are
commonly referred to as "guest drivers" or "client drivers". All drivers
except visorbus expose a device of a specific usable class to the Linux guest
@@ -141,7 +141,7 @@ called automatically by the visorbus driver at appropriate times:
-----------------------------------
Because visorbus is a standard Linux bus driver in the model described in
-Documentation/driver-model/, the hierarchy of s-Par virtual devices is
+Documentation/driver-api/driver-model/, the hierarchy of s-Par virtual devices is
published in the sysfs tree beneath /bus/visorbus/, e.g.,
/sys/bus/visorbus/devices/ might look like:
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 0e3e4dacbc12..c7623f99ac0f 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -93,7 +93,7 @@ config VT_HW_CONSOLE_BINDING
select the console driver that will serve as the backend for the
virtual terminals.
- See <file:Documentation/console/console.txt> for more
+ See <file:Documentation/driver-api/console.rst> for more
information. For framebuffer console users, please refer to
<file:Documentation/fb/fbcon.rst>.
@@ -175,7 +175,7 @@ config ROCKETPORT
This driver supports Comtrol RocketPort and RocketModem PCI boards.
These boards provide 2, 4, 8, 16, or 32 high-speed serial ports or
modems. For information about the RocketPort/RocketModem boards
- and this driver read <file:Documentation/serial/rocket.rst>.
+ and this driver read <file:Documentation/driver-api/serial/rocket.rst>.
To compile this driver as a module, choose M here: the
module will be called rocket.
@@ -193,7 +193,7 @@ config CYCLADES
your Linux box, for instance in order to become a dial-in server.
For information about the Cyclades-Z card, read
- <file:Documentation/serial/cyclades_z.rst>.
+ <file:Documentation/driver-api/serial/cyclades_z.rst>.
To compile this driver as a module, choose M here: the
module will be called cyclades.
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index b416c7b33f49..04c23951b831 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -500,7 +500,7 @@ config SERIAL_SA1100
help
If you have a machine based on a SA1100/SA1110 StrongARM(R) CPU you
can enable its onboard serial port by enabling this option.
- Please read <file:Documentation/arm/SA1100/serial_UART> for further
+ Please read <file:Documentation/arm/sa1100/serial_uart.rst> for further
info.
config SERIAL_SA1100_CONSOLE
diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c
index 6e3c66ab0e62..a0555ae2b1ef 100644
--- a/drivers/tty/serial/ucc_uart.c
+++ b/drivers/tty/serial/ucc_uart.c
@@ -1081,7 +1081,7 @@ static int qe_uart_verify_port(struct uart_port *port,
}
/* UART operations
*
- * Details on these functions can be found in Documentation/serial/driver.rst
+ * Details on these functions can be found in Documentation/driver-api/serial/driver.rst
*/
static const struct uart_ops qe_uart_pops = {
.tx_empty = qe_uart_tx_empty,
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index e5a7a454fe17..fd17db9b432f 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -25,7 +25,7 @@ menuconfig VFIO
select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64)
help
VFIO provides a framework for secure userspace device drivers.
- See Documentation/vfio.txt for more details.
+ See Documentation/driver-api/vfio.rst for more details.
If you don't know what to do here, say N.
diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig
index ba94a076887f..5da27f2100f9 100644
--- a/drivers/vfio/mdev/Kconfig
+++ b/drivers/vfio/mdev/Kconfig
@@ -6,7 +6,7 @@ config VFIO_MDEV
default n
help
Provides a framework to virtualize devices.
- See Documentation/vfio-mediated-device.txt for more details.
+ See Documentation/driver-api/vfio-mediated-device.rst for more details.
If you don't know what do here, say N.
diff --git a/drivers/w1/Kconfig b/drivers/w1/Kconfig
index 03dd57581df7..3e7ad7b232fe 100644
--- a/drivers/w1/Kconfig
+++ b/drivers/w1/Kconfig
@@ -19,7 +19,7 @@ config W1_CON
default y
---help---
This allows to communicate with userspace using connector. For more
- information see <file:Documentation/connector/connector.txt>.
+ information see <file:Documentation/driver-api/connector.rst>.
There are three types of messages between w1 core and userspace:
1. Events. They are generated each time new master or slave device found
either due to automatic or requested search.
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 23537bc8c827..212b4a854f2c 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -2,7 +2,8 @@
config BTRFS_FS
tristate "Btrfs filesystem support"
- select LIBCRC32C
+ select CRYPTO
+ select CRYPTO_CRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
select LZO_COMPRESS
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index ca693dd554e9..76a843198bcb 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -10,7 +10,8 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
- uuid-tree.o props.o free-space-tree.o tree-checker.o
+ uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
+ block-rsv.o delalloc-space.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 982152d3f920..89116afda7a2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1465,12 +1465,11 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
*
* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
*/
-int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
+int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
+ struct ulist *roots, struct ulist *tmp)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
- struct ulist *tmp = NULL;
- struct ulist *roots = NULL;
struct ulist_iterator uiter;
struct ulist_node *node;
struct seq_list elem = SEQ_LIST_INIT(elem);
@@ -1481,12 +1480,8 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
.share_count = 0,
};
- tmp = ulist_alloc(GFP_NOFS);
- roots = ulist_alloc(GFP_NOFS);
- if (!tmp || !roots) {
- ret = -ENOMEM;
- goto out;
- }
+ ulist_init(roots);
+ ulist_init(tmp);
trans = btrfs_attach_transaction(root);
if (IS_ERR(trans)) {
@@ -1527,8 +1522,8 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
up_read(&fs_info->commit_root_sem);
}
out:
- ulist_free(tmp);
- ulist_free(roots);
+ ulist_release(roots);
+ ulist_release(tmp);
return ret;
}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 54d58988483a..777f61dc081e 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -57,7 +57,8 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
u64 start_off, struct btrfs_path *path,
struct btrfs_inode_extref **ret_extref,
u64 *found_off);
-int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr);
+int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
+ struct ulist *roots, struct ulist *tmp_ulist);
int __init btrfs_prelim_ref_init(void);
void __cold btrfs_prelim_ref_exit(void);
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
new file mode 100644
index 000000000000..698470b9f32d
--- /dev/null
+++ b/fs/btrfs/block-rsv.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "ctree.h"
+#include "block-rsv.h"
+#include "space-info.h"
+#include "math.h"
+#include "transaction.h"
+
+static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv,
+ struct btrfs_block_rsv *dest, u64 num_bytes,
+ u64 *qgroup_to_release_ret)
+{
+ struct btrfs_space_info *space_info = block_rsv->space_info;
+ u64 qgroup_to_release = 0;
+ u64 ret;
+
+ spin_lock(&block_rsv->lock);
+ if (num_bytes == (u64)-1) {
+ num_bytes = block_rsv->size;
+ qgroup_to_release = block_rsv->qgroup_rsv_size;
+ }
+ block_rsv->size -= num_bytes;
+ if (block_rsv->reserved >= block_rsv->size) {
+ num_bytes = block_rsv->reserved - block_rsv->size;
+ block_rsv->reserved = block_rsv->size;
+ block_rsv->full = 1;
+ } else {
+ num_bytes = 0;
+ }
+ if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
+ qgroup_to_release = block_rsv->qgroup_rsv_reserved -
+ block_rsv->qgroup_rsv_size;
+ block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
+ } else {
+ qgroup_to_release = 0;
+ }
+ spin_unlock(&block_rsv->lock);
+
+ ret = num_bytes;
+ if (num_bytes > 0) {
+ if (dest) {
+ spin_lock(&dest->lock);
+ if (!dest->full) {
+ u64 bytes_to_add;
+
+ bytes_to_add = dest->size - dest->reserved;
+ bytes_to_add = min(num_bytes, bytes_to_add);
+ dest->reserved += bytes_to_add;
+ if (dest->reserved >= dest->size)
+ dest->full = 1;
+ num_bytes -= bytes_to_add;
+ }
+ spin_unlock(&dest->lock);
+ }
+ if (num_bytes)
+ btrfs_space_info_add_old_bytes(fs_info, space_info,
+ num_bytes);
+ }
+ if (qgroup_to_release_ret)
+ *qgroup_to_release_ret = qgroup_to_release;
+ return ret;
+}
+
+int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
+ struct btrfs_block_rsv *dst, u64 num_bytes,
+ bool update_size)
+{
+ int ret;
+
+ ret = btrfs_block_rsv_use_bytes(src, num_bytes);
+ if (ret)
+ return ret;
+
+ btrfs_block_rsv_add_bytes(dst, num_bytes, update_size);
+ return 0;
+}
+
+void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
+{
+ memset(rsv, 0, sizeof(*rsv));
+ spin_lock_init(&rsv->lock);
+ rsv->type = type;
+}
+
+void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv,
+ unsigned short type)
+{
+ btrfs_init_block_rsv(rsv, type);
+ rsv->space_info = btrfs_find_space_info(fs_info,
+ BTRFS_BLOCK_GROUP_METADATA);
+}
+
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
+ unsigned short type)
+{
+ struct btrfs_block_rsv *block_rsv;
+
+ block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
+ if (!block_rsv)
+ return NULL;
+
+ btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
+ return block_rsv;
+}
+
+void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv)
+{
+ if (!rsv)
+ return;
+ btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
+ kfree(rsv);
+}
+
+int btrfs_block_rsv_add(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv, u64 num_bytes,
+ enum btrfs_reserve_flush_enum flush)
+{
+ int ret;
+
+ if (num_bytes == 0)
+ return 0;
+
+ ret = btrfs_reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
+ if (!ret)
+ btrfs_block_rsv_add_bytes(block_rsv, num_bytes, true);
+
+ return ret;
+}
+
+int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
+{
+ u64 num_bytes = 0;
+ int ret = -ENOSPC;
+
+ if (!block_rsv)
+ return 0;
+
+ spin_lock(&block_rsv->lock);
+ num_bytes = div_factor(block_rsv->size, min_factor);
+ if (block_rsv->reserved >= num_bytes)
+ ret = 0;
+ spin_unlock(&block_rsv->lock);
+
+ return ret;
+}
+
+int btrfs_block_rsv_refill(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv, u64 min_reserved,
+ enum btrfs_reserve_flush_enum flush)
+{
+ u64 num_bytes = 0;
+ int ret = -ENOSPC;
+
+ if (!block_rsv)
+ return 0;
+
+ spin_lock(&block_rsv->lock);
+ num_bytes = min_reserved;
+ if (block_rsv->reserved >= num_bytes)
+ ret = 0;
+ else
+ num_bytes -= block_rsv->reserved;
+ spin_unlock(&block_rsv->lock);
+
+ if (!ret)
+ return 0;
+
+ ret = btrfs_reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
+ if (!ret) {
+ btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false);
+ return 0;
+ }
+
+ return ret;
+}
+
+u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, u64 *qgroup_to_release)
+{
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+ struct btrfs_block_rsv *target = NULL;
+
+ /*
+ * If we are the delayed_rsv then push to the global rsv, otherwise dump
+ * into the delayed rsv if it is not full.
+ */
+ if (block_rsv == delayed_rsv)
+ target = global_rsv;
+ else if (block_rsv != global_rsv && !delayed_rsv->full)
+ target = delayed_rsv;
+
+ if (target && block_rsv->space_info != target->space_info)
+ target = NULL;
+
+ return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
+ qgroup_to_release);
+}
+
+int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes)
+{
+ int ret = -ENOSPC;
+
+ spin_lock(&block_rsv->lock);
+ if (block_rsv->reserved >= num_bytes) {
+ block_rsv->reserved -= num_bytes;
+ if (block_rsv->reserved < block_rsv->size)
+ block_rsv->full = 0;
+ ret = 0;
+ }
+ spin_unlock(&block_rsv->lock);
+ return ret;
+}
+
+void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, bool update_size)
+{
+ spin_lock(&block_rsv->lock);
+ block_rsv->reserved += num_bytes;
+ if (update_size)
+ block_rsv->size += num_bytes;
+ else if (block_rsv->reserved >= block_rsv->size)
+ block_rsv->full = 1;
+ spin_unlock(&block_rsv->lock);
+}
+
+int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *dest, u64 num_bytes,
+ int min_factor)
+{
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ u64 min_bytes;
+
+ if (global_rsv->space_info != dest->space_info)
+ return -ENOSPC;
+
+ spin_lock(&global_rsv->lock);
+ min_bytes = div_factor(global_rsv->size, min_factor);
+ if (global_rsv->reserved < min_bytes + num_bytes) {
+ spin_unlock(&global_rsv->lock);
+ return -ENOSPC;
+ }
+ global_rsv->reserved -= num_bytes;
+ if (global_rsv->reserved < global_rsv->size)
+ global_rsv->full = 0;
+ spin_unlock(&global_rsv->lock);
+
+ btrfs_block_rsv_add_bytes(dest, num_bytes, true);
+ return 0;
+}
+
+void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
+ struct btrfs_space_info *sinfo = block_rsv->space_info;
+ u64 num_bytes;
+
+ /*
+ * The global block rsv is based on the size of the extent tree, the
+ * checksum tree and the root tree. If the fs is empty we want to set
+ * it to a minimal amount for safety.
+ */
+ num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
+ btrfs_root_used(&fs_info->csum_root->root_item) +
+ btrfs_root_used(&fs_info->tree_root->root_item);
+ num_bytes = max_t(u64, num_bytes, SZ_16M);
+
+ spin_lock(&sinfo->lock);
+ spin_lock(&block_rsv->lock);
+
+ block_rsv->size = min_t(u64, num_bytes, SZ_512M);
+
+ if (block_rsv->reserved < block_rsv->size) {
+ num_bytes = btrfs_space_info_used(sinfo, true);
+ if (sinfo->total_bytes > num_bytes) {
+ num_bytes = sinfo->total_bytes - num_bytes;
+ num_bytes = min(num_bytes,
+ block_rsv->size - block_rsv->reserved);
+ block_rsv->reserved += num_bytes;
+ btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
+ num_bytes);
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ sinfo->flags, num_bytes,
+ 1);
+ }
+ } else if (block_rsv->reserved > block_rsv->size) {
+ num_bytes = block_rsv->reserved - block_rsv->size;
+ btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
+ -num_bytes);
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ sinfo->flags, num_bytes, 0);
+ block_rsv->reserved = block_rsv->size;
+ }
+
+ if (block_rsv->reserved == block_rsv->size)
+ block_rsv->full = 1;
+ else
+ block_rsv->full = 0;
+
+ spin_unlock(&block_rsv->lock);
+ spin_unlock(&sinfo->lock);
+}
+
+void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_space_info *space_info;
+
+ space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+ fs_info->chunk_block_rsv.space_info = space_info;
+
+ space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+ fs_info->global_block_rsv.space_info = space_info;
+ fs_info->trans_block_rsv.space_info = space_info;
+ fs_info->empty_block_rsv.space_info = space_info;
+ fs_info->delayed_block_rsv.space_info = space_info;
+ fs_info->delayed_refs_rsv.space_info = space_info;
+
+ fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
+ fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
+ fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
+ fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
+ if (fs_info->quota_root)
+ fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
+ fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
+
+ btrfs_update_global_block_rsv(fs_info);
+}
+
+void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info)
+{
+ btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1);
+ WARN_ON(fs_info->trans_block_rsv.size > 0);
+ WARN_ON(fs_info->trans_block_rsv.reserved > 0);
+ WARN_ON(fs_info->chunk_block_rsv.size > 0);
+ WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
+ WARN_ON(fs_info->delayed_block_rsv.size > 0);
+ WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
+ WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
+ WARN_ON(fs_info->delayed_refs_rsv.size > 0);
+}
+
+static struct btrfs_block_rsv *get_block_rsv(
+ const struct btrfs_trans_handle *trans,
+ const struct btrfs_root *root)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_rsv *block_rsv = NULL;
+
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+ (root == fs_info->csum_root && trans->adding_csums) ||
+ (root == fs_info->uuid_root))
+ block_rsv = trans->block_rsv;
+
+ if (!block_rsv)
+ block_rsv = root->block_rsv;
+
+ if (!block_rsv)
+ block_rsv = &fs_info->empty_block_rsv;
+
+ return block_rsv;
+}
+
+struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u32 blocksize)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_rsv *block_rsv;
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ int ret;
+ bool global_updated = false;
+
+ block_rsv = get_block_rsv(trans, root);
+
+ if (unlikely(block_rsv->size == 0))
+ goto try_reserve;
+again:
+ ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize);
+ if (!ret)
+ return block_rsv;
+
+ if (block_rsv->failfast)
+ return ERR_PTR(ret);
+
+ if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
+ global_updated = true;
+ btrfs_update_global_block_rsv(fs_info);
+ goto again;
+ }
+
+ /*
+ * The global reserve still exists to save us from ourselves, so don't
+ * warn_on if we are short on our delayed refs reserve.
+ */
+ if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
+ btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
+ static DEFINE_RATELIMIT_STATE(_rs,
+ DEFAULT_RATELIMIT_INTERVAL * 10,
+ /*DEFAULT_RATELIMIT_BURST*/ 1);
+ if (__ratelimit(&_rs))
+ WARN(1, KERN_DEBUG
+ "BTRFS: block rsv returned %d\n", ret);
+ }
+try_reserve:
+ ret = btrfs_reserve_metadata_bytes(root, block_rsv, blocksize,
+ BTRFS_RESERVE_NO_FLUSH);
+ if (!ret)
+ return block_rsv;
+ /*
+ * If we couldn't reserve metadata bytes try and use some from
+ * the global reserve if its space type is the same as the global
+ * reservation.
+ */
+ if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
+ block_rsv->space_info == global_rsv->space_info) {
+ ret = btrfs_block_rsv_use_bytes(global_rsv, blocksize);
+ if (!ret)
+ return global_rsv;
+ }
+ return ERR_PTR(ret);
+}
diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h
new file mode 100644
index 000000000000..d1428bb73fc5
--- /dev/null
+++ b/fs/btrfs/block-rsv.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef BTRFS_BLOCK_RSV_H
+#define BTRFS_BLOCK_RSV_H
+
+struct btrfs_trans_handle;
+enum btrfs_reserve_flush_enum;
+
+/*
+ * Types of block reserves
+ */
+enum {
+ BTRFS_BLOCK_RSV_GLOBAL,
+ BTRFS_BLOCK_RSV_DELALLOC,
+ BTRFS_BLOCK_RSV_TRANS,
+ BTRFS_BLOCK_RSV_CHUNK,
+ BTRFS_BLOCK_RSV_DELOPS,
+ BTRFS_BLOCK_RSV_DELREFS,
+ BTRFS_BLOCK_RSV_EMPTY,
+ BTRFS_BLOCK_RSV_TEMP,
+};
+
+struct btrfs_block_rsv {
+ u64 size;
+ u64 reserved;
+ struct btrfs_space_info *space_info;
+ spinlock_t lock;
+ unsigned short full;
+ unsigned short type;
+ unsigned short failfast;
+
+ /*
+ * Qgroup equivalent for @size @reserved
+ *
+ * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
+ * about things like csum size nor how many tree blocks it will need to
+ * reserve.
+ *
+ * Qgroup cares more about net change of the extent usage.
+ *
+ * So for one newly inserted file extent, in worst case it will cause
+ * leaf split and level increase, nodesize for each file extent is
+ * already too much.
+ *
+ * In short, qgroup_size/reserved is the upper limit of possible needed
+ * qgroup metadata reservation.
+ */
+ u64 qgroup_rsv_size;
+ u64 qgroup_rsv_reserved;
+};
+
+void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
+ unsigned short type);
+void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv,
+ unsigned short type);
+void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv);
+int btrfs_block_rsv_add(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv, u64 num_bytes,
+ enum btrfs_reserve_flush_enum flush);
+int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor);
+int btrfs_block_rsv_refill(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv, u64 min_reserved,
+ enum btrfs_reserve_flush_enum flush);
+int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
+ struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
+ bool update_size);
+int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes);
+int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *dest, u64 num_bytes,
+ int min_factor);
+void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, bool update_size);
+u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, u64 *qgroup_to_release);
+void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info);
+void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info);
+void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info);
+struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u32 blocksize);
+
+static inline void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes)
+{
+ __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
+}
+
+static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv,
+ u32 blocksize)
+{
+ btrfs_block_rsv_add_bytes(block_rsv, blocksize, false);
+ btrfs_block_rsv_release(fs_info, block_rsv, 0);
+}
+
+#endif /* BTRFS_BLOCK_RSV_H */
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d5b438706b77..f853835c409c 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -337,22 +337,34 @@ static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
}
+/* Array of bytes with variable length, hexadecimal format 0x1234 */
+#define CSUM_FMT "0x%*phN"
+#define CSUM_FMT_VALUE(size, bytes) size, bytes
+
static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
- u64 logical_start, u32 csum, u32 csum_expected, int mirror_num)
+ u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
{
struct btrfs_root *root = inode->root;
+ struct btrfs_super_block *sb = root->fs_info->super_copy;
+ const u16 csum_size = btrfs_super_csum_size(sb);
/* Output minus objectid, which is more meaningful */
if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
btrfs_warn_rl(root->fs_info,
- "csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
+"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
root->root_key.objectid, btrfs_ino(inode),
- logical_start, csum, csum_expected, mirror_num);
+ logical_start,
+ CSUM_FMT_VALUE(csum_size, csum),
+ CSUM_FMT_VALUE(csum_size, csum_expected),
+ mirror_num);
else
btrfs_warn_rl(root->fs_info,
- "csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
+"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
root->root_key.objectid, btrfs_ino(inode),
- logical_start, csum, csum_expected, mirror_num);
+ logical_start,
+ CSUM_FMT_VALUE(csum_size, csum),
+ CSUM_FMT_VALUE(csum_size, csum_expected),
+ mirror_num);
}
#endif
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index b0c8094528d1..81a9731959a9 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -83,7 +83,7 @@
#include <linux/blkdev.h>
#include <linux/mm.h>
#include <linux/string.h>
-#include <linux/crc32c.h>
+#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -1710,9 +1710,9 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
char **datav, unsigned int num_pages)
{
struct btrfs_fs_info *fs_info = state->fs_info;
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct btrfs_header *h;
u8 csum[BTRFS_CSUM_SIZE];
- u32 crc = ~(u32)0;
unsigned int i;
if (num_pages * PAGE_SIZE < state->metablock_size)
@@ -1723,14 +1723,17 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
if (memcmp(h->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE))
return 1;
+ shash->tfm = fs_info->csum_shash;
+ crypto_shash_init(shash);
+
for (i = 0; i < num_pages; i++) {
u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
size_t sublen = i ? PAGE_SIZE :
(PAGE_SIZE - BTRFS_CSUM_SIZE);
- crc = crc32c(crc, data, sublen);
+ crypto_shash_update(shash, data, sublen);
}
- btrfs_csum_final(crc, csum);
+ crypto_shash_final(shash, csum);
if (memcmp(csum, h->csum, state->csum_size))
return 1;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 84dd4a8980c5..60c47b417a4b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/log2.h>
+#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -42,6 +43,22 @@ const char* btrfs_compress_type2str(enum btrfs_compression_type type)
return NULL;
}
+bool btrfs_compress_is_valid_type(const char *str, size_t len)
+{
+ int i;
+
+ for (i = 1; i < ARRAY_SIZE(btrfs_compress_types); i++) {
+ size_t comp_len = strlen(btrfs_compress_types[i]);
+
+ if (len < comp_len)
+ continue;
+
+ if (!strncmp(btrfs_compress_types[i], str, comp_len))
+ return true;
+ }
+ return false;
+}
+
static int btrfs_decompress_bio(struct compressed_bio *cb);
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
@@ -57,32 +74,37 @@ static int check_compressed_csum(struct btrfs_inode *inode,
struct compressed_bio *cb,
u64 disk_start)
{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+ const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int ret;
struct page *page;
unsigned long i;
char *kaddr;
- u32 csum;
- u32 *cb_sum = &cb->sums;
+ u8 csum[BTRFS_CSUM_SIZE];
+ u8 *cb_sum = cb->sums;
if (inode->flags & BTRFS_INODE_NODATASUM)
return 0;
+ shash->tfm = fs_info->csum_shash;
+
for (i = 0; i < cb->nr_pages; i++) {
page = cb->compressed_pages[i];
- csum = ~(u32)0;
+ crypto_shash_init(shash);
kaddr = kmap_atomic(page);
- csum = btrfs_csum_data(kaddr, csum, PAGE_SIZE);
- btrfs_csum_final(csum, (u8 *)&csum);
+ crypto_shash_update(shash, kaddr, PAGE_SIZE);
kunmap_atomic(kaddr);
+ crypto_shash_final(shash, (u8 *)&csum);
- if (csum != *cb_sum) {
- btrfs_print_data_csum_error(inode, disk_start, csum,
- *cb_sum, cb->mirror_num);
+ if (memcmp(&csum, cb_sum, csum_size)) {
+ btrfs_print_data_csum_error(inode, disk_start,
+ csum, cb_sum, cb->mirror_num);
ret = -EIO;
goto fail;
}
- cb_sum++;
+ cb_sum += csum_size;
}
ret = 0;
@@ -318,7 +340,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bdev = fs_info->fs_devices->latest_bdev;
- bio = btrfs_bio_alloc(bdev, first_byte);
+ bio = btrfs_bio_alloc(first_byte);
+ bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
@@ -360,7 +383,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_endio(bio);
}
- bio = btrfs_bio_alloc(bdev, first_byte);
+ bio = btrfs_bio_alloc(first_byte);
+ bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
@@ -536,7 +560,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
struct extent_map *em;
blk_status_t ret = BLK_STS_RESOURCE;
int faili = 0;
- u32 *sums;
+ const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+ u8 *sums;
em_tree = &BTRFS_I(inode)->extent_tree;
@@ -558,7 +583,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->errors = 0;
cb->inode = inode;
cb->mirror_num = mirror_num;
- sums = &cb->sums;
+ sums = cb->sums;
cb->start = em->orig_start;
em_len = em->len;
@@ -597,7 +622,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
/* include any pages we added in add_ra-bio_pages */
cb->len = bio->bi_iter.bi_size;
- comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
+ comp_bio = btrfs_bio_alloc(cur_disk_byte);
+ bio_set_dev(comp_bio, bdev);
comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
@@ -617,6 +643,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->mapping = NULL;
if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
+ unsigned int nr_sectors;
+
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
@@ -634,8 +662,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
sums);
BUG_ON(ret); /* -ENOMEM */
}
- sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
- fs_info->sectorsize);
+
+ nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
+ fs_info->sectorsize);
+ sums += csum_size * nr_sectors;
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
if (ret) {
@@ -643,7 +673,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_endio(comp_bio);
}
- comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
+ comp_bio = btrfs_bio_alloc(cur_disk_byte);
+ bio_set_dev(comp_bio, bdev);
comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 9976fe0f7526..2035b8eb1290 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -61,7 +61,7 @@ struct compressed_bio {
* the start of a variable length array of checksums only
* used by reads
*/
- u32 sums;
+ u8 sums[];
};
static inline unsigned int btrfs_compress_type(unsigned int type_level)
@@ -173,6 +173,7 @@ extern const struct btrfs_compress_op btrfs_lzo_compress;
extern const struct btrfs_compress_op btrfs_zstd_compress;
const char* btrfs_compress_type2str(enum btrfs_compression_type type);
+bool btrfs_compress_is_valid_type(const char *str, size_t len);
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0a61dff27f57..299e11e6c554 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -19,6 +19,7 @@
#include <linux/kobject.h>
#include <trace/events/btrfs.h>
#include <asm/kmap_types.h>
+#include <asm/unaligned.h>
#include <linux/pagemap.h>
#include <linux/btrfs.h>
#include <linux/btrfs_tree.h>
@@ -31,11 +32,13 @@
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
+#include "block-rsv.h"
struct btrfs_trans_handle;
struct btrfs_transaction;
struct btrfs_pending_snapshot;
struct btrfs_delayed_ref_root;
+struct btrfs_space_info;
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
@@ -45,7 +48,16 @@ struct btrfs_ref;
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
-#define BTRFS_MAX_MIRRORS 3
+/*
+ * Maximum number of mirrors that can be available for all profiles counting
+ * the target device of dev-replace as one. During an active device replace
+ * procedure, the target device of the copy operation is a mirror for the
+ * filesystem data as well that can be used to read data in order to repair
+ * read errors on other disks.
+ *
+ * Current value is derived from RAID1 with 2 copies.
+ */
+#define BTRFS_MAX_MIRRORS (2 + 1)
#define BTRFS_MAX_LEVEL 8
@@ -72,6 +84,7 @@ struct btrfs_ref;
/* four bytes for CRC32 */
static const int btrfs_csum_sizes[] = { 4 };
+static const char *btrfs_csum_names[] = { "crc32c" };
#define BTRFS_EMPTY_DIR_SIZE 0
@@ -99,10 +112,6 @@ static inline u32 count_max_extents(u64 size)
return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
}
-struct btrfs_mapping_tree {
- struct extent_map_tree map_tree;
-};
-
static inline unsigned long btrfs_chunk_item_size(int num_stripes)
{
BUG_ON(num_stripes == 0);
@@ -395,115 +404,6 @@ struct raid_kobject {
struct list_head list;
};
-struct btrfs_space_info {
- spinlock_t lock;
-
- u64 total_bytes; /* total bytes in the space,
- this doesn't take mirrors into account */
- u64 bytes_used; /* total bytes used,
- this doesn't take mirrors into account */
- u64 bytes_pinned; /* total bytes pinned, will be freed when the
- transaction finishes */
- u64 bytes_reserved; /* total bytes the allocator has reserved for
- current allocations */
- u64 bytes_may_use; /* number of bytes that may be used for
- delalloc/allocations */
- u64 bytes_readonly; /* total bytes that are read only */
-
- u64 max_extent_size; /* This will hold the maximum extent size of
- the space info if we had an ENOSPC in the
- allocator. */
-
- unsigned int full:1; /* indicates that we cannot allocate any more
- chunks for this space */
- unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
-
- unsigned int flush:1; /* set if we are trying to make space */
-
- unsigned int force_alloc; /* set if we need to force a chunk
- alloc for this space */
-
- u64 disk_used; /* total bytes used on disk */
- u64 disk_total; /* total bytes on disk, takes mirrors into
- account */
-
- u64 flags;
-
- /*
- * bytes_pinned is kept in line with what is actually pinned, as in
- * we've called update_block_group and dropped the bytes_used counter
- * and increased the bytes_pinned counter. However this means that
- * bytes_pinned does not reflect the bytes that will be pinned once the
- * delayed refs are flushed, so this counter is inc'ed every time we
- * call btrfs_free_extent so it is a realtime count of what will be
- * freed once the transaction is committed. It will be zeroed every
- * time the transaction commits.
- */
- struct percpu_counter total_bytes_pinned;
-
- struct list_head list;
- /* Protected by the spinlock 'lock'. */
- struct list_head ro_bgs;
- struct list_head priority_tickets;
- struct list_head tickets;
- /*
- * tickets_id just indicates the next ticket will be handled, so note
- * it's not stored per ticket.
- */
- u64 tickets_id;
-
- struct rw_semaphore groups_sem;
- /* for block groups in our same type */
- struct list_head block_groups[BTRFS_NR_RAID_TYPES];
- wait_queue_head_t wait;
-
- struct kobject kobj;
- struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
-};
-
-/*
- * Types of block reserves
- */
-enum {
- BTRFS_BLOCK_RSV_GLOBAL,
- BTRFS_BLOCK_RSV_DELALLOC,
- BTRFS_BLOCK_RSV_TRANS,
- BTRFS_BLOCK_RSV_CHUNK,
- BTRFS_BLOCK_RSV_DELOPS,
- BTRFS_BLOCK_RSV_DELREFS,
- BTRFS_BLOCK_RSV_EMPTY,
- BTRFS_BLOCK_RSV_TEMP,
-};
-
-struct btrfs_block_rsv {
- u64 size;
- u64 reserved;
- struct btrfs_space_info *space_info;
- spinlock_t lock;
- unsigned short full;
- unsigned short type;
- unsigned short failfast;
-
- /*
- * Qgroup equivalent for @size @reserved
- *
- * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
- * about things like csum size nor how many tree blocks it will need to
- * reserve.
- *
- * Qgroup cares more about net change of the extent usage.
- *
- * So for one newly inserted file extent, in worst case it will cause
- * leaf split and level increase, nodesize for each file extent is
- * already too much.
- *
- * In short, qgroup_size/reserved is the upper limit of possible needed
- * qgroup metadata reservation.
- */
- u64 qgroup_rsv_size;
- u64 qgroup_rsv_reserved;
-};
-
/*
* free clusters are used to claim free space in relatively large chunks,
* allowing us to do less seeky writes. They are used for all metadata
@@ -786,11 +686,18 @@ enum {
/*
* Indicate that balance has been set up from the ioctl and is in the
* main phase. The fs_info::balance_ctl is initialized.
+ * Set and cleared while holding fs_info::balance_mutex.
*/
BTRFS_FS_BALANCE_RUNNING,
/* Indicate that the cleaner thread is awake and doing something. */
BTRFS_FS_CLEANER_RUNNING,
+
+ /*
+ * The checksumming has an optimized version and is considered fast,
+ * so we don't need to offload checksums to workqueues.
+ */
+ BTRFS_FS_CSUM_IMPL_FAST,
};
struct btrfs_fs_info {
@@ -824,7 +731,7 @@ struct btrfs_fs_info {
struct extent_io_tree *pinned_extents;
/* logical->physical extent mapping */
- struct btrfs_mapping_tree mapping_tree;
+ struct extent_map_tree mapping_tree;
/*
* block reservation for extent, checksum, root tree and
@@ -1160,6 +1067,14 @@ struct btrfs_fs_info {
spinlock_t swapfile_pins_lock;
struct rb_root swapfile_pins;
+ struct crypto_shash *csum_shash;
+
+ /*
+ * Number of send operations in progress.
+ * Updated while holding fs_info::balance_mutex.
+ */
+ int send_in_progress;
+
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock;
struct rb_root block_tree;
@@ -2451,6 +2366,11 @@ static inline int btrfs_super_csum_size(const struct btrfs_super_block *s)
return btrfs_csum_sizes[t];
}
+static inline const char *btrfs_super_csum_name(u16 csum_type)
+{
+ /* csum type is validated at mount time */
+ return btrfs_csum_names[csum_type];
+}
/*
* The leaf data grows from end-to-front in the node.
@@ -2642,6 +2562,16 @@ BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right,
((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \
btrfs_item_offset_nr(leaf, slot)))
+static inline u32 btrfs_crc32c(u32 crc, const void *address, unsigned length)
+{
+ return crc32c(crc, address, length);
+}
+
+static inline void btrfs_crc32c_final(u32 crc, u8 *result)
+{
+ put_unaligned_le32(~crc, result);
+}
+
static inline u64 btrfs_name_hash(const char *name, int len)
{
return crc32c((u32)~1, name, len);
@@ -2656,12 +2586,6 @@ static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
return (u64) crc32c(parent_objectid, name, len);
}
-static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
-{
- return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
- (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
-}
-
static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
{
return mapping_gfp_constraint(mapping, ~__GFP_FS);
@@ -2698,8 +2622,6 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info,
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
}
-int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans);
-bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
const u64 start);
void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg);
@@ -2814,17 +2736,28 @@ enum btrfs_flush_state {
COMMIT_TRANS = 9,
};
-int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
-int btrfs_check_data_free_space(struct inode *inode,
- struct extent_changeset **reserved, u64 start, u64 len);
-void btrfs_free_reserved_data_space(struct inode *inode,
- struct extent_changeset *reserved, u64 start, u64 len);
-void btrfs_delalloc_release_space(struct inode *inode,
- struct extent_changeset *reserved,
- u64 start, u64 len, bool qgroup_free);
-void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
- u64 len);
-void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
+/*
+ * control flags for do_chunk_alloc's force field
+ * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
+ * if we really need one.
+ *
+ * CHUNK_ALLOC_LIMITED means to only try and allocate one
+ * if we have very few chunks already allocated. This is
+ * used as part of the clustering code to help make sure
+ * we have a good pool of storage to cluster in, without
+ * filling the FS with empty chunks
+ *
+ * CHUNK_ALLOC_FORCE means it must try to allocate one
+ *
+ */
+enum btrfs_chunk_alloc_enum {
+ CHUNK_ALLOC_NO_FORCE,
+ CHUNK_ALLOC_LIMITED,
+ CHUNK_ALLOC_FORCE,
+};
+
+int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+ enum btrfs_chunk_alloc_enum force);
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int nitems, bool use_global_rsv);
@@ -2834,41 +2767,6 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
- bool qgroup_free);
-int btrfs_delalloc_reserve_space(struct inode *inode,
- struct extent_changeset **reserved, u64 start, u64 len);
-void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
- unsigned short type);
-void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv,
- unsigned short type);
-void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv);
-int btrfs_block_rsv_add(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, u64 num_bytes,
- enum btrfs_reserve_flush_enum flush);
-int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor);
-int btrfs_block_rsv_refill(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, u64 min_reserved,
- enum btrfs_reserve_flush_enum flush);
-int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
- struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
- bool update_size);
-int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *dest, u64 num_bytes,
- int min_factor);
-void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes);
-void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr);
-void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
-int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
- enum btrfs_reserve_flush_enum flush);
-void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *src,
- u64 num_bytes);
int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
@@ -3186,7 +3084,8 @@ int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot,
struct btrfs_dio_private;
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
-blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+ u8 *dst);
blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
u64 logical_offset);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
@@ -3514,8 +3413,7 @@ __cold
static inline void assfail(const char *expr, const char *file, int line)
{
if (IS_ENABLED(CONFIG_BTRFS_ASSERT)) {
- pr_err("assertion failed: %s, file: %s, line: %d\n",
- expr, file, line);
+ pr_err("assertion failed: %s, in %s:%d\n", expr, file, line);
BUG();
}
}
@@ -3599,10 +3497,11 @@ do { \
/* compatibility and incompatibility defines */
#define btrfs_set_fs_incompat(__fs_info, opt) \
- __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+ __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \
+ #opt)
static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
- u64 flag)
+ u64 flag, const char* name)
{
struct btrfs_super_block *disk_super;
u64 features;
@@ -3615,18 +3514,20 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
if (!(features & flag)) {
features |= flag;
btrfs_set_super_incompat_flags(disk_super, features);
- btrfs_info(fs_info, "setting %llu feature flag",
- flag);
+ btrfs_info(fs_info,
+ "setting incompat feature flag for %s (0x%llx)",
+ name, flag);
}
spin_unlock(&fs_info->super_lock);
}
}
#define btrfs_clear_fs_incompat(__fs_info, opt) \
- __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+ __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \
+ #opt)
static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info,
- u64 flag)
+ u64 flag, const char* name)
{
struct btrfs_super_block *disk_super;
u64 features;
@@ -3639,8 +3540,9 @@ static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info,
if (features & flag) {
features &= ~flag;
btrfs_set_super_incompat_flags(disk_super, features);
- btrfs_info(fs_info, "clearing %llu feature flag",
- flag);
+ btrfs_info(fs_info,
+ "clearing incompat feature flag for %s (0x%llx)",
+ name, flag);
}
spin_unlock(&fs_info->super_lock);
}
@@ -3657,10 +3559,11 @@ static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
}
#define btrfs_set_fs_compat_ro(__fs_info, opt) \
- __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+ __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \
+ #opt)
static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info,
- u64 flag)
+ u64 flag, const char *name)
{
struct btrfs_super_block *disk_super;
u64 features;
@@ -3673,18 +3576,20 @@ static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info,
if (!(features & flag)) {
features |= flag;
btrfs_set_super_compat_ro_flags(disk_super, features);
- btrfs_info(fs_info, "setting %llu ro feature flag",
- flag);
+ btrfs_info(fs_info,
+ "setting compat-ro feature flag for %s (0x%llx)",
+ name, flag);
}
spin_unlock(&fs_info->super_lock);
}
}
#define btrfs_clear_fs_compat_ro(__fs_info, opt) \
- __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+ __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \
+ #opt)
static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info,
- u64 flag)
+ u64 flag, const char *name)
{
struct btrfs_super_block *disk_super;
u64 features;
@@ -3697,8 +3602,9 @@ static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info,
if (features & flag) {
features &= ~flag;
btrfs_set_super_compat_ro_flags(disk_super, features);
- btrfs_info(fs_info, "clearing %llu ro feature flag",
- flag);
+ btrfs_info(fs_info,
+ "clearing compat-ro feature flag for %s (0x%llx)",
+ name, flag);
}
spin_unlock(&fs_info->super_lock);
}
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
new file mode 100644
index 000000000000..17f7c0d38768
--- /dev/null
+++ b/fs/btrfs/delalloc-space.c
@@ -0,0 +1,494 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "ctree.h"
+#include "delalloc-space.h"
+#include "block-rsv.h"
+#include "btrfs_inode.h"
+#include "space-info.h"
+#include "transaction.h"
+#include "qgroup.h"
+
+int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
+ u64 used;
+ int ret = 0;
+ int need_commit = 2;
+ int have_pinned_space;
+
+ /* Make sure bytes are sectorsize aligned */
+ bytes = ALIGN(bytes, fs_info->sectorsize);
+
+ if (btrfs_is_free_space_inode(inode)) {
+ need_commit = 0;
+ ASSERT(current->journal_info);
+ }
+
+again:
+ /* Make sure we have enough space to handle the data first */
+ spin_lock(&data_sinfo->lock);
+ used = btrfs_space_info_used(data_sinfo, true);
+
+ if (used + bytes > data_sinfo->total_bytes) {
+ struct btrfs_trans_handle *trans;
+
+ /*
+ * If we don't have enough free bytes in this space then we need
+ * to alloc a new chunk.
+ */
+ if (!data_sinfo->full) {
+ u64 alloc_target;
+
+ data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
+ spin_unlock(&data_sinfo->lock);
+
+ alloc_target = btrfs_data_alloc_profile(fs_info);
+ /*
+ * It is ugly that we don't call nolock join
+ * transaction for the free space inode case here.
+ * But it is safe because we only do the data space
+ * reservation for the free space cache in the
+ * transaction context, the common join transaction
+ * just increase the counter of the current transaction
+ * handler, doesn't try to acquire the trans_lock of
+ * the fs.
+ */
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ ret = btrfs_chunk_alloc(trans, alloc_target,
+ CHUNK_ALLOC_NO_FORCE);
+ btrfs_end_transaction(trans);
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ return ret;
+ else {
+ have_pinned_space = 1;
+ goto commit_trans;
+ }
+ }
+
+ goto again;
+ }
+
+ /*
+ * If we don't have enough pinned space to deal with this
+ * allocation, and no removed chunk in current transaction,
+ * don't bother committing the transaction.
+ */
+ have_pinned_space = __percpu_counter_compare(
+ &data_sinfo->total_bytes_pinned,
+ used + bytes - data_sinfo->total_bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
+ spin_unlock(&data_sinfo->lock);
+
+ /* Commit the current transaction and try again */
+commit_trans:
+ if (need_commit) {
+ need_commit--;
+
+ if (need_commit > 0) {
+ btrfs_start_delalloc_roots(fs_info, -1);
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
+ (u64)-1);
+ }
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+ if (have_pinned_space >= 0 ||
+ test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
+ &trans->transaction->flags) ||
+ need_commit > 0) {
+ ret = btrfs_commit_transaction(trans);
+ if (ret)
+ return ret;
+ /*
+ * The cleaner kthread might still be doing iput
+ * operations. Wait for it to finish so that
+ * more space is released. We don't need to
+ * explicitly run the delayed iputs here because
+ * the commit_transaction would have woken up
+ * the cleaner.
+ */
+ ret = btrfs_wait_on_delayed_iputs(fs_info);
+ if (ret)
+ return ret;
+ goto again;
+ } else {
+ btrfs_end_transaction(trans);
+ }
+ }
+
+ trace_btrfs_space_reservation(fs_info,
+ "space_info:enospc",
+ data_sinfo->flags, bytes, 1);
+ return -ENOSPC;
+ }
+ btrfs_space_info_update_bytes_may_use(fs_info, data_sinfo, bytes);
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ data_sinfo->flags, bytes, 1);
+ spin_unlock(&data_sinfo->lock);
+
+ return 0;
+}
+
+int btrfs_check_data_free_space(struct inode *inode,
+ struct extent_changeset **reserved, u64 start, u64 len)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ int ret;
+
+ /* align the range */
+ len = round_up(start + len, fs_info->sectorsize) -
+ round_down(start, fs_info->sectorsize);
+ start = round_down(start, fs_info->sectorsize);
+
+ ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
+ if (ret < 0)
+ return ret;
+
+ /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
+ ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
+ if (ret < 0)
+ btrfs_free_reserved_data_space_noquota(inode, start, len);
+ else
+ ret = 0;
+ return ret;
+}
+
+/*
+ * Called if we need to clear a data reservation for this inode
+ * Normally in a error case.
+ *
+ * This one will *NOT* use accurate qgroup reserved space API, just for case
+ * which we can't sleep and is sure it won't affect qgroup reserved space.
+ * Like clear_bit_hook().
+ */
+void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
+ u64 len)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_space_info *data_sinfo;
+
+ /* Make sure the range is aligned to sectorsize */
+ len = round_up(start + len, fs_info->sectorsize) -
+ round_down(start, fs_info->sectorsize);
+ start = round_down(start, fs_info->sectorsize);
+
+ data_sinfo = fs_info->data_sinfo;
+ spin_lock(&data_sinfo->lock);
+ btrfs_space_info_update_bytes_may_use(fs_info, data_sinfo, -len);
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ data_sinfo->flags, len, 0);
+ spin_unlock(&data_sinfo->lock);
+}
+
+/*
+ * Called if we need to clear a data reservation for this inode
+ * Normally in a error case.
+ *
+ * This one will handle the per-inode data rsv map for accurate reserved
+ * space framework.
+ */
+void btrfs_free_reserved_data_space(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ /* Make sure the range is aligned to sectorsize */
+ len = round_up(start + len, root->fs_info->sectorsize) -
+ round_down(start, root->fs_info->sectorsize);
+ start = round_down(start, root->fs_info->sectorsize);
+
+ btrfs_free_reserved_data_space_noquota(inode, start, len);
+ btrfs_qgroup_free_data(inode, reserved, start, len);
+}
+
+/**
+ * btrfs_inode_rsv_release - release any excessive reservation.
+ * @inode - the inode we need to release from.
+ * @qgroup_free - free or convert qgroup meta.
+ * Unlike normal operation, qgroup meta reservation needs to know if we are
+ * freeing qgroup reservation or just converting it into per-trans. Normally
+ * @qgroup_free is true for error handling, and false for normal release.
+ *
+ * This is the same as btrfs_block_rsv_release, except that it handles the
+ * tracepoint for the reservation.
+ */
+static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+ u64 released = 0;
+ u64 qgroup_to_release = 0;
+
+ /*
+ * Since we statically set the block_rsv->size we just want to say we
+ * are releasing 0 bytes, and then we'll just get the reservation over
+ * the size free'd.
+ */
+ released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
+ &qgroup_to_release);
+ if (released > 0)
+ trace_btrfs_space_reservation(fs_info, "delalloc",
+ btrfs_ino(inode), released, 0);
+ if (qgroup_free)
+ btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
+ else
+ btrfs_qgroup_convert_reserved_meta(inode->root,
+ qgroup_to_release);
+}
+
+static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode)
+{
+ struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+ u64 reserve_size = 0;
+ u64 qgroup_rsv_size = 0;
+ u64 csum_leaves;
+ unsigned outstanding_extents;
+
+ lockdep_assert_held(&inode->lock);
+ outstanding_extents = inode->outstanding_extents;
+ if (outstanding_extents)
+ reserve_size = btrfs_calc_trans_metadata_size(fs_info,
+ outstanding_extents + 1);
+ csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
+ inode->csum_bytes);
+ reserve_size += btrfs_calc_trans_metadata_size(fs_info,
+ csum_leaves);
+ /*
+ * For qgroup rsv, the calculation is very simple:
+ * account one nodesize for each outstanding extent
+ *
+ * This is overestimating in most cases.
+ */
+ qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
+
+ spin_lock(&block_rsv->lock);
+ block_rsv->size = reserve_size;
+ block_rsv->qgroup_rsv_size = qgroup_rsv_size;
+ spin_unlock(&block_rsv->lock);
+}
+
+static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
+ u64 num_bytes, u64 *meta_reserve,
+ u64 *qgroup_reserve)
+{
+ u64 nr_extents = count_max_extents(num_bytes);
+ u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
+
+ /* We add one for the inode update at finish ordered time */
+ *meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
+ nr_extents + csum_leaves + 1);
+ *qgroup_reserve = nr_extents * fs_info->nodesize;
+}
+
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+ u64 meta_reserve, qgroup_reserve;
+ unsigned nr_extents;
+ enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
+ int ret = 0;
+ bool delalloc_lock = true;
+
+ /*
+ * If we are a free space inode we need to not flush since we will be in
+ * the middle of a transaction commit. We also don't need the delalloc
+ * mutex since we won't race with anybody. We need this mostly to make
+ * lockdep shut its filthy mouth.
+ *
+ * If we have a transaction open (can happen if we call truncate_block
+ * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
+ */
+ if (btrfs_is_free_space_inode(inode)) {
+ flush = BTRFS_RESERVE_NO_FLUSH;
+ delalloc_lock = false;
+ } else {
+ if (current->journal_info)
+ flush = BTRFS_RESERVE_FLUSH_LIMIT;
+
+ if (btrfs_transaction_in_commit(fs_info))
+ schedule_timeout(1);
+ }
+
+ if (delalloc_lock)
+ mutex_lock(&inode->delalloc_mutex);
+
+ num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
+
+ /*
+ * We always want to do it this way, every other way is wrong and ends
+ * in tears. Pre-reserving the amount we are going to add will always
+ * be the right way, because otherwise if we have enough parallelism we
+ * could end up with thousands of inodes all holding little bits of
+ * reservations they were able to make previously and the only way to
+ * reclaim that space is to ENOSPC out the operations and clear
+ * everything out and try again, which is bad. This way we just
+ * over-reserve slightly, and clean up the mess when we are done.
+ */
+ calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
+ &qgroup_reserve);
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
+ if (ret)
+ goto out_fail;
+ ret = btrfs_reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
+ if (ret)
+ goto out_qgroup;
+
+ /*
+ * Now we need to update our outstanding extents and csum bytes _first_
+ * and then add the reservation to the block_rsv. This keeps us from
+ * racing with an ordered completion or some such that would think it
+ * needs to free the reservation we just made.
+ */
+ spin_lock(&inode->lock);
+ nr_extents = count_max_extents(num_bytes);
+ btrfs_mod_outstanding_extents(inode, nr_extents);
+ inode->csum_bytes += num_bytes;
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+ spin_unlock(&inode->lock);
+
+ /* Now we can safely add our space to our block rsv */
+ btrfs_block_rsv_add_bytes(block_rsv, meta_reserve, false);
+ trace_btrfs_space_reservation(root->fs_info, "delalloc",
+ btrfs_ino(inode), meta_reserve, 1);
+
+ spin_lock(&block_rsv->lock);
+ block_rsv->qgroup_rsv_reserved += qgroup_reserve;
+ spin_unlock(&block_rsv->lock);
+
+ if (delalloc_lock)
+ mutex_unlock(&inode->delalloc_mutex);
+ return 0;
+out_qgroup:
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
+out_fail:
+ btrfs_inode_rsv_release(inode, true);
+ if (delalloc_lock)
+ mutex_unlock(&inode->delalloc_mutex);
+ return ret;
+}
+
+/**
+ * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
+ * @inode: the inode to release the reservation for.
+ * @num_bytes: the number of bytes we are releasing.
+ * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
+ *
+ * This will release the metadata reservation for an inode. This can be called
+ * once we complete IO for a given set of bytes to release their metadata
+ * reservations, or on error for the same reason.
+ */
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
+ bool qgroup_free)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+ num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
+ spin_lock(&inode->lock);
+ inode->csum_bytes -= num_bytes;
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+ spin_unlock(&inode->lock);
+
+ if (btrfs_is_testing(fs_info))
+ return;
+
+ btrfs_inode_rsv_release(inode, qgroup_free);
+}
+
+/**
+ * btrfs_delalloc_release_extents - release our outstanding_extents
+ * @inode: the inode to balance the reservation for.
+ * @num_bytes: the number of bytes we originally reserved with
+ * @qgroup_free: do we need to free qgroup meta reservation or convert them.
+ *
+ * When we reserve space we increase outstanding_extents for the extents we may
+ * add. Once we've set the range as delalloc or created our ordered extents we
+ * have outstanding_extents to track the real usage, so we use this to free our
+ * temporarily tracked outstanding_extents. This _must_ be used in conjunction
+ * with btrfs_delalloc_reserve_metadata.
+ */
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
+ bool qgroup_free)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ unsigned num_extents;
+
+ spin_lock(&inode->lock);
+ num_extents = count_max_extents(num_bytes);
+ btrfs_mod_outstanding_extents(inode, -num_extents);
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+ spin_unlock(&inode->lock);
+
+ if (btrfs_is_testing(fs_info))
+ return;
+
+ btrfs_inode_rsv_release(inode, qgroup_free);
+}
+
+/**
+ * btrfs_delalloc_reserve_space - reserve data and metadata space for
+ * delalloc
+ * @inode: inode we're writing to
+ * @start: start range we are writing to
+ * @len: how long the range we are writing to
+ * @reserved: mandatory parameter, record actually reserved qgroup ranges of
+ * current reservation.
+ *
+ * This will do the following things
+ *
+ * - reserve space in data space info for num bytes
+ * and reserve precious corresponding qgroup space
+ * (Done in check_data_free_space)
+ *
+ * - reserve space for metadata space, based on the number of outstanding
+ * extents and how much csums will be needed
+ * also reserve metadata space in a per root over-reserve method.
+ * - add to the inodes->delalloc_bytes
+ * - add it to the fs_info's delalloc inodes list.
+ * (Above 3 all done in delalloc_reserve_metadata)
+ *
+ * Return 0 for success
+ * Return <0 for error(-ENOSPC or -EQUOT)
+ */
+int btrfs_delalloc_reserve_space(struct inode *inode,
+ struct extent_changeset **reserved, u64 start, u64 len)
+{
+ int ret;
+
+ ret = btrfs_check_data_free_space(inode, reserved, start, len);
+ if (ret < 0)
+ return ret;
+ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
+ if (ret < 0)
+ btrfs_free_reserved_data_space(inode, *reserved, start, len);
+ return ret;
+}
+
+/**
+ * btrfs_delalloc_release_space - release data and metadata space for delalloc
+ * @inode: inode we're releasing space for
+ * @start: start position of the space already reserved
+ * @len: the len of the space already reserved
+ * @release_bytes: the len of the space we consumed or didn't use
+ *
+ * This function will release the metadata space that was not used and will
+ * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
+ * list if there are no delalloc bytes left.
+ * Also it will handle the qgroup reserved space.
+ */
+void btrfs_delalloc_release_space(struct inode *inode,
+ struct extent_changeset *reserved,
+ u64 start, u64 len, bool qgroup_free)
+{
+ btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
+ btrfs_free_reserved_data_space(inode, reserved, start, len);
+}
diff --git a/fs/btrfs/delalloc-space.h b/fs/btrfs/delalloc-space.h
new file mode 100644
index 000000000000..54466fbd7075
--- /dev/null
+++ b/fs/btrfs/delalloc-space.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef BTRFS_DELALLOC_SPACE_H
+#define BTRFS_DELALLOC_SPACE_H
+
+struct extent_changeset;
+
+int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
+int btrfs_check_data_free_space(struct inode *inode,
+ struct extent_changeset **reserved, u64 start, u64 len);
+void btrfs_free_reserved_data_space(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len);
+void btrfs_delalloc_release_space(struct inode *inode,
+ struct extent_changeset *reserved,
+ u64 start, u64 len, bool qgroup_free);
+void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
+ u64 len);
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
+ bool qgroup_free);
+int btrfs_delalloc_reserve_space(struct inode *inode,
+ struct extent_changeset **reserved, u64 start, u64 len);
+
+#endif /* BTRFS_DELALLOC_SPACE_H */
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index a73fc23e2961..9a91d1eb0af4 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -10,6 +10,7 @@
#include "delayed-ref.h"
#include "transaction.h"
#include "qgroup.h"
+#include "space-info.h"
struct kmem_cache *btrfs_delayed_ref_head_cachep;
struct kmem_cache *btrfs_delayed_tree_ref_cachep;
@@ -24,6 +25,179 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
* of hammering updates on the extent allocation tree.
*/
+bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ bool ret = false;
+ u64 reserved;
+
+ spin_lock(&global_rsv->lock);
+ reserved = global_rsv->reserved;
+ spin_unlock(&global_rsv->lock);
+
+ /*
+ * Since the global reserve is just kind of magic we don't really want
+ * to rely on it to save our bacon, so if our size is more than the
+ * delayed_refs_rsv and the global rsv then it's time to think about
+ * bailing.
+ */
+ spin_lock(&delayed_refs_rsv->lock);
+ reserved += delayed_refs_rsv->reserved;
+ if (delayed_refs_rsv->size >= reserved)
+ ret = true;
+ spin_unlock(&delayed_refs_rsv->lock);
+ return ret;
+}
+
+int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
+{
+ u64 num_entries =
+ atomic_read(&trans->transaction->delayed_refs.num_entries);
+ u64 avg_runtime;
+ u64 val;
+
+ smp_mb();
+ avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
+ val = num_entries * avg_runtime;
+ if (val >= NSEC_PER_SEC)
+ return 1;
+ if (val >= NSEC_PER_SEC / 2)
+ return 2;
+
+ return btrfs_check_space_for_delayed_refs(trans->fs_info);
+}
+
+/**
+ * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
+ * @fs_info - the fs_info for our fs.
+ * @nr - the number of items to drop.
+ *
+ * This drops the delayed ref head's count from the delayed refs rsv and frees
+ * any excess reservation we had.
+ */
+void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
+{
+ struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
+ u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
+ u64 released = 0;
+
+ released = __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes,
+ NULL);
+ if (released)
+ trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+ 0, released, 0);
+}
+
+/*
+ * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
+ * @trans - the trans that may have generated delayed refs
+ *
+ * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
+ * it'll calculate the additional size and add it to the delayed_refs_rsv.
+ */
+void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+ u64 num_bytes;
+
+ if (!trans->delayed_ref_updates)
+ return;
+
+ num_bytes = btrfs_calc_trans_metadata_size(fs_info,
+ trans->delayed_ref_updates);
+ spin_lock(&delayed_rsv->lock);
+ delayed_rsv->size += num_bytes;
+ delayed_rsv->full = 0;
+ spin_unlock(&delayed_rsv->lock);
+ trans->delayed_ref_updates = 0;
+}
+
+/**
+ * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
+ * @fs_info - the fs info for our fs.
+ * @src - the source block rsv to transfer from.
+ * @num_bytes - the number of bytes to transfer.
+ *
+ * This transfers up to the num_bytes amount from the src rsv to the
+ * delayed_refs_rsv. Any extra bytes are returned to the space info.
+ */
+void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *src,
+ u64 num_bytes)
+{
+ struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+ u64 to_free = 0;
+
+ spin_lock(&src->lock);
+ src->reserved -= num_bytes;
+ src->size -= num_bytes;
+ spin_unlock(&src->lock);
+
+ spin_lock(&delayed_refs_rsv->lock);
+ if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
+ u64 delta = delayed_refs_rsv->size -
+ delayed_refs_rsv->reserved;
+ if (num_bytes > delta) {
+ to_free = num_bytes - delta;
+ num_bytes = delta;
+ }
+ } else {
+ to_free = num_bytes;
+ num_bytes = 0;
+ }
+
+ if (num_bytes)
+ delayed_refs_rsv->reserved += num_bytes;
+ if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
+ delayed_refs_rsv->full = 1;
+ spin_unlock(&delayed_refs_rsv->lock);
+
+ if (num_bytes)
+ trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+ 0, num_bytes, 1);
+ if (to_free)
+ btrfs_space_info_add_old_bytes(fs_info,
+ delayed_refs_rsv->space_info, to_free);
+}
+
+/**
+ * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
+ * @fs_info - the fs_info for our fs.
+ * @flush - control how we can flush for this reservation.
+ *
+ * This will refill the delayed block_rsv up to 1 items size worth of space and
+ * will return -ENOSPC if we can't make the reservation.
+ */
+int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
+ enum btrfs_reserve_flush_enum flush)
+{
+ struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
+ u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
+ u64 num_bytes = 0;
+ int ret = -ENOSPC;
+
+ spin_lock(&block_rsv->lock);
+ if (block_rsv->reserved < block_rsv->size) {
+ num_bytes = block_rsv->size - block_rsv->reserved;
+ num_bytes = min(num_bytes, limit);
+ }
+ spin_unlock(&block_rsv->lock);
+
+ if (!num_bytes)
+ return 0;
+
+ ret = btrfs_reserve_metadata_bytes(fs_info->extent_root, block_rsv,
+ num_bytes, flush);
+ if (ret)
+ return ret;
+ btrfs_block_rsv_add_bytes(block_rsv, num_bytes, 0);
+ trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+ 0, num_bytes, 1);
+ return 0;
+}
+
/*
* compare two delayed tree backrefs with same bytenr and type
*/
@@ -957,13 +1131,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
}
/*
- * this does a simple search for the head node for a given extent.
- * It must be called with the delayed ref spinlock held, and it returns
- * the head node if any where found, or NULL if not.
+ * This does a simple search for the head node for a given extent. Returns the
+ * head node if found, or NULL if not.
*/
struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
{
+ lockdep_assert_held(&delayed_refs->lock);
+
return find_ref_head(delayed_refs, bytenr, false);
}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index c18f93ea88ed..1c977e6d45dc 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -364,6 +364,16 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
+void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr);
+void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
+int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
+ enum btrfs_reserve_flush_enum flush);
+void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *src,
+ u64 num_bytes);
+int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans);
+bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
+
/*
* helper functions to cast a node into its container
*/
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index ee0989c7e3a9..6b2e9aa83ffa 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -201,7 +201,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
return PTR_ERR(bdev);
}
- filemap_write_and_wait(bdev->bd_inode->i_mapping);
+ sync_blockdev(bdev);
devices = &fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
@@ -237,7 +237,6 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
}
rcu_assign_pointer(device->name, name);
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
device->generation = 0;
device->io_width = fs_info->sectorsize;
@@ -256,6 +255,8 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
device->dev_stats_valid = 1;
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
device->fs_devices = fs_info->fs_devices;
+
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
list_add(&device->dev_list, &fs_info->fs_devices->devices);
fs_info->fs_devices->num_devices++;
fs_info->fs_devices->open_devices++;
@@ -399,7 +400,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
int ret;
struct btrfs_device *tgt_device = NULL;
struct btrfs_device *src_device = NULL;
- bool need_unlock;
src_device = btrfs_find_device_by_devspec(fs_info, srcdevid,
srcdev_name);
@@ -413,11 +413,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
return -ETXTBSY;
}
- ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name,
- src_device, &tgt_device);
- if (ret)
- return ret;
-
/*
* Here we commit the transaction to make sure commit_total_bytes
* of all the devices are updated.
@@ -431,7 +426,11 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
return PTR_ERR(trans);
}
- need_unlock = true;
+ ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name,
+ src_device, &tgt_device);
+ if (ret)
+ return ret;
+
down_write(&dev_replace->rwsem);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
@@ -442,11 +441,11 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
ASSERT(0);
ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED;
+ up_write(&dev_replace->rwsem);
goto leave;
}
dev_replace->cont_reading_from_srcdev_mode = read_src;
- WARN_ON(!src_device);
dev_replace->srcdev = src_device;
dev_replace->tgtdev = tgt_device;
@@ -471,7 +470,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
atomic64_set(&dev_replace->num_write_errors, 0);
atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
up_write(&dev_replace->rwsem);
- need_unlock = false;
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
if (ret)
@@ -479,16 +477,16 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
- /* force writing the updated state information to disk */
- trans = btrfs_start_transaction(root, 0);
+ /* Commit dev_replace state and reserve 1 item for it. */
+ trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- need_unlock = true;
down_write(&dev_replace->rwsem);
dev_replace->replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED;
dev_replace->srcdev = NULL;
dev_replace->tgtdev = NULL;
+ up_write(&dev_replace->rwsem);
goto leave;
}
@@ -510,8 +508,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
return ret;
leave:
- if (need_unlock)
- up_write(&dev_replace->rwsem);
btrfs_destroy_dev_replace_tgtdev(tgt_device);
return ret;
}
@@ -678,7 +674,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_device_set_disk_total_bytes(tgt_device,
src_device->disk_total_bytes);
btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
- tgt_device->commit_total_bytes = src_device->commit_total_bytes;
tgt_device->commit_bytes_used = src_device->bytes_used;
btrfs_assign_next_active_device(src_device, tgt_device);
@@ -728,7 +723,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
struct btrfs_device *srcdev,
struct btrfs_device *tgtdev)
{
- struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree;
struct extent_map *em;
struct map_lookup *map;
u64 start = 0;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index deb74a8c191a..41a2bd2e0c56 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -19,6 +19,7 @@
#include <linux/crc32c.h>
#include <linux/sched/mm.h>
#include <asm/unaligned.h>
+#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -40,10 +41,6 @@
#include "tree-checker.h"
#include "ref-verify.h"
-#ifdef CONFIG_X86
-#include <asm/cpufeature.h>
-#endif
-
#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
BTRFS_HEADER_FLAG_RELOC |\
BTRFS_SUPER_FLAG_ERROR |\
@@ -249,16 +246,6 @@ out:
return em;
}
-u32 btrfs_csum_data(const char *data, u32 seed, size_t len)
-{
- return crc32c(seed, data, len);
-}
-
-void btrfs_csum_final(u32 crc, u8 *result)
-{
- put_unaligned_le32(~crc, result);
-}
-
/*
* Compute the csum of a btree block and store the result to provided buffer.
*
@@ -266,6 +253,8 @@ void btrfs_csum_final(u32 crc, u8 *result)
*/
static int csum_tree_block(struct extent_buffer *buf, u8 *result)
{
+ struct btrfs_fs_info *fs_info = buf->fs_info;
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
unsigned long len;
unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE;
@@ -273,9 +262,12 @@ static int csum_tree_block(struct extent_buffer *buf, u8 *result)
unsigned long map_start;
unsigned long map_len;
int err;
- u32 crc = ~(u32)0;
+
+ shash->tfm = fs_info->csum_shash;
+ crypto_shash_init(shash);
len = buf->len - offset;
+
while (len > 0) {
/*
* Note: we don't need to check for the err == 1 case here, as
@@ -288,14 +280,13 @@ static int csum_tree_block(struct extent_buffer *buf, u8 *result)
if (WARN_ON(err))
return err;
cur_len = min(len, map_len - (offset - map_start));
- crc = btrfs_csum_data(kaddr + offset - map_start,
- crc, cur_len);
+ crypto_shash_update(shash, kaddr + offset - map_start, cur_len);
len -= cur_len;
offset += cur_len;
}
memset(result, 0, BTRFS_CSUM_SIZE);
- btrfs_csum_final(crc, result);
+ crypto_shash_final(shash, result);
return 0;
}
@@ -356,6 +347,16 @@ out:
return ret;
}
+static bool btrfs_supported_super_csum(u16 csum_type)
+{
+ switch (csum_type) {
+ case BTRFS_CSUM_TYPE_CRC32:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
* Return 0 if the superblock checksum type matches the checksum value of that
* algorithm. Pass the raw disk superblock data.
@@ -365,33 +366,25 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
{
struct btrfs_super_block *disk_sb =
(struct btrfs_super_block *)raw_disk_sb;
- u16 csum_type = btrfs_super_csum_type(disk_sb);
- int ret = 0;
+ char result[BTRFS_CSUM_SIZE];
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
- if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
- u32 crc = ~(u32)0;
- char result[sizeof(crc)];
+ shash->tfm = fs_info->csum_shash;
+ crypto_shash_init(shash);
- /*
- * The super_block structure does not span the whole
- * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space
- * is filled with zeros and is included in the checksum.
- */
- crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE,
- crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
- btrfs_csum_final(crc, result);
+ /*
+ * The super_block structure does not span the whole
+ * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
+ * filled with zeros and is included in the checksum.
+ */
+ crypto_shash_update(shash, raw_disk_sb + BTRFS_CSUM_SIZE,
+ BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+ crypto_shash_final(shash, result);
- if (memcmp(raw_disk_sb, result, sizeof(result)))
- ret = 1;
- }
+ if (memcmp(disk_sb->csum, result, btrfs_super_csum_size(disk_sb)))
+ return 1;
- if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
- btrfs_err(fs_info, "unsupported checksum algorithm %u",
- csum_type);
- ret = 1;
- }
-
- return ret;
+ return 0;
}
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
@@ -873,14 +866,13 @@ static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
return btree_csum_one_bio(bio);
}
-static int check_async_write(struct btrfs_inode *bi)
+static int check_async_write(struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *bi)
{
if (atomic_read(&bi->sync_writers))
return 0;
-#ifdef CONFIG_X86
- if (static_cpu_has(X86_FEATURE_XMM4_2))
+ if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
return 0;
-#endif
return 1;
}
@@ -889,7 +881,7 @@ static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
unsigned long bio_flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int async = check_async_write(BTRFS_I(inode));
+ int async = check_async_write(fs_info, BTRFS_I(inode));
blk_status_t ret;
if (bio_op(bio) != REQ_OP_WRITE) {
@@ -2262,6 +2254,29 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
return 0;
}
+static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
+{
+ struct crypto_shash *csum_shash;
+ const char *csum_name = btrfs_super_csum_name(csum_type);
+
+ csum_shash = crypto_alloc_shash(csum_name, 0, 0);
+
+ if (IS_ERR(csum_shash)) {
+ btrfs_err(fs_info, "error allocating %s hash for checksum",
+ csum_name);
+ return PTR_ERR(csum_shash);
+ }
+
+ fs_info->csum_shash = csum_shash;
+
+ return 0;
+}
+
+static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
+{
+ crypto_free_shash(fs_info->csum_shash);
+}
+
static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices)
{
@@ -2577,7 +2592,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
ret = validate_super(fs_info, sb, -1);
if (ret < 0)
goto out;
- if (btrfs_super_csum_type(sb) != BTRFS_CSUM_TYPE_CRC32) {
+ if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
ret = -EUCLEAN;
btrfs_err(fs_info, "invalid csum type, has %u want %u",
btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32);
@@ -2607,6 +2622,7 @@ int open_ctree(struct super_block *sb,
u32 stripesize;
u64 generation;
u64 features;
+ u16 csum_type;
struct btrfs_key location;
struct buffer_head *bh;
struct btrfs_super_block *disk_super;
@@ -2689,7 +2705,7 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->space_info);
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
INIT_LIST_HEAD(&fs_info->unused_bgs);
- btrfs_mapping_init(&fs_info->mapping_tree);
+ extent_map_tree_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv,
BTRFS_BLOCK_RSV_GLOBAL);
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
@@ -2793,6 +2809,8 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->swapfile_pins_lock);
fs_info->swapfile_pins = RB_ROOT;
+ fs_info->send_in_progress = 0;
+
ret = btrfs_alloc_stripe_hash_table(fs_info);
if (ret) {
err = ret;
@@ -2813,6 +2831,25 @@ int open_ctree(struct super_block *sb,
}
/*
+ * Verify the type first, if that or the the checksum value are
+ * corrupted, we'll find out
+ */
+ csum_type = btrfs_super_csum_type((struct btrfs_super_block *)bh->b_data);
+ if (!btrfs_supported_super_csum(csum_type)) {
+ btrfs_err(fs_info, "unsupported checksum algorithm: %u",
+ csum_type);
+ err = -EINVAL;
+ brelse(bh);
+ goto fail_alloc;
+ }
+
+ ret = btrfs_init_csum_hash(fs_info, csum_type);
+ if (ret) {
+ err = ret;
+ goto fail_alloc;
+ }
+
+ /*
* We want to check superblock checksum, the type is stored inside.
* Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
*/
@@ -2820,7 +2857,7 @@ int open_ctree(struct super_block *sb,
btrfs_err(fs_info, "superblock checksum mismatch");
err = -EINVAL;
brelse(bh);
- goto fail_alloc;
+ goto fail_csum;
}
/*
@@ -2857,11 +2894,11 @@ int open_ctree(struct super_block *sb,
if (ret) {
btrfs_err(fs_info, "superblock contains fatal errors");
err = -EINVAL;
- goto fail_alloc;
+ goto fail_csum;
}
if (!btrfs_super_root(disk_super))
- goto fail_alloc;
+ goto fail_csum;
/* check FS state, whether FS is broken. */
if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
@@ -2883,7 +2920,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_parse_options(fs_info, options, sb->s_flags);
if (ret) {
err = ret;
- goto fail_alloc;
+ goto fail_csum;
}
features = btrfs_super_incompat_flags(disk_super) &
@@ -2893,7 +2930,7 @@ int open_ctree(struct super_block *sb,
"cannot mount because of unsupported optional features (%llx)",
features);
err = -EINVAL;
- goto fail_alloc;
+ goto fail_csum;
}
features = btrfs_super_incompat_flags(disk_super);
@@ -2937,7 +2974,7 @@ int open_ctree(struct super_block *sb,
btrfs_err(fs_info,
"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
nodesize, sectorsize);
- goto fail_alloc;
+ goto fail_csum;
}
/*
@@ -2953,7 +2990,7 @@ int open_ctree(struct super_block *sb,
"cannot mount read-write because of unsupported optional features (%llx)",
features);
err = -EINVAL;
- goto fail_alloc;
+ goto fail_csum;
}
ret = btrfs_init_workqueues(fs_info, fs_devices);
@@ -3331,6 +3368,8 @@ fail_tree_roots:
fail_sb_buffer:
btrfs_stop_all_workers(fs_info);
btrfs_free_block_groups(fs_info);
+fail_csum:
+ btrfs_free_csum_hash(fs_info);
fail_alloc:
fail_iput:
btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3472,17 +3511,20 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
{
+ struct btrfs_fs_info *fs_info = device->fs_info;
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct buffer_head *bh;
int i;
int ret;
int errors = 0;
- u32 crc;
u64 bytenr;
int op_flags;
if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
+ shash->tfm = fs_info->csum_shash;
+
for (i = 0; i < max_mirrors; i++) {
bytenr = btrfs_sb_offset(i);
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
@@ -3491,10 +3533,10 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_set_super_bytenr(sb, bytenr);
- crc = ~(u32)0;
- crc = btrfs_csum_data((const char *)sb + BTRFS_CSUM_SIZE, crc,
- BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
- btrfs_csum_final(crc, sb->csum);
+ crypto_shash_init(shash);
+ crypto_shash_update(shash, (const char *)sb + BTRFS_CSUM_SIZE,
+ BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+ crypto_shash_final(shash, sb->csum);
/* One reference for us, and we leave it for the caller */
bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
@@ -3709,7 +3751,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 ||
(flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE))
- min_tolerated = min(min_tolerated,
+ min_tolerated = min_t(int, min_tolerated,
btrfs_raid_array[BTRFS_RAID_SINGLE].
tolerated_failures);
@@ -3718,7 +3760,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
continue;
if (!(flags & btrfs_raid_array[raid_type].bg_flag))
continue;
- min_tolerated = min(min_tolerated,
+ min_tolerated = min_t(int, min_tolerated,
btrfs_raid_array[raid_type].
tolerated_failures);
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index a0161aa1ea0b..e80f7c45a307 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -115,8 +115,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
struct btrfs_key *first_key);
-u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
-void btrfs_csum_final(u32 crc, u8 *result);
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5faf057f6f37..d3b58e388535 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -28,46 +28,12 @@
#include "sysfs.h"
#include "qgroup.h"
#include "ref-verify.h"
+#include "space-info.h"
+#include "block-rsv.h"
+#include "delalloc-space.h"
#undef SCRAMBLE_DELAYED_REFS
-/*
- * control flags for do_chunk_alloc's force field
- * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
- * if we really need one.
- *
- * CHUNK_ALLOC_LIMITED means to only try and allocate one
- * if we have very few chunks already allocated. This is
- * used as part of the clustering code to help make sure
- * we have a good pool of storage to cluster in, without
- * filling the FS with empty chunks
- *
- * CHUNK_ALLOC_FORCE means it must try to allocate one
- *
- */
-enum {
- CHUNK_ALLOC_NO_FORCE = 0,
- CHUNK_ALLOC_LIMITED = 1,
- CHUNK_ALLOC_FORCE = 2,
-};
-
-/*
- * Declare a helper function to detect underflow of various space info members
- */
-#define DECLARE_SPACE_INFO_UPDATE(name) \
-static inline void update_##name(struct btrfs_space_info *sinfo, \
- s64 bytes) \
-{ \
- if (bytes < 0 && sinfo->name < -bytes) { \
- WARN_ON(1); \
- sinfo->name = 0; \
- return; \
- } \
- sinfo->name += bytes; \
-}
-
-DECLARE_SPACE_INFO_UPDATE(bytes_may_use);
-DECLARE_SPACE_INFO_UPDATE(bytes_pinned);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node, u64 parent,
@@ -84,21 +50,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
-static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
- int force);
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
-static void dump_space_info(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *info, u64 bytes,
- int dump_block_groups);
-static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes);
-static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 num_bytes);
-static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 num_bytes);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -737,62 +690,39 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
return block_group_cache_tree_search(info, bytenr, 1);
}
-static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
- u64 flags)
+static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
{
- struct list_head *head = &info->space_info;
- struct btrfs_space_info *found;
-
- flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
-
- rcu_read_lock();
- list_for_each_entry_rcu(found, head, list) {
- if (found->flags & flags) {
- rcu_read_unlock();
- return found;
- }
+ if (ref->type == BTRFS_REF_METADATA) {
+ if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
+ return BTRFS_BLOCK_GROUP_SYSTEM;
+ else
+ return BTRFS_BLOCK_GROUP_METADATA;
}
- rcu_read_unlock();
- return NULL;
+ return BTRFS_BLOCK_GROUP_DATA;
}
static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_ref *ref, int sign)
+ struct btrfs_ref *ref)
{
struct btrfs_space_info *space_info;
- s64 num_bytes;
- u64 flags;
-
- ASSERT(sign == 1 || sign == -1);
- num_bytes = sign * ref->len;
- if (ref->type == BTRFS_REF_METADATA) {
- if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
- flags = BTRFS_BLOCK_GROUP_SYSTEM;
- else
- flags = BTRFS_BLOCK_GROUP_METADATA;
- } else {
- flags = BTRFS_BLOCK_GROUP_DATA;
- }
+ u64 flags = generic_ref_to_space_flags(ref);
- space_info = __find_space_info(fs_info, flags);
+ space_info = btrfs_find_space_info(fs_info, flags);
ASSERT(space_info);
- percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes,
+ percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len,
BTRFS_TOTAL_BYTES_PINNED_BATCH);
}
-/*
- * after adding space to the filesystem, we need to clear the full flags
- * on all the space infos.
- */
-void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+static void sub_pinned_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_ref *ref)
{
- struct list_head *head = &info->space_info;
- struct btrfs_space_info *found;
+ struct btrfs_space_info *space_info;
+ u64 flags = generic_ref_to_space_flags(ref);
- rcu_read_lock();
- list_for_each_entry_rcu(found, head, list)
- found->full = 0;
- rcu_read_unlock();
+ space_info = btrfs_find_space_info(fs_info, flags);
+ ASSERT(space_info);
+ percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
}
/* simple helper to search for an existing data extent at a given offset */
@@ -1121,11 +1051,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
__le64 lenum;
lenum = cpu_to_le64(root_objectid);
- high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
+ high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
lenum = cpu_to_le64(owner);
- low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+ low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
lenum = cpu_to_le64(offset);
- low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+ low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
return ((u64)high_crc << 31) ^ (u64)low_crc;
}
@@ -2065,7 +1995,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_ref_tree_mod(fs_info, generic_ref);
if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
- add_pinned_bytes(fs_info, generic_ref, -1);
+ sub_pinned_bytes(fs_info, generic_ref);
return ret;
}
@@ -2462,7 +2392,7 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
flags = BTRFS_BLOCK_GROUP_SYSTEM;
else
flags = BTRFS_BLOCK_GROUP_METADATA;
- space_info = __find_space_info(fs_info, flags);
+ space_info = btrfs_find_space_info(fs_info, flags);
ASSERT(space_info);
percpu_counter_add_batch(&space_info->total_bytes_pinned,
-head->num_bytes,
@@ -2824,49 +2754,6 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
return num_csums;
}
-bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- bool ret = false;
- u64 reserved;
-
- spin_lock(&global_rsv->lock);
- reserved = global_rsv->reserved;
- spin_unlock(&global_rsv->lock);
-
- /*
- * Since the global reserve is just kind of magic we don't really want
- * to rely on it to save our bacon, so if our size is more than the
- * delayed_refs_rsv and the global rsv then it's time to think about
- * bailing.
- */
- spin_lock(&delayed_refs_rsv->lock);
- reserved += delayed_refs_rsv->reserved;
- if (delayed_refs_rsv->size >= reserved)
- ret = true;
- spin_unlock(&delayed_refs_rsv->lock);
- return ret;
-}
-
-int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
-{
- u64 num_entries =
- atomic_read(&trans->transaction->delayed_refs.num_entries);
- u64 avg_runtime;
- u64 val;
-
- smp_mb();
- avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
- val = num_entries * avg_runtime;
- if (val >= NSEC_PER_SEC)
- return 1;
- if (val >= NSEC_PER_SEC / 2)
- return 2;
-
- return btrfs_check_space_for_delayed_refs(trans->fs_info);
-}
-
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
@@ -3834,93 +3721,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
}
-static const char *alloc_name(u64 flags)
-{
- switch (flags) {
- case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
- return "mixed";
- case BTRFS_BLOCK_GROUP_METADATA:
- return "metadata";
- case BTRFS_BLOCK_GROUP_DATA:
- return "data";
- case BTRFS_BLOCK_GROUP_SYSTEM:
- return "system";
- default:
- WARN_ON(1);
- return "invalid-combination";
- };
-}
-
-static int create_space_info(struct btrfs_fs_info *info, u64 flags)
-{
-
- struct btrfs_space_info *space_info;
- int i;
- int ret;
-
- space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
- if (!space_info)
- return -ENOMEM;
-
- ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
- GFP_KERNEL);
- if (ret) {
- kfree(space_info);
- return ret;
- }
-
- for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
- INIT_LIST_HEAD(&space_info->block_groups[i]);
- init_rwsem(&space_info->groups_sem);
- spin_lock_init(&space_info->lock);
- space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
- space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
- init_waitqueue_head(&space_info->wait);
- INIT_LIST_HEAD(&space_info->ro_bgs);
- INIT_LIST_HEAD(&space_info->tickets);
- INIT_LIST_HEAD(&space_info->priority_tickets);
-
- ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
- info->space_info_kobj, "%s",
- alloc_name(space_info->flags));
- if (ret) {
- kobject_put(&space_info->kobj);
- return ret;
- }
-
- list_add_rcu(&space_info->list, &info->space_info);
- if (flags & BTRFS_BLOCK_GROUP_DATA)
- info->data_sinfo = space_info;
-
- return ret;
-}
-
-static void update_space_info(struct btrfs_fs_info *info, u64 flags,
- u64 total_bytes, u64 bytes_used,
- u64 bytes_readonly,
- struct btrfs_space_info **space_info)
-{
- struct btrfs_space_info *found;
- int factor;
-
- factor = btrfs_bg_type_to_factor(flags);
-
- found = __find_space_info(info, flags);
- ASSERT(found);
- spin_lock(&found->lock);
- found->total_bytes += total_bytes;
- found->disk_total += total_bytes * factor;
- found->bytes_used += bytes_used;
- found->disk_used += bytes_used * factor;
- found->bytes_readonly += bytes_readonly;
- if (total_bytes > 0)
- found->full = 0;
- space_info_add_new_bytes(info, found, total_bytes -
- bytes_used - bytes_readonly);
- spin_unlock(&found->lock);
- *space_info = found;
-}
-
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = chunk_to_extended(flags) &
@@ -4068,215 +3868,6 @@ u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
}
-static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
- bool may_use_included)
-{
- ASSERT(s_info);
- return s_info->bytes_used + s_info->bytes_reserved +
- s_info->bytes_pinned + s_info->bytes_readonly +
- (may_use_included ? s_info->bytes_may_use : 0);
-}
-
-int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
-{
- struct btrfs_root *root = inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
- u64 used;
- int ret = 0;
- int need_commit = 2;
- int have_pinned_space;
-
- /* make sure bytes are sectorsize aligned */
- bytes = ALIGN(bytes, fs_info->sectorsize);
-
- if (btrfs_is_free_space_inode(inode)) {
- need_commit = 0;
- ASSERT(current->journal_info);
- }
-
-again:
- /* make sure we have enough space to handle the data first */
- spin_lock(&data_sinfo->lock);
- used = btrfs_space_info_used(data_sinfo, true);
-
- if (used + bytes > data_sinfo->total_bytes) {
- struct btrfs_trans_handle *trans;
-
- /*
- * if we don't have enough free bytes in this space then we need
- * to alloc a new chunk.
- */
- if (!data_sinfo->full) {
- u64 alloc_target;
-
- data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
- spin_unlock(&data_sinfo->lock);
-
- alloc_target = btrfs_data_alloc_profile(fs_info);
- /*
- * It is ugly that we don't call nolock join
- * transaction for the free space inode case here.
- * But it is safe because we only do the data space
- * reservation for the free space cache in the
- * transaction context, the common join transaction
- * just increase the counter of the current transaction
- * handler, doesn't try to acquire the trans_lock of
- * the fs.
- */
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- ret = do_chunk_alloc(trans, alloc_target,
- CHUNK_ALLOC_NO_FORCE);
- btrfs_end_transaction(trans);
- if (ret < 0) {
- if (ret != -ENOSPC)
- return ret;
- else {
- have_pinned_space = 1;
- goto commit_trans;
- }
- }
-
- goto again;
- }
-
- /*
- * If we don't have enough pinned space to deal with this
- * allocation, and no removed chunk in current transaction,
- * don't bother committing the transaction.
- */
- have_pinned_space = __percpu_counter_compare(
- &data_sinfo->total_bytes_pinned,
- used + bytes - data_sinfo->total_bytes,
- BTRFS_TOTAL_BYTES_PINNED_BATCH);
- spin_unlock(&data_sinfo->lock);
-
- /* commit the current transaction and try again */
-commit_trans:
- if (need_commit) {
- need_commit--;
-
- if (need_commit > 0) {
- btrfs_start_delalloc_roots(fs_info, -1);
- btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
- (u64)-1);
- }
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- if (have_pinned_space >= 0 ||
- test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
- &trans->transaction->flags) ||
- need_commit > 0) {
- ret = btrfs_commit_transaction(trans);
- if (ret)
- return ret;
- /*
- * The cleaner kthread might still be doing iput
- * operations. Wait for it to finish so that
- * more space is released. We don't need to
- * explicitly run the delayed iputs here because
- * the commit_transaction would have woken up
- * the cleaner.
- */
- ret = btrfs_wait_on_delayed_iputs(fs_info);
- if (ret)
- return ret;
- goto again;
- } else {
- btrfs_end_transaction(trans);
- }
- }
-
- trace_btrfs_space_reservation(fs_info,
- "space_info:enospc",
- data_sinfo->flags, bytes, 1);
- return -ENOSPC;
- }
- update_bytes_may_use(data_sinfo, bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- data_sinfo->flags, bytes, 1);
- spin_unlock(&data_sinfo->lock);
-
- return 0;
-}
-
-int btrfs_check_data_free_space(struct inode *inode,
- struct extent_changeset **reserved, u64 start, u64 len)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int ret;
-
- /* align the range */
- len = round_up(start + len, fs_info->sectorsize) -
- round_down(start, fs_info->sectorsize);
- start = round_down(start, fs_info->sectorsize);
-
- ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
- if (ret < 0)
- return ret;
-
- /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
- ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
- if (ret < 0)
- btrfs_free_reserved_data_space_noquota(inode, start, len);
- else
- ret = 0;
- return ret;
-}
-
-/*
- * Called if we need to clear a data reservation for this inode
- * Normally in a error case.
- *
- * This one will *NOT* use accurate qgroup reserved space API, just for case
- * which we can't sleep and is sure it won't affect qgroup reserved space.
- * Like clear_bit_hook().
- */
-void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
- u64 len)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_space_info *data_sinfo;
-
- /* Make sure the range is aligned to sectorsize */
- len = round_up(start + len, fs_info->sectorsize) -
- round_down(start, fs_info->sectorsize);
- start = round_down(start, fs_info->sectorsize);
-
- data_sinfo = fs_info->data_sinfo;
- spin_lock(&data_sinfo->lock);
- update_bytes_may_use(data_sinfo, -len);
- trace_btrfs_space_reservation(fs_info, "space_info",
- data_sinfo->flags, len, 0);
- spin_unlock(&data_sinfo->lock);
-}
-
-/*
- * Called if we need to clear a data reservation for this inode
- * Normally in a error case.
- *
- * This one will handle the per-inode data rsv map for accurate reserved
- * space framework.
- */
-void btrfs_free_reserved_data_space(struct inode *inode,
- struct extent_changeset *reserved, u64 start, u64 len)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
-
- /* Make sure the range is aligned to sectorsize */
- len = round_up(start + len, root->fs_info->sectorsize) -
- round_down(start, root->fs_info->sectorsize);
- start = round_down(start, root->fs_info->sectorsize);
-
- btrfs_free_reserved_data_space_noquota(inode, start, len);
- btrfs_qgroup_free_data(inode, reserved, start, len);
-}
-
static void force_metadata_allocation(struct btrfs_fs_info *info)
{
struct list_head *head = &info->space_info;
@@ -4290,11 +3881,6 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
rcu_read_unlock();
}
-static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
-{
- return (global->size << 1);
-}
-
static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *sinfo, int force)
{
@@ -4325,15 +3911,9 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
{
u64 num_dev;
- if (type & (BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6))
+ num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
+ if (!num_dev)
num_dev = fs_info->fs_devices->rw_devices;
- else if (type & BTRFS_BLOCK_GROUP_RAID1)
- num_dev = 2;
- else
- num_dev = 1; /* DUP or single */
return num_dev;
}
@@ -4358,7 +3938,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
*/
lockdep_assert_held(&fs_info->chunk_mutex);
- info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+ info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
spin_lock(&info->lock);
left = info->total_bytes - btrfs_space_info_used(info, true);
spin_unlock(&info->lock);
@@ -4372,7 +3952,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
left, thresh, type);
- dump_space_info(fs_info, info, 0, 0);
+ btrfs_dump_space_info(fs_info, info, 0, 0);
}
if (left < thresh) {
@@ -4405,8 +3985,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
*/
-static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
- int force)
+int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+ enum btrfs_chunk_alloc_enum force)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *space_info;
@@ -4418,7 +3998,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
if (trans->allocating_chunk)
return -ENOSPC;
- space_info = __find_space_info(fs_info, flags);
+ space_info = btrfs_find_space_info(fs_info, flags);
ASSERT(space_info);
do {
@@ -4525,1714 +4105,6 @@ out:
return ret;
}
-static int can_overcommit(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, u64 bytes,
- enum btrfs_reserve_flush_enum flush,
- bool system_chunk)
-{
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- u64 profile;
- u64 space_size;
- u64 avail;
- u64 used;
- int factor;
-
- /* Don't overcommit when in mixed mode. */
- if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
- return 0;
-
- if (system_chunk)
- profile = btrfs_system_alloc_profile(fs_info);
- else
- profile = btrfs_metadata_alloc_profile(fs_info);
-
- used = btrfs_space_info_used(space_info, false);
-
- /*
- * We only want to allow over committing if we have lots of actual space
- * free, but if we don't have enough space to handle the global reserve
- * space then we could end up having a real enospc problem when trying
- * to allocate a chunk or some other such important allocation.
- */
- spin_lock(&global_rsv->lock);
- space_size = calc_global_rsv_need_space(global_rsv);
- spin_unlock(&global_rsv->lock);
- if (used + space_size >= space_info->total_bytes)
- return 0;
-
- used += space_info->bytes_may_use;
-
- avail = atomic64_read(&fs_info->free_chunk_space);
-
- /*
- * If we have dup, raid1 or raid10 then only half of the free
- * space is actually usable. For raid56, the space info used
- * doesn't include the parity drive, so we don't have to
- * change the math
- */
- factor = btrfs_bg_type_to_factor(profile);
- avail = div_u64(avail, factor);
-
- /*
- * If we aren't flushing all things, let us overcommit up to
- * 1/2th of the space. If we can flush, don't let us overcommit
- * too much, let it overcommit up to 1/8 of the space.
- */
- if (flush == BTRFS_RESERVE_FLUSH_ALL)
- avail >>= 3;
- else
- avail >>= 1;
-
- if (used + bytes < space_info->total_bytes + avail)
- return 1;
- return 0;
-}
-
-static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
- unsigned long nr_pages, int nr_items)
-{
- struct super_block *sb = fs_info->sb;
-
- if (down_read_trylock(&sb->s_umount)) {
- writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
- up_read(&sb->s_umount);
- } else {
- /*
- * We needn't worry the filesystem going from r/w to r/o though
- * we don't acquire ->s_umount mutex, because the filesystem
- * should guarantee the delalloc inodes list be empty after
- * the filesystem is readonly(all dirty pages are written to
- * the disk).
- */
- btrfs_start_delalloc_roots(fs_info, nr_items);
- if (!current->journal_info)
- btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
- }
-}
-
-static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
- u64 to_reclaim)
-{
- u64 bytes;
- u64 nr;
-
- bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
- nr = div64_u64(to_reclaim, bytes);
- if (!nr)
- nr = 1;
- return nr;
-}
-
-#define EXTENT_SIZE_PER_ITEM SZ_256K
-
-/*
- * shrink metadata reservation for delalloc
- */
-static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
- u64 orig, bool wait_ordered)
-{
- struct btrfs_space_info *space_info;
- struct btrfs_trans_handle *trans;
- u64 delalloc_bytes;
- u64 dio_bytes;
- u64 async_pages;
- u64 items;
- long time_left;
- unsigned long nr_pages;
- int loops;
-
- /* Calc the number of the pages we need flush for space reservation */
- items = calc_reclaim_items_nr(fs_info, to_reclaim);
- to_reclaim = items * EXTENT_SIZE_PER_ITEM;
-
- trans = (struct btrfs_trans_handle *)current->journal_info;
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
-
- delalloc_bytes = percpu_counter_sum_positive(
- &fs_info->delalloc_bytes);
- dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
- if (delalloc_bytes == 0 && dio_bytes == 0) {
- if (trans)
- return;
- if (wait_ordered)
- btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
- return;
- }
-
- /*
- * If we are doing more ordered than delalloc we need to just wait on
- * ordered extents, otherwise we'll waste time trying to flush delalloc
- * that likely won't give us the space back we need.
- */
- if (dio_bytes > delalloc_bytes)
- wait_ordered = true;
-
- loops = 0;
- while ((delalloc_bytes || dio_bytes) && loops < 3) {
- nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
-
- /*
- * Triggers inode writeback for up to nr_pages. This will invoke
- * ->writepages callback and trigger delalloc filling
- * (btrfs_run_delalloc_range()).
- */
- btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
-
- /*
- * We need to wait for the compressed pages to start before
- * we continue.
- */
- async_pages = atomic_read(&fs_info->async_delalloc_pages);
- if (!async_pages)
- goto skip_async;
-
- /*
- * Calculate how many compressed pages we want to be written
- * before we continue. I.e if there are more async pages than we
- * require wait_event will wait until nr_pages are written.
- */
- if (async_pages <= nr_pages)
- async_pages = 0;
- else
- async_pages -= nr_pages;
-
- wait_event(fs_info->async_submit_wait,
- atomic_read(&fs_info->async_delalloc_pages) <=
- (int)async_pages);
-skip_async:
- spin_lock(&space_info->lock);
- if (list_empty(&space_info->tickets) &&
- list_empty(&space_info->priority_tickets)) {
- spin_unlock(&space_info->lock);
- break;
- }
- spin_unlock(&space_info->lock);
-
- loops++;
- if (wait_ordered && !trans) {
- btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
- } else {
- time_left = schedule_timeout_killable(1);
- if (time_left)
- break;
- }
- delalloc_bytes = percpu_counter_sum_positive(
- &fs_info->delalloc_bytes);
- dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
- }
-}
-
-struct reserve_ticket {
- u64 orig_bytes;
- u64 bytes;
- int error;
- struct list_head list;
- wait_queue_head_t wait;
-};
-
-/**
- * maybe_commit_transaction - possibly commit the transaction if its ok to
- * @root - the root we're allocating for
- * @bytes - the number of bytes we want to reserve
- * @force - force the commit
- *
- * This will check to make sure that committing the transaction will actually
- * get us somewhere and then commit the transaction if it does. Otherwise it
- * will return -ENOSPC.
- */
-static int may_commit_transaction(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info)
-{
- struct reserve_ticket *ticket = NULL;
- struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
- struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
- struct btrfs_trans_handle *trans;
- u64 bytes_needed;
- u64 reclaim_bytes = 0;
-
- trans = (struct btrfs_trans_handle *)current->journal_info;
- if (trans)
- return -EAGAIN;
-
- spin_lock(&space_info->lock);
- if (!list_empty(&space_info->priority_tickets))
- ticket = list_first_entry(&space_info->priority_tickets,
- struct reserve_ticket, list);
- else if (!list_empty(&space_info->tickets))
- ticket = list_first_entry(&space_info->tickets,
- struct reserve_ticket, list);
- bytes_needed = (ticket) ? ticket->bytes : 0;
- spin_unlock(&space_info->lock);
-
- if (!bytes_needed)
- return 0;
-
- trans = btrfs_join_transaction(fs_info->extent_root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- /*
- * See if there is enough pinned space to make this reservation, or if
- * we have block groups that are going to be freed, allowing us to
- * possibly do a chunk allocation the next loop through.
- */
- if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
- __percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes_needed,
- BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
- goto commit;
-
- /*
- * See if there is some space in the delayed insertion reservation for
- * this reservation.
- */
- if (space_info != delayed_rsv->space_info)
- goto enospc;
-
- spin_lock(&delayed_rsv->lock);
- reclaim_bytes += delayed_rsv->reserved;
- spin_unlock(&delayed_rsv->lock);
-
- spin_lock(&delayed_refs_rsv->lock);
- reclaim_bytes += delayed_refs_rsv->reserved;
- spin_unlock(&delayed_refs_rsv->lock);
- if (reclaim_bytes >= bytes_needed)
- goto commit;
- bytes_needed -= reclaim_bytes;
-
- if (__percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes_needed,
- BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
- goto enospc;
-
-commit:
- return btrfs_commit_transaction(trans);
-enospc:
- btrfs_end_transaction(trans);
- return -ENOSPC;
-}
-
-/*
- * Try to flush some data based on policy set by @state. This is only advisory
- * and may fail for various reasons. The caller is supposed to examine the
- * state of @space_info to detect the outcome.
- */
-static void flush_space(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, u64 num_bytes,
- int state)
-{
- struct btrfs_root *root = fs_info->extent_root;
- struct btrfs_trans_handle *trans;
- int nr;
- int ret = 0;
-
- switch (state) {
- case FLUSH_DELAYED_ITEMS_NR:
- case FLUSH_DELAYED_ITEMS:
- if (state == FLUSH_DELAYED_ITEMS_NR)
- nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
- else
- nr = -1;
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
- }
- ret = btrfs_run_delayed_items_nr(trans, nr);
- btrfs_end_transaction(trans);
- break;
- case FLUSH_DELALLOC:
- case FLUSH_DELALLOC_WAIT:
- shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
- state == FLUSH_DELALLOC_WAIT);
- break;
- case FLUSH_DELAYED_REFS_NR:
- case FLUSH_DELAYED_REFS:
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
- }
- if (state == FLUSH_DELAYED_REFS_NR)
- nr = calc_reclaim_items_nr(fs_info, num_bytes);
- else
- nr = 0;
- btrfs_run_delayed_refs(trans, nr);
- btrfs_end_transaction(trans);
- break;
- case ALLOC_CHUNK:
- case ALLOC_CHUNK_FORCE:
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
- }
- ret = do_chunk_alloc(trans,
- btrfs_metadata_alloc_profile(fs_info),
- (state == ALLOC_CHUNK) ?
- CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
- btrfs_end_transaction(trans);
- if (ret > 0 || ret == -ENOSPC)
- ret = 0;
- break;
- case COMMIT_TRANS:
- /*
- * If we have pending delayed iputs then we could free up a
- * bunch of pinned space, so make sure we run the iputs before
- * we do our pinned bytes check below.
- */
- btrfs_run_delayed_iputs(fs_info);
- btrfs_wait_on_delayed_iputs(fs_info);
-
- ret = may_commit_transaction(fs_info, space_info);
- break;
- default:
- ret = -ENOSPC;
- break;
- }
-
- trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
- ret);
- return;
-}
-
-static inline u64
-btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- bool system_chunk)
-{
- struct reserve_ticket *ticket;
- u64 used;
- u64 expected;
- u64 to_reclaim = 0;
-
- list_for_each_entry(ticket, &space_info->tickets, list)
- to_reclaim += ticket->bytes;
- list_for_each_entry(ticket, &space_info->priority_tickets, list)
- to_reclaim += ticket->bytes;
- if (to_reclaim)
- return to_reclaim;
-
- to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
- if (can_overcommit(fs_info, space_info, to_reclaim,
- BTRFS_RESERVE_FLUSH_ALL, system_chunk))
- return 0;
-
- used = btrfs_space_info_used(space_info, true);
-
- if (can_overcommit(fs_info, space_info, SZ_1M,
- BTRFS_RESERVE_FLUSH_ALL, system_chunk))
- expected = div_factor_fine(space_info->total_bytes, 95);
- else
- expected = div_factor_fine(space_info->total_bytes, 90);
-
- if (used > expected)
- to_reclaim = used - expected;
- else
- to_reclaim = 0;
- to_reclaim = min(to_reclaim, space_info->bytes_may_use +
- space_info->bytes_reserved);
- return to_reclaim;
-}
-
-static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 used, bool system_chunk)
-{
- u64 thresh = div_factor_fine(space_info->total_bytes, 98);
-
- /* If we're just plain full then async reclaim just slows us down. */
- if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
- return 0;
-
- if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
- system_chunk))
- return 0;
-
- return (used >= thresh && !btrfs_fs_closing(fs_info) &&
- !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
-}
-
-static bool wake_all_tickets(struct list_head *head)
-{
- struct reserve_ticket *ticket;
-
- while (!list_empty(head)) {
- ticket = list_first_entry(head, struct reserve_ticket, list);
- list_del_init(&ticket->list);
- ticket->error = -ENOSPC;
- wake_up(&ticket->wait);
- if (ticket->bytes != ticket->orig_bytes)
- return true;
- }
- return false;
-}
-
-/*
- * This is for normal flushers, we can wait all goddamned day if we want to. We
- * will loop and continuously try to flush as long as we are making progress.
- * We count progress as clearing off tickets each time we have to loop.
- */
-static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
-{
- struct btrfs_fs_info *fs_info;
- struct btrfs_space_info *space_info;
- u64 to_reclaim;
- int flush_state;
- int commit_cycles = 0;
- u64 last_tickets_id;
-
- fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
-
- spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
- false);
- if (!to_reclaim) {
- space_info->flush = 0;
- spin_unlock(&space_info->lock);
- return;
- }
- last_tickets_id = space_info->tickets_id;
- spin_unlock(&space_info->lock);
-
- flush_state = FLUSH_DELAYED_ITEMS_NR;
- do {
- flush_space(fs_info, space_info, to_reclaim, flush_state);
- spin_lock(&space_info->lock);
- if (list_empty(&space_info->tickets)) {
- space_info->flush = 0;
- spin_unlock(&space_info->lock);
- return;
- }
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
- space_info,
- false);
- if (last_tickets_id == space_info->tickets_id) {
- flush_state++;
- } else {
- last_tickets_id = space_info->tickets_id;
- flush_state = FLUSH_DELAYED_ITEMS_NR;
- if (commit_cycles)
- commit_cycles--;
- }
-
- /*
- * We don't want to force a chunk allocation until we've tried
- * pretty hard to reclaim space. Think of the case where we
- * freed up a bunch of space and so have a lot of pinned space
- * to reclaim. We would rather use that than possibly create a
- * underutilized metadata chunk. So if this is our first run
- * through the flushing state machine skip ALLOC_CHUNK_FORCE and
- * commit the transaction. If nothing has changed the next go
- * around then we can force a chunk allocation.
- */
- if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
- flush_state++;
-
- if (flush_state > COMMIT_TRANS) {
- commit_cycles++;
- if (commit_cycles > 2) {
- if (wake_all_tickets(&space_info->tickets)) {
- flush_state = FLUSH_DELAYED_ITEMS_NR;
- commit_cycles--;
- } else {
- space_info->flush = 0;
- }
- } else {
- flush_state = FLUSH_DELAYED_ITEMS_NR;
- }
- }
- spin_unlock(&space_info->lock);
- } while (flush_state <= COMMIT_TRANS);
-}
-
-void btrfs_init_async_reclaim_work(struct work_struct *work)
-{
- INIT_WORK(work, btrfs_async_reclaim_metadata_space);
-}
-
-static const enum btrfs_flush_state priority_flush_states[] = {
- FLUSH_DELAYED_ITEMS_NR,
- FLUSH_DELAYED_ITEMS,
- ALLOC_CHUNK,
-};
-
-static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- struct reserve_ticket *ticket)
-{
- u64 to_reclaim;
- int flush_state;
-
- spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
- false);
- if (!to_reclaim) {
- spin_unlock(&space_info->lock);
- return;
- }
- spin_unlock(&space_info->lock);
-
- flush_state = 0;
- do {
- flush_space(fs_info, space_info, to_reclaim,
- priority_flush_states[flush_state]);
- flush_state++;
- spin_lock(&space_info->lock);
- if (ticket->bytes == 0) {
- spin_unlock(&space_info->lock);
- return;
- }
- spin_unlock(&space_info->lock);
- } while (flush_state < ARRAY_SIZE(priority_flush_states));
-}
-
-static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- struct reserve_ticket *ticket)
-
-{
- DEFINE_WAIT(wait);
- u64 reclaim_bytes = 0;
- int ret = 0;
-
- spin_lock(&space_info->lock);
- while (ticket->bytes > 0 && ticket->error == 0) {
- ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
- if (ret) {
- ret = -EINTR;
- break;
- }
- spin_unlock(&space_info->lock);
-
- schedule();
-
- finish_wait(&ticket->wait, &wait);
- spin_lock(&space_info->lock);
- }
- if (!ret)
- ret = ticket->error;
- if (!list_empty(&ticket->list))
- list_del_init(&ticket->list);
- if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
- reclaim_bytes = ticket->orig_bytes - ticket->bytes;
- spin_unlock(&space_info->lock);
-
- if (reclaim_bytes)
- space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
- return ret;
-}
-
-/**
- * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
- * @root - the root we're allocating for
- * @space_info - the space info we want to allocate from
- * @orig_bytes - the number of bytes we want
- * @flush - whether or not we can flush to make our reservation
- *
- * This will reserve orig_bytes number of bytes from the space info associated
- * with the block_rsv. If there is not enough space it will make an attempt to
- * flush out space to make room. It will do this by flushing delalloc if
- * possible or committing the transaction. If flush is 0 then no attempts to
- * regain reservations will be made and this will fail if there is not enough
- * space already.
- */
-static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 orig_bytes,
- enum btrfs_reserve_flush_enum flush,
- bool system_chunk)
-{
- struct reserve_ticket ticket;
- u64 used;
- u64 reclaim_bytes = 0;
- int ret = 0;
-
- ASSERT(orig_bytes);
- ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
-
- spin_lock(&space_info->lock);
- ret = -ENOSPC;
- used = btrfs_space_info_used(space_info, true);
-
- /*
- * If we have enough space then hooray, make our reservation and carry
- * on. If not see if we can overcommit, and if we can, hooray carry on.
- * If not things get more complicated.
- */
- if (used + orig_bytes <= space_info->total_bytes) {
- update_bytes_may_use(space_info, orig_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags, orig_bytes, 1);
- ret = 0;
- } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
- system_chunk)) {
- update_bytes_may_use(space_info, orig_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags, orig_bytes, 1);
- ret = 0;
- }
-
- /*
- * If we couldn't make a reservation then setup our reservation ticket
- * and kick the async worker if it's not already running.
- *
- * If we are a priority flusher then we just need to add our ticket to
- * the list and we will do our own flushing further down.
- */
- if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
- ticket.orig_bytes = orig_bytes;
- ticket.bytes = orig_bytes;
- ticket.error = 0;
- init_waitqueue_head(&ticket.wait);
- if (flush == BTRFS_RESERVE_FLUSH_ALL) {
- list_add_tail(&ticket.list, &space_info->tickets);
- if (!space_info->flush) {
- space_info->flush = 1;
- trace_btrfs_trigger_flush(fs_info,
- space_info->flags,
- orig_bytes, flush,
- "enospc");
- queue_work(system_unbound_wq,
- &fs_info->async_reclaim_work);
- }
- } else {
- list_add_tail(&ticket.list,
- &space_info->priority_tickets);
- }
- } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
- used += orig_bytes;
- /*
- * We will do the space reservation dance during log replay,
- * which means we won't have fs_info->fs_root set, so don't do
- * the async reclaim as we will panic.
- */
- if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
- need_do_async_reclaim(fs_info, space_info,
- used, system_chunk) &&
- !work_busy(&fs_info->async_reclaim_work)) {
- trace_btrfs_trigger_flush(fs_info, space_info->flags,
- orig_bytes, flush, "preempt");
- queue_work(system_unbound_wq,
- &fs_info->async_reclaim_work);
- }
- }
- spin_unlock(&space_info->lock);
- if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
- return ret;
-
- if (flush == BTRFS_RESERVE_FLUSH_ALL)
- return wait_reserve_ticket(fs_info, space_info, &ticket);
-
- ret = 0;
- priority_reclaim_metadata_space(fs_info, space_info, &ticket);
- spin_lock(&space_info->lock);
- if (ticket.bytes) {
- if (ticket.bytes < orig_bytes)
- reclaim_bytes = orig_bytes - ticket.bytes;
- list_del_init(&ticket.list);
- ret = -ENOSPC;
- }
- spin_unlock(&space_info->lock);
-
- if (reclaim_bytes)
- space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
- ASSERT(list_empty(&ticket.list));
- return ret;
-}
-
-/**
- * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
- * @root - the root we're allocating for
- * @block_rsv - the block_rsv we're allocating for
- * @orig_bytes - the number of bytes we want
- * @flush - whether or not we can flush to make our reservation
- *
- * This will reserve orig_bytes number of bytes from the space info associated
- * with the block_rsv. If there is not enough space it will make an attempt to
- * flush out space to make room. It will do this by flushing delalloc if
- * possible or committing the transaction. If flush is 0 then no attempts to
- * regain reservations will be made and this will fail if there is not enough
- * space already.
- */
-static int reserve_metadata_bytes(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 orig_bytes,
- enum btrfs_reserve_flush_enum flush)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- int ret;
- bool system_chunk = (root == fs_info->chunk_root);
-
- ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
- orig_bytes, flush, system_chunk);
- if (ret == -ENOSPC &&
- unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
- if (block_rsv != global_rsv &&
- !block_rsv_use_bytes(global_rsv, orig_bytes))
- ret = 0;
- }
- if (ret == -ENOSPC) {
- trace_btrfs_space_reservation(fs_info, "space_info:enospc",
- block_rsv->space_info->flags,
- orig_bytes, 1);
-
- if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- dump_space_info(fs_info, block_rsv->space_info,
- orig_bytes, 0);
- }
- return ret;
-}
-
-static struct btrfs_block_rsv *get_block_rsv(
- const struct btrfs_trans_handle *trans,
- const struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *block_rsv = NULL;
-
- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
- (root == fs_info->csum_root && trans->adding_csums) ||
- (root == fs_info->uuid_root))
- block_rsv = trans->block_rsv;
-
- if (!block_rsv)
- block_rsv = root->block_rsv;
-
- if (!block_rsv)
- block_rsv = &fs_info->empty_block_rsv;
-
- return block_rsv;
-}
-
-static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
-{
- int ret = -ENOSPC;
- spin_lock(&block_rsv->lock);
- if (block_rsv->reserved >= num_bytes) {
- block_rsv->reserved -= num_bytes;
- if (block_rsv->reserved < block_rsv->size)
- block_rsv->full = 0;
- ret = 0;
- }
- spin_unlock(&block_rsv->lock);
- return ret;
-}
-
-static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, bool update_size)
-{
- spin_lock(&block_rsv->lock);
- block_rsv->reserved += num_bytes;
- if (update_size)
- block_rsv->size += num_bytes;
- else if (block_rsv->reserved >= block_rsv->size)
- block_rsv->full = 1;
- spin_unlock(&block_rsv->lock);
-}
-
-int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *dest, u64 num_bytes,
- int min_factor)
-{
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- u64 min_bytes;
-
- if (global_rsv->space_info != dest->space_info)
- return -ENOSPC;
-
- spin_lock(&global_rsv->lock);
- min_bytes = div_factor(global_rsv->size, min_factor);
- if (global_rsv->reserved < min_bytes + num_bytes) {
- spin_unlock(&global_rsv->lock);
- return -ENOSPC;
- }
- global_rsv->reserved -= num_bytes;
- if (global_rsv->reserved < global_rsv->size)
- global_rsv->full = 0;
- spin_unlock(&global_rsv->lock);
-
- block_rsv_add_bytes(dest, num_bytes, true);
- return 0;
-}
-
-/**
- * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
- * @fs_info - the fs info for our fs.
- * @src - the source block rsv to transfer from.
- * @num_bytes - the number of bytes to transfer.
- *
- * This transfers up to the num_bytes amount from the src rsv to the
- * delayed_refs_rsv. Any extra bytes are returned to the space info.
- */
-void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *src,
- u64 num_bytes)
-{
- struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
- u64 to_free = 0;
-
- spin_lock(&src->lock);
- src->reserved -= num_bytes;
- src->size -= num_bytes;
- spin_unlock(&src->lock);
-
- spin_lock(&delayed_refs_rsv->lock);
- if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
- u64 delta = delayed_refs_rsv->size -
- delayed_refs_rsv->reserved;
- if (num_bytes > delta) {
- to_free = num_bytes - delta;
- num_bytes = delta;
- }
- } else {
- to_free = num_bytes;
- num_bytes = 0;
- }
-
- if (num_bytes)
- delayed_refs_rsv->reserved += num_bytes;
- if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
- delayed_refs_rsv->full = 1;
- spin_unlock(&delayed_refs_rsv->lock);
-
- if (num_bytes)
- trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
- 0, num_bytes, 1);
- if (to_free)
- space_info_add_old_bytes(fs_info, delayed_refs_rsv->space_info,
- to_free);
-}
-
-/**
- * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
- * @fs_info - the fs_info for our fs.
- * @flush - control how we can flush for this reservation.
- *
- * This will refill the delayed block_rsv up to 1 items size worth of space and
- * will return -ENOSPC if we can't make the reservation.
- */
-int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
- enum btrfs_reserve_flush_enum flush)
-{
- struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
- u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
- u64 num_bytes = 0;
- int ret = -ENOSPC;
-
- spin_lock(&block_rsv->lock);
- if (block_rsv->reserved < block_rsv->size) {
- num_bytes = block_rsv->size - block_rsv->reserved;
- num_bytes = min(num_bytes, limit);
- }
- spin_unlock(&block_rsv->lock);
-
- if (!num_bytes)
- return 0;
-
- ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv,
- num_bytes, flush);
- if (ret)
- return ret;
- block_rsv_add_bytes(block_rsv, num_bytes, 0);
- trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
- 0, num_bytes, 1);
- return 0;
-}
-
-/*
- * This is for space we already have accounted in space_info->bytes_may_use, so
- * basically when we're returning space from block_rsv's.
- */
-static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 num_bytes)
-{
- struct reserve_ticket *ticket;
- struct list_head *head;
- u64 used;
- enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
- bool check_overcommit = false;
-
- spin_lock(&space_info->lock);
- head = &space_info->priority_tickets;
-
- /*
- * If we are over our limit then we need to check and see if we can
- * overcommit, and if we can't then we just need to free up our space
- * and not satisfy any requests.
- */
- used = btrfs_space_info_used(space_info, true);
- if (used - num_bytes >= space_info->total_bytes)
- check_overcommit = true;
-again:
- while (!list_empty(head) && num_bytes) {
- ticket = list_first_entry(head, struct reserve_ticket,
- list);
- /*
- * We use 0 bytes because this space is already reserved, so
- * adding the ticket space would be a double count.
- */
- if (check_overcommit &&
- !can_overcommit(fs_info, space_info, 0, flush, false))
- break;
- if (num_bytes >= ticket->bytes) {
- list_del_init(&ticket->list);
- num_bytes -= ticket->bytes;
- ticket->bytes = 0;
- space_info->tickets_id++;
- wake_up(&ticket->wait);
- } else {
- ticket->bytes -= num_bytes;
- num_bytes = 0;
- }
- }
-
- if (num_bytes && head == &space_info->priority_tickets) {
- head = &space_info->tickets;
- flush = BTRFS_RESERVE_FLUSH_ALL;
- goto again;
- }
- update_bytes_may_use(space_info, -num_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags, num_bytes, 0);
- spin_unlock(&space_info->lock);
-}
-
-/*
- * This is for newly allocated space that isn't accounted in
- * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
- * we use this helper.
- */
-static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 num_bytes)
-{
- struct reserve_ticket *ticket;
- struct list_head *head = &space_info->priority_tickets;
-
-again:
- while (!list_empty(head) && num_bytes) {
- ticket = list_first_entry(head, struct reserve_ticket,
- list);
- if (num_bytes >= ticket->bytes) {
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags,
- ticket->bytes, 1);
- list_del_init(&ticket->list);
- num_bytes -= ticket->bytes;
- update_bytes_may_use(space_info, ticket->bytes);
- ticket->bytes = 0;
- space_info->tickets_id++;
- wake_up(&ticket->wait);
- } else {
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags,
- num_bytes, 1);
- update_bytes_may_use(space_info, num_bytes);
- ticket->bytes -= num_bytes;
- num_bytes = 0;
- }
- }
-
- if (num_bytes && head == &space_info->priority_tickets) {
- head = &space_info->tickets;
- goto again;
- }
-}
-
-static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv,
- struct btrfs_block_rsv *dest, u64 num_bytes,
- u64 *qgroup_to_release_ret)
-{
- struct btrfs_space_info *space_info = block_rsv->space_info;
- u64 qgroup_to_release = 0;
- u64 ret;
-
- spin_lock(&block_rsv->lock);
- if (num_bytes == (u64)-1) {
- num_bytes = block_rsv->size;
- qgroup_to_release = block_rsv->qgroup_rsv_size;
- }
- block_rsv->size -= num_bytes;
- if (block_rsv->reserved >= block_rsv->size) {
- num_bytes = block_rsv->reserved - block_rsv->size;
- block_rsv->reserved = block_rsv->size;
- block_rsv->full = 1;
- } else {
- num_bytes = 0;
- }
- if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
- qgroup_to_release = block_rsv->qgroup_rsv_reserved -
- block_rsv->qgroup_rsv_size;
- block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
- } else {
- qgroup_to_release = 0;
- }
- spin_unlock(&block_rsv->lock);
-
- ret = num_bytes;
- if (num_bytes > 0) {
- if (dest) {
- spin_lock(&dest->lock);
- if (!dest->full) {
- u64 bytes_to_add;
-
- bytes_to_add = dest->size - dest->reserved;
- bytes_to_add = min(num_bytes, bytes_to_add);
- dest->reserved += bytes_to_add;
- if (dest->reserved >= dest->size)
- dest->full = 1;
- num_bytes -= bytes_to_add;
- }
- spin_unlock(&dest->lock);
- }
- if (num_bytes)
- space_info_add_old_bytes(fs_info, space_info,
- num_bytes);
- }
- if (qgroup_to_release_ret)
- *qgroup_to_release_ret = qgroup_to_release;
- return ret;
-}
-
-int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
- struct btrfs_block_rsv *dst, u64 num_bytes,
- bool update_size)
-{
- int ret;
-
- ret = block_rsv_use_bytes(src, num_bytes);
- if (ret)
- return ret;
-
- block_rsv_add_bytes(dst, num_bytes, update_size);
- return 0;
-}
-
-void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
-{
- memset(rsv, 0, sizeof(*rsv));
- spin_lock_init(&rsv->lock);
- rsv->type = type;
-}
-
-void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv,
- unsigned short type)
-{
- btrfs_init_block_rsv(rsv, type);
- rsv->space_info = __find_space_info(fs_info,
- BTRFS_BLOCK_GROUP_METADATA);
-}
-
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
- unsigned short type)
-{
- struct btrfs_block_rsv *block_rsv;
-
- block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
- if (!block_rsv)
- return NULL;
-
- btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
- return block_rsv;
-}
-
-void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv)
-{
- if (!rsv)
- return;
- btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
- kfree(rsv);
-}
-
-int btrfs_block_rsv_add(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, u64 num_bytes,
- enum btrfs_reserve_flush_enum flush)
-{
- int ret;
-
- if (num_bytes == 0)
- return 0;
-
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
- if (!ret)
- block_rsv_add_bytes(block_rsv, num_bytes, true);
-
- return ret;
-}
-
-int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
-{
- u64 num_bytes = 0;
- int ret = -ENOSPC;
-
- if (!block_rsv)
- return 0;
-
- spin_lock(&block_rsv->lock);
- num_bytes = div_factor(block_rsv->size, min_factor);
- if (block_rsv->reserved >= num_bytes)
- ret = 0;
- spin_unlock(&block_rsv->lock);
-
- return ret;
-}
-
-int btrfs_block_rsv_refill(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, u64 min_reserved,
- enum btrfs_reserve_flush_enum flush)
-{
- u64 num_bytes = 0;
- int ret = -ENOSPC;
-
- if (!block_rsv)
- return 0;
-
- spin_lock(&block_rsv->lock);
- num_bytes = min_reserved;
- if (block_rsv->reserved >= num_bytes)
- ret = 0;
- else
- num_bytes -= block_rsv->reserved;
- spin_unlock(&block_rsv->lock);
-
- if (!ret)
- return 0;
-
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
- if (!ret) {
- block_rsv_add_bytes(block_rsv, num_bytes, false);
- return 0;
- }
-
- return ret;
-}
-
-static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, u64 *qgroup_to_release)
-{
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
- struct btrfs_block_rsv *target = delayed_rsv;
-
- if (target->full || target == block_rsv)
- target = global_rsv;
-
- if (block_rsv->space_info != target->space_info)
- target = NULL;
-
- return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
- qgroup_to_release);
-}
-
-void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
-{
- __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
-}
-
-/**
- * btrfs_inode_rsv_release - release any excessive reservation.
- * @inode - the inode we need to release from.
- * @qgroup_free - free or convert qgroup meta.
- * Unlike normal operation, qgroup meta reservation needs to know if we are
- * freeing qgroup reservation or just converting it into per-trans. Normally
- * @qgroup_free is true for error handling, and false for normal release.
- *
- * This is the same as btrfs_block_rsv_release, except that it handles the
- * tracepoint for the reservation.
- */
-static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
- u64 released = 0;
- u64 qgroup_to_release = 0;
-
- /*
- * Since we statically set the block_rsv->size we just want to say we
- * are releasing 0 bytes, and then we'll just get the reservation over
- * the size free'd.
- */
- released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
- &qgroup_to_release);
- if (released > 0)
- trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(inode), released, 0);
- if (qgroup_free)
- btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
- else
- btrfs_qgroup_convert_reserved_meta(inode->root,
- qgroup_to_release);
-}
-
-/**
- * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
- * @fs_info - the fs_info for our fs.
- * @nr - the number of items to drop.
- *
- * This drops the delayed ref head's count from the delayed refs rsv and frees
- * any excess reservation we had.
- */
-void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
-{
- struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
- u64 released = 0;
-
- released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv,
- num_bytes, NULL);
- if (released)
- trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
- 0, released, 0);
-}
-
-static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
- struct btrfs_space_info *sinfo = block_rsv->space_info;
- u64 num_bytes;
-
- /*
- * The global block rsv is based on the size of the extent tree, the
- * checksum tree and the root tree. If the fs is empty we want to set
- * it to a minimal amount for safety.
- */
- num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
- btrfs_root_used(&fs_info->csum_root->root_item) +
- btrfs_root_used(&fs_info->tree_root->root_item);
- num_bytes = max_t(u64, num_bytes, SZ_16M);
-
- spin_lock(&sinfo->lock);
- spin_lock(&block_rsv->lock);
-
- block_rsv->size = min_t(u64, num_bytes, SZ_512M);
-
- if (block_rsv->reserved < block_rsv->size) {
- num_bytes = btrfs_space_info_used(sinfo, true);
- if (sinfo->total_bytes > num_bytes) {
- num_bytes = sinfo->total_bytes - num_bytes;
- num_bytes = min(num_bytes,
- block_rsv->size - block_rsv->reserved);
- block_rsv->reserved += num_bytes;
- update_bytes_may_use(sinfo, num_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- sinfo->flags, num_bytes,
- 1);
- }
- } else if (block_rsv->reserved > block_rsv->size) {
- num_bytes = block_rsv->reserved - block_rsv->size;
- update_bytes_may_use(sinfo, -num_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- sinfo->flags, num_bytes, 0);
- block_rsv->reserved = block_rsv->size;
- }
-
- if (block_rsv->reserved == block_rsv->size)
- block_rsv->full = 1;
- else
- block_rsv->full = 0;
-
- spin_unlock(&block_rsv->lock);
- spin_unlock(&sinfo->lock);
-}
-
-static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_space_info *space_info;
-
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
- fs_info->chunk_block_rsv.space_info = space_info;
-
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
- fs_info->global_block_rsv.space_info = space_info;
- fs_info->trans_block_rsv.space_info = space_info;
- fs_info->empty_block_rsv.space_info = space_info;
- fs_info->delayed_block_rsv.space_info = space_info;
- fs_info->delayed_refs_rsv.space_info = space_info;
-
- fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
- fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
- fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
- fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
- if (fs_info->quota_root)
- fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
- fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
-
- update_global_block_rsv(fs_info);
-}
-
-static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
- block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
- (u64)-1, NULL);
- WARN_ON(fs_info->trans_block_rsv.size > 0);
- WARN_ON(fs_info->trans_block_rsv.reserved > 0);
- WARN_ON(fs_info->chunk_block_rsv.size > 0);
- WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
- WARN_ON(fs_info->delayed_block_rsv.size > 0);
- WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
- WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
- WARN_ON(fs_info->delayed_refs_rsv.size > 0);
-}
-
-/*
- * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
- * @trans - the trans that may have generated delayed refs
- *
- * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
- * it'll calculate the additional size and add it to the delayed_refs_rsv.
- */
-void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
-{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
- u64 num_bytes;
-
- if (!trans->delayed_ref_updates)
- return;
-
- num_bytes = btrfs_calc_trans_metadata_size(fs_info,
- trans->delayed_ref_updates);
- spin_lock(&delayed_rsv->lock);
- delayed_rsv->size += num_bytes;
- delayed_rsv->full = 0;
- spin_unlock(&delayed_rsv->lock);
- trans->delayed_ref_updates = 0;
-}
-
-/*
- * To be called after all the new block groups attached to the transaction
- * handle have been created (btrfs_create_pending_block_groups()).
- */
-void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
-{
- struct btrfs_fs_info *fs_info = trans->fs_info;
-
- if (!trans->chunk_bytes_reserved)
- return;
-
- WARN_ON_ONCE(!list_empty(&trans->new_bgs));
-
- block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
- trans->chunk_bytes_reserved, NULL);
- trans->chunk_bytes_reserved = 0;
-}
-
-/*
- * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
- * root: the root of the parent directory
- * rsv: block reservation
- * items: the number of items that we need do reservation
- * use_global_rsv: allow fallback to the global block reservation
- *
- * This function is used to reserve the space for snapshot/subvolume
- * creation and deletion. Those operations are different with the
- * common file/directory operations, they change two fs/file trees
- * and root tree, the number of items that the qgroup reserves is
- * different with the free space reservation. So we can not use
- * the space reservation mechanism in start_transaction().
- */
-int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv, int items,
- bool use_global_rsv)
-{
- u64 qgroup_num_bytes = 0;
- u64 num_bytes;
- int ret;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
-
- if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
- /* One for parent inode, two for dir entries */
- qgroup_num_bytes = 3 * fs_info->nodesize;
- ret = btrfs_qgroup_reserve_meta_prealloc(root,
- qgroup_num_bytes, true);
- if (ret)
- return ret;
- }
-
- num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
- rsv->space_info = __find_space_info(fs_info,
- BTRFS_BLOCK_GROUP_METADATA);
- ret = btrfs_block_rsv_add(root, rsv, num_bytes,
- BTRFS_RESERVE_FLUSH_ALL);
-
- if (ret == -ENOSPC && use_global_rsv)
- ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
-
- if (ret && qgroup_num_bytes)
- btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
-
- return ret;
-}
-
-void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv)
-{
- btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
-}
-
-static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
- struct btrfs_inode *inode)
-{
- struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
- u64 reserve_size = 0;
- u64 qgroup_rsv_size = 0;
- u64 csum_leaves;
- unsigned outstanding_extents;
-
- lockdep_assert_held(&inode->lock);
- outstanding_extents = inode->outstanding_extents;
- if (outstanding_extents)
- reserve_size = btrfs_calc_trans_metadata_size(fs_info,
- outstanding_extents + 1);
- csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
- inode->csum_bytes);
- reserve_size += btrfs_calc_trans_metadata_size(fs_info,
- csum_leaves);
- /*
- * For qgroup rsv, the calculation is very simple:
- * account one nodesize for each outstanding extent
- *
- * This is overestimating in most cases.
- */
- qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
-
- spin_lock(&block_rsv->lock);
- block_rsv->size = reserve_size;
- block_rsv->qgroup_rsv_size = qgroup_rsv_size;
- spin_unlock(&block_rsv->lock);
-}
-
-static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
- u64 num_bytes, u64 *meta_reserve,
- u64 *qgroup_reserve)
-{
- u64 nr_extents = count_max_extents(num_bytes);
- u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
-
- /* We add one for the inode update at finish ordered time */
- *meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
- nr_extents + csum_leaves + 1);
- *qgroup_reserve = nr_extents * fs_info->nodesize;
-}
-
-int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
-{
- struct btrfs_root *root = inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
- u64 meta_reserve, qgroup_reserve;
- unsigned nr_extents;
- enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
- int ret = 0;
- bool delalloc_lock = true;
-
- /* If we are a free space inode we need to not flush since we will be in
- * the middle of a transaction commit. We also don't need the delalloc
- * mutex since we won't race with anybody. We need this mostly to make
- * lockdep shut its filthy mouth.
- *
- * If we have a transaction open (can happen if we call truncate_block
- * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
- */
- if (btrfs_is_free_space_inode(inode)) {
- flush = BTRFS_RESERVE_NO_FLUSH;
- delalloc_lock = false;
- } else {
- if (current->journal_info)
- flush = BTRFS_RESERVE_FLUSH_LIMIT;
-
- if (btrfs_transaction_in_commit(fs_info))
- schedule_timeout(1);
- }
-
- if (delalloc_lock)
- mutex_lock(&inode->delalloc_mutex);
-
- num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
-
- /*
- * We always want to do it this way, every other way is wrong and ends
- * in tears. Pre-reserving the amount we are going to add will always
- * be the right way, because otherwise if we have enough parallelism we
- * could end up with thousands of inodes all holding little bits of
- * reservations they were able to make previously and the only way to
- * reclaim that space is to ENOSPC out the operations and clear
- * everything out and try again, which is bad. This way we just
- * over-reserve slightly, and clean up the mess when we are done.
- */
- calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
- &qgroup_reserve);
- ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
- if (ret)
- goto out_fail;
- ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
- if (ret)
- goto out_qgroup;
-
- /*
- * Now we need to update our outstanding extents and csum bytes _first_
- * and then add the reservation to the block_rsv. This keeps us from
- * racing with an ordered completion or some such that would think it
- * needs to free the reservation we just made.
- */
- spin_lock(&inode->lock);
- nr_extents = count_max_extents(num_bytes);
- btrfs_mod_outstanding_extents(inode, nr_extents);
- inode->csum_bytes += num_bytes;
- btrfs_calculate_inode_block_rsv_size(fs_info, inode);
- spin_unlock(&inode->lock);
-
- /* Now we can safely add our space to our block rsv */
- block_rsv_add_bytes(block_rsv, meta_reserve, false);
- trace_btrfs_space_reservation(root->fs_info, "delalloc",
- btrfs_ino(inode), meta_reserve, 1);
-
- spin_lock(&block_rsv->lock);
- block_rsv->qgroup_rsv_reserved += qgroup_reserve;
- spin_unlock(&block_rsv->lock);
-
- if (delalloc_lock)
- mutex_unlock(&inode->delalloc_mutex);
- return 0;
-out_qgroup:
- btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
-out_fail:
- btrfs_inode_rsv_release(inode, true);
- if (delalloc_lock)
- mutex_unlock(&inode->delalloc_mutex);
- return ret;
-}
-
-/**
- * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
- * @inode: the inode to release the reservation for.
- * @num_bytes: the number of bytes we are releasing.
- * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
- *
- * This will release the metadata reservation for an inode. This can be called
- * once we complete IO for a given set of bytes to release their metadata
- * reservations, or on error for the same reason.
- */
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
- bool qgroup_free)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
-
- num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
- spin_lock(&inode->lock);
- inode->csum_bytes -= num_bytes;
- btrfs_calculate_inode_block_rsv_size(fs_info, inode);
- spin_unlock(&inode->lock);
-
- if (btrfs_is_testing(fs_info))
- return;
-
- btrfs_inode_rsv_release(inode, qgroup_free);
-}
-
-/**
- * btrfs_delalloc_release_extents - release our outstanding_extents
- * @inode: the inode to balance the reservation for.
- * @num_bytes: the number of bytes we originally reserved with
- * @qgroup_free: do we need to free qgroup meta reservation or convert them.
- *
- * When we reserve space we increase outstanding_extents for the extents we may
- * add. Once we've set the range as delalloc or created our ordered extents we
- * have outstanding_extents to track the real usage, so we use this to free our
- * temporarily tracked outstanding_extents. This _must_ be used in conjunction
- * with btrfs_delalloc_reserve_metadata.
- */
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
- bool qgroup_free)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- unsigned num_extents;
-
- spin_lock(&inode->lock);
- num_extents = count_max_extents(num_bytes);
- btrfs_mod_outstanding_extents(inode, -num_extents);
- btrfs_calculate_inode_block_rsv_size(fs_info, inode);
- spin_unlock(&inode->lock);
-
- if (btrfs_is_testing(fs_info))
- return;
-
- btrfs_inode_rsv_release(inode, qgroup_free);
-}
-
-/**
- * btrfs_delalloc_reserve_space - reserve data and metadata space for
- * delalloc
- * @inode: inode we're writing to
- * @start: start range we are writing to
- * @len: how long the range we are writing to
- * @reserved: mandatory parameter, record actually reserved qgroup ranges of
- * current reservation.
- *
- * This will do the following things
- *
- * o reserve space in data space info for num bytes
- * and reserve precious corresponding qgroup space
- * (Done in check_data_free_space)
- *
- * o reserve space for metadata space, based on the number of outstanding
- * extents and how much csums will be needed
- * also reserve metadata space in a per root over-reserve method.
- * o add to the inodes->delalloc_bytes
- * o add it to the fs_info's delalloc inodes list.
- * (Above 3 all done in delalloc_reserve_metadata)
- *
- * Return 0 for success
- * Return <0 for error(-ENOSPC or -EQUOT)
- */
-int btrfs_delalloc_reserve_space(struct inode *inode,
- struct extent_changeset **reserved, u64 start, u64 len)
-{
- int ret;
-
- ret = btrfs_check_data_free_space(inode, reserved, start, len);
- if (ret < 0)
- return ret;
- ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
- if (ret < 0)
- btrfs_free_reserved_data_space(inode, *reserved, start, len);
- return ret;
-}
-
-/**
- * btrfs_delalloc_release_space - release data and metadata space for delalloc
- * @inode: inode we're releasing space for
- * @start: start position of the space already reserved
- * @len: the len of the space already reserved
- * @release_bytes: the len of the space we consumed or didn't use
- *
- * This function will release the metadata space that was not used and will
- * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
- * list if there are no delalloc bytes left.
- * Also it will handle the qgroup reserved space.
- */
-void btrfs_delalloc_release_space(struct inode *inode,
- struct extent_changeset *reserved,
- u64 start, u64 len, bool qgroup_free)
-{
- btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
- btrfs_free_reserved_data_space(inode, reserved, start, len);
-}
-
static int update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc)
{
@@ -6296,7 +4168,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
old_val -= num_bytes;
btrfs_set_block_group_used(&cache->item, old_val);
cache->pinned += num_bytes;
- update_bytes_pinned(cache->space_info, num_bytes);
+ btrfs_space_info_update_bytes_pinned(info,
+ cache->space_info, num_bytes);
cache->space_info->bytes_used -= num_bytes;
cache->space_info->disk_used -= num_bytes * factor;
spin_unlock(&cache->lock);
@@ -6371,7 +4244,8 @@ static int pin_down_extent(struct btrfs_block_group_cache *cache,
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->pinned += num_bytes;
- update_bytes_pinned(cache->space_info, num_bytes);
+ btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
+ num_bytes);
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
@@ -6580,7 +4454,8 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
} else {
cache->reserved += num_bytes;
space_info->bytes_reserved += num_bytes;
- update_bytes_may_use(space_info, -ram_bytes);
+ btrfs_space_info_update_bytes_may_use(cache->fs_info,
+ space_info, -ram_bytes);
if (delalloc)
cache->delalloc_bytes += num_bytes;
}
@@ -6646,7 +4521,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
up_write(&fs_info->commit_root_sem);
- update_global_block_rsv(fs_info);
+ btrfs_update_global_block_rsv(fs_info);
}
/*
@@ -6736,7 +4611,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
cache->pinned -= len;
- update_bytes_pinned(space_info, -len);
+ btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
trace_btrfs_space_reservation(fs_info, "pinned",
space_info->flags, len, 0);
@@ -6757,7 +4632,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
to_add = min(len, global_rsv->size -
global_rsv->reserved);
global_rsv->reserved += to_add;
- update_bytes_may_use(space_info, to_add);
+ btrfs_space_info_update_bytes_may_use(fs_info,
+ space_info, to_add);
if (global_rsv->reserved >= global_rsv->size)
global_rsv->full = 1;
trace_btrfs_space_reservation(fs_info,
@@ -6769,8 +4645,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
spin_unlock(&global_rsv->lock);
/* Add to any tickets we may have */
if (len)
- space_info_add_new_bytes(fs_info, space_info,
- len);
+ btrfs_space_info_add_new_bytes(fs_info,
+ space_info, len);
}
spin_unlock(&space_info->lock);
}
@@ -7191,7 +5067,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
}
out:
if (pin)
- add_pinned_bytes(fs_info, &generic_ref, 1);
+ add_pinned_bytes(fs_info, &generic_ref);
if (last_ref) {
/*
@@ -7239,7 +5115,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
btrfs_ref_tree_mod(fs_info, ref);
if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
- add_pinned_bytes(fs_info, ref, 1);
+ add_pinned_bytes(fs_info, ref);
return ret;
}
@@ -7292,10 +5168,10 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
}
enum btrfs_loop_type {
- LOOP_CACHING_NOWAIT = 0,
- LOOP_CACHING_WAIT = 1,
- LOOP_ALLOC_CHUNK = 2,
- LOOP_NO_EMPTY_SIZE = 3,
+ LOOP_CACHING_NOWAIT,
+ LOOP_CACHING_WAIT,
+ LOOP_ALLOC_CHUNK,
+ LOOP_NO_EMPTY_SIZE,
};
static inline void
@@ -7661,8 +5537,8 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
return ret;
}
- ret = do_chunk_alloc(trans, ffe_ctl->flags,
- CHUNK_ALLOC_FORCE);
+ ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
+ CHUNK_ALLOC_FORCE);
/*
* If we can't allocate a new chunk we've already looped
@@ -7758,7 +5634,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
- space_info = __find_space_info(fs_info, flags);
+ space_info = btrfs_find_space_info(fs_info, flags);
if (!space_info) {
btrfs_err(fs_info, "No space info for %llu", flags);
return -ENOSPC;
@@ -7863,9 +5739,8 @@ search:
*/
if (!block_group_bits(block_group, flags)) {
u64 extra = BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6 |
+ BTRFS_BLOCK_GROUP_RAID1_MASK |
+ BTRFS_BLOCK_GROUP_RAID56_MASK |
BTRFS_BLOCK_GROUP_RAID10;
/*
@@ -7984,60 +5859,6 @@ loop:
return ret;
}
-#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
-do { \
- struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
- spin_lock(&__rsv->lock); \
- btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
- __rsv->size, __rsv->reserved); \
- spin_unlock(&__rsv->lock); \
-} while (0)
-
-static void dump_space_info(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *info, u64 bytes,
- int dump_block_groups)
-{
- struct btrfs_block_group_cache *cache;
- int index = 0;
-
- spin_lock(&info->lock);
- btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
- info->flags,
- info->total_bytes - btrfs_space_info_used(info, true),
- info->full ? "" : "not ");
- btrfs_info(fs_info,
- "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
- info->total_bytes, info->bytes_used, info->bytes_pinned,
- info->bytes_reserved, info->bytes_may_use,
- info->bytes_readonly);
- spin_unlock(&info->lock);
-
- DUMP_BLOCK_RSV(fs_info, global_block_rsv);
- DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
- DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
- DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
- DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
-
- if (!dump_block_groups)
- return;
-
- down_read(&info->groups_sem);
-again:
- list_for_each_entry(cache, &info->block_groups[index], list) {
- spin_lock(&cache->lock);
- btrfs_info(fs_info,
- "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
- cache->key.objectid, cache->key.offset,
- btrfs_block_group_used(&cache->item), cache->pinned,
- cache->reserved, cache->ro ? "[readonly]" : "");
- btrfs_dump_free_space(cache, bytes);
- spin_unlock(&cache->lock);
- }
- if (++index < BTRFS_NR_RAID_TYPES)
- goto again;
- up_read(&info->groups_sem);
-}
-
/*
* btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
* hole that is at least as big as @num_bytes.
@@ -8113,12 +5934,13 @@ again:
} else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
struct btrfs_space_info *sinfo;
- sinfo = __find_space_info(fs_info, flags);
+ sinfo = btrfs_find_space_info(fs_info, flags);
btrfs_err(fs_info,
"allocation failed flags %llu, wanted %llu",
flags, num_bytes);
if (sinfo)
- dump_space_info(fs_info, sinfo, num_bytes, 1);
+ btrfs_dump_space_info(fs_info, sinfo,
+ num_bytes, 1);
}
}
@@ -8456,73 +6278,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
return buf;
}
-static struct btrfs_block_rsv *
-use_block_rsv(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u32 blocksize)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *block_rsv;
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- int ret;
- bool global_updated = false;
-
- block_rsv = get_block_rsv(trans, root);
-
- if (unlikely(block_rsv->size == 0))
- goto try_reserve;
-again:
- ret = block_rsv_use_bytes(block_rsv, blocksize);
- if (!ret)
- return block_rsv;
-
- if (block_rsv->failfast)
- return ERR_PTR(ret);
-
- if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
- global_updated = true;
- update_global_block_rsv(fs_info);
- goto again;
- }
-
- /*
- * The global reserve still exists to save us from ourselves, so don't
- * warn_on if we are short on our delayed refs reserve.
- */
- if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
- btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
- static DEFINE_RATELIMIT_STATE(_rs,
- DEFAULT_RATELIMIT_INTERVAL * 10,
- /*DEFAULT_RATELIMIT_BURST*/ 1);
- if (__ratelimit(&_rs))
- WARN(1, KERN_DEBUG
- "BTRFS: block rsv returned %d\n", ret);
- }
-try_reserve:
- ret = reserve_metadata_bytes(root, block_rsv, blocksize,
- BTRFS_RESERVE_NO_FLUSH);
- if (!ret)
- return block_rsv;
- /*
- * If we couldn't reserve metadata bytes try and use some from
- * the global reserve if its space type is the same as the global
- * reservation.
- */
- if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
- block_rsv->space_info == global_rsv->space_info) {
- ret = block_rsv_use_bytes(global_rsv, blocksize);
- if (!ret)
- return global_rsv;
- }
- return ERR_PTR(ret);
-}
-
-static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv, u32 blocksize)
-{
- block_rsv_add_bytes(block_rsv, blocksize, false);
- block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
-}
-
/*
* finds a free extent and does all the dirty work required for allocation
* returns the tree buffer or an ERR_PTR on error.
@@ -8555,7 +6310,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
}
#endif
- block_rsv = use_block_rsv(trans, root, blocksize);
+ block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
if (IS_ERR(block_rsv))
return ERR_CAST(block_rsv);
@@ -8613,7 +6368,7 @@ out_free_buf:
out_free_reserved:
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
out_unuse:
- unuse_block_rsv(fs_info, block_rsv, blocksize);
+ btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
return ERR_PTR(ret);
}
@@ -9552,9 +7307,8 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
num_devices = fs_info->fs_devices->rw_devices;
- stripped = BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
- BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
+ stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK |
+ BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10;
if (num_devices == 1) {
stripped |= BTRFS_BLOCK_GROUP_DUP;
@@ -9565,7 +7319,7 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
return stripped;
/* turn mirroring into duplication */
- if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
+ if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK |
BTRFS_BLOCK_GROUP_RAID10))
return stripped | BTRFS_BLOCK_GROUP_DUP;
} else {
@@ -9636,7 +7390,7 @@ out:
btrfs_info(cache->fs_info,
"sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
sinfo_used, num_bytes, min_allocable_bytes);
- dump_space_info(cache->fs_info, cache->space_info, 0, 0);
+ btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
}
return ret;
}
@@ -9678,8 +7432,7 @@ again:
*/
alloc_flags = update_block_group_flags(fs_info, cache->flags);
if (alloc_flags != cache->flags) {
- ret = do_chunk_alloc(trans, alloc_flags,
- CHUNK_ALLOC_FORCE);
+ ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
/*
* ENOSPC is allowed here, we may have enough space
* already allocated at the new raid level to
@@ -9695,7 +7448,7 @@ again:
if (!ret)
goto out;
alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
- ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+ ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
ret = inc_block_group_ro(cache, 0);
@@ -9716,7 +7469,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
{
u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
- return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+ return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
}
/*
@@ -9949,7 +7702,7 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
struct extent_map_tree *em_tree;
struct extent_map *em;
- em_tree = &root->fs_info->mapping_tree.map_tree;
+ em_tree = &root->fs_info->mapping_tree;
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, found_key.objectid,
found_key.offset);
@@ -10102,7 +7855,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
*/
synchronize_rcu();
- release_global_block_rsv(info);
+ btrfs_release_global_block_rsv(info);
while (!list_empty(&info->space_info)) {
int i;
@@ -10118,7 +7871,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
if (WARN_ON(space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0 ||
space_info->bytes_may_use > 0))
- dump_space_info(info, space_info, 0, 0);
+ btrfs_dump_space_info(info, space_info, 0, 0);
list_del(&space_info->list);
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
struct kobject *kobj;
@@ -10141,7 +7894,6 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
struct btrfs_space_info *space_info;
struct raid_kobject *rkobj;
LIST_HEAD(list);
- int index;
int ret = 0;
spin_lock(&fs_info->pending_raid_kobjs_lock);
@@ -10149,11 +7901,10 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
spin_unlock(&fs_info->pending_raid_kobjs_lock);
list_for_each_entry(rkobj, &list, list) {
- space_info = __find_space_info(fs_info, rkobj->flags);
- index = btrfs_bg_flags_to_raid_index(rkobj->flags);
+ space_info = btrfs_find_space_info(fs_info, rkobj->flags);
ret = kobject_add(&rkobj->kobj, &space_info->kobj,
- "%s", get_raid_name(index));
+ "%s", btrfs_bg_type_to_raid_name(rkobj->flags));
if (ret) {
kobject_put(&rkobj->kobj);
break;
@@ -10243,21 +7994,21 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
*/
static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
{
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct extent_map_tree *map_tree = &fs_info->mapping_tree;
struct extent_map *em;
struct btrfs_block_group_cache *bg;
u64 start = 0;
int ret = 0;
while (1) {
- read_lock(&map_tree->map_tree.lock);
+ read_lock(&map_tree->lock);
/*
* lookup_extent_mapping will return the first extent map
* intersecting the range, so setting @len to 1 is enough to
* get the first chunk.
*/
- em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
- read_unlock(&map_tree->map_tree.lock);
+ em = lookup_extent_mapping(map_tree, start, 1);
+ read_unlock(&map_tree->lock);
if (!em)
break;
@@ -10417,9 +8168,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
}
trace_btrfs_add_block_group(info, cache, 0);
- update_space_info(info, cache->flags, found_key.offset,
- btrfs_block_group_used(&cache->item),
- cache->bytes_super, &space_info);
+ btrfs_update_space_info(info, cache->flags, found_key.offset,
+ btrfs_block_group_used(&cache->item),
+ cache->bytes_super, &space_info);
cache->space_info = space_info;
@@ -10437,9 +8188,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
list_for_each_entry_rcu(space_info, &info->space_info, list) {
if (!(get_alloc_profile(info, space_info->flags) &
(BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6 |
+ BTRFS_BLOCK_GROUP_RAID1_MASK |
+ BTRFS_BLOCK_GROUP_RAID56_MASK |
BTRFS_BLOCK_GROUP_DUP)))
continue;
/*
@@ -10457,7 +8207,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
}
btrfs_add_raid_kobjects(info);
- init_global_block_rsv(info);
+ btrfs_init_global_block_rsv(info);
ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
@@ -10554,7 +8304,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
* assigned to our block group. We want our bg to be added to the rbtree
* with its ->space_info set.
*/
- cache->space_info = __find_space_info(fs_info, cache->flags);
+ cache->space_info = btrfs_find_space_info(fs_info, cache->flags);
ASSERT(cache->space_info);
ret = btrfs_add_block_group_cache(fs_info, cache);
@@ -10569,9 +8319,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
* the rbtree, update the space info's counters.
*/
trace_btrfs_add_block_group(fs_info, cache, 1);
- update_space_info(fs_info, cache->flags, size, bytes_used,
+ btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
cache->bytes_super, &cache->space_info);
- update_global_block_rsv(fs_info);
+ btrfs_update_global_block_rsv(fs_info);
link_block_group(cache);
@@ -10598,6 +8348,35 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
write_sequnlock(&fs_info->profiles_lock);
}
+/*
+ * Clear incompat bits for the following feature(s):
+ *
+ * - RAID56 - in case there's neither RAID5 nor RAID6 profile block group
+ * in the whole filesystem
+ */
+static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
+{
+ if (flags & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ struct list_head *head = &fs_info->space_info;
+ struct btrfs_space_info *sinfo;
+
+ list_for_each_entry_rcu(sinfo, head, list) {
+ bool found = false;
+
+ down_read(&sinfo->groups_sem);
+ if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5]))
+ found = true;
+ if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6]))
+ found = true;
+ up_read(&sinfo->groups_sem);
+
+ if (found)
+ return;
+ }
+ btrfs_clear_fs_incompat(fs_info, RAID56);
+ }
+}
+
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
u64 group_start, struct extent_map *em)
{
@@ -10744,6 +8523,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
clear_avail_alloc_bits(fs_info, block_group->flags);
}
up_write(&block_group->space_info->groups_sem);
+ clear_incompat_bg_bits(fs_info, block_group->flags);
if (kobj) {
kobject_del(kobj);
kobject_put(kobj);
@@ -10853,7 +8633,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
if (remove_em) {
struct extent_map_tree *em_tree;
- em_tree = &fs_info->mapping_tree.map_tree;
+ em_tree = &fs_info->mapping_tree;
write_lock(&em_tree->lock);
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
@@ -10871,7 +8651,7 @@ struct btrfs_trans_handle *
btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
const u64 chunk_offset)
{
- struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree;
struct extent_map *em;
struct map_lookup *map;
unsigned int num_items;
@@ -11020,7 +8800,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
- update_bytes_pinned(space_info, -block_group->pinned);
+ btrfs_space_info_update_bytes_pinned(fs_info, space_info,
+ -block_group->pinned);
space_info->bytes_readonly += block_group->pinned;
percpu_counter_add_batch(&space_info->total_bytes_pinned,
-block_group->pinned,
@@ -11076,43 +8857,6 @@ next:
spin_unlock(&fs_info->unused_bgs_lock);
}
-int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_super_block *disk_super;
- u64 features;
- u64 flags;
- int mixed = 0;
- int ret;
-
- disk_super = fs_info->super_copy;
- if (!btrfs_super_root(disk_super))
- return -EINVAL;
-
- features = btrfs_super_incompat_flags(disk_super);
- if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
- mixed = 1;
-
- flags = BTRFS_BLOCK_GROUP_SYSTEM;
- ret = create_space_info(fs_info, flags);
- if (ret)
- goto out;
-
- if (mixed) {
- flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
- ret = create_space_info(fs_info, flags);
- } else {
- flags = BTRFS_BLOCK_GROUP_METADATA;
- ret = create_space_info(fs_info, flags);
- if (ret)
- goto out;
-
- flags = BTRFS_BLOCK_GROUP_DATA;
- ret = create_space_info(fs_info, flags);
- }
-out:
- return ret;
-}
-
int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 start, u64 end)
{
@@ -11171,12 +8915,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
find_first_clear_extent_bit(&device->alloc_state, start,
&start, &end,
CHUNK_TRIMMED | CHUNK_ALLOCATED);
+
+ /* Ensure we skip the reserved area in the first 1M */
+ start = max_t(u64, start, SZ_1M);
+
/*
* If find_first_clear_extent_bit find a range that spans the
* end of the device it will set end to -1, in this case it's up
* to the caller to trim the value to the size of the device.
*/
end = min(end, device->total_bytes - 1);
+
len = end - start + 1;
/* We didn't find any extents */
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5106008f5e28..1ff438fd5bc2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -359,6 +359,24 @@ do_insert:
return NULL;
}
+/**
+ * __etree_search - searche @tree for an entry that contains @offset. Such
+ * entry would have entry->start <= offset && entry->end >= offset.
+ *
+ * @tree - the tree to search
+ * @offset - offset that should fall within an entry in @tree
+ * @next_ret - pointer to the first entry whose range ends after @offset
+ * @prev - pointer to the first entry whose range begins before @offset
+ * @p_ret - pointer where new node should be anchored (used when inserting an
+ * entry in the tree)
+ * @parent_ret - points to entry which would have been the parent of the entry,
+ * containing @offset
+ *
+ * This function returns a pointer to the entry that contains @offset byte
+ * address. If no such entry exists, then NULL is returned and the other
+ * pointer arguments to the function are filled, otherwise the found entry is
+ * returned and other pointers are left untouched.
+ */
static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
struct rb_node **next_ret,
struct rb_node **prev_ret,
@@ -504,9 +522,11 @@ static int insert_state(struct extent_io_tree *tree,
{
struct rb_node *node;
- if (end < start)
- WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
- end, start);
+ if (end < start) {
+ btrfs_err(tree->fs_info,
+ "insert state: end < start %llu %llu", end, start);
+ WARN_ON(1);
+ }
state->start = start;
state->end = end;
@@ -516,7 +536,8 @@ static int insert_state(struct extent_io_tree *tree,
if (node) {
struct extent_state *found;
found = rb_entry(node, struct extent_state, rb_node);
- pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
+ btrfs_err(tree->fs_info,
+ "found node %llu %llu on insert of %llu %llu",
found->start, found->end, start, end);
return -EEXIST;
}
@@ -1537,8 +1558,8 @@ out:
}
/**
- * find_first_clear_extent_bit - finds the first range that has @bits not set
- * and that starts after @start
+ * find_first_clear_extent_bit - find the first range that has @bits not set.
+ * This range could start before @start.
*
* @tree - the tree to search
* @start - the offset at/after which the found extent should start
@@ -1578,12 +1599,52 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
goto out;
}
}
+ /*
+ * At this point 'node' either contains 'start' or start is
+ * before 'node'
+ */
state = rb_entry(node, struct extent_state, rb_node);
- if (in_range(start, state->start, state->end - state->start + 1) &&
- (state->state & bits)) {
- start = state->end + 1;
+
+ if (in_range(start, state->start, state->end - state->start + 1)) {
+ if (state->state & bits) {
+ /*
+ * |--range with bits sets--|
+ * |
+ * start
+ */
+ start = state->end + 1;
+ } else {
+ /*
+ * 'start' falls within a range that doesn't
+ * have the bits set, so take its start as
+ * the beginning of the desired range
+ *
+ * |--range with bits cleared----|
+ * |
+ * start
+ */
+ *start_ret = state->start;
+ break;
+ }
} else {
- *start_ret = start;
+ /*
+ * |---prev range---|---hole/unset---|---node range---|
+ * |
+ * start
+ *
+ * or
+ *
+ * |---hole/unset--||--first node--|
+ * 0 |
+ * start
+ */
+ if (prev) {
+ state = rb_entry(prev, struct extent_state,
+ rb_node);
+ *start_ret = state->end + 1;
+ } else {
+ *start_ret = 0;
+ }
break;
}
}
@@ -1719,10 +1780,10 @@ static noinline int lock_delalloc_pages(struct inode *inode,
*/
EXPORT_FOR_TESTS
noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
- struct extent_io_tree *tree,
struct page *locked_page, u64 *start,
u64 *end)
{
+ struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
u64 delalloc_start;
u64 delalloc_end;
@@ -2800,12 +2861,11 @@ static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
* never fail. We're returning a bio right now but you can call btrfs_io_bio
* for the appropriate container_of magic
*/
-struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
+struct bio *btrfs_bio_alloc(u64 first_byte)
{
struct bio *bio;
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
- bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_byte >> 9;
btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
@@ -2916,7 +2976,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
}
}
- bio = btrfs_bio_alloc(bdev, offset);
+ bio = btrfs_bio_alloc(offset);
+ bio_set_dev(bio, bdev);
bio_add_page(bio, page, page_size, pg_offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
@@ -3204,21 +3265,10 @@ static inline void contiguous_readpages(struct extent_io_tree *tree,
unsigned long *bio_flags,
u64 *prev_em_start)
{
- struct inode *inode;
- struct btrfs_ordered_extent *ordered;
+ struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
int index;
- inode = pages[0]->mapping->host;
- while (1) {
- lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
- end - start + 1);
- if (!ordered)
- break;
- unlock_extent(tree, start, end);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
+ btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
for (index = 0; index < nr_pages; index++) {
__do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
@@ -3234,22 +3284,12 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
unsigned long *bio_flags,
unsigned int read_flags)
{
- struct inode *inode = page->mapping->host;
- struct btrfs_ordered_extent *ordered;
+ struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
int ret;
- while (1) {
- lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
- PAGE_SIZE);
- if (!ordered)
- break;
- unlock_extent(tree, start, end);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
+ btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
bio_flags, read_flags, NULL);
@@ -3290,7 +3330,6 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
struct page *page, struct writeback_control *wbc,
u64 delalloc_start, unsigned long *nr_written)
{
- struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
u64 page_end = delalloc_start + PAGE_SIZE - 1;
bool found;
u64 delalloc_to_write = 0;
@@ -3300,8 +3339,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
while (delalloc_end < page_end) {
- found = find_lock_delalloc_range(inode, tree,
- page,
+ found = find_lock_delalloc_range(inode, page,
&delalloc_start,
&delalloc_end);
if (!found) {
@@ -3310,7 +3348,6 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
}
ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
delalloc_end, &page_started, nr_written, wbc);
- /* File system has been set read-only */
if (ret) {
SetPageError(page);
/*
@@ -4542,6 +4579,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct fiemap_cache cache = { 0 };
+ struct ulist *roots;
+ struct ulist *tmp_ulist;
int end = 0;
u64 em_start = 0;
u64 em_len = 0;
@@ -4555,6 +4594,13 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return -ENOMEM;
path->leave_spinning = 1;
+ roots = ulist_alloc(GFP_KERNEL);
+ tmp_ulist = ulist_alloc(GFP_KERNEL);
+ if (!roots || !tmp_ulist) {
+ ret = -ENOMEM;
+ goto out_free_ulist;
+ }
+
start = round_down(start, btrfs_inode_sectorsize(inode));
len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
@@ -4565,8 +4611,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
ret = btrfs_lookup_file_extent(NULL, root, path,
btrfs_ino(BTRFS_I(inode)), -1, 0);
if (ret < 0) {
- btrfs_free_path(path);
- return ret;
+ goto out_free_ulist;
} else {
WARN_ON(!ret);
if (ret == 1)
@@ -4675,7 +4720,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
*/
ret = btrfs_check_shared(root,
btrfs_ino(BTRFS_I(inode)),
- bytenr);
+ bytenr, roots, tmp_ulist);
if (ret < 0)
goto out_free;
if (ret)
@@ -4718,9 +4763,13 @@ out_free:
ret = emit_last_fiemap_cache(fieinfo, &cache);
free_extent_map(em);
out:
- btrfs_free_path(path);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state);
+
+out_free_ulist:
+ btrfs_free_path(path);
+ ulist_free(roots);
+ ulist_free(tmp_ulist);
return ret;
}
@@ -4808,7 +4857,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
eb->bflags = 0;
rwlock_init(&eb->lock);
atomic_set(&eb->blocking_readers, 0);
- atomic_set(&eb->blocking_writers, 0);
+ eb->blocking_writers = 0;
eb->lock_nested = false;
init_waitqueue_head(&eb->write_lock_wq);
init_waitqueue_head(&eb->read_lock_wq);
@@ -4827,10 +4876,10 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
#ifdef CONFIG_BTRFS_DEBUG
- atomic_set(&eb->spinning_writers, 0);
+ eb->spinning_writers = 0;
atomic_set(&eb->spinning_readers, 0);
atomic_set(&eb->read_locks, 0);
- atomic_set(&eb->write_locks, 0);
+ eb->write_locks = 0;
#endif
return eb;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index aa18a16a6ed7..401423b16976 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -167,7 +167,7 @@ struct extent_buffer {
struct rcu_head rcu_head;
pid_t lock_owner;
- atomic_t blocking_writers;
+ int blocking_writers;
atomic_t blocking_readers;
bool lock_nested;
/* >= 0 if eb belongs to a log tree, -1 otherwise */
@@ -187,10 +187,10 @@ struct extent_buffer {
wait_queue_head_t read_lock_wq;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
#ifdef CONFIG_BTRFS_DEBUG
- atomic_t spinning_writers;
+ int spinning_writers;
atomic_t spinning_readers;
atomic_t read_locks;
- atomic_t write_locks;
+ int write_locks;
struct list_head leak_list;
#endif
};
@@ -497,7 +497,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
u64 delalloc_end, struct page *locked_page,
unsigned bits_to_clear,
unsigned long page_ops);
-struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte);
+struct bio *btrfs_bio_alloc(u64 first_byte);
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
struct bio *btrfs_bio_clone(struct bio *bio);
struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size);
@@ -549,7 +549,7 @@ int free_io_failure(struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree,
struct io_failure_record *rec);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-bool find_lock_delalloc_range(struct inode *inode, struct extent_io_tree *tree,
+bool find_lock_delalloc_range(struct inode *inode,
struct page *locked_page, u64 *start,
u64 *end);
#endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index d431ea8198e4..1a599f50837b 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -8,6 +8,7 @@
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/sched/mm.h>
+#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -22,9 +23,13 @@
#define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
PAGE_SIZE))
-#define MAX_ORDERED_SUM_BYTES(fs_info) ((PAGE_SIZE - \
- sizeof(struct btrfs_ordered_sum)) / \
- sizeof(u32) * (fs_info)->sectorsize)
+static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
+ u16 csum_size)
+{
+ u32 ncsums = (PAGE_SIZE - sizeof(struct btrfs_ordered_sum)) / csum_size;
+
+ return ncsums * fs_info->sectorsize;
+}
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -144,7 +149,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
}
static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
- u64 logical_offset, u32 *dst, int dio)
+ u64 logical_offset, u8 *dst, int dio)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct bio_vec bvec;
@@ -182,7 +187,7 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio
}
csum = btrfs_bio->csum;
} else {
- csum = (u8 *)dst;
+ csum = dst;
}
if (bio->bi_iter.bi_size > PAGE_SIZE * 8)
@@ -211,7 +216,7 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio
if (!dio)
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
- (u32 *)csum, nblocks);
+ csum, nblocks);
if (count)
goto found;
@@ -283,7 +288,8 @@ next:
return 0;
}
-blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+ u8 *dst)
{
return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
}
@@ -374,7 +380,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct btrfs_csum_item);
while (start < csum_end) {
size = min_t(size_t, csum_end - start,
- MAX_ORDERED_SUM_BYTES(fs_info));
+ max_ordered_sum_bytes(fs_info, csum_size));
sums = kzalloc(btrfs_ordered_sum_size(fs_info, size),
GFP_NOFS);
if (!sums) {
@@ -427,6 +433,7 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered = NULL;
char *data;
@@ -439,6 +446,7 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
int i;
u64 offset;
unsigned nofs_flag;
+ const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
nofs_flag = memalloc_nofs_save();
sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
@@ -459,6 +467,8 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
index = 0;
+ shash->tfm = fs_info->csum_shash;
+
bio_for_each_segment(bvec, bio, iter) {
if (!contig)
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
@@ -498,17 +508,14 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
index = 0;
}
- sums->sums[index] = ~(u32)0;
+ crypto_shash_init(shash);
data = kmap_atomic(bvec.bv_page);
- sums->sums[index]
- = btrfs_csum_data(data + bvec.bv_offset
- + (i * fs_info->sectorsize),
- sums->sums[index],
- fs_info->sectorsize);
+ crypto_shash_update(shash, data + bvec.bv_offset
+ + (i * fs_info->sectorsize),
+ fs_info->sectorsize);
kunmap_atomic(data);
- btrfs_csum_final(sums->sums[index],
- (char *)(sums->sums + index));
- index++;
+ crypto_shash_final(shash, (char *)(sums->sums + index));
+ index += csum_size;
offset += fs_info->sectorsize;
this_sum_bytes += fs_info->sectorsize;
total_bytes += fs_info->sectorsize;
@@ -904,9 +911,9 @@ found:
write_extent_buffer(leaf, sums->sums + index, (unsigned long)item,
ins_size);
+ index += ins_size;
ins_size /= csum_size;
total_bytes += ins_size * fs_info->sectorsize;
- index += ins_size;
btrfs_mark_buffer_dirty(path->nodes[0]);
if (total_bytes < sums->len) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 89f5be2bfb43..58a18ed11546 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -26,6 +26,7 @@
#include "volumes.h"
#include "qgroup.h"
#include "compression.h"
+#include "delalloc-space.h"
static struct kmem_cache *btrfs_inode_defrag_cachep;
/*
@@ -1550,30 +1551,20 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
- struct btrfs_ordered_extent *ordered;
u64 lockstart, lockend;
u64 num_bytes;
int ret;
ret = btrfs_start_write_no_snapshotting(root);
if (!ret)
- return -ENOSPC;
+ return -EAGAIN;
lockstart = round_down(pos, fs_info->sectorsize);
lockend = round_up(pos + *write_bytes,
fs_info->sectorsize) - 1;
- while (1) {
- lock_extent(&inode->io_tree, lockstart, lockend);
- ordered = btrfs_lookup_ordered_range(inode, lockstart,
- lockend - lockstart + 1);
- if (!ordered) {
- break;
- }
- unlock_extent(&inode->io_tree, lockstart, lockend);
- btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
+ btrfs_lock_and_flush_ordered_range(&inode->io_tree, inode, lockstart,
+ lockend, NULL);
num_bytes = lockend - lockstart + 1;
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
@@ -2721,6 +2712,11 @@ out_only_mutex:
* for detecting, at fsync time, if the inode isn't yet in the
* log tree or it's there but not up to date.
*/
+ struct timespec64 now = current_time(inode);
+
+ inode_inc_iversion(inode);
+ inode->i_mtime = now;
+ inode->i_ctime = now;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
@@ -2801,9 +2797,9 @@ static int btrfs_fallocate_update_isize(struct inode *inode,
}
enum {
- RANGE_BOUNDARY_WRITTEN_EXTENT = 0,
- RANGE_BOUNDARY_PREALLOC_EXTENT = 1,
- RANGE_BOUNDARY_HOLE = 2,
+ RANGE_BOUNDARY_WRITTEN_EXTENT,
+ RANGE_BOUNDARY_PREALLOC_EXTENT,
+ RANGE_BOUNDARY_HOLE,
};
static int btrfs_zero_range_check_range_boundary(struct inode *inode,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f74dc259307b..062be9dde4c6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -18,6 +18,8 @@
#include "extent_io.h"
#include "inode-map.h"
#include "volumes.h"
+#include "space-info.h"
+#include "delalloc-space.h"
#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
#define MAX_CACHE_BYTES_PER_GIG SZ_32K
@@ -465,9 +467,8 @@ static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
if (index == 0)
offset = sizeof(u32) * io_ctl->num_pages;
- crc = btrfs_csum_data(io_ctl->orig + offset, crc,
- PAGE_SIZE - offset);
- btrfs_csum_final(crc, (u8 *)&crc);
+ crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
+ btrfs_crc32c_final(crc, (u8 *)&crc);
io_ctl_unmap_page(io_ctl);
tmp = page_address(io_ctl->pages[0]);
tmp += index;
@@ -493,9 +494,8 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
val = *tmp;
io_ctl_map_page(io_ctl, 0);
- crc = btrfs_csum_data(io_ctl->orig + offset, crc,
- PAGE_SIZE - offset);
- btrfs_csum_final(crc, (u8 *)&crc);
+ crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
+ btrfs_crc32c_final(crc, (u8 *)&crc);
if (val != crc) {
btrfs_err_rl(io_ctl->fs_info,
"csum mismatch on free space cache");
@@ -3166,8 +3166,8 @@ static int do_trimming(struct btrfs_block_group_cache *block_group,
space_info->bytes_readonly += reserved_bytes;
block_group->reserved -= reserved_bytes;
space_info->bytes_reserved -= reserved_bytes;
- spin_unlock(&space_info->lock);
spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
}
return ret;
@@ -3358,7 +3358,7 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
if (cleanup) {
mutex_lock(&fs_info->chunk_mutex);
- em_tree = &fs_info->mapping_tree.map_tree;
+ em_tree = &fs_info->mapping_tree;
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, block_group->key.objectid,
1);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index ffca2abf13d0..2e8bb402050b 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -11,6 +11,7 @@
#include "free-space-cache.h"
#include "inode-map.h"
#include "transaction.h"
+#include "delalloc-space.h"
static int caching_kthread(void *data)
{
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a2aabdb85226..1af069a9a0c7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -47,6 +47,7 @@
#include "props.h"
#include "qgroup.h"
#include "dedupe.h"
+#include "delalloc-space.h"
struct btrfs_iget_args {
struct btrfs_key *location;
@@ -1932,17 +1933,19 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
u64 length = 0;
u64 map_length;
int ret;
+ struct btrfs_io_geometry geom;
if (bio_flags & EXTENT_BIO_COMPRESSED)
return 0;
length = bio->bi_iter.bi_size;
map_length = length;
- ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
- NULL, 0);
+ ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
+ &geom);
if (ret < 0)
return ret;
- if (map_length < length + size)
+
+ if (geom.len < length + size)
return 1;
return 0;
}
@@ -3203,16 +3206,23 @@ static int __readpage_endio_check(struct inode *inode,
int icsum, struct page *page,
int pgoff, u64 start, size_t len)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
char *kaddr;
- u32 csum_expected;
- u32 csum = ~(u32)0;
+ u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+ u8 *csum_expected;
+ u8 csum[BTRFS_CSUM_SIZE];
- csum_expected = *(((u32 *)io_bio->csum) + icsum);
+ csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size;
kaddr = kmap_atomic(page);
- csum = btrfs_csum_data(kaddr + pgoff, csum, len);
- btrfs_csum_final(csum, (u8 *)&csum);
- if (csum != csum_expected)
+ shash->tfm = fs_info->csum_shash;
+
+ crypto_shash_init(shash);
+ crypto_shash_update(shash, kaddr + pgoff, len);
+ crypto_shash_final(shash, csum);
+
+ if (memcmp(csum, csum_expected, csum_size))
goto zeroit;
kunmap_atomic(kaddr);
@@ -3286,6 +3296,28 @@ void btrfs_add_delayed_iput(struct inode *inode)
wake_up_process(fs_info->cleaner_kthread);
}
+static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode)
+{
+ list_del_init(&inode->delayed_iput);
+ spin_unlock(&fs_info->delayed_iput_lock);
+ iput(&inode->vfs_inode);
+ if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
+ wake_up(&fs_info->delayed_iputs_wait);
+ spin_lock(&fs_info->delayed_iput_lock);
+}
+
+static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode)
+{
+ if (!list_empty(&inode->delayed_iput)) {
+ spin_lock(&fs_info->delayed_iput_lock);
+ if (!list_empty(&inode->delayed_iput))
+ run_delayed_iput_locked(fs_info, inode);
+ spin_unlock(&fs_info->delayed_iput_lock);
+ }
+}
+
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
{
@@ -3295,12 +3327,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput);
- list_del_init(&inode->delayed_iput);
- spin_unlock(&fs_info->delayed_iput_lock);
- iput(&inode->vfs_inode);
- if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
- wake_up(&fs_info->delayed_iputs_wait);
- spin_lock(&fs_info->delayed_iput_lock);
+ run_delayed_iput_locked(fs_info, inode);
}
spin_unlock(&fs_info->delayed_iput_lock);
}
@@ -3935,9 +3962,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
int ret = 0;
- struct extent_buffer *leaf;
struct btrfs_dir_item *di;
- struct btrfs_key key;
u64 index;
u64 ino = btrfs_ino(inode);
u64 dir_ino = btrfs_ino(dir);
@@ -3955,8 +3980,6 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
ret = di ? PTR_ERR(di) : -ENOENT;
goto err;
}
- leaf = path->nodes[0];
- btrfs_dir_item_key_to_cpu(leaf, di, &key);
ret = btrfs_delete_one_dir_name(trans, root, path, di);
if (ret)
goto err;
@@ -4009,6 +4032,17 @@ skip_backref:
ret = 0;
else if (ret)
btrfs_abort_transaction(trans, ret);
+
+ /*
+ * If we have a pending delayed iput we could end up with the final iput
+ * being run in btrfs-cleaner context. If we have enough of these built
+ * up we can end up burning a lot of time in btrfs-cleaner without any
+ * way to throttle the unlinks. Since we're currently holding a ref on
+ * the inode we can run the delayed iput here without any issues as the
+ * final iput won't be done until after we drop the ref we're currently
+ * holding.
+ */
+ btrfs_run_delayed_iput(fs_info, inode);
err:
btrfs_free_path(path);
if (ret)
@@ -5008,21 +5042,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
if (size <= hole_start)
return 0;
- while (1) {
- struct btrfs_ordered_extent *ordered;
-
- lock_extent_bits(io_tree, hole_start, block_end - 1,
- &cached_state);
- ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start,
- block_end - hole_start);
- if (!ordered)
- break;
- unlock_extent_cached(io_tree, hole_start, block_end - 1,
- &cached_state);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
-
+ btrfs_lock_and_flush_ordered_range(io_tree, BTRFS_I(inode), hole_start,
+ block_end - 1, &cached_state);
cur_offset = hole_start;
while (1) {
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
@@ -8318,22 +8339,21 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
struct bio *orig_bio = dip->orig_bio;
u64 start_sector = orig_bio->bi_iter.bi_sector;
u64 file_offset = dip->logical_offset;
- u64 map_length;
int async_submit = 0;
u64 submit_len;
int clone_offset = 0;
int clone_len;
int ret;
blk_status_t status;
+ struct btrfs_io_geometry geom;
- map_length = orig_bio->bi_iter.bi_size;
- submit_len = map_length;
- ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9,
- &map_length, NULL, 0);
+ submit_len = orig_bio->bi_iter.bi_size;
+ ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
+ start_sector << 9, submit_len, &geom);
if (ret)
return -EIO;
- if (map_length >= submit_len) {
+ if (geom.len >= submit_len) {
bio = orig_bio;
dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
goto submit;
@@ -8346,10 +8366,10 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
async_submit = 1;
/* bio split */
- ASSERT(map_length <= INT_MAX);
+ ASSERT(geom.len <= INT_MAX);
atomic_inc(&dip->pending_bios);
do {
- clone_len = min_t(int, submit_len, map_length);
+ clone_len = min_t(int, submit_len, geom.len);
/*
* This will never fail as it's passing GPF_NOFS and
@@ -8386,9 +8406,8 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
start_sector += clone_len >> 9;
file_offset += clone_len;
- map_length = submit_len;
- ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
- start_sector << 9, &map_length, NULL, 0);
+ ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
+ start_sector << 9, submit_len, &geom);
if (ret)
goto out_err;
} while (submit_len > 0);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cfeff1b8dce0..818f7ec8bb0e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -43,6 +43,8 @@
#include "qgroup.h"
#include "tree-log.h"
#include "compression.h"
+#include "space-info.h"
+#include "delalloc-space.h"
#ifdef CONFIG_64BIT
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
@@ -3993,6 +3995,27 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
if (!same_inode)
inode_dio_wait(inode_out);
+ /*
+ * Workaround to make sure NOCOW buffered write reach disk as NOCOW.
+ *
+ * Btrfs' back references do not have a block level granularity, they
+ * work at the whole extent level.
+ * NOCOW buffered write without data space reserved may not be able
+ * to fall back to CoW due to lack of data space, thus could cause
+ * data loss.
+ *
+ * Here we take a shortcut by flushing the whole inode, so that all
+ * nocow write should reach disk as nocow before we increase the
+ * reference of the extent. We could do better by only flushing NOCOW
+ * data, but that needs extra accounting.
+ *
+ * Also we don't need to check ASYNC_EXTENT, as async extent will be
+ * CoWed anyway, not affecting nocow part.
+ */
+ ret = filemap_flush(inode_in->i_mapping);
+ if (ret < 0)
+ return ret;
+
ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs),
wb_len);
if (ret < 0)
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 2f6c3c7851ed..98fccce4208c 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -15,19 +15,19 @@
#ifdef CONFIG_BTRFS_DEBUG
static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
{
- WARN_ON(atomic_read(&eb->spinning_writers));
- atomic_inc(&eb->spinning_writers);
+ WARN_ON(eb->spinning_writers);
+ eb->spinning_writers++;
}
static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
{
- WARN_ON(atomic_read(&eb->spinning_writers) != 1);
- atomic_dec(&eb->spinning_writers);
+ WARN_ON(eb->spinning_writers != 1);
+ eb->spinning_writers--;
}
static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
{
- WARN_ON(atomic_read(&eb->spinning_writers));
+ WARN_ON(eb->spinning_writers);
}
static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
@@ -58,17 +58,17 @@ static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
{
- atomic_inc(&eb->write_locks);
+ eb->write_locks++;
}
static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
{
- atomic_dec(&eb->write_locks);
+ eb->write_locks--;
}
void btrfs_assert_tree_locked(struct extent_buffer *eb)
{
- BUG_ON(!atomic_read(&eb->write_locks));
+ BUG_ON(!eb->write_locks);
}
#else
@@ -111,10 +111,10 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
*/
if (eb->lock_nested && current->pid == eb->lock_owner)
return;
- if (atomic_read(&eb->blocking_writers) == 0) {
+ if (eb->blocking_writers == 0) {
btrfs_assert_spinning_writers_put(eb);
btrfs_assert_tree_locked(eb);
- atomic_inc(&eb->blocking_writers);
+ eb->blocking_writers++;
write_unlock(&eb->lock);
}
}
@@ -148,12 +148,11 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
*/
if (eb->lock_nested && current->pid == eb->lock_owner)
return;
- BUG_ON(atomic_read(&eb->blocking_writers) != 1);
write_lock(&eb->lock);
+ BUG_ON(eb->blocking_writers != 1);
btrfs_assert_spinning_writers_get(eb);
- /* atomic_dec_and_test implies a barrier */
- if (atomic_dec_and_test(&eb->blocking_writers))
- cond_wake_up_nomb(&eb->write_lock_wq);
+ if (--eb->blocking_writers == 0)
+ cond_wake_up(&eb->write_lock_wq);
}
/*
@@ -167,12 +166,10 @@ void btrfs_tree_read_lock(struct extent_buffer *eb)
if (trace_btrfs_tree_read_lock_enabled())
start_ns = ktime_get_ns();
again:
- BUG_ON(!atomic_read(&eb->blocking_writers) &&
- current->pid == eb->lock_owner);
-
read_lock(&eb->lock);
- if (atomic_read(&eb->blocking_writers) &&
- current->pid == eb->lock_owner) {
+ BUG_ON(eb->blocking_writers == 0 &&
+ current->pid == eb->lock_owner);
+ if (eb->blocking_writers && current->pid == eb->lock_owner) {
/*
* This extent is already write-locked by our thread. We allow
* an additional read lock to be added because it's for the same
@@ -185,10 +182,10 @@ again:
trace_btrfs_tree_read_lock(eb, start_ns);
return;
}
- if (atomic_read(&eb->blocking_writers)) {
+ if (eb->blocking_writers) {
read_unlock(&eb->lock);
wait_event(eb->write_lock_wq,
- atomic_read(&eb->blocking_writers) == 0);
+ eb->blocking_writers == 0);
goto again;
}
btrfs_assert_tree_read_locks_get(eb);
@@ -203,11 +200,11 @@ again:
*/
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
{
- if (atomic_read(&eb->blocking_writers))
+ if (eb->blocking_writers)
return 0;
read_lock(&eb->lock);
- if (atomic_read(&eb->blocking_writers)) {
+ if (eb->blocking_writers) {
read_unlock(&eb->lock);
return 0;
}
@@ -223,13 +220,13 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
*/
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
{
- if (atomic_read(&eb->blocking_writers))
+ if (eb->blocking_writers)
return 0;
if (!read_trylock(&eb->lock))
return 0;
- if (atomic_read(&eb->blocking_writers)) {
+ if (eb->blocking_writers) {
read_unlock(&eb->lock);
return 0;
}
@@ -245,13 +242,11 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
*/
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
{
- if (atomic_read(&eb->blocking_writers) ||
- atomic_read(&eb->blocking_readers))
+ if (eb->blocking_writers || atomic_read(&eb->blocking_readers))
return 0;
write_lock(&eb->lock);
- if (atomic_read(&eb->blocking_writers) ||
- atomic_read(&eb->blocking_readers)) {
+ if (eb->blocking_writers || atomic_read(&eb->blocking_readers)) {
write_unlock(&eb->lock);
return 0;
}
@@ -322,10 +317,9 @@ void btrfs_tree_lock(struct extent_buffer *eb)
WARN_ON(eb->lock_owner == current->pid);
again:
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
- wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+ wait_event(eb->write_lock_wq, eb->blocking_writers == 0);
write_lock(&eb->lock);
- if (atomic_read(&eb->blocking_readers) ||
- atomic_read(&eb->blocking_writers)) {
+ if (atomic_read(&eb->blocking_readers) || eb->blocking_writers) {
write_unlock(&eb->lock);
goto again;
}
@@ -340,7 +334,7 @@ again:
*/
void btrfs_tree_unlock(struct extent_buffer *eb)
{
- int blockers = atomic_read(&eb->blocking_writers);
+ int blockers = eb->blocking_writers;
BUG_ON(blockers > 1);
@@ -351,7 +345,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
if (blockers) {
btrfs_assert_no_spinning_writers(eb);
- atomic_dec(&eb->blocking_writers);
+ eb->blocking_writers--;
/* Use the lighter barrier after atomic */
smp_mb__after_atomic();
cond_wake_up_nomb(&eb->write_lock_wq);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 52889da69113..1744ba8b2754 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -13,6 +13,7 @@
#include "extent_io.h"
#include "disk-io.h"
#include "compression.h"
+#include "delalloc-space.h"
static struct kmem_cache *btrfs_ordered_extent_cache;
@@ -924,14 +925,16 @@ out:
* be reclaimed before their checksum is actually put into the btree
*/
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
- u32 *sum, int len)
+ u8 *sum, int len)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_sum *ordered_sum;
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
unsigned long num_sectors;
unsigned long i;
u32 sectorsize = btrfs_inode_sectorsize(inode);
+ const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int index = 0;
ordered = btrfs_lookup_ordered_extent(inode, offset);
@@ -947,10 +950,10 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
num_sectors = ordered_sum->len >>
inode->i_sb->s_blocksize_bits;
num_sectors = min_t(int, len - index, num_sectors - i);
- memcpy(sum + index, ordered_sum->sums + i,
- num_sectors);
+ memcpy(sum + index, ordered_sum->sums + i * csum_size,
+ num_sectors * csum_size);
- index += (int)num_sectors;
+ index += (int)num_sectors * csum_size;
if (index == len)
goto out;
disk_bytenr += num_sectors * sectorsize;
@@ -962,6 +965,51 @@ out:
return index;
}
+/*
+ * btrfs_flush_ordered_range - Lock the passed range and ensures all pending
+ * ordered extents in it are run to completion.
+ *
+ * @tree: IO tree used for locking out other users of the range
+ * @inode: Inode whose ordered tree is to be searched
+ * @start: Beginning of range to flush
+ * @end: Last byte of range to lock
+ * @cached_state: If passed, will return the extent state responsible for the
+ * locked range. It's the caller's responsibility to free the cached state.
+ *
+ * This function always returns with the given range locked, ensuring after it's
+ * called no order extent can be pending.
+ */
+void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
+ struct btrfs_inode *inode, u64 start,
+ u64 end,
+ struct extent_state **cached_state)
+{
+ struct btrfs_ordered_extent *ordered;
+ struct extent_state *cachedp = NULL;
+
+ if (cached_state)
+ cachedp = *cached_state;
+
+ while (1) {
+ lock_extent_bits(tree, start, end, &cachedp);
+ ordered = btrfs_lookup_ordered_range(inode, start,
+ end - start + 1);
+ if (!ordered) {
+ /*
+ * If no external cached_state has been passed then
+ * decrement the extra ref taken for cachedp since we
+ * aren't exposing it outside of this function
+ */
+ if (!cached_state)
+ refcount_dec(&cachedp->refs);
+ break;
+ }
+ unlock_extent_cached(tree, start, end, &cachedp);
+ btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ }
+}
+
int __init ordered_data_init(void)
{
btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 4c5991c3de14..5204171ea962 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -23,7 +23,7 @@ struct btrfs_ordered_sum {
int len;
struct list_head list;
/* last field is a variable length array of csums */
- u32 sums[];
+ u8 sums[];
};
/*
@@ -183,11 +183,15 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
- u32 *sum, int len);
+ u8 *sum, int len);
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
const u64 range_start, const u64 range_len);
u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
const u64 range_start, const u64 range_len);
+void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
+ struct btrfs_inode *inode, u64 start,
+ u64 end,
+ struct extent_state **cached_state);
int __init ordered_data_init(void);
void __cold ordered_data_exit(void);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 1141ca5fae6a..9cb50577d982 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -153,11 +153,11 @@ static void print_eb_refs_lock(struct extent_buffer *eb)
#ifdef CONFIG_BTRFS_DEBUG
btrfs_info(eb->fs_info,
"refs %u lock (w:%d r:%d bw:%d br:%d sw:%d sr:%d) lock_owner %u current %u",
- atomic_read(&eb->refs), atomic_read(&eb->write_locks),
+ atomic_read(&eb->refs), eb->write_locks,
atomic_read(&eb->read_locks),
- atomic_read(&eb->blocking_writers),
+ eb->blocking_writers,
atomic_read(&eb->blocking_readers),
- atomic_read(&eb->spinning_writers),
+ eb->spinning_writers,
atomic_read(&eb->spinning_readers),
eb->lock_owner, current->pid);
#endif
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index a9e2e66152ee..e0469816c678 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -257,11 +257,7 @@ static int prop_compression_validate(const char *value, size_t len)
if (!value)
return 0;
- if (!strncmp("lzo", value, 3))
- return 0;
- else if (!strncmp("zlib", value, 4))
- return 0;
- else if (!strncmp("zstd", value, 4))
+ if (btrfs_compress_is_valid_type(value, len))
return 0;
return -EINVAL;
@@ -341,7 +337,7 @@ static int inherit_props(struct btrfs_trans_handle *trans,
for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
const struct prop_handler *h = &prop_handlers[i];
const char *value;
- u64 num_bytes;
+ u64 num_bytes = 0;
if (!h->inheritable)
continue;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 3e6ffbbd8b0a..f8a3c1b0a15a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2614,6 +2614,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
int ret = 0;
int i;
u64 *i_qgroups;
+ bool committing = false;
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root;
struct btrfs_qgroup *srcgroup;
@@ -2621,7 +2622,25 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
u32 level_size = 0;
u64 nums;
- mutex_lock(&fs_info->qgroup_ioctl_lock);
+ /*
+ * There are only two callers of this function.
+ *
+ * One in create_subvol() in the ioctl context, which needs to hold
+ * the qgroup_ioctl_lock.
+ *
+ * The other one in create_pending_snapshot() where no other qgroup
+ * code can modify the fs as they all need to either start a new trans
+ * or hold a trans handler, thus we don't need to hold
+ * qgroup_ioctl_lock.
+ * This would avoid long and complex lock chain and make lockdep happy.
+ */
+ spin_lock(&fs_info->trans_lock);
+ if (trans->transaction->state == TRANS_STATE_COMMIT_DOING)
+ committing = true;
+ spin_unlock(&fs_info->trans_lock);
+
+ if (!committing)
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
goto out;
@@ -2785,7 +2804,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
unlock:
spin_unlock(&fs_info->qgroup_lock);
out:
- mutex_unlock(&fs_info->qgroup_ioctl_lock);
+ if (!committing)
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index f5d4c13a8dbc..2503485db859 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -7,7 +7,7 @@
#ifndef BTRFS_RAID56_H
#define BTRFS_RAID56_H
-static inline int nr_parity_stripes(struct map_lookup *map)
+static inline int nr_parity_stripes(const struct map_lookup *map)
{
if (map->type & BTRFS_BLOCK_GROUP_RAID5)
return 1;
@@ -17,7 +17,7 @@ static inline int nr_parity_stripes(struct map_lookup *map)
return 0;
}
-static inline int nr_data_stripes(struct map_lookup *map)
+static inline int nr_data_stripes(const struct map_lookup *map)
{
return map->num_stripes - nr_parity_stripes(map);
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 22a3c69864fa..7f219851fa23 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -20,6 +20,7 @@
#include "inode-map.h"
#include "qgroup.h"
#include "print-tree.h"
+#include "delalloc-space.h"
/*
* backref_node, mapping_node and tree_block start with this
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 22124122728c..47733fb55df7 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -9,6 +9,8 @@
#include "transaction.h"
#include "disk-io.h"
#include "print-tree.h"
+#include "qgroup.h"
+#include "space-info.h"
/*
* Read a root item from the tree. In case we detect a root item smaller then
@@ -497,3 +499,57 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec);
spin_unlock(&root->root_item_lock);
}
+
+/*
+ * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
+ * root: the root of the parent directory
+ * rsv: block reservation
+ * items: the number of items that we need do reservation
+ * use_global_rsv: allow fallback to the global block reservation
+ *
+ * This function is used to reserve the space for snapshot/subvolume
+ * creation and deletion. Those operations are different with the
+ * common file/directory operations, they change two fs/file trees
+ * and root tree, the number of items that the qgroup reserves is
+ * different with the free space reservation. So we can not use
+ * the space reservation mechanism in start_transaction().
+ */
+int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
+ struct btrfs_block_rsv *rsv, int items,
+ bool use_global_rsv)
+{
+ u64 qgroup_num_bytes = 0;
+ u64 num_bytes;
+ int ret;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+
+ if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
+ /* One for parent inode, two for dir entries */
+ qgroup_num_bytes = 3 * fs_info->nodesize;
+ ret = btrfs_qgroup_reserve_meta_prealloc(root,
+ qgroup_num_bytes, true);
+ if (ret)
+ return ret;
+ }
+
+ num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
+ rsv->space_info = btrfs_find_space_info(fs_info,
+ BTRFS_BLOCK_GROUP_METADATA);
+ ret = btrfs_block_rsv_add(root, rsv, num_bytes,
+ BTRFS_RESERVE_FLUSH_ALL);
+
+ if (ret == -ENOSPC && use_global_rsv)
+ ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
+
+ if (ret && qgroup_num_bytes)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
+
+ return ret;
+}
+
+void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv)
+{
+ btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
+}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f7b29f9db5e2..0c99cf9fb595 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -6,6 +6,7 @@
#include <linux/blkdev.h>
#include <linux/ratelimit.h>
#include <linux/sched/mm.h>
+#include <crypto/hash.h>
#include "ctree.h"
#include "volumes.h"
#include "disk-io.h"
@@ -1787,11 +1788,12 @@ static int scrub_checksum(struct scrub_block *sblock)
static int scrub_checksum_data(struct scrub_block *sblock)
{
struct scrub_ctx *sctx = sblock->sctx;
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
u8 csum[BTRFS_CSUM_SIZE];
u8 *on_disk_csum;
struct page *page;
void *buffer;
- u32 crc = ~(u32)0;
u64 len;
int index;
@@ -1799,6 +1801,9 @@ static int scrub_checksum_data(struct scrub_block *sblock)
if (!sblock->pagev[0]->have_csum)
return 0;
+ shash->tfm = fs_info->csum_shash;
+ crypto_shash_init(shash);
+
on_disk_csum = sblock->pagev[0]->csum;
page = sblock->pagev[0]->page;
buffer = kmap_atomic(page);
@@ -1808,7 +1813,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
for (;;) {
u64 l = min_t(u64, len, PAGE_SIZE);
- crc = btrfs_csum_data(buffer, crc, l);
+ crypto_shash_update(shash, buffer, l);
kunmap_atomic(buffer);
len -= l;
if (len == 0)
@@ -1820,7 +1825,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
buffer = kmap_atomic(page);
}
- btrfs_csum_final(crc, csum);
+ crypto_shash_final(shash, csum);
if (memcmp(csum, on_disk_csum, sctx->csum_size))
sblock->checksum_error = 1;
@@ -1832,16 +1837,19 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
struct scrub_ctx *sctx = sblock->sctx;
struct btrfs_header *h;
struct btrfs_fs_info *fs_info = sctx->fs_info;
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
u8 calculated_csum[BTRFS_CSUM_SIZE];
u8 on_disk_csum[BTRFS_CSUM_SIZE];
struct page *page;
void *mapped_buffer;
u64 mapped_size;
void *p;
- u32 crc = ~(u32)0;
u64 len;
int index;
+ shash->tfm = fs_info->csum_shash;
+ crypto_shash_init(shash);
+
BUG_ON(sblock->page_count < 1);
page = sblock->pagev[0]->page;
mapped_buffer = kmap_atomic(page);
@@ -1875,7 +1883,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
for (;;) {
u64 l = min_t(u64, len, mapped_size);
- crc = btrfs_csum_data(p, crc, l);
+ crypto_shash_update(shash, p, l);
kunmap_atomic(mapped_buffer);
len -= l;
if (len == 0)
@@ -1889,7 +1897,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
p = mapped_buffer;
}
- btrfs_csum_final(crc, calculated_csum);
+ crypto_shash_final(shash, calculated_csum);
if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
sblock->checksum_error = 1;
@@ -1900,18 +1908,22 @@ static int scrub_checksum_super(struct scrub_block *sblock)
{
struct btrfs_super_block *s;
struct scrub_ctx *sctx = sblock->sctx;
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
u8 calculated_csum[BTRFS_CSUM_SIZE];
u8 on_disk_csum[BTRFS_CSUM_SIZE];
struct page *page;
void *mapped_buffer;
u64 mapped_size;
void *p;
- u32 crc = ~(u32)0;
int fail_gen = 0;
int fail_cor = 0;
u64 len;
int index;
+ shash->tfm = fs_info->csum_shash;
+ crypto_shash_init(shash);
+
BUG_ON(sblock->page_count < 1);
page = sblock->pagev[0]->page;
mapped_buffer = kmap_atomic(page);
@@ -1934,7 +1946,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
for (;;) {
u64 l = min_t(u64, len, mapped_size);
- crc = btrfs_csum_data(p, crc, l);
+ crypto_shash_update(shash, p, l);
kunmap_atomic(mapped_buffer);
len -= l;
if (len == 0)
@@ -1948,7 +1960,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
p = mapped_buffer;
}
- btrfs_csum_final(crc, calculated_csum);
+ crypto_shash_final(shash, calculated_csum);
if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
++fail_cor;
@@ -2448,7 +2460,7 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
ASSERT(index < UINT_MAX);
num_sectors = sum->len / sctx->fs_info->sectorsize;
- memcpy(csum, sum->sums + index, sctx->csum_size);
+ memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
if (index == num_sectors - 1) {
list_del(&sum->list);
kfree(sum);
@@ -2660,18 +2672,18 @@ static int get_raid56_logic_offset(u64 physical, int num,
u64 last_offset;
u32 stripe_index;
u32 rot;
+ const int data_stripes = nr_data_stripes(map);
- last_offset = (physical - map->stripes[num].physical) *
- nr_data_stripes(map);
+ last_offset = (physical - map->stripes[num].physical) * data_stripes;
if (stripe_start)
*stripe_start = last_offset;
*offset = last_offset;
- for (i = 0; i < nr_data_stripes(map); i++) {
+ for (i = 0; i < data_stripes; i++) {
*offset = last_offset + i * map->stripe_len;
stripe_nr = div64_u64(*offset, map->stripe_len);
- stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
+ stripe_nr = div_u64(stripe_nr, data_stripes);
/* Work out the disk rotation on this stripe-set */
stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
@@ -3079,7 +3091,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
offset = map->stripe_len * (num / map->sub_stripes);
increment = map->stripe_len * factor;
mirror_num = num % map->sub_stripes + 1;
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
increment = map->stripe_len;
mirror_num = num % map->num_stripes + 1;
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
@@ -3410,15 +3422,15 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
struct btrfs_block_group_cache *cache)
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct extent_map_tree *map_tree = &fs_info->mapping_tree;
struct map_lookup *map;
struct extent_map *em;
int i;
int ret = 0;
- read_lock(&map_tree->map_tree.lock);
- em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
- read_unlock(&map_tree->map_tree.lock);
+ read_lock(&map_tree->lock);
+ em = lookup_extent_mapping(map_tree, chunk_offset, 1);
+ read_unlock(&map_tree->lock);
if (!em) {
/*
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f7fe4770f0e5..69b59bf75882 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -686,7 +686,7 @@ static int send_cmd(struct send_ctx *sctx)
hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
hdr->crc = 0;
- crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
+ crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
hdr->crc = cpu_to_le32(crc);
ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
@@ -6929,9 +6929,23 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
if (ret)
goto out;
+ mutex_lock(&fs_info->balance_mutex);
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
+ mutex_unlock(&fs_info->balance_mutex);
+ btrfs_warn_rl(fs_info,
+ "cannot run send because a balance operation is in progress");
+ ret = -EAGAIN;
+ goto out;
+ }
+ fs_info->send_in_progress++;
+ mutex_unlock(&fs_info->balance_mutex);
+
current->journal_info = BTRFS_SEND_TRANS_STUB;
ret = send_subvol(sctx);
current->journal_info = NULL;
+ mutex_lock(&fs_info->balance_mutex);
+ fs_info->send_in_progress--;
+ mutex_unlock(&fs_info->balance_mutex);
if (ret < 0)
goto out;
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
new file mode 100644
index 000000000000..ab7b9ec4c240
--- /dev/null
+++ b/fs/btrfs/space-info.c
@@ -0,0 +1,1094 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "ctree.h"
+#include "space-info.h"
+#include "sysfs.h"
+#include "volumes.h"
+#include "free-space-cache.h"
+#include "ordered-data.h"
+#include "transaction.h"
+#include "math.h"
+
+u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
+ bool may_use_included)
+{
+ ASSERT(s_info);
+ return s_info->bytes_used + s_info->bytes_reserved +
+ s_info->bytes_pinned + s_info->bytes_readonly +
+ (may_use_included ? s_info->bytes_may_use : 0);
+}
+
+/*
+ * after adding space to the filesystem, we need to clear the full flags
+ * on all the space infos.
+ */
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+{
+ struct list_head *head = &info->space_info;
+ struct btrfs_space_info *found;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list)
+ found->full = 0;
+ rcu_read_unlock();
+}
+
+static const char *alloc_name(u64 flags)
+{
+ switch (flags) {
+ case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
+ return "mixed";
+ case BTRFS_BLOCK_GROUP_METADATA:
+ return "metadata";
+ case BTRFS_BLOCK_GROUP_DATA:
+ return "data";
+ case BTRFS_BLOCK_GROUP_SYSTEM:
+ return "system";
+ default:
+ WARN_ON(1);
+ return "invalid-combination";
+ };
+}
+
+static int create_space_info(struct btrfs_fs_info *info, u64 flags)
+{
+
+ struct btrfs_space_info *space_info;
+ int i;
+ int ret;
+
+ space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
+ if (!space_info)
+ return -ENOMEM;
+
+ ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
+ GFP_KERNEL);
+ if (ret) {
+ kfree(space_info);
+ return ret;
+ }
+
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+ INIT_LIST_HEAD(&space_info->block_groups[i]);
+ init_rwsem(&space_info->groups_sem);
+ spin_lock_init(&space_info->lock);
+ space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
+ space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
+ init_waitqueue_head(&space_info->wait);
+ INIT_LIST_HEAD(&space_info->ro_bgs);
+ INIT_LIST_HEAD(&space_info->tickets);
+ INIT_LIST_HEAD(&space_info->priority_tickets);
+
+ ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
+ info->space_info_kobj, "%s",
+ alloc_name(space_info->flags));
+ if (ret) {
+ kobject_put(&space_info->kobj);
+ return ret;
+ }
+
+ list_add_rcu(&space_info->list, &info->space_info);
+ if (flags & BTRFS_BLOCK_GROUP_DATA)
+ info->data_sinfo = space_info;
+
+ return ret;
+}
+
+int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_super_block *disk_super;
+ u64 features;
+ u64 flags;
+ int mixed = 0;
+ int ret;
+
+ disk_super = fs_info->super_copy;
+ if (!btrfs_super_root(disk_super))
+ return -EINVAL;
+
+ features = btrfs_super_incompat_flags(disk_super);
+ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ mixed = 1;
+
+ flags = BTRFS_BLOCK_GROUP_SYSTEM;
+ ret = create_space_info(fs_info, flags);
+ if (ret)
+ goto out;
+
+ if (mixed) {
+ flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
+ ret = create_space_info(fs_info, flags);
+ } else {
+ flags = BTRFS_BLOCK_GROUP_METADATA;
+ ret = create_space_info(fs_info, flags);
+ if (ret)
+ goto out;
+
+ flags = BTRFS_BLOCK_GROUP_DATA;
+ ret = create_space_info(fs_info, flags);
+ }
+out:
+ return ret;
+}
+
+void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+ u64 total_bytes, u64 bytes_used,
+ u64 bytes_readonly,
+ struct btrfs_space_info **space_info)
+{
+ struct btrfs_space_info *found;
+ int factor;
+
+ factor = btrfs_bg_type_to_factor(flags);
+
+ found = btrfs_find_space_info(info, flags);
+ ASSERT(found);
+ spin_lock(&found->lock);
+ found->total_bytes += total_bytes;
+ found->disk_total += total_bytes * factor;
+ found->bytes_used += bytes_used;
+ found->disk_used += bytes_used * factor;
+ found->bytes_readonly += bytes_readonly;
+ if (total_bytes > 0)
+ found->full = 0;
+ btrfs_space_info_add_new_bytes(info, found,
+ total_bytes - bytes_used -
+ bytes_readonly);
+ spin_unlock(&found->lock);
+ *space_info = found;
+}
+
+struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
+ u64 flags)
+{
+ struct list_head *head = &info->space_info;
+ struct btrfs_space_info *found;
+
+ flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list) {
+ if (found->flags & flags) {
+ rcu_read_unlock();
+ return found;
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
+}
+
+static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
+{
+ return (global->size << 1);
+}
+
+static int can_overcommit(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info, u64 bytes,
+ enum btrfs_reserve_flush_enum flush,
+ bool system_chunk)
+{
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ u64 profile;
+ u64 space_size;
+ u64 avail;
+ u64 used;
+ int factor;
+
+ /* Don't overcommit when in mixed mode. */
+ if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
+ return 0;
+
+ if (system_chunk)
+ profile = btrfs_system_alloc_profile(fs_info);
+ else
+ profile = btrfs_metadata_alloc_profile(fs_info);
+
+ used = btrfs_space_info_used(space_info, false);
+
+ /*
+ * We only want to allow over committing if we have lots of actual space
+ * free, but if we don't have enough space to handle the global reserve
+ * space then we could end up having a real enospc problem when trying
+ * to allocate a chunk or some other such important allocation.
+ */
+ spin_lock(&global_rsv->lock);
+ space_size = calc_global_rsv_need_space(global_rsv);
+ spin_unlock(&global_rsv->lock);
+ if (used + space_size >= space_info->total_bytes)
+ return 0;
+
+ used += space_info->bytes_may_use;
+
+ avail = atomic64_read(&fs_info->free_chunk_space);
+
+ /*
+ * If we have dup, raid1 or raid10 then only half of the free
+ * space is actually usable. For raid56, the space info used
+ * doesn't include the parity drive, so we don't have to
+ * change the math
+ */
+ factor = btrfs_bg_type_to_factor(profile);
+ avail = div_u64(avail, factor);
+
+ /*
+ * If we aren't flushing all things, let us overcommit up to
+ * 1/2th of the space. If we can flush, don't let us overcommit
+ * too much, let it overcommit up to 1/8 of the space.
+ */
+ if (flush == BTRFS_RESERVE_FLUSH_ALL)
+ avail >>= 3;
+ else
+ avail >>= 1;
+
+ if (used + bytes < space_info->total_bytes + avail)
+ return 1;
+ return 0;
+}
+
+/*
+ * This is for space we already have accounted in space_info->bytes_may_use, so
+ * basically when we're returning space from block_rsv's.
+ */
+void btrfs_space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 num_bytes)
+{
+ struct reserve_ticket *ticket;
+ struct list_head *head;
+ u64 used;
+ enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
+ bool check_overcommit = false;
+
+ spin_lock(&space_info->lock);
+ head = &space_info->priority_tickets;
+
+ /*
+ * If we are over our limit then we need to check and see if we can
+ * overcommit, and if we can't then we just need to free up our space
+ * and not satisfy any requests.
+ */
+ used = btrfs_space_info_used(space_info, true);
+ if (used - num_bytes >= space_info->total_bytes)
+ check_overcommit = true;
+again:
+ while (!list_empty(head) && num_bytes) {
+ ticket = list_first_entry(head, struct reserve_ticket,
+ list);
+ /*
+ * We use 0 bytes because this space is already reserved, so
+ * adding the ticket space would be a double count.
+ */
+ if (check_overcommit &&
+ !can_overcommit(fs_info, space_info, 0, flush, false))
+ break;
+ if (num_bytes >= ticket->bytes) {
+ list_del_init(&ticket->list);
+ num_bytes -= ticket->bytes;
+ ticket->bytes = 0;
+ space_info->tickets_id++;
+ wake_up(&ticket->wait);
+ } else {
+ ticket->bytes -= num_bytes;
+ num_bytes = 0;
+ }
+ }
+
+ if (num_bytes && head == &space_info->priority_tickets) {
+ head = &space_info->tickets;
+ flush = BTRFS_RESERVE_FLUSH_ALL;
+ goto again;
+ }
+ btrfs_space_info_update_bytes_may_use(fs_info, space_info, -num_bytes);
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ space_info->flags, num_bytes, 0);
+ spin_unlock(&space_info->lock);
+}
+
+/*
+ * This is for newly allocated space that isn't accounted in
+ * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
+ * we use this helper.
+ */
+void btrfs_space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 num_bytes)
+{
+ struct reserve_ticket *ticket;
+ struct list_head *head = &space_info->priority_tickets;
+
+again:
+ while (!list_empty(head) && num_bytes) {
+ ticket = list_first_entry(head, struct reserve_ticket,
+ list);
+ if (num_bytes >= ticket->bytes) {
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ space_info->flags,
+ ticket->bytes, 1);
+ list_del_init(&ticket->list);
+ num_bytes -= ticket->bytes;
+ btrfs_space_info_update_bytes_may_use(fs_info,
+ space_info,
+ ticket->bytes);
+ ticket->bytes = 0;
+ space_info->tickets_id++;
+ wake_up(&ticket->wait);
+ } else {
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ space_info->flags,
+ num_bytes, 1);
+ btrfs_space_info_update_bytes_may_use(fs_info,
+ space_info,
+ num_bytes);
+ ticket->bytes -= num_bytes;
+ num_bytes = 0;
+ }
+ }
+
+ if (num_bytes && head == &space_info->priority_tickets) {
+ head = &space_info->tickets;
+ goto again;
+ }
+}
+
+#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
+do { \
+ struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
+ spin_lock(&__rsv->lock); \
+ btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
+ __rsv->size, __rsv->reserved); \
+ spin_unlock(&__rsv->lock); \
+} while (0)
+
+void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *info, u64 bytes,
+ int dump_block_groups)
+{
+ struct btrfs_block_group_cache *cache;
+ int index = 0;
+
+ spin_lock(&info->lock);
+ btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
+ info->flags,
+ info->total_bytes - btrfs_space_info_used(info, true),
+ info->full ? "" : "not ");
+ btrfs_info(fs_info,
+ "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
+ info->total_bytes, info->bytes_used, info->bytes_pinned,
+ info->bytes_reserved, info->bytes_may_use,
+ info->bytes_readonly);
+ spin_unlock(&info->lock);
+
+ DUMP_BLOCK_RSV(fs_info, global_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
+
+ if (!dump_block_groups)
+ return;
+
+ down_read(&info->groups_sem);
+again:
+ list_for_each_entry(cache, &info->block_groups[index], list) {
+ spin_lock(&cache->lock);
+ btrfs_info(fs_info,
+ "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
+ cache->key.objectid, cache->key.offset,
+ btrfs_block_group_used(&cache->item), cache->pinned,
+ cache->reserved, cache->ro ? "[readonly]" : "");
+ btrfs_dump_free_space(cache, bytes);
+ spin_unlock(&cache->lock);
+ }
+ if (++index < BTRFS_NR_RAID_TYPES)
+ goto again;
+ up_read(&info->groups_sem);
+}
+
+static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
+ unsigned long nr_pages, int nr_items)
+{
+ struct super_block *sb = fs_info->sb;
+
+ if (down_read_trylock(&sb->s_umount)) {
+ writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
+ up_read(&sb->s_umount);
+ } else {
+ /*
+ * We needn't worry the filesystem going from r/w to r/o though
+ * we don't acquire ->s_umount mutex, because the filesystem
+ * should guarantee the delalloc inodes list be empty after
+ * the filesystem is readonly(all dirty pages are written to
+ * the disk).
+ */
+ btrfs_start_delalloc_roots(fs_info, nr_items);
+ if (!current->journal_info)
+ btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
+ }
+}
+
+static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
+ u64 to_reclaim)
+{
+ u64 bytes;
+ u64 nr;
+
+ bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
+ nr = div64_u64(to_reclaim, bytes);
+ if (!nr)
+ nr = 1;
+ return nr;
+}
+
+#define EXTENT_SIZE_PER_ITEM SZ_256K
+
+/*
+ * shrink metadata reservation for delalloc
+ */
+static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
+ u64 orig, bool wait_ordered)
+{
+ struct btrfs_space_info *space_info;
+ struct btrfs_trans_handle *trans;
+ u64 delalloc_bytes;
+ u64 dio_bytes;
+ u64 async_pages;
+ u64 items;
+ long time_left;
+ unsigned long nr_pages;
+ int loops;
+
+ /* Calc the number of the pages we need flush for space reservation */
+ items = calc_reclaim_items_nr(fs_info, to_reclaim);
+ to_reclaim = items * EXTENT_SIZE_PER_ITEM;
+
+ trans = (struct btrfs_trans_handle *)current->journal_info;
+ space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+
+ delalloc_bytes = percpu_counter_sum_positive(
+ &fs_info->delalloc_bytes);
+ dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
+ if (delalloc_bytes == 0 && dio_bytes == 0) {
+ if (trans)
+ return;
+ if (wait_ordered)
+ btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
+ return;
+ }
+
+ /*
+ * If we are doing more ordered than delalloc we need to just wait on
+ * ordered extents, otherwise we'll waste time trying to flush delalloc
+ * that likely won't give us the space back we need.
+ */
+ if (dio_bytes > delalloc_bytes)
+ wait_ordered = true;
+
+ loops = 0;
+ while ((delalloc_bytes || dio_bytes) && loops < 3) {
+ nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
+
+ /*
+ * Triggers inode writeback for up to nr_pages. This will invoke
+ * ->writepages callback and trigger delalloc filling
+ * (btrfs_run_delalloc_range()).
+ */
+ btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
+
+ /*
+ * We need to wait for the compressed pages to start before
+ * we continue.
+ */
+ async_pages = atomic_read(&fs_info->async_delalloc_pages);
+ if (!async_pages)
+ goto skip_async;
+
+ /*
+ * Calculate how many compressed pages we want to be written
+ * before we continue. I.e if there are more async pages than we
+ * require wait_event will wait until nr_pages are written.
+ */
+ if (async_pages <= nr_pages)
+ async_pages = 0;
+ else
+ async_pages -= nr_pages;
+
+ wait_event(fs_info->async_submit_wait,
+ atomic_read(&fs_info->async_delalloc_pages) <=
+ (int)async_pages);
+skip_async:
+ spin_lock(&space_info->lock);
+ if (list_empty(&space_info->tickets) &&
+ list_empty(&space_info->priority_tickets)) {
+ spin_unlock(&space_info->lock);
+ break;
+ }
+ spin_unlock(&space_info->lock);
+
+ loops++;
+ if (wait_ordered && !trans) {
+ btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
+ } else {
+ time_left = schedule_timeout_killable(1);
+ if (time_left)
+ break;
+ }
+ delalloc_bytes = percpu_counter_sum_positive(
+ &fs_info->delalloc_bytes);
+ dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
+ }
+}
+
+/**
+ * maybe_commit_transaction - possibly commit the transaction if its ok to
+ * @root - the root we're allocating for
+ * @bytes - the number of bytes we want to reserve
+ * @force - force the commit
+ *
+ * This will check to make sure that committing the transaction will actually
+ * get us somewhere and then commit the transaction if it does. Otherwise it
+ * will return -ENOSPC.
+ */
+static int may_commit_transaction(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info)
+{
+ struct reserve_ticket *ticket = NULL;
+ struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
+ struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+ struct btrfs_trans_handle *trans;
+ u64 bytes_needed;
+ u64 reclaim_bytes = 0;
+
+ trans = (struct btrfs_trans_handle *)current->journal_info;
+ if (trans)
+ return -EAGAIN;
+
+ spin_lock(&space_info->lock);
+ if (!list_empty(&space_info->priority_tickets))
+ ticket = list_first_entry(&space_info->priority_tickets,
+ struct reserve_ticket, list);
+ else if (!list_empty(&space_info->tickets))
+ ticket = list_first_entry(&space_info->tickets,
+ struct reserve_ticket, list);
+ bytes_needed = (ticket) ? ticket->bytes : 0;
+ spin_unlock(&space_info->lock);
+
+ if (!bytes_needed)
+ return 0;
+
+ trans = btrfs_join_transaction(fs_info->extent_root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ /*
+ * See if there is enough pinned space to make this reservation, or if
+ * we have block groups that are going to be freed, allowing us to
+ * possibly do a chunk allocation the next loop through.
+ */
+ if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
+ __percpu_counter_compare(&space_info->total_bytes_pinned,
+ bytes_needed,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
+ goto commit;
+
+ /*
+ * See if there is some space in the delayed insertion reservation for
+ * this reservation.
+ */
+ if (space_info != delayed_rsv->space_info)
+ goto enospc;
+
+ spin_lock(&delayed_rsv->lock);
+ reclaim_bytes += delayed_rsv->reserved;
+ spin_unlock(&delayed_rsv->lock);
+
+ spin_lock(&delayed_refs_rsv->lock);
+ reclaim_bytes += delayed_refs_rsv->reserved;
+ spin_unlock(&delayed_refs_rsv->lock);
+ if (reclaim_bytes >= bytes_needed)
+ goto commit;
+ bytes_needed -= reclaim_bytes;
+
+ if (__percpu_counter_compare(&space_info->total_bytes_pinned,
+ bytes_needed,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
+ goto enospc;
+
+commit:
+ return btrfs_commit_transaction(trans);
+enospc:
+ btrfs_end_transaction(trans);
+ return -ENOSPC;
+}
+
+/*
+ * Try to flush some data based on policy set by @state. This is only advisory
+ * and may fail for various reasons. The caller is supposed to examine the
+ * state of @space_info to detect the outcome.
+ */
+static void flush_space(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info, u64 num_bytes,
+ int state)
+{
+ struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_trans_handle *trans;
+ int nr;
+ int ret = 0;
+
+ switch (state) {
+ case FLUSH_DELAYED_ITEMS_NR:
+ case FLUSH_DELAYED_ITEMS:
+ if (state == FLUSH_DELAYED_ITEMS_NR)
+ nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
+ else
+ nr = -1;
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+ ret = btrfs_run_delayed_items_nr(trans, nr);
+ btrfs_end_transaction(trans);
+ break;
+ case FLUSH_DELALLOC:
+ case FLUSH_DELALLOC_WAIT:
+ shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
+ state == FLUSH_DELALLOC_WAIT);
+ break;
+ case FLUSH_DELAYED_REFS_NR:
+ case FLUSH_DELAYED_REFS:
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+ if (state == FLUSH_DELAYED_REFS_NR)
+ nr = calc_reclaim_items_nr(fs_info, num_bytes);
+ else
+ nr = 0;
+ btrfs_run_delayed_refs(trans, nr);
+ btrfs_end_transaction(trans);
+ break;
+ case ALLOC_CHUNK:
+ case ALLOC_CHUNK_FORCE:
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+ ret = btrfs_chunk_alloc(trans,
+ btrfs_metadata_alloc_profile(fs_info),
+ (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
+ CHUNK_ALLOC_FORCE);
+ btrfs_end_transaction(trans);
+ if (ret > 0 || ret == -ENOSPC)
+ ret = 0;
+ break;
+ case COMMIT_TRANS:
+ /*
+ * If we have pending delayed iputs then we could free up a
+ * bunch of pinned space, so make sure we run the iputs before
+ * we do our pinned bytes check below.
+ */
+ btrfs_run_delayed_iputs(fs_info);
+ btrfs_wait_on_delayed_iputs(fs_info);
+
+ ret = may_commit_transaction(fs_info, space_info);
+ break;
+ default:
+ ret = -ENOSPC;
+ break;
+ }
+
+ trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
+ ret);
+ return;
+}
+
+static inline u64
+btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ bool system_chunk)
+{
+ struct reserve_ticket *ticket;
+ u64 used;
+ u64 expected;
+ u64 to_reclaim = 0;
+
+ list_for_each_entry(ticket, &space_info->tickets, list)
+ to_reclaim += ticket->bytes;
+ list_for_each_entry(ticket, &space_info->priority_tickets, list)
+ to_reclaim += ticket->bytes;
+ if (to_reclaim)
+ return to_reclaim;
+
+ to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
+ if (can_overcommit(fs_info, space_info, to_reclaim,
+ BTRFS_RESERVE_FLUSH_ALL, system_chunk))
+ return 0;
+
+ used = btrfs_space_info_used(space_info, true);
+
+ if (can_overcommit(fs_info, space_info, SZ_1M,
+ BTRFS_RESERVE_FLUSH_ALL, system_chunk))
+ expected = div_factor_fine(space_info->total_bytes, 95);
+ else
+ expected = div_factor_fine(space_info->total_bytes, 90);
+
+ if (used > expected)
+ to_reclaim = used - expected;
+ else
+ to_reclaim = 0;
+ to_reclaim = min(to_reclaim, space_info->bytes_may_use +
+ space_info->bytes_reserved);
+ return to_reclaim;
+}
+
+static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 used, bool system_chunk)
+{
+ u64 thresh = div_factor_fine(space_info->total_bytes, 98);
+
+ /* If we're just plain full then async reclaim just slows us down. */
+ if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
+ return 0;
+
+ if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ system_chunk))
+ return 0;
+
+ return (used >= thresh && !btrfs_fs_closing(fs_info) &&
+ !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
+}
+
+static bool wake_all_tickets(struct list_head *head)
+{
+ struct reserve_ticket *ticket;
+
+ while (!list_empty(head)) {
+ ticket = list_first_entry(head, struct reserve_ticket, list);
+ list_del_init(&ticket->list);
+ ticket->error = -ENOSPC;
+ wake_up(&ticket->wait);
+ if (ticket->bytes != ticket->orig_bytes)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * This is for normal flushers, we can wait all goddamned day if we want to. We
+ * will loop and continuously try to flush as long as we are making progress.
+ * We count progress as clearing off tickets each time we have to loop.
+ */
+static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
+{
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_space_info *space_info;
+ u64 to_reclaim;
+ int flush_state;
+ int commit_cycles = 0;
+ u64 last_tickets_id;
+
+ fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
+ space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+
+ spin_lock(&space_info->lock);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ false);
+ if (!to_reclaim) {
+ space_info->flush = 0;
+ spin_unlock(&space_info->lock);
+ return;
+ }
+ last_tickets_id = space_info->tickets_id;
+ spin_unlock(&space_info->lock);
+
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ do {
+ flush_space(fs_info, space_info, to_reclaim, flush_state);
+ spin_lock(&space_info->lock);
+ if (list_empty(&space_info->tickets)) {
+ space_info->flush = 0;
+ spin_unlock(&space_info->lock);
+ return;
+ }
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
+ space_info,
+ false);
+ if (last_tickets_id == space_info->tickets_id) {
+ flush_state++;
+ } else {
+ last_tickets_id = space_info->tickets_id;
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ if (commit_cycles)
+ commit_cycles--;
+ }
+
+ /*
+ * We don't want to force a chunk allocation until we've tried
+ * pretty hard to reclaim space. Think of the case where we
+ * freed up a bunch of space and so have a lot of pinned space
+ * to reclaim. We would rather use that than possibly create a
+ * underutilized metadata chunk. So if this is our first run
+ * through the flushing state machine skip ALLOC_CHUNK_FORCE and
+ * commit the transaction. If nothing has changed the next go
+ * around then we can force a chunk allocation.
+ */
+ if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
+ flush_state++;
+
+ if (flush_state > COMMIT_TRANS) {
+ commit_cycles++;
+ if (commit_cycles > 2) {
+ if (wake_all_tickets(&space_info->tickets)) {
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ commit_cycles--;
+ } else {
+ space_info->flush = 0;
+ }
+ } else {
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ }
+ }
+ spin_unlock(&space_info->lock);
+ } while (flush_state <= COMMIT_TRANS);
+}
+
+void btrfs_init_async_reclaim_work(struct work_struct *work)
+{
+ INIT_WORK(work, btrfs_async_reclaim_metadata_space);
+}
+
+static const enum btrfs_flush_state priority_flush_states[] = {
+ FLUSH_DELAYED_ITEMS_NR,
+ FLUSH_DELAYED_ITEMS,
+ ALLOC_CHUNK,
+};
+
+static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ struct reserve_ticket *ticket)
+{
+ u64 to_reclaim;
+ int flush_state;
+
+ spin_lock(&space_info->lock);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ false);
+ if (!to_reclaim) {
+ spin_unlock(&space_info->lock);
+ return;
+ }
+ spin_unlock(&space_info->lock);
+
+ flush_state = 0;
+ do {
+ flush_space(fs_info, space_info, to_reclaim,
+ priority_flush_states[flush_state]);
+ flush_state++;
+ spin_lock(&space_info->lock);
+ if (ticket->bytes == 0) {
+ spin_unlock(&space_info->lock);
+ return;
+ }
+ spin_unlock(&space_info->lock);
+ } while (flush_state < ARRAY_SIZE(priority_flush_states));
+}
+
+static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ struct reserve_ticket *ticket)
+
+{
+ DEFINE_WAIT(wait);
+ u64 reclaim_bytes = 0;
+ int ret = 0;
+
+ spin_lock(&space_info->lock);
+ while (ticket->bytes > 0 && ticket->error == 0) {
+ ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
+ if (ret) {
+ ret = -EINTR;
+ break;
+ }
+ spin_unlock(&space_info->lock);
+
+ schedule();
+
+ finish_wait(&ticket->wait, &wait);
+ spin_lock(&space_info->lock);
+ }
+ if (!ret)
+ ret = ticket->error;
+ if (!list_empty(&ticket->list))
+ list_del_init(&ticket->list);
+ if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
+ reclaim_bytes = ticket->orig_bytes - ticket->bytes;
+ spin_unlock(&space_info->lock);
+
+ if (reclaim_bytes)
+ btrfs_space_info_add_old_bytes(fs_info, space_info,
+ reclaim_bytes);
+ return ret;
+}
+
+/**
+ * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
+ * @root - the root we're allocating for
+ * @space_info - the space info we want to allocate from
+ * @orig_bytes - the number of bytes we want
+ * @flush - whether or not we can flush to make our reservation
+ *
+ * This will reserve orig_bytes number of bytes from the space info associated
+ * with the block_rsv. If there is not enough space it will make an attempt to
+ * flush out space to make room. It will do this by flushing delalloc if
+ * possible or committing the transaction. If flush is 0 then no attempts to
+ * regain reservations will be made and this will fail if there is not enough
+ * space already.
+ */
+static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 orig_bytes,
+ enum btrfs_reserve_flush_enum flush,
+ bool system_chunk)
+{
+ struct reserve_ticket ticket;
+ u64 used;
+ u64 reclaim_bytes = 0;
+ int ret = 0;
+
+ ASSERT(orig_bytes);
+ ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
+
+ spin_lock(&space_info->lock);
+ ret = -ENOSPC;
+ used = btrfs_space_info_used(space_info, true);
+
+ /*
+ * Carry on if we have enough space (short-circuit) OR call
+ * can_overcommit() to ensure we can overcommit to continue.
+ */
+ if ((used + orig_bytes <= space_info->total_bytes) ||
+ can_overcommit(fs_info, space_info, orig_bytes, flush,
+ system_chunk)) {
+ btrfs_space_info_update_bytes_may_use(fs_info, space_info,
+ orig_bytes);
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ space_info->flags, orig_bytes, 1);
+ ret = 0;
+ }
+
+ /*
+ * If we couldn't make a reservation then setup our reservation ticket
+ * and kick the async worker if it's not already running.
+ *
+ * If we are a priority flusher then we just need to add our ticket to
+ * the list and we will do our own flushing further down.
+ */
+ if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
+ ticket.orig_bytes = orig_bytes;
+ ticket.bytes = orig_bytes;
+ ticket.error = 0;
+ init_waitqueue_head(&ticket.wait);
+ if (flush == BTRFS_RESERVE_FLUSH_ALL) {
+ list_add_tail(&ticket.list, &space_info->tickets);
+ if (!space_info->flush) {
+ space_info->flush = 1;
+ trace_btrfs_trigger_flush(fs_info,
+ space_info->flags,
+ orig_bytes, flush,
+ "enospc");
+ queue_work(system_unbound_wq,
+ &fs_info->async_reclaim_work);
+ }
+ } else {
+ list_add_tail(&ticket.list,
+ &space_info->priority_tickets);
+ }
+ } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
+ used += orig_bytes;
+ /*
+ * We will do the space reservation dance during log replay,
+ * which means we won't have fs_info->fs_root set, so don't do
+ * the async reclaim as we will panic.
+ */
+ if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
+ need_do_async_reclaim(fs_info, space_info,
+ used, system_chunk) &&
+ !work_busy(&fs_info->async_reclaim_work)) {
+ trace_btrfs_trigger_flush(fs_info, space_info->flags,
+ orig_bytes, flush, "preempt");
+ queue_work(system_unbound_wq,
+ &fs_info->async_reclaim_work);
+ }
+ }
+ spin_unlock(&space_info->lock);
+ if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
+ return ret;
+
+ if (flush == BTRFS_RESERVE_FLUSH_ALL)
+ return wait_reserve_ticket(fs_info, space_info, &ticket);
+
+ ret = 0;
+ priority_reclaim_metadata_space(fs_info, space_info, &ticket);
+ spin_lock(&space_info->lock);
+ if (ticket.bytes) {
+ if (ticket.bytes < orig_bytes)
+ reclaim_bytes = orig_bytes - ticket.bytes;
+ list_del_init(&ticket.list);
+ ret = -ENOSPC;
+ }
+ spin_unlock(&space_info->lock);
+
+ if (reclaim_bytes)
+ btrfs_space_info_add_old_bytes(fs_info, space_info,
+ reclaim_bytes);
+ ASSERT(list_empty(&ticket.list));
+ return ret;
+}
+
+/**
+ * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
+ * @root - the root we're allocating for
+ * @block_rsv - the block_rsv we're allocating for
+ * @orig_bytes - the number of bytes we want
+ * @flush - whether or not we can flush to make our reservation
+ *
+ * This will reserve orig_bytes number of bytes from the space info associated
+ * with the block_rsv. If there is not enough space it will make an attempt to
+ * flush out space to make room. It will do this by flushing delalloc if
+ * possible or committing the transaction. If flush is 0 then no attempts to
+ * regain reservations will be made and this will fail if there is not enough
+ * space already.
+ */
+int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 orig_bytes,
+ enum btrfs_reserve_flush_enum flush)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ int ret;
+ bool system_chunk = (root == fs_info->chunk_root);
+
+ ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
+ orig_bytes, flush, system_chunk);
+ if (ret == -ENOSPC &&
+ unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
+ if (block_rsv != global_rsv &&
+ !btrfs_block_rsv_use_bytes(global_rsv, orig_bytes))
+ ret = 0;
+ }
+ if (ret == -ENOSPC) {
+ trace_btrfs_space_reservation(fs_info, "space_info:enospc",
+ block_rsv->space_info->flags,
+ orig_bytes, 1);
+
+ if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
+ btrfs_dump_space_info(fs_info, block_rsv->space_info,
+ orig_bytes, 0);
+ }
+ return ret;
+}
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
new file mode 100644
index 000000000000..c2b54b8e1a14
--- /dev/null
+++ b/fs/btrfs/space-info.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef BTRFS_SPACE_INFO_H
+#define BTRFS_SPACE_INFO_H
+
+struct btrfs_space_info {
+ spinlock_t lock;
+
+ u64 total_bytes; /* total bytes in the space,
+ this doesn't take mirrors into account */
+ u64 bytes_used; /* total bytes used,
+ this doesn't take mirrors into account */
+ u64 bytes_pinned; /* total bytes pinned, will be freed when the
+ transaction finishes */
+ u64 bytes_reserved; /* total bytes the allocator has reserved for
+ current allocations */
+ u64 bytes_may_use; /* number of bytes that may be used for
+ delalloc/allocations */
+ u64 bytes_readonly; /* total bytes that are read only */
+
+ u64 max_extent_size; /* This will hold the maximum extent size of
+ the space info if we had an ENOSPC in the
+ allocator. */
+
+ unsigned int full:1; /* indicates that we cannot allocate any more
+ chunks for this space */
+ unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
+
+ unsigned int flush:1; /* set if we are trying to make space */
+
+ unsigned int force_alloc; /* set if we need to force a chunk
+ alloc for this space */
+
+ u64 disk_used; /* total bytes used on disk */
+ u64 disk_total; /* total bytes on disk, takes mirrors into
+ account */
+
+ u64 flags;
+
+ /*
+ * bytes_pinned is kept in line with what is actually pinned, as in
+ * we've called update_block_group and dropped the bytes_used counter
+ * and increased the bytes_pinned counter. However this means that
+ * bytes_pinned does not reflect the bytes that will be pinned once the
+ * delayed refs are flushed, so this counter is inc'ed every time we
+ * call btrfs_free_extent so it is a realtime count of what will be
+ * freed once the transaction is committed. It will be zeroed every
+ * time the transaction commits.
+ */
+ struct percpu_counter total_bytes_pinned;
+
+ struct list_head list;
+ /* Protected by the spinlock 'lock'. */
+ struct list_head ro_bgs;
+ struct list_head priority_tickets;
+ struct list_head tickets;
+ /*
+ * tickets_id just indicates the next ticket will be handled, so note
+ * it's not stored per ticket.
+ */
+ u64 tickets_id;
+
+ struct rw_semaphore groups_sem;
+ /* for block groups in our same type */
+ struct list_head block_groups[BTRFS_NR_RAID_TYPES];
+ wait_queue_head_t wait;
+
+ struct kobject kobj;
+ struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
+};
+
+struct reserve_ticket {
+ u64 orig_bytes;
+ u64 bytes;
+ int error;
+ struct list_head list;
+ wait_queue_head_t wait;
+};
+
+static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
+{
+ return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
+}
+
+/*
+ *
+ * Declare a helper function to detect underflow of various space info members
+ */
+#define DECLARE_SPACE_INFO_UPDATE(name) \
+static inline void \
+btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \
+ struct btrfs_space_info *sinfo, \
+ s64 bytes) \
+{ \
+ lockdep_assert_held(&sinfo->lock); \
+ trace_update_##name(fs_info, sinfo, sinfo->name, bytes); \
+ if (bytes < 0 && sinfo->name < -bytes) { \
+ WARN_ON(1); \
+ sinfo->name = 0; \
+ return; \
+ } \
+ sinfo->name += bytes; \
+}
+
+DECLARE_SPACE_INFO_UPDATE(bytes_may_use);
+DECLARE_SPACE_INFO_UPDATE(bytes_pinned);
+
+void btrfs_space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 num_bytes);
+void btrfs_space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 num_bytes);
+int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
+void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+ u64 total_bytes, u64 bytes_used,
+ u64 bytes_readonly,
+ struct btrfs_space_info **space_info);
+struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
+ u64 flags);
+u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
+ bool may_use_included);
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
+void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *info, u64 bytes,
+ int dump_block_groups);
+int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 orig_bytes,
+ enum btrfs_reserve_flush_enum flush);
+
+#endif /* BTRFS_SPACE_INFO_H */
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0645ec428b4f..78de9d5d80c6 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -42,6 +42,7 @@
#include "dev-replace.h"
#include "free-space-cache.h"
#include "backref.h"
+#include "space-info.h"
#include "tests/btrfs-tests.h"
#include "qgroup.h"
@@ -1553,6 +1554,8 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
} else {
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
btrfs_sb(s)->bdev_holder = fs_type;
+ if (!strstr(crc32c_impl(), "generic"))
+ set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
error = btrfs_fill_super(s, fs_devices, data);
}
if (!error)
@@ -1601,14 +1604,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
{
struct vfsmount *mnt_root;
struct dentry *root;
- fmode_t mode = FMODE_READ;
char *subvol_name = NULL;
u64 subvol_objectid = 0;
int error = 0;
- if (!(flags & SB_RDONLY))
- mode |= FMODE_WRITE;
-
error = btrfs_parse_subvol_options(data, &subvol_name,
&subvol_objectid);
if (error) {
@@ -1904,8 +1903,9 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
u64 type;
u64 avail_space;
u64 min_stripe_size;
- int min_stripes = 1, num_stripes = 1;
+ int min_stripes, num_stripes = 1;
int i = 0, nr_devices;
+ const struct btrfs_raid_attr *rattr;
/*
* We aren't under the device list lock, so this is racy-ish, but good
@@ -1929,21 +1929,18 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
/* calc min stripe number for data space allocation */
type = btrfs_data_alloc_profile(fs_info);
- if (type & BTRFS_BLOCK_GROUP_RAID0) {
- min_stripes = 2;
+ rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];
+ min_stripes = rattr->devs_min;
+
+ if (type & BTRFS_BLOCK_GROUP_RAID0)
num_stripes = nr_devices;
- } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
- min_stripes = 2;
+ else if (type & BTRFS_BLOCK_GROUP_RAID1)
num_stripes = 2;
- } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
- min_stripes = 4;
+ else if (type & BTRFS_BLOCK_GROUP_RAID10)
num_stripes = 4;
- }
- if (type & BTRFS_BLOCK_GROUP_DUP)
- min_stripe_size = 2 * BTRFS_STRIPE_LEN;
- else
- min_stripe_size = BTRFS_STRIPE_LEN;
+ /* Adjust for more than 1 stripe per device */
+ min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
rcu_read_lock();
list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
@@ -2466,3 +2463,4 @@ late_initcall(init_btrfs_fs);
module_exit(exit_btrfs_fs)
MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: crc32c");
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c1dfc97893ba..9539f8143b7a 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -16,6 +16,7 @@
#include "transaction.h"
#include "sysfs.h"
#include "volumes.h"
+#include "space-info.h"
static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj);
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 7bf4d5734dbe..1bf6b5a79191 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -10,6 +10,7 @@
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../extent_io.h"
+#include "../btrfs_inode.h"
#define PROCESS_UNLOCK (1 << 0)
#define PROCESS_RELEASE (1 << 1)
@@ -58,7 +59,7 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
static int test_find_delalloc(u32 sectorsize)
{
struct inode *inode;
- struct extent_io_tree tmp;
+ struct extent_io_tree *tmp;
struct page *page;
struct page *locked_page = NULL;
unsigned long index = 0;
@@ -76,12 +77,13 @@ static int test_find_delalloc(u32 sectorsize)
test_std_err(TEST_ALLOC_INODE);
return -ENOMEM;
}
+ tmp = &BTRFS_I(inode)->io_tree;
/*
* Passing NULL as we don't have fs_info but tracepoints are not used
* at this point
*/
- extent_io_tree_init(NULL, &tmp, IO_TREE_SELFTEST, NULL);
+ extent_io_tree_init(NULL, tmp, IO_TREE_SELFTEST, NULL);
/*
* First go through and create and mark all of our pages dirty, we pin
@@ -108,10 +110,10 @@ static int test_find_delalloc(u32 sectorsize)
* |--- delalloc ---|
* |--- search ---|
*/
- set_extent_delalloc(&tmp, 0, sectorsize - 1, 0, NULL);
+ set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL);
start = 0;
end = 0;
- found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ found = find_lock_delalloc_range(inode, locked_page, &start,
&end);
if (!found) {
test_err("should have found at least one delalloc");
@@ -122,7 +124,7 @@ static int test_find_delalloc(u32 sectorsize)
sectorsize - 1, start, end);
goto out_bits;
}
- unlock_extent(&tmp, start, end);
+ unlock_extent(tmp, start, end);
unlock_page(locked_page);
put_page(locked_page);
@@ -139,10 +141,10 @@ static int test_find_delalloc(u32 sectorsize)
test_err("couldn't find the locked page");
goto out_bits;
}
- set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, 0, NULL);
+ set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL);
start = test_start;
end = 0;
- found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ found = find_lock_delalloc_range(inode, locked_page, &start,
&end);
if (!found) {
test_err("couldn't find delalloc in our range");
@@ -158,7 +160,7 @@ static int test_find_delalloc(u32 sectorsize)
test_err("there were unlocked pages in the range");
goto out_bits;
}
- unlock_extent(&tmp, start, end);
+ unlock_extent(tmp, start, end);
/* locked_page was unlocked above */
put_page(locked_page);
@@ -176,7 +178,7 @@ static int test_find_delalloc(u32 sectorsize)
}
start = test_start;
end = 0;
- found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ found = find_lock_delalloc_range(inode, locked_page, &start,
&end);
if (found) {
test_err("found range when we shouldn't have");
@@ -194,10 +196,10 @@ static int test_find_delalloc(u32 sectorsize)
*
* We are re-using our test_start from above since it works out well.
*/
- set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, 0, NULL);
+ set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL);
start = test_start;
end = 0;
- found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ found = find_lock_delalloc_range(inode, locked_page, &start,
&end);
if (!found) {
test_err("didn't find our range");
@@ -213,7 +215,7 @@ static int test_find_delalloc(u32 sectorsize)
test_err("pages in range were not all locked");
goto out_bits;
}
- unlock_extent(&tmp, start, end);
+ unlock_extent(tmp, start, end);
/*
* Now to test where we run into a page that is no longer dirty in the
@@ -238,7 +240,7 @@ static int test_find_delalloc(u32 sectorsize)
* this changes at any point in the future we will need to fix this
* tests expected behavior.
*/
- found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ found = find_lock_delalloc_range(inode, locked_page, &start,
&end);
if (!found) {
test_err("didn't find our range");
@@ -256,7 +258,7 @@ static int test_find_delalloc(u32 sectorsize)
}
ret = 0;
out_bits:
- clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1);
+ clear_extent_bits(tmp, 0, total_dirty - 1, (unsigned)-1);
out:
if (locked_page)
put_page(locked_page);
@@ -432,6 +434,89 @@ out:
return ret;
}
+static int test_find_first_clear_extent_bit(void)
+{
+ struct extent_io_tree tree;
+ u64 start, end;
+
+ test_msg("running find_first_clear_extent_bit test");
+ extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL);
+
+ /*
+ * Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between
+ * 4M-32M
+ */
+ set_extent_bits(&tree, SZ_1M, SZ_4M - 1,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED);
+
+ find_first_clear_extent_bit(&tree, SZ_512K, &start, &end,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED);
+
+ if (start != 0 || end != SZ_1M -1)
+ test_err("error finding beginning range: start %llu end %llu",
+ start, end);
+
+ /* Now add 32M-64M so that we have a hole between 4M-32M */
+ set_extent_bits(&tree, SZ_32M, SZ_64M - 1,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED);
+
+ /*
+ * Request first hole starting at 12M, we should get 4M-32M
+ */
+ find_first_clear_extent_bit(&tree, 12 * SZ_1M, &start, &end,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED);
+
+ if (start != SZ_4M || end != SZ_32M - 1)
+ test_err("error finding trimmed range: start %llu end %llu",
+ start, end);
+
+ /*
+ * Search in the middle of allocated range, should get the next one
+ * available, which happens to be unallocated -> 4M-32M
+ */
+ find_first_clear_extent_bit(&tree, SZ_2M, &start, &end,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED);
+
+ if (start != SZ_4M || end != SZ_32M -1)
+ test_err("error finding next unalloc range: start %llu end %llu",
+ start, end);
+
+ /*
+ * Set 64M-72M with CHUNK_ALLOC flag, then search for CHUNK_TRIMMED flag
+ * being unset in this range, we should get the entry in range 64M-72M
+ */
+ set_extent_bits(&tree, SZ_64M, SZ_64M + SZ_8M - 1, CHUNK_ALLOCATED);
+ find_first_clear_extent_bit(&tree, SZ_64M + SZ_1M, &start, &end,
+ CHUNK_TRIMMED);
+
+ if (start != SZ_64M || end != SZ_64M + SZ_8M - 1)
+ test_err("error finding exact range: start %llu end %llu",
+ start, end);
+
+ find_first_clear_extent_bit(&tree, SZ_64M - SZ_8M, &start, &end,
+ CHUNK_TRIMMED);
+
+ /*
+ * Search in the middle of set range whose immediate neighbour doesn't
+ * have the bits set so it must be returned
+ */
+ if (start != SZ_64M || end != SZ_64M + SZ_8M - 1)
+ test_err("error finding next alloc range: start %llu end %llu",
+ start, end);
+
+ /*
+ * Search beyond any known range, shall return after last known range
+ * and end should be -1
+ */
+ find_first_clear_extent_bit(&tree, -1, &start, &end, CHUNK_TRIMMED);
+ if (start != SZ_64M + SZ_8M || end != -1)
+ test_err(
+ "error handling beyond end of range search: start %llu end %llu",
+ start, end);
+
+ return 0;
+}
+
int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
{
int ret;
@@ -442,6 +527,10 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
if (ret)
goto out;
+ ret = test_find_first_clear_extent_bit();
+ if (ret)
+ goto out;
+
ret = test_eb_bitmaps(sectorsize, nodesize);
out:
return ret;
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 87aeabe9d610..4a7f796c9900 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -66,7 +66,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info,
em->len = SZ_16K;
em->block_start = 0;
em->block_len = SZ_16K;
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0);
+ write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [0, 16K)");
goto out;
@@ -85,7 +87,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info,
em->len = SZ_4K;
em->block_start = SZ_32K; /* avoid merging */
em->block_len = SZ_4K;
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0);
+ write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [16K, 20K)");
goto out;
@@ -104,7 +108,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info,
em->len = len;
em->block_start = start;
em->block_len = len;
+ write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
+ write_unlock(&em_tree->lock);
if (ret) {
test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
goto out;
@@ -148,7 +154,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info,
em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0);
+ write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [0, 1K)");
goto out;
@@ -167,7 +175,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info,
em->len = SZ_4K;
em->block_start = SZ_4K;
em->block_len = SZ_4K;
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0);
+ write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [4K, 8K)");
goto out;
@@ -186,7 +196,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info,
em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
+ write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
+ write_unlock(&em_tree->lock);
if (ret) {
test_err("case2 [0 1K]: ret %d", ret);
goto out;
@@ -225,7 +237,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
em->len = SZ_4K;
em->block_start = SZ_4K;
em->block_len = SZ_4K;
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0);
+ write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [4K, 8K)");
goto out;
@@ -244,7 +258,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
em->len = SZ_16K;
em->block_start = 0;
em->block_len = SZ_16K;
+ write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
+ write_unlock(&em_tree->lock);
if (ret) {
test_err("case3 [0x%llx 0x%llx): ret %d",
start, start + len, ret);
@@ -320,7 +336,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
em->len = SZ_8K;
em->block_start = 0;
em->block_len = SZ_8K;
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0);
+ write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [0, 8K)");
goto out;
@@ -339,7 +357,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
em->len = 24 * SZ_1K;
em->block_start = SZ_16K; /* avoid merging */
em->block_len = 24 * SZ_1K;
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0);
+ write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [8K, 32K)");
goto out;
@@ -357,7 +377,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
em->len = SZ_32K;
em->block_start = 0;
em->block_len = SZ_32K;
+ write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
+ write_unlock(&em_tree->lock);
if (ret) {
test_err("case4 [0x%llx 0x%llx): ret %d",
start, len, ret);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 3f6811cdf803..3b8ae1a8f02d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -129,6 +129,24 @@ static inline int extwriter_counter_read(struct btrfs_transaction *trans)
}
/*
+ * To be called after all the new block groups attached to the transaction
+ * handle have been created (btrfs_create_pending_block_groups()).
+ */
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+
+ if (!trans->chunk_bytes_reserved)
+ return;
+
+ WARN_ON_ONCE(!list_empty(&trans->new_bgs));
+
+ btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv,
+ trans->chunk_bytes_reserved);
+ trans->chunk_bytes_reserved = 0;
+}
+
+/*
* either allocate a new transaction or hop into the existing one
*/
static noinline int join_transaction(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 78c446c222b7..527ea94b57d9 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -224,5 +224,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction);
void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info);
void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
#endif
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 96fce4bef4e7..ccd5706199d7 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -132,6 +132,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
struct btrfs_file_extent_item *fi;
u32 sectorsize = fs_info->sectorsize;
u32 item_size = btrfs_item_size_nr(leaf, slot);
+ u64 extent_end;
if (!IS_ALIGNED(key->offset, sectorsize)) {
file_extent_err(leaf, slot,
@@ -207,6 +208,16 @@ static int check_extent_data_item(struct extent_buffer *leaf,
CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))
return -EUCLEAN;
+ /* Catch extent end overflow */
+ if (check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi),
+ key->offset, &extent_end)) {
+ file_extent_err(leaf, slot,
+ "extent end overflow, have file offset %llu extent num bytes %llu",
+ key->offset,
+ btrfs_file_extent_num_bytes(leaf, fi));
+ return -EUCLEAN;
+ }
+
/*
* Check that no two consecutive file extent items, in the same leaf,
* present ranges that overlap each other.
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 3fc8d854d7fb..6c8297bcfeb7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3323,6 +3323,30 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
}
/*
+ * Check if an inode was logged in the current transaction. We can't always rely
+ * on an inode's logged_trans value, because it's an in-memory only field and
+ * therefore not persisted. This means that its value is lost if the inode gets
+ * evicted and loaded again from disk (in which case it has a value of 0, and
+ * certainly it is smaller then any possible transaction ID), when that happens
+ * the full_sync flag is set in the inode's runtime flags, so on that case we
+ * assume eviction happened and ignore the logged_trans value, assuming the
+ * worst case, that the inode was logged before in the current transaction.
+ */
+static bool inode_logged(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode)
+{
+ if (inode->logged_trans == trans->transid)
+ return true;
+
+ if (inode->last_trans == trans->transid &&
+ test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
+ !test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags))
+ return true;
+
+ return false;
+}
+
+/*
* If both a file and directory are logged, and unlinks or renames are
* mixed in, we have a few interesting corners:
*
@@ -3356,7 +3380,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
int bytes_del = 0;
u64 dir_ino = btrfs_ino(dir);
- if (dir->logged_trans < trans->transid)
+ if (!inode_logged(trans, dir))
return 0;
ret = join_running_log_trans(root);
@@ -3460,7 +3484,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
u64 index;
int ret;
- if (inode->logged_trans < trans->transid)
+ if (!inode_logged(trans, inode))
return 0;
ret = join_running_log_trans(root);
@@ -5420,9 +5444,19 @@ log_extents:
}
}
+ /*
+ * Don't update last_log_commit if we logged that an inode exists after
+ * it was loaded to memory (full_sync bit set).
+ * This is to prevent data loss when we do a write to the inode, then
+ * the inode gets evicted after all delalloc was flushed, then we log
+ * it exists (due to a rename for example) and then fsync it. This last
+ * fsync would do nothing (not logging the extents previously written).
+ */
spin_lock(&inode->lock);
inode->logged_trans = trans->transid;
- inode->last_log_commit = inode->last_sub_trans;
+ if (inode_only != LOG_INODE_EXISTS ||
+ !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
+ inode->last_log_commit = inode->last_sub_trans;
spin_unlock(&inode->lock);
out_unlock:
mutex_unlock(&inode->log_mutex);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1c2a6e4b39da..a13ddba1ebc3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -28,6 +28,7 @@
#include "dev-replace.h"
#include "sysfs.h"
#include "tree-checker.h"
+#include "space-info.h"
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = {
@@ -123,12 +124,14 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
},
};
-const char *get_raid_name(enum btrfs_raid_types type)
+const char *btrfs_bg_type_to_raid_name(u64 flags)
{
- if (type >= BTRFS_NR_RAID_TYPES)
+ const int index = btrfs_bg_flags_to_raid_index(flags);
+
+ if (index >= BTRFS_NR_RAID_TYPES)
return NULL;
- return btrfs_raid_array[type].raid_name;
+ return btrfs_raid_array[index].raid_name;
}
/*
@@ -237,7 +240,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
* chunk_mutex
* -----------
* protects chunks, adding or removing during allocation, trim or when a new
- * device is added/removed
+ * device is added/removed. Additionally it also protects post_commit_list of
+ * individual devices, since they can be added to the transaction's
+ * post_commit_list only with chunk_mutex held.
*
* cleaner_mutex
* -------------
@@ -1818,7 +1823,7 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
struct rb_node *n;
u64 ret = 0;
- em_tree = &fs_info->mapping_tree.map_tree;
+ em_tree = &fs_info->mapping_tree;
read_lock(&em_tree->lock);
n = rb_last(&em_tree->map.rb_root);
if (n) {
@@ -2941,7 +2946,7 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
struct extent_map_tree *em_tree;
struct extent_map *em;
- em_tree = &fs_info->mapping_tree.map_tree;
+ em_tree = &fs_info->mapping_tree;
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, length);
read_unlock(&em_tree->lock);
@@ -3474,6 +3479,18 @@ static int chunk_devid_filter(struct extent_buffer *leaf,
return 1;
}
+static u64 calc_data_stripes(u64 type, int num_stripes)
+{
+ const int index = btrfs_bg_flags_to_raid_index(type);
+ const int ncopies = btrfs_raid_array[index].ncopies;
+ const int nparity = btrfs_raid_array[index].nparity;
+
+ if (nparity)
+ return num_stripes - nparity;
+ else
+ return num_stripes / ncopies;
+}
+
/* [pstart, pend) */
static int chunk_drange_filter(struct extent_buffer *leaf,
struct btrfs_chunk *chunk,
@@ -3483,22 +3500,15 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
u64 stripe_offset;
u64 stripe_length;
+ u64 type;
int factor;
int i;
if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
return 0;
- if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) {
- factor = num_stripes / 2;
- } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID5) {
- factor = num_stripes - 1;
- } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID6) {
- factor = num_stripes - 2;
- } else {
- factor = num_stripes;
- }
+ type = btrfs_chunk_type(leaf, chunk);
+ factor = calc_data_stripes(type, num_stripes);
for (i = 0; i < num_stripes; i++) {
stripe = btrfs_stripe_nr(chunk, i);
@@ -3921,11 +3931,9 @@ static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf,
bp += ret; \
} while (0)
- if (flags & BTRFS_BALANCE_ARGS_CONVERT) {
- int index = btrfs_bg_flags_to_raid_index(bargs->target);
-
- CHECK_APPEND_1ARG("convert=%s,", get_raid_name(index));
- }
+ if (flags & BTRFS_BALANCE_ARGS_CONVERT)
+ CHECK_APPEND_1ARG("convert=%s,",
+ btrfs_bg_type_to_raid_name(bargs->target));
if (flags & BTRFS_BALANCE_ARGS_SOFT)
CHECK_APPEND_NOARG("soft,");
@@ -4047,6 +4055,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
u64 num_devices;
unsigned seq;
bool reducing_integrity;
+ int i;
if (btrfs_fs_closing(fs_info) ||
atomic_read(&fs_info->balance_pause_req) ||
@@ -4076,48 +4085,43 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
}
num_devices = btrfs_num_devices(fs_info);
+ allowed = 0;
+ for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++)
+ if (num_devices >= btrfs_raid_array[i].devs_min)
+ allowed |= btrfs_raid_array[i].bg_flag;
- allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
- if (num_devices > 1)
- allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
- if (num_devices > 2)
- allowed |= BTRFS_BLOCK_GROUP_RAID5;
- if (num_devices > 3)
- allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID6);
if (validate_convert_profile(&bctl->data, allowed)) {
- int index = btrfs_bg_flags_to_raid_index(bctl->data.target);
-
btrfs_err(fs_info,
"balance: invalid convert data profile %s",
- get_raid_name(index));
+ btrfs_bg_type_to_raid_name(bctl->data.target));
ret = -EINVAL;
goto out;
}
if (validate_convert_profile(&bctl->meta, allowed)) {
- int index = btrfs_bg_flags_to_raid_index(bctl->meta.target);
-
btrfs_err(fs_info,
"balance: invalid convert metadata profile %s",
- get_raid_name(index));
+ btrfs_bg_type_to_raid_name(bctl->meta.target));
ret = -EINVAL;
goto out;
}
if (validate_convert_profile(&bctl->sys, allowed)) {
- int index = btrfs_bg_flags_to_raid_index(bctl->sys.target);
-
btrfs_err(fs_info,
"balance: invalid convert system profile %s",
- get_raid_name(index));
+ btrfs_bg_type_to_raid_name(bctl->sys.target));
ret = -EINVAL;
goto out;
}
- /* allow to reduce meta or sys integrity only if force set */
- allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6;
+ /*
+ * Allow to reduce metadata or system integrity only if force set for
+ * profiles with redundancy (copies, parity)
+ */
+ allowed = 0;
+ for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) {
+ if (btrfs_raid_array[i].ncopies >= 2 ||
+ btrfs_raid_array[i].tolerated_failures >= 1)
+ allowed |= btrfs_raid_array[i].bg_flag;
+ }
do {
seq = read_seqbegin(&fs_info->profiles_lock);
@@ -4152,12 +4156,18 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
- int meta_index = btrfs_bg_flags_to_raid_index(meta_target);
- int data_index = btrfs_bg_flags_to_raid_index(data_target);
-
btrfs_warn(fs_info,
"balance: metadata profile %s has lower redundancy than data profile %s",
- get_raid_name(meta_index), get_raid_name(data_index));
+ btrfs_bg_type_to_raid_name(meta_target),
+ btrfs_bg_type_to_raid_name(data_target));
+ }
+
+ if (fs_info->send_in_progress) {
+ btrfs_warn_rl(fs_info,
+"cannot run balance while send operations are in progress (%d in progress)",
+ fs_info->send_in_progress);
+ ret = -EAGAIN;
+ goto out;
}
ret = insert_balance_item(fs_info, bctl);
@@ -4949,6 +4959,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
sub_stripes = btrfs_raid_array[index].sub_stripes;
dev_stripes = btrfs_raid_array[index].dev_stripes;
devs_max = btrfs_raid_array[index].devs_max;
+ if (!devs_max)
+ devs_max = BTRFS_MAX_DEVS(info);
devs_min = btrfs_raid_array[index].devs_min;
devs_increment = btrfs_raid_array[index].devs_increment;
ncopies = btrfs_raid_array[index].ncopies;
@@ -4957,8 +4969,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (type & BTRFS_BLOCK_GROUP_DATA) {
max_stripe_size = SZ_1G;
max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
- if (!devs_max)
- devs_max = BTRFS_MAX_DEVS(info);
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
/* for larger filesystems, use larger metadata chunks */
if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
@@ -4966,13 +4976,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
else
max_stripe_size = SZ_256M;
max_chunk_size = max_stripe_size;
- if (!devs_max)
- devs_max = BTRFS_MAX_DEVS(info);
} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
max_stripe_size = SZ_32M;
max_chunk_size = 2 * max_stripe_size;
- if (!devs_max)
- devs_max = BTRFS_MAX_DEVS_SYS_CHUNK;
} else {
btrfs_err(info, "invalid chunk type 0x%llx requested",
type);
@@ -5143,7 +5149,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
em->block_len = em->len;
em->orig_block_len = stripe_size;
- em_tree = &info->mapping_tree.map_tree;
+ em_tree = &info->mapping_tree;
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0);
if (ret) {
@@ -5324,20 +5330,9 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
static inline int btrfs_chunk_max_errors(struct map_lookup *map)
{
- int max_errors;
+ const int index = btrfs_bg_flags_to_raid_index(map->type);
- if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_DUP)) {
- max_errors = 1;
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) {
- max_errors = 2;
- } else {
- max_errors = 0;
- }
-
- return max_errors;
+ return btrfs_raid_array[index].tolerated_failures;
}
int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
@@ -5378,21 +5373,16 @@ end:
return readonly;
}
-void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
-{
- extent_map_tree_init(&tree->map_tree);
-}
-
-void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
+void btrfs_mapping_tree_free(struct extent_map_tree *tree)
{
struct extent_map *em;
while (1) {
- write_lock(&tree->map_tree.lock);
- em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
+ write_lock(&tree->lock);
+ em = lookup_extent_mapping(tree, 0, (u64)-1);
if (em)
- remove_extent_mapping(&tree->map_tree, em);
- write_unlock(&tree->map_tree.lock);
+ remove_extent_mapping(tree, em);
+ write_unlock(&tree->lock);
if (!em)
break;
/* once for us */
@@ -5419,7 +5409,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
return 1;
map = em->map_lookup;
- if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
+ if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK))
ret = map->num_stripes;
else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
ret = map->sub_stripes;
@@ -5493,7 +5483,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev;
ASSERT((map->type &
- (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)));
+ (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
num_stripes = map->sub_stripes;
@@ -5682,7 +5672,7 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
&remaining_stripes);
div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
last_stripe *= sub_stripes;
- } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+ } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
BTRFS_BLOCK_GROUP_DUP)) {
num_stripes = map->num_stripes;
} else {
@@ -5926,6 +5916,102 @@ static bool need_full_stripe(enum btrfs_map_op op)
return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
}
+/*
+ * btrfs_get_io_geometry - calculates the geomery of a particular (address, len)
+ * tuple. This information is used to calculate how big a
+ * particular bio can get before it straddles a stripe.
+ *
+ * @fs_info - the filesystem
+ * @logical - address that we want to figure out the geometry of
+ * @len - the length of IO we are going to perform, starting at @logical
+ * @op - type of operation - write or read
+ * @io_geom - pointer used to return values
+ *
+ * Returns < 0 in case a chunk for the given logical address cannot be found,
+ * usually shouldn't happen unless @logical is corrupted, 0 otherwise.
+ */
+int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+ u64 logical, u64 len, struct btrfs_io_geometry *io_geom)
+{
+ struct extent_map *em;
+ struct map_lookup *map;
+ u64 offset;
+ u64 stripe_offset;
+ u64 stripe_nr;
+ u64 stripe_len;
+ u64 raid56_full_stripe_start = (u64)-1;
+ int data_stripes;
+
+ ASSERT(op != BTRFS_MAP_DISCARD);
+
+ em = btrfs_get_chunk_map(fs_info, logical, len);
+ if (IS_ERR(em))
+ return PTR_ERR(em);
+
+ map = em->map_lookup;
+ /* Offset of this logical address in the chunk */
+ offset = logical - em->start;
+ /* Len of a stripe in a chunk */
+ stripe_len = map->stripe_len;
+ /* Stripe wher this block falls in */
+ stripe_nr = div64_u64(offset, stripe_len);
+ /* Offset of stripe in the chunk */
+ stripe_offset = stripe_nr * stripe_len;
+ if (offset < stripe_offset) {
+ btrfs_crit(fs_info,
+"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
+ stripe_offset, offset, em->start, logical, stripe_len);
+ free_extent_map(em);
+ return -EINVAL;
+ }
+
+ /* stripe_offset is the offset of this block in its stripe */
+ stripe_offset = offset - stripe_offset;
+ data_stripes = nr_data_stripes(map);
+
+ if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ u64 max_len = stripe_len - stripe_offset;
+
+ /*
+ * In case of raid56, we need to know the stripe aligned start
+ */
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ unsigned long full_stripe_len = stripe_len * data_stripes;
+ raid56_full_stripe_start = offset;
+
+ /*
+ * Allow a write of a full stripe, but make sure we
+ * don't allow straddling of stripes
+ */
+ raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
+ full_stripe_len);
+ raid56_full_stripe_start *= full_stripe_len;
+
+ /*
+ * For writes to RAID[56], allow a full stripeset across
+ * all disks. For other RAID types and for RAID[56]
+ * reads, just allow a single stripe (on a single disk).
+ */
+ if (op == BTRFS_MAP_WRITE) {
+ max_len = stripe_len * data_stripes -
+ (offset - raid56_full_stripe_start);
+ }
+ }
+ len = min_t(u64, em->len - offset, max_len);
+ } else {
+ len = em->len - offset;
+ }
+
+ io_geom->len = len;
+ io_geom->offset = offset;
+ io_geom->stripe_len = stripe_len;
+ io_geom->stripe_nr = stripe_nr;
+ io_geom->stripe_offset = stripe_offset;
+ io_geom->raid56_stripe_offset = raid56_full_stripe_start;
+
+ return 0;
+}
+
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
enum btrfs_map_op op,
u64 logical, u64 *length,
@@ -5939,6 +6025,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
u64 stripe_nr;
u64 stripe_len;
u32 stripe_index;
+ int data_stripes;
int i;
int ret = 0;
int num_stripes;
@@ -5951,76 +6038,29 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
int patch_the_first_stripe_for_dev_replace = 0;
u64 physical_to_patch_in_first_stripe = 0;
u64 raid56_full_stripe_start = (u64)-1;
+ struct btrfs_io_geometry geom;
+
+ ASSERT(bbio_ret);
if (op == BTRFS_MAP_DISCARD)
return __btrfs_map_block_for_discard(fs_info, logical,
*length, bbio_ret);
- em = btrfs_get_chunk_map(fs_info, logical, *length);
- if (IS_ERR(em))
- return PTR_ERR(em);
+ ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom);
+ if (ret < 0)
+ return ret;
+ em = btrfs_get_chunk_map(fs_info, logical, *length);
+ ASSERT(em);
map = em->map_lookup;
- offset = logical - em->start;
-
- stripe_len = map->stripe_len;
- stripe_nr = offset;
- /*
- * stripe_nr counts the total number of stripes we have to stride
- * to get to this block
- */
- stripe_nr = div64_u64(stripe_nr, stripe_len);
-
- stripe_offset = stripe_nr * stripe_len;
- if (offset < stripe_offset) {
- btrfs_crit(fs_info,
- "stripe math has gone wrong, stripe_offset=%llu, offset=%llu, start=%llu, logical=%llu, stripe_len=%llu",
- stripe_offset, offset, em->start, logical,
- stripe_len);
- free_extent_map(em);
- return -EINVAL;
- }
-
- /* stripe_offset is the offset of this block in its stripe*/
- stripe_offset = offset - stripe_offset;
-
- /* if we're here for raid56, we need to know the stripe aligned start */
- if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
- raid56_full_stripe_start = offset;
- /* allow a write of a full stripe, but make sure we don't
- * allow straddling of stripes
- */
- raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
- full_stripe_len);
- raid56_full_stripe_start *= full_stripe_len;
- }
-
- if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
- u64 max_len;
- /* For writes to RAID[56], allow a full stripeset across all disks.
- For other RAID types and for RAID[56] reads, just allow a single
- stripe (on a single disk). */
- if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
- (op == BTRFS_MAP_WRITE)) {
- max_len = stripe_len * nr_data_stripes(map) -
- (offset - raid56_full_stripe_start);
- } else {
- /* we limit the length of each bio to what fits in a stripe */
- max_len = stripe_len - stripe_offset;
- }
- *length = min_t(u64, em->len - offset, max_len);
- } else {
- *length = em->len - offset;
- }
-
- /*
- * This is for when we're called from btrfs_bio_fits_in_stripe and all
- * it cares about is the length
- */
- if (!bbio_ret)
- goto out;
+ *length = geom.len;
+ offset = geom.offset;
+ stripe_len = geom.stripe_len;
+ stripe_nr = geom.stripe_nr;
+ stripe_offset = geom.stripe_offset;
+ raid56_full_stripe_start = geom.raid56_stripe_offset;
+ data_stripes = nr_data_stripes(map);
down_read(&dev_replace->rwsem);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
@@ -6052,7 +6092,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
&stripe_index);
if (!need_full_stripe(op))
mirror_num = 1;
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
if (need_full_stripe(op))
num_stripes = map->num_stripes;
else if (mirror_num)
@@ -6094,7 +6134,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
/* push stripe_nr back to the start of the full stripe */
stripe_nr = div64_u64(raid56_full_stripe_start,
- stripe_len * nr_data_stripes(map));
+ stripe_len * data_stripes);
/* RAID[56] write or recovery. Return all stripes */
num_stripes = map->num_stripes;
@@ -6110,10 +6150,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
* Mirror #3 is RAID6 Q block.
*/
stripe_nr = div_u64_rem(stripe_nr,
- nr_data_stripes(map), &stripe_index);
+ data_stripes, &stripe_index);
if (mirror_num > 1)
- stripe_index = nr_data_stripes(map) +
- mirror_num - 2;
+ stripe_index = data_stripes + mirror_num - 2;
/* We distribute the parity blocks across stripes */
div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
@@ -6171,8 +6210,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
div_u64_rem(stripe_nr, num_stripes, &rot);
/* Fill in the logical address of each stripe */
- tmp = stripe_nr * nr_data_stripes(map);
- for (i = 0; i < nr_data_stripes(map); i++)
+ tmp = stripe_nr * data_stripes;
+ for (i = 0; i < data_stripes; i++)
bbio->raid_map[(i+rot) % num_stripes] =
em->start + (tmp + i) * map->stripe_len;
@@ -6687,7 +6726,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
struct btrfs_chunk *chunk)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct extent_map_tree *map_tree = &fs_info->mapping_tree;
struct map_lookup *map;
struct extent_map *em;
u64 logical;
@@ -6712,9 +6751,9 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
return ret;
}
- read_lock(&map_tree->map_tree.lock);
- em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
- read_unlock(&map_tree->map_tree.lock);
+ read_lock(&map_tree->lock);
+ em = lookup_extent_mapping(map_tree, logical, 1);
+ read_unlock(&map_tree->lock);
/* already mapped? */
if (em && em->start <= logical && em->start + em->len > logical) {
@@ -6783,9 +6822,9 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
}
- write_lock(&map_tree->map_tree.lock);
- ret = add_extent_mapping(&map_tree->map_tree, em, 0);
- write_unlock(&map_tree->map_tree.lock);
+ write_lock(&map_tree->lock);
+ ret = add_extent_mapping(map_tree, em, 0);
+ write_unlock(&map_tree->lock);
if (ret < 0) {
btrfs_err(fs_info,
"failed to add chunk map, start=%llu len=%llu: %d",
@@ -7103,14 +7142,14 @@ out_short_read:
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
struct btrfs_device *failing_dev)
{
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct extent_map_tree *map_tree = &fs_info->mapping_tree;
struct extent_map *em;
u64 next_start = 0;
bool ret = true;
- read_lock(&map_tree->map_tree.lock);
- em = lookup_extent_mapping(&map_tree->map_tree, 0, (u64)-1);
- read_unlock(&map_tree->map_tree.lock);
+ read_lock(&map_tree->lock);
+ em = lookup_extent_mapping(map_tree, 0, (u64)-1);
+ read_unlock(&map_tree->lock);
/* No chunk at all? Return false anyway */
if (!em) {
ret = false;
@@ -7148,10 +7187,10 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
next_start = extent_map_end(em);
free_extent_map(em);
- read_lock(&map_tree->map_tree.lock);
- em = lookup_extent_mapping(&map_tree->map_tree, next_start,
+ read_lock(&map_tree->lock);
+ em = lookup_extent_mapping(map_tree, next_start,
(u64)(-1) - next_start);
- read_unlock(&map_tree->map_tree.lock);
+ read_unlock(&map_tree->lock);
}
out:
return ret;
@@ -7600,10 +7639,9 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
*/
int btrfs_bg_type_to_factor(u64 flags)
{
- if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- return 2;
- return 1;
+ const int index = btrfs_bg_flags_to_raid_index(flags);
+
+ return btrfs_raid_array[index].ncopies;
}
@@ -7612,7 +7650,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
u64 chunk_offset, u64 devid,
u64 physical_offset, u64 physical_len)
{
- struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree;
struct extent_map *em;
struct map_lookup *map;
struct btrfs_device *dev;
@@ -7701,7 +7739,7 @@ out:
static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
{
- struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree;
struct extent_map *em;
struct rb_node *node;
int ret = 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 136a3eb64604..7f6aa1816409 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -23,6 +23,21 @@ struct btrfs_pending_bios {
struct bio *tail;
};
+struct btrfs_io_geometry {
+ /* remaining bytes before crossing a stripe */
+ u64 len;
+ /* offset of logical address in chunk */
+ u64 offset;
+ /* length of single IO stripe */
+ u64 stripe_len;
+ /* number of stripe where address falls */
+ u64 stripe_nr;
+ /* offset of address in stripe */
+ u64 stripe_offset;
+ /* offset of raid56 stripe into the chunk */
+ u64 raid56_stripe_offset;
+};
+
/*
* Use sequence counter to get consistent device stat data on
* 32-bit processors.
@@ -43,8 +58,8 @@ struct btrfs_pending_bios {
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
struct btrfs_device {
- struct list_head dev_list;
- struct list_head dev_alloc_list;
+ struct list_head dev_list; /* device_list_mutex */
+ struct list_head dev_alloc_list; /* chunk mutex */
struct list_head post_commit_list; /* chunk mutex */
struct btrfs_fs_devices *fs_devices;
struct btrfs_fs_info *fs_info;
@@ -229,9 +244,14 @@ struct btrfs_fs_devices {
* this mutex lock.
*/
struct mutex device_list_mutex;
+
+ /* List of all devices, protected by device_list_mutex */
struct list_head devices;
- /* devices not currently being allocated */
+ /*
+ * Devices which can satisfy space allocation. Protected by
+ * chunk_mutex
+ */
struct list_head alloc_list;
struct btrfs_fs_devices *seed;
@@ -336,16 +356,16 @@ struct btrfs_device_info {
};
struct btrfs_raid_attr {
- int sub_stripes; /* sub_stripes info for map */
- int dev_stripes; /* stripes per dev */
- int devs_max; /* max devs to use */
- int devs_min; /* min devs needed */
- int tolerated_failures; /* max tolerated fail devs */
- int devs_increment; /* ndevs has to be a multiple of this */
- int ncopies; /* how many copies to data has */
- int nparity; /* number of stripes worth of bytes to store
+ u8 sub_stripes; /* sub_stripes info for map */
+ u8 dev_stripes; /* stripes per dev */
+ u8 devs_max; /* max devs to use */
+ u8 devs_min; /* min devs needed */
+ u8 tolerated_failures; /* max tolerated fail devs */
+ u8 devs_increment; /* ndevs has to be a multiple of this */
+ u8 ncopies; /* how many copies to data has */
+ u8 nparity; /* number of stripes worth of bytes to store
* parity information */
- int mindev_error; /* error code if min devs requisite is unmet */
+ u8 mindev_error; /* error code if min devs requisite is unmet */
const char raid_name[8]; /* name of the raid */
u64 bg_flag; /* block group flag of the raid */
};
@@ -408,13 +428,14 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret);
+int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+ u64 logical, u64 len, struct btrfs_io_geometry *io_geom);
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
u64 physical, u64 **logical, int *naddrs, int *stripe_len);
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
-void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
-void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
+void btrfs_mapping_tree_free(struct extent_map_tree *tree);
blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num, int async_submit);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
@@ -557,8 +578,6 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
}
-const char *get_raid_name(enum btrfs_raid_types type);
-
void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
struct list_head *btrfs_get_fs_uuids(void);
@@ -568,6 +587,7 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
struct btrfs_device *failing_dev);
int btrfs_bg_type_to_factor(u64 flags);
+const char *btrfs_bg_type_to_raid_name(u64 flags);
int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
#endif
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 679a3c8e4fb3..960f9a3c012d 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -52,7 +52,7 @@ ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
struct orangefs_kernel_op_s *new_op = NULL;
- int buffer_index = -1;
+ int buffer_index;
ssize_t ret;
size_t copy_amount;
@@ -134,7 +134,6 @@ populate_shared_memory:
*/
if (ret == -EAGAIN && op_state_purged(new_op)) {
orangefs_bufmap_put(buffer_index);
- buffer_index = -1;
if (type == ORANGEFS_IO_WRITE)
iov_iter_revert(iter, total_size);
gossip_debug(GOSSIP_FILE_DEBUG,
@@ -262,7 +261,6 @@ out:
"%s(%pU): PUT buffer_index %d\n",
__func__, handle, buffer_index);
}
- buffer_index = -1;
}
op_release(new_op);
return ret;
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index cba429db95d9..cb5629bd5fff 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -73,7 +73,7 @@ config PROC_SYSCTL
interface is through /proc/sys. If you say Y here a tree of
modifiable sysctl entries will be generated beneath the
/proc/sys directory. They are explained in the files
- in <file:Documentation/sysctl/>. Note that enabling this
+ in <file:Documentation/admin-guide/sysctl/>. Note that enabling this
option will enlarge the kernel by at least 8 KB.
As it is generally a good thing, you should say Y here unless
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 77eb628ecc7f..ebea9501afb8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -209,12 +209,53 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}
+/*
+ * If the user used setproctitle(), we just get the string from
+ * user space at arg_start, and limit it to a maximum of one page.
+ */
+static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
+ size_t count, unsigned long pos,
+ unsigned long arg_start)
+{
+ char *page;
+ int ret, got;
+
+ if (pos >= PAGE_SIZE)
+ return 0;
+
+ page = (char *)__get_free_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ ret = 0;
+ got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
+ if (got > 0) {
+ int len = strnlen(page, got);
+
+ /* Include the NUL character if it was found */
+ if (len < got)
+ len++;
+
+ if (len > pos) {
+ len -= pos;
+ if (len > count)
+ len = count;
+ len -= copy_to_user(buf, page+pos, len);
+ if (!len)
+ len = -EFAULT;
+ ret = len;
+ }
+ }
+ free_page((unsigned long)page);
+ return ret;
+}
+
static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
size_t count, loff_t *ppos)
{
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long pos, len;
- char *page;
+ char *page, c;
/* Check if process spawned far enough to have cmdline. */
if (!mm->env_end)
@@ -231,28 +272,42 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
return 0;
/*
- * We have traditionally allowed the user to re-write
- * the argument strings and overflow the end result
- * into the environment section. But only do that if
- * the environment area is contiguous to the arguments.
+ * We allow setproctitle() to overwrite the argument
+ * strings, and overflow past the original end. But
+ * only when it overflows into the environment area.
*/
- if (env_start != arg_end || env_start >= env_end)
+ if (env_start != arg_end || env_end < env_start)
env_start = env_end = arg_end;
-
- /* .. and limit it to a maximum of one page of slop */
- if (env_end >= arg_end + PAGE_SIZE)
- env_end = arg_end + PAGE_SIZE - 1;
+ len = env_end - arg_start;
/* We're not going to care if "*ppos" has high bits set */
- pos = arg_start + *ppos;
-
- /* .. but we do check the result is in the proper range */
- if (pos < arg_start || pos >= env_end)
+ pos = *ppos;
+ if (pos >= len)
return 0;
+ if (count > len - pos)
+ count = len - pos;
+ if (!count)
+ return 0;
+
+ /*
+ * Magical special case: if the argv[] end byte is not
+ * zero, the user has overwritten it with setproctitle(3).
+ *
+ * Possible future enhancement: do this only once when
+ * pos is 0, and set a flag in the 'struct file'.
+ */
+ if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
+ return get_mm_proctitle(mm, buf, count, pos, arg_start);
- /* .. and we never go past env_end */
- if (env_end - pos < count)
- count = env_end - pos;
+ /*
+ * For the non-setproctitle() case we limit things strictly
+ * to the [arg_start, arg_end[ range.
+ */
+ pos += arg_start;
+ if (pos < arg_start || pos >= arg_end)
+ return 0;
+ if (count > arg_end - pos)
+ count = arg_end - pos;
page = (char *)__get_free_page(GFP_KERNEL);
if (!page)
@@ -262,48 +317,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
while (count) {
int got;
size_t size = min_t(size_t, PAGE_SIZE, count);
- long offset;
- /*
- * Are we already starting past the official end?
- * We always include the last byte that is *supposed*
- * to be NUL
- */
- offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
-
- got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
- if (got <= offset)
+ got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
+ if (got <= 0)
break;
- got -= offset;
-
- /* Don't walk past a NUL character once you hit arg_end */
- if (pos + got >= arg_end) {
- int n = 0;
-
- /*
- * If we started before 'arg_end' but ended up
- * at or after it, we start the NUL character
- * check at arg_end-1 (where we expect the normal
- * EOF to be).
- *
- * NOTE! This is smaller than 'got', because
- * pos + got >= arg_end
- */
- if (pos < arg_end)
- n = arg_end - pos - 1;
-
- /* Cut off at first NUL after 'n' */
- got = n + strnlen(page+n, offset+got-n);
- if (got < offset)
- break;
- got -= offset;
-
- /* Include the NUL if it existed */
- if (got < size)
- got++;
- }
-
- got -= copy_to_user(buf, page+offset, got);
+ got -= copy_to_user(buf, page, got);
if (unlikely(!got)) {
if (!len)
len = -EFAULT;
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index c5311935239d..430e219e3aba 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -624,7 +624,7 @@ struct cftype {
/*
* Control Group subsystem type.
- * See Documentation/cgroup-v1/cgroups.rst for details
+ * See Documentation/admin-guide/cgroup-v1/cgroups.rst for details
*/
struct cgroup_subsys {
struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 1d72ef76f24f..6b6c7396a584 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -55,10 +55,71 @@ struct cn_dev {
struct cn_queue_dev *cbdev;
};
+/**
+ * cn_add_callback() - Registers new callback with connector core.
+ *
+ * @id: unique connector's user identifier.
+ * It must be registered in connector.h for legal
+ * in-kernel users.
+ * @name: connector's callback symbolic name.
+ * @callback: connector's callback.
+ * parameters are %cn_msg and the sender's credentials
+ */
int cn_add_callback(struct cb_id *id, const char *name,
void (*callback)(struct cn_msg *, struct netlink_skb_parms *));
-void cn_del_callback(struct cb_id *);
+/**
+ * cn_del_callback() - Unregisters new callback with connector core.
+ *
+ * @id: unique connector's user identifier.
+ */
+void cn_del_callback(struct cb_id *id);
+
+
+/**
+ * cn_netlink_send_mult - Sends message to the specified groups.
+ *
+ * @msg: message header(with attached data).
+ * @len: Number of @msg to be sent.
+ * @portid: destination port.
+ * If non-zero the message will be sent to the given port,
+ * which should be set to the original sender.
+ * @group: destination group.
+ * If @portid and @group is zero, then appropriate group will
+ * be searched through all registered connector users, and
+ * message will be delivered to the group which was created
+ * for user with the same ID as in @msg.
+ * If @group is not zero, then message will be delivered
+ * to the specified group.
+ * @gfp_mask: GFP mask.
+ *
+ * It can be safely called from softirq context, but may silently
+ * fail under strong memory pressure.
+ *
+ * If there are no listeners for given group %-ESRCH can be returned.
+ */
int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp_t gfp_mask);
+
+/**
+ * cn_netlink_send_mult - Sends message to the specified groups.
+ *
+ * @msg: message header(with attached data).
+ * @portid: destination port.
+ * If non-zero the message will be sent to the given port,
+ * which should be set to the original sender.
+ * @group: destination group.
+ * If @portid and @group is zero, then appropriate group will
+ * be searched through all registered connector users, and
+ * message will be delivered to the group which was created
+ * for user with the same ID as in @msg.
+ * If @group is not zero, then message will be delivered
+ * to the specified group.
+ * @gfp_mask: GFP mask.
+ *
+ * It can be safely called from softirq context, but may silently
+ * fail under strong memory pressure.
+ *
+ * If there are no listeners for given group %-ESRCH can be returned.
+ */
int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 group, gfp_t gfp_mask);
int cn_queue_add_callback(struct cn_queue_dev *dev, const char *name,
diff --git a/include/linux/device.h b/include/linux/device.h
index 5eabfa0c4dee..c330b75c6c57 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -6,7 +6,7 @@
* Copyright (c) 2004-2009 Greg Kroah-Hartman <gregkh@suse.de>
* Copyright (c) 2008-2009 Novell Inc.
*
- * See Documentation/driver-model/ for more information.
+ * See Documentation/driver-api/driver-model/ for more information.
*/
#ifndef _DEVICE_H_
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index c0b93e0ff0c0..8e6dd908da21 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -1,7 +1,7 @@
/*
Hardware Random Number Generator
- Please read Documentation/hw_random.txt for details on use.
+ Please read Documentation/admin-guide/hw_random.rst for details on use.
----------------------------------------------------------
This software may be used and distributed according to the terms
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 57baa27f238c..0b0d7259276d 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -5,7 +5,7 @@
* Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*
- * see Documentation/locking/lockdep-design.txt for more details.
+ * see Documentation/locking/lockdep-design.rst for more details.
*/
#ifndef __LINUX_LOCKDEP_H
#define __LINUX_LOCKDEP_H
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 3093dd162424..dcd03fee6e01 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -151,7 +151,7 @@ static inline bool mutex_is_locked(struct mutex *lock)
/*
* See kernel/locking/mutex.c for detailed documentation of these APIs.
- * Also see Documentation/locking/mutex-design.txt.
+ * Also see Documentation/locking/mutex-design.rst.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index beb25f277889..9bc36b589827 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -4,7 +4,7 @@
*
* Copyright (c) 2001-2003 Patrick Mochel <mochel@osdl.org>
*
- * See Documentation/driver-model/ for more information.
+ * See Documentation/driver-api/driver-model/ for more information.
*/
#ifndef _PLATFORM_DEVICE_H_
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index e401358c4e7e..9d9c663987d8 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -160,7 +160,7 @@ extern void downgrade_write(struct rw_semaphore *sem);
* static then another method for expressing nested locking is
* the explicit definition of lock class keys and the use of
* lockdep_set_class() at lock initialization time.
- * See Documentation/locking/lockdep-design.txt for more details.)
+ * See Documentation/locking/lockdep-design.rst for more details.)
*/
extern void down_read_nested(struct rw_semaphore *sem, int subclass);
extern void down_write_nested(struct rw_semaphore *sem, int subclass);
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 109a0df5af39..0497091e40c1 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -89,6 +89,7 @@ extern void exit_files(struct task_struct *);
extern void exit_itimers(struct signal_struct *);
extern long _do_fork(struct kernel_clone_args *kargs);
+extern bool legacy_clone_args_valid(const struct kernel_clone_args *kargs);
extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
struct task_struct *fork_idle(int);
struct mm_struct *copy_init_mm(void);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 05b179015d6c..2b78cc734719 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -32,7 +32,7 @@ struct device;
/*
* This structure describes all the operations that can be done on the
- * physical hardware. See Documentation/serial/driver.rst for details.
+ * physical hardware. See Documentation/driver-api/serial/driver.rst for details.
*/
struct uart_ops {
unsigned int (*tx_empty)(struct uart_port *);
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index f9eff010fc7e..2f6a669408bb 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -29,6 +29,7 @@ struct btrfs_qgroup_extent_record;
struct btrfs_qgroup;
struct extent_io_tree;
struct prelim_ref;
+struct btrfs_space_info;
TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR);
TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS);
@@ -2091,6 +2092,45 @@ DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock);
DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_write_lock);
DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic);
+DECLARE_EVENT_CLASS(btrfs__space_info_update,
+
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *sinfo, u64 old, s64 diff),
+
+ TP_ARGS(fs_info, sinfo, old, diff),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, type )
+ __field( u64, old )
+ __field( s64, diff )
+ ),
+
+ TP_fast_assign_btrfs(fs_info,
+ __entry->type = sinfo->flags;
+ __entry->old = old;
+ __entry->diff = diff;
+ ),
+ TP_printk_btrfs("type=%s old=%llu diff=%lld",
+ __print_flags(__entry->type, "|", BTRFS_GROUP_FLAGS),
+ __entry->old, __entry->diff)
+);
+
+DEFINE_EVENT(btrfs__space_info_update, update_bytes_may_use,
+
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *sinfo, u64 old, s64 diff),
+
+ TP_ARGS(fs_info, sinfo, old, diff)
+);
+
+DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned,
+
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *sinfo, u64 old, s64 diff),
+
+ TP_ARGS(fs_info, sinfo, old, diff)
+);
+
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 9acfff0cd153..1be0e798e362 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -846,8 +846,10 @@ __SYSCALL(__NR_fsmount, sys_fsmount)
__SYSCALL(__NR_fspick, sys_fspick)
#define __NR_pidfd_open 434
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
+#ifdef __ARCH_WANT_SYS_CLONE3
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)
+#endif
#undef __NR_syscalls
#define __NR_syscalls 436
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6f68438aa4ed..82699845ef79 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -806,7 +806,7 @@ union bpf_attr {
* based on a user-provided identifier for all traffic coming from
* the tasks belonging to the related cgroup. See also the related
* kernel documentation, available from the Linux sources in file
- * *Documentation/cgroup-v1/net_cls.rst*.
+ * *Documentation/admin-guide/cgroup-v1/net_cls.rst*.
*
* The Linux kernel has two versions for cgroups: there are
* cgroups v1 and cgroups v2. Both are available to users, who can
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 421239b98db2..34d5b34286fa 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -866,6 +866,8 @@ enum btrfs_raid_types {
#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
BTRFS_BLOCK_GROUP_RAID6)
+#define BTRFS_BLOCK_GROUP_RAID1_MASK (BTRFS_BLOCK_GROUP_RAID1)
+
/*
* We need a bit for restriper to be able to tell when chunks of type
* SINGLE are available. This "extended" profile format is used in
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 64c14cb0022f..b8bb285f6b2a 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -36,7 +36,7 @@
#include <linux/types.h>
#include <linux/ioctl.h>
-/* Documentation/ioctl/ioctl-number.txt */
+/* Documentation/ioctl/ioctl-number.rst */
#define RDMA_IOCTL_MAGIC 0x1b
#define RDMA_VERBS_IOCTL \
_IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr)
diff --git a/init/Kconfig b/init/Kconfig
index 99da78db0440..bd7d650d4a99 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -550,7 +550,7 @@ config PSI
have cpu.pressure, memory.pressure, and io.pressure files,
which aggregate pressure stalls for the grouped tasks only.
- For more details see Documentation/accounting/psi.txt.
+ For more details see Documentation/accounting/psi.rst.
Say N if unsure.
@@ -821,7 +821,7 @@ menuconfig CGROUPS
controls or device isolation.
See
- Documentation/scheduler/sched-design-CFS.rst (CFS)
- - Documentation/cgroup-v1/ (features for grouping, isolation
+ - Documentation/admin-guide/cgroup-v1/ (features for grouping, isolation
and resource control)
Say N if unsure.
@@ -883,7 +883,7 @@ config BLK_CGROUP
CONFIG_CFQ_GROUP_IOSCHED=y; for enabling throttling policy, set
CONFIG_BLK_DEV_THROTTLING=y.
- See Documentation/cgroup-v1/blkio-controller.rst for more information.
+ See Documentation/admin-guide/cgroup-v1/blkio-controller.rst for more information.
config CGROUP_WRITEBACK
bool
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index b3b02b9c4405..863e434a6020 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -729,7 +729,7 @@ static inline int nr_cpusets(void)
* load balancing domains (sched domains) as specified by that partial
* partition.
*
- * See "What is sched_load_balance" in Documentation/cgroup-v1/cpusets.rst
+ * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst
* for a background explanation of this.
*
* Does not return errors, on the theory that the callers of this
diff --git a/kernel/fork.c b/kernel/fork.c
index dfa78985a6fd..d8ae0f1b4148 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2405,6 +2405,16 @@ long _do_fork(struct kernel_clone_args *args)
return nr;
}
+bool legacy_clone_args_valid(const struct kernel_clone_args *kargs)
+{
+ /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */
+ if ((kargs->flags & CLONE_PIDFD) &&
+ (kargs->flags & CLONE_PARENT_SETTID))
+ return false;
+
+ return true;
+}
+
#ifndef CONFIG_HAVE_COPY_THREAD_TLS
/* For compatibility with architectures that call do_fork directly rather than
* using the syscall entry points below. */
@@ -2416,6 +2426,7 @@ long do_fork(unsigned long clone_flags,
{
struct kernel_clone_args args = {
.flags = (clone_flags & ~CSIGNAL),
+ .pidfd = parent_tidptr,
.child_tid = child_tidptr,
.parent_tid = parent_tidptr,
.exit_signal = (clone_flags & CSIGNAL),
@@ -2423,6 +2434,9 @@ long do_fork(unsigned long clone_flags,
.stack_size = stack_size,
};
+ if (!legacy_clone_args_valid(&args))
+ return -EINVAL;
+
return _do_fork(&args);
}
#endif
@@ -2504,8 +2518,7 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
.tls = tls,
};
- /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */
- if ((clone_flags & CLONE_PIDFD) && (clone_flags & CLONE_PARENT_SETTID))
+ if (!legacy_clone_args_valid(&args))
return -EINVAL;
return _do_fork(&args);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 0c601ae072b3..edd1c082dbf5 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -16,7 +16,7 @@
* by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
* and Sven Dietrich.
*
- * Also see Documentation/locking/mutex-design.txt.
+ * Also see Documentation/locking/mutex-design.rst.
*/
#include <linux/mutex.h>
#include <linux/ww_mutex.h>
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 38fbf9fa7f1b..fa83d36e30c6 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -9,7 +9,7 @@
* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
* Copyright (C) 2006 Esben Nielsen
*
- * See Documentation/locking/rt-mutex-design.txt for details.
+ * See Documentation/locking/rt-mutex-design.rst for details.
*/
#include <linux/spinlock.h>
#include <linux/export.h>
diff --git a/kernel/panic.c b/kernel/panic.c
index 4d9f55bf7d38..057540b6eee9 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -372,7 +372,7 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
/**
* print_tainted - return a string to represent the kernel taint state.
*
- * For individual taint flag meanings, see Documentation/sysctl/kernel.txt
+ * For individual taint flag meanings, see Documentation/admin-guide/sysctl/kernel.rst
*
* The string is overwritten by the next call to print_tainted(),
* but is always NULL terminated.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c6ee805202bd..bc6673ab3a08 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1139,7 +1139,7 @@ config PROVE_LOCKING
the proof of observed correctness is also maintained for an
arbitrary combination of these separate locking variants.
- For more details, see Documentation/locking/lockdep-design.txt.
+ For more details, see Documentation/locking/lockdep-design.rst.
config LOCK_STAT
bool "Lock usage statistics"
@@ -1153,7 +1153,7 @@ config LOCK_STAT
help
This feature enables tracking lock contention points
- For more details, see Documentation/locking/lockstat.txt
+ For more details, see Documentation/locking/lockstat.rst
This also enables lock events required by "perf lock",
subcommand of perf.
diff --git a/mm/swap.c b/mm/swap.c
index 607c48229a1d..ae300397dfda 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -8,7 +8,7 @@
/*
* This file contains the default values for the operation of the
* Linux VM subsystem. Fine-tuning documentation can be found in
- * Documentation/sysctl/vm.txt.
+ * Documentation/admin-guide/sysctl/vm.rst.
* Started 18.12.91
* Swap aging added 23.2.95, Stephen Tweedie.
* Buffermem limits added 12.3.98, Rik van Riel.
diff --git a/samples/Kconfig b/samples/Kconfig
index 71b5e833dd9e..c8dacb4dda80 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -99,7 +99,7 @@ config SAMPLE_CONNECTOR
When enabled, this builds both a sample kernel module for
the connector interface and a user space tool to communicate
with it.
- See also Documentation/connector/connector.txt
+ See also Documentation/driver-api/connector.rst
config SAMPLE_HIDRAW
bool "hidraw sample"
diff --git a/scripts/coccinelle/free/devm_free.cocci b/scripts/coccinelle/free/devm_free.cocci
index fefd0331a2de..441799b5359b 100644
--- a/scripts/coccinelle/free/devm_free.cocci
+++ b/scripts/coccinelle/free/devm_free.cocci
@@ -3,7 +3,7 @@
/// functions. Values allocated using the devm_functions are freed when
/// the device is detached, and thus the use of the standard freeing
/// function would cause a double free.
-/// See Documentation/driver-model/devres.rst for more information.
+/// See Documentation/driver-api/driver-model/devres.rst for more information.
///
/// A difficulty of detecting this problem is that the standard freeing
/// function might be called from a different function than the one
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index e9c677a53c74..d33de0b9f4f5 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -23,7 +23,7 @@ config GCC_PLUGINS
GCC plugins are loadable modules that provide extra features to the
compiler. They are useful for runtime instrumentation and static analysis.
- See Documentation/gcc-plugins.txt for details.
+ See Documentation/core-api/gcc-plugins.rst for details.
menu "GCC plugins"
depends on GCC_PLUGINS
diff --git a/security/Kconfig b/security/Kconfig
index 06a30851511a..0d65594b5196 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -121,7 +121,7 @@ config INTEL_TXT
See <http://www.intel.com/technology/security/> for more information
about Intel(R) TXT.
See <http://tboot.sourceforge.net> for more information about tboot.
- See Documentation/intel_txt.txt for a description of how to enable
+ See Documentation/x86/intel_txt.rst for a description of how to enable
Intel TXT support in a kernel boot.
If you are unsure as to whether this is required, answer N.
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index c07196502577..725674f3276d 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -509,7 +509,7 @@ static inline int may_allow_all(struct dev_cgroup *parent)
* This is one of the three key functions for hierarchy implementation.
* This function is responsible for re-evaluating all the cgroup's active
* exceptions due to a parent's exception change.
- * Refer to Documentation/cgroup-v1/devices.rst for more details.
+ * Refer to Documentation/admin-guide/cgroup-v1/devices.rst for more details.
*/
static void revalidate_active_exceptions(struct dev_cgroup *devcg)
{
diff --git a/security/safesetid/lsm.c b/security/safesetid/lsm.c
index 06d4259f9ab1..7760019ad35d 100644
--- a/security/safesetid/lsm.c
+++ b/security/safesetid/lsm.c
@@ -14,67 +14,50 @@
#define pr_fmt(fmt) "SafeSetID: " fmt
-#include <linux/hashtable.h>
#include <linux/lsm_hooks.h>
#include <linux/module.h>
#include <linux/ptrace.h>
#include <linux/sched/task_stack.h>
#include <linux/security.h>
+#include "lsm.h"
/* Flag indicating whether initialization completed */
int safesetid_initialized;
-#define NUM_BITS 8 /* 128 buckets in hash table */
+struct setuid_ruleset __rcu *safesetid_setuid_rules;
-static DEFINE_HASHTABLE(safesetid_whitelist_hashtable, NUM_BITS);
-
-/*
- * Hash table entry to store safesetid policy signifying that 'parent' user
- * can setid to 'child' user.
- */
-struct entry {
- struct hlist_node next;
- struct hlist_node dlist; /* for deletion cleanup */
- uint64_t parent_kuid;
- uint64_t child_kuid;
-};
-
-static DEFINE_SPINLOCK(safesetid_whitelist_hashtable_spinlock);
-
-static bool check_setuid_policy_hashtable_key(kuid_t parent)
+/* Compute a decision for a transition from @src to @dst under @policy. */
+enum sid_policy_type _setuid_policy_lookup(struct setuid_ruleset *policy,
+ kuid_t src, kuid_t dst)
{
- struct entry *entry;
-
- rcu_read_lock();
- hash_for_each_possible_rcu(safesetid_whitelist_hashtable,
- entry, next, __kuid_val(parent)) {
- if (entry->parent_kuid == __kuid_val(parent)) {
- rcu_read_unlock();
- return true;
- }
+ struct setuid_rule *rule;
+ enum sid_policy_type result = SIDPOL_DEFAULT;
+
+ hash_for_each_possible(policy->rules, rule, next, __kuid_val(src)) {
+ if (!uid_eq(rule->src_uid, src))
+ continue;
+ if (uid_eq(rule->dst_uid, dst))
+ return SIDPOL_ALLOWED;
+ result = SIDPOL_CONSTRAINED;
}
- rcu_read_unlock();
-
- return false;
+ return result;
}
-static bool check_setuid_policy_hashtable_key_value(kuid_t parent,
- kuid_t child)
+/*
+ * Compute a decision for a transition from @src to @dst under the active
+ * policy.
+ */
+static enum sid_policy_type setuid_policy_lookup(kuid_t src, kuid_t dst)
{
- struct entry *entry;
+ enum sid_policy_type result = SIDPOL_DEFAULT;
+ struct setuid_ruleset *pol;
rcu_read_lock();
- hash_for_each_possible_rcu(safesetid_whitelist_hashtable,
- entry, next, __kuid_val(parent)) {
- if (entry->parent_kuid == __kuid_val(parent) &&
- entry->child_kuid == __kuid_val(child)) {
- rcu_read_unlock();
- return true;
- }
- }
+ pol = rcu_dereference(safesetid_setuid_rules);
+ if (pol)
+ result = _setuid_policy_lookup(pol, src, dst);
rcu_read_unlock();
-
- return false;
+ return result;
}
static int safesetid_security_capable(const struct cred *cred,
@@ -82,37 +65,59 @@ static int safesetid_security_capable(const struct cred *cred,
int cap,
unsigned int opts)
{
- if (cap == CAP_SETUID &&
- check_setuid_policy_hashtable_key(cred->uid)) {
- if (!(opts & CAP_OPT_INSETID)) {
- /*
- * Deny if we're not in a set*uid() syscall to avoid
- * giving powers gated by CAP_SETUID that are related
- * to functionality other than calling set*uid() (e.g.
- * allowing user to set up userns uid mappings).
- */
- pr_warn("Operation requires CAP_SETUID, which is not available to UID %u for operations besides approved set*uid transitions",
- __kuid_val(cred->uid));
- return -1;
- }
- }
- return 0;
+ /* We're only interested in CAP_SETUID. */
+ if (cap != CAP_SETUID)
+ return 0;
+
+ /*
+ * If CAP_SETUID is currently used for a set*uid() syscall, we want to
+ * let it go through here; the real security check happens later, in the
+ * task_fix_setuid hook.
+ */
+ if ((opts & CAP_OPT_INSETID) != 0)
+ return 0;
+
+ /*
+ * If no policy applies to this task, allow the use of CAP_SETUID for
+ * other purposes.
+ */
+ if (setuid_policy_lookup(cred->uid, INVALID_UID) == SIDPOL_DEFAULT)
+ return 0;
+
+ /*
+ * Reject use of CAP_SETUID for functionality other than calling
+ * set*uid() (e.g. setting up userns uid mappings).
+ */
+ pr_warn("Operation requires CAP_SETUID, which is not available to UID %u for operations besides approved set*uid transitions\n",
+ __kuid_val(cred->uid));
+ return -EPERM;
}
-static int check_uid_transition(kuid_t parent, kuid_t child)
+/*
+ * Check whether a caller with old credentials @old is allowed to switch to
+ * credentials that contain @new_uid.
+ */
+static bool uid_permitted_for_cred(const struct cred *old, kuid_t new_uid)
{
- if (check_setuid_policy_hashtable_key_value(parent, child))
- return 0;
- pr_warn("UID transition (%d -> %d) blocked",
- __kuid_val(parent),
- __kuid_val(child));
+ bool permitted;
+
+ /* If our old creds already had this UID in it, it's fine. */
+ if (uid_eq(new_uid, old->uid) || uid_eq(new_uid, old->euid) ||
+ uid_eq(new_uid, old->suid))
+ return true;
+
/*
- * Kill this process to avoid potential security vulnerabilities
- * that could arise from a missing whitelist entry preventing a
- * privileged process from dropping to a lesser-privileged one.
+ * Transitions to new UIDs require a check against the policy of the old
+ * RUID.
*/
- force_sig(SIGKILL);
- return -EACCES;
+ permitted =
+ setuid_policy_lookup(old->uid, new_uid) != SIDPOL_CONSTRAINED;
+ if (!permitted) {
+ pr_warn("UID transition ((%d,%d,%d) -> %d) blocked\n",
+ __kuid_val(old->uid), __kuid_val(old->euid),
+ __kuid_val(old->suid), __kuid_val(new_uid));
+ }
+ return permitted;
}
/*
@@ -125,134 +130,23 @@ static int safesetid_task_fix_setuid(struct cred *new,
int flags)
{
- /* Do nothing if there are no setuid restrictions for this UID. */
- if (!check_setuid_policy_hashtable_key(old->uid))
+ /* Do nothing if there are no setuid restrictions for our old RUID. */
+ if (setuid_policy_lookup(old->uid, INVALID_UID) == SIDPOL_DEFAULT)
return 0;
- switch (flags) {
- case LSM_SETID_RE:
- /*
- * Users for which setuid restrictions exist can only set the
- * real UID to the real UID or the effective UID, unless an
- * explicit whitelist policy allows the transition.
- */
- if (!uid_eq(old->uid, new->uid) &&
- !uid_eq(old->euid, new->uid)) {
- return check_uid_transition(old->uid, new->uid);
- }
- /*
- * Users for which setuid restrictions exist can only set the
- * effective UID to the real UID, the effective UID, or the
- * saved set-UID, unless an explicit whitelist policy allows
- * the transition.
- */
- if (!uid_eq(old->uid, new->euid) &&
- !uid_eq(old->euid, new->euid) &&
- !uid_eq(old->suid, new->euid)) {
- return check_uid_transition(old->euid, new->euid);
- }
- break;
- case LSM_SETID_ID:
- /*
- * Users for which setuid restrictions exist cannot change the
- * real UID or saved set-UID unless an explicit whitelist
- * policy allows the transition.
- */
- if (!uid_eq(old->uid, new->uid))
- return check_uid_transition(old->uid, new->uid);
- if (!uid_eq(old->suid, new->suid))
- return check_uid_transition(old->suid, new->suid);
- break;
- case LSM_SETID_RES:
- /*
- * Users for which setuid restrictions exist cannot change the
- * real UID, effective UID, or saved set-UID to anything but
- * one of: the current real UID, the current effective UID or
- * the current saved set-user-ID unless an explicit whitelist
- * policy allows the transition.
- */
- if (!uid_eq(new->uid, old->uid) &&
- !uid_eq(new->uid, old->euid) &&
- !uid_eq(new->uid, old->suid)) {
- return check_uid_transition(old->uid, new->uid);
- }
- if (!uid_eq(new->euid, old->uid) &&
- !uid_eq(new->euid, old->euid) &&
- !uid_eq(new->euid, old->suid)) {
- return check_uid_transition(old->euid, new->euid);
- }
- if (!uid_eq(new->suid, old->uid) &&
- !uid_eq(new->suid, old->euid) &&
- !uid_eq(new->suid, old->suid)) {
- return check_uid_transition(old->suid, new->suid);
- }
- break;
- case LSM_SETID_FS:
- /*
- * Users for which setuid restrictions exist cannot change the
- * filesystem UID to anything but one of: the current real UID,
- * the current effective UID or the current saved set-UID
- * unless an explicit whitelist policy allows the transition.
- */
- if (!uid_eq(new->fsuid, old->uid) &&
- !uid_eq(new->fsuid, old->euid) &&
- !uid_eq(new->fsuid, old->suid) &&
- !uid_eq(new->fsuid, old->fsuid)) {
- return check_uid_transition(old->fsuid, new->fsuid);
- }
- break;
- default:
- pr_warn("Unknown setid state %d\n", flags);
- force_sig(SIGKILL);
- return -EINVAL;
- }
- return 0;
-}
-
-int add_safesetid_whitelist_entry(kuid_t parent, kuid_t child)
-{
- struct entry *new;
-
- /* Return if entry already exists */
- if (check_setuid_policy_hashtable_key_value(parent, child))
+ if (uid_permitted_for_cred(old, new->uid) &&
+ uid_permitted_for_cred(old, new->euid) &&
+ uid_permitted_for_cred(old, new->suid) &&
+ uid_permitted_for_cred(old, new->fsuid))
return 0;
- new = kzalloc(sizeof(struct entry), GFP_KERNEL);
- if (!new)
- return -ENOMEM;
- new->parent_kuid = __kuid_val(parent);
- new->child_kuid = __kuid_val(child);
- spin_lock(&safesetid_whitelist_hashtable_spinlock);
- hash_add_rcu(safesetid_whitelist_hashtable,
- &new->next,
- __kuid_val(parent));
- spin_unlock(&safesetid_whitelist_hashtable_spinlock);
- return 0;
-}
-
-void flush_safesetid_whitelist_entries(void)
-{
- struct entry *entry;
- struct hlist_node *hlist_node;
- unsigned int bkt_loop_cursor;
- HLIST_HEAD(free_list);
-
/*
- * Could probably use hash_for_each_rcu here instead, but this should
- * be fine as well.
+ * Kill this process to avoid potential security vulnerabilities
+ * that could arise from a missing whitelist entry preventing a
+ * privileged process from dropping to a lesser-privileged one.
*/
- spin_lock(&safesetid_whitelist_hashtable_spinlock);
- hash_for_each_safe(safesetid_whitelist_hashtable, bkt_loop_cursor,
- hlist_node, entry, next) {
- hash_del_rcu(&entry->next);
- hlist_add_head(&entry->dlist, &free_list);
- }
- spin_unlock(&safesetid_whitelist_hashtable_spinlock);
- synchronize_rcu();
- hlist_for_each_entry_safe(entry, hlist_node, &free_list, dlist) {
- hlist_del(&entry->dlist);
- kfree(entry);
- }
+ force_sig(SIGKILL);
+ return -EACCES;
}
static struct security_hook_list safesetid_security_hooks[] = {
diff --git a/security/safesetid/lsm.h b/security/safesetid/lsm.h
index c1ea3c265fcf..db6d16e6bbc3 100644
--- a/security/safesetid/lsm.h
+++ b/security/safesetid/lsm.h
@@ -15,19 +15,39 @@
#define _SAFESETID_H
#include <linux/types.h>
+#include <linux/uidgid.h>
+#include <linux/hashtable.h>
/* Flag indicating whether initialization completed */
extern int safesetid_initialized;
-/* Function type. */
-enum safesetid_whitelist_file_write_type {
- SAFESETID_WHITELIST_ADD, /* Add whitelist policy. */
- SAFESETID_WHITELIST_FLUSH, /* Flush whitelist policies. */
+enum sid_policy_type {
+ SIDPOL_DEFAULT, /* source ID is unaffected by policy */
+ SIDPOL_CONSTRAINED, /* source ID is affected by policy */
+ SIDPOL_ALLOWED /* target ID explicitly allowed */
};
-/* Add entry to safesetid whitelist to allow 'parent' to setid to 'child'. */
-int add_safesetid_whitelist_entry(kuid_t parent, kuid_t child);
+/*
+ * Hash table entry to store safesetid policy signifying that 'src_uid'
+ * can setuid to 'dst_uid'.
+ */
+struct setuid_rule {
+ struct hlist_node next;
+ kuid_t src_uid;
+ kuid_t dst_uid;
+};
+
+#define SETID_HASH_BITS 8 /* 256 buckets in hash table */
+
+struct setuid_ruleset {
+ DECLARE_HASHTABLE(rules, SETID_HASH_BITS);
+ char *policy_str;
+ struct rcu_head rcu;
+};
+
+enum sid_policy_type _setuid_policy_lookup(struct setuid_ruleset *policy,
+ kuid_t src, kuid_t dst);
-void flush_safesetid_whitelist_entries(void);
+extern struct setuid_ruleset __rcu *safesetid_setuid_rules;
#endif /* _SAFESETID_H */
diff --git a/security/safesetid/securityfs.c b/security/safesetid/securityfs.c
index 2c6c829be044..d568e17dd773 100644
--- a/security/safesetid/securityfs.c
+++ b/security/safesetid/securityfs.c
@@ -11,92 +11,184 @@
* published by the Free Software Foundation.
*
*/
+
+#define pr_fmt(fmt) "SafeSetID: " fmt
+
#include <linux/security.h>
#include <linux/cred.h>
#include "lsm.h"
-static struct dentry *safesetid_policy_dir;
-
-struct safesetid_file_entry {
- const char *name;
- enum safesetid_whitelist_file_write_type type;
- struct dentry *dentry;
-};
-
-static struct safesetid_file_entry safesetid_files[] = {
- {.name = "add_whitelist_policy",
- .type = SAFESETID_WHITELIST_ADD},
- {.name = "flush_whitelist_policies",
- .type = SAFESETID_WHITELIST_FLUSH},
-};
+static DEFINE_MUTEX(policy_update_lock);
/*
* In the case the input buffer contains one or more invalid UIDs, the kuid_t
- * variables pointed to by 'parent' and 'child' will get updated but this
+ * variables pointed to by @parent and @child will get updated but this
* function will return an error.
+ * Contents of @buf may be modified.
*/
-static int parse_safesetid_whitelist_policy(const char __user *buf,
- size_t len,
- kuid_t *parent,
- kuid_t *child)
+static int parse_policy_line(struct file *file, char *buf,
+ struct setuid_rule *rule)
{
- char *kern_buf;
- char *parent_buf;
- char *child_buf;
- const char separator[] = ":";
+ char *child_str;
int ret;
- size_t first_substring_length;
- long parsed_parent;
- long parsed_child;
+ u32 parsed_parent, parsed_child;
- /* Duplicate string from user memory and NULL-terminate */
- kern_buf = memdup_user_nul(buf, len);
- if (IS_ERR(kern_buf))
- return PTR_ERR(kern_buf);
+ /* Format of |buf| string should be <UID>:<UID>. */
+ child_str = strchr(buf, ':');
+ if (child_str == NULL)
+ return -EINVAL;
+ *child_str = '\0';
+ child_str++;
- /*
- * Format of |buf| string should be <UID>:<UID>.
- * Find location of ":" in kern_buf (copied from |buf|).
- */
- first_substring_length = strcspn(kern_buf, separator);
- if (first_substring_length == 0 || first_substring_length == len) {
- ret = -EINVAL;
- goto free_kern;
- }
+ ret = kstrtou32(buf, 0, &parsed_parent);
+ if (ret)
+ return ret;
+
+ ret = kstrtou32(child_str, 0, &parsed_child);
+ if (ret)
+ return ret;
- parent_buf = kmemdup_nul(kern_buf, first_substring_length, GFP_KERNEL);
- if (!parent_buf) {
- ret = -ENOMEM;
- goto free_kern;
+ rule->src_uid = make_kuid(file->f_cred->user_ns, parsed_parent);
+ rule->dst_uid = make_kuid(file->f_cred->user_ns, parsed_child);
+ if (!uid_valid(rule->src_uid) || !uid_valid(rule->dst_uid))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void __release_ruleset(struct rcu_head *rcu)
+{
+ struct setuid_ruleset *pol =
+ container_of(rcu, struct setuid_ruleset, rcu);
+ int bucket;
+ struct setuid_rule *rule;
+ struct hlist_node *tmp;
+
+ hash_for_each_safe(pol->rules, bucket, tmp, rule, next)
+ kfree(rule);
+ kfree(pol->policy_str);
+ kfree(pol);
+}
+
+static void release_ruleset(struct setuid_ruleset *pol)
+{
+ call_rcu(&pol->rcu, __release_ruleset);
+}
+
+static void insert_rule(struct setuid_ruleset *pol, struct setuid_rule *rule)
+{
+ hash_add(pol->rules, &rule->next, __kuid_val(rule->src_uid));
+}
+
+static int verify_ruleset(struct setuid_ruleset *pol)
+{
+ int bucket;
+ struct setuid_rule *rule, *nrule;
+ int res = 0;
+
+ hash_for_each(pol->rules, bucket, rule, next) {
+ if (_setuid_policy_lookup(pol, rule->dst_uid, INVALID_UID) ==
+ SIDPOL_DEFAULT) {
+ pr_warn("insecure policy detected: uid %d is constrained but transitively unconstrained through uid %d\n",
+ __kuid_val(rule->src_uid),
+ __kuid_val(rule->dst_uid));
+ res = -EINVAL;
+
+ /* fix it up */
+ nrule = kmalloc(sizeof(struct setuid_rule), GFP_KERNEL);
+ if (!nrule)
+ return -ENOMEM;
+ nrule->src_uid = rule->dst_uid;
+ nrule->dst_uid = rule->dst_uid;
+ insert_rule(pol, nrule);
+ }
}
+ return res;
+}
- ret = kstrtol(parent_buf, 0, &parsed_parent);
- if (ret)
- goto free_both;
+static ssize_t handle_policy_update(struct file *file,
+ const char __user *ubuf, size_t len)
+{
+ struct setuid_ruleset *pol;
+ char *buf, *p, *end;
+ int err;
- child_buf = kern_buf + first_substring_length + 1;
- ret = kstrtol(child_buf, 0, &parsed_child);
- if (ret)
- goto free_both;
+ pol = kmalloc(sizeof(struct setuid_ruleset), GFP_KERNEL);
+ if (!pol)
+ return -ENOMEM;
+ pol->policy_str = NULL;
+ hash_init(pol->rules);
- *parent = make_kuid(current_user_ns(), parsed_parent);
- if (!uid_valid(*parent)) {
- ret = -EINVAL;
- goto free_both;
+ p = buf = memdup_user_nul(ubuf, len);
+ if (IS_ERR(buf)) {
+ err = PTR_ERR(buf);
+ goto out_free_pol;
}
+ pol->policy_str = kstrdup(buf, GFP_KERNEL);
+ if (pol->policy_str == NULL) {
+ err = -ENOMEM;
+ goto out_free_buf;
+ }
+
+ /* policy lines, including the last one, end with \n */
+ while (*p != '\0') {
+ struct setuid_rule *rule;
+
+ end = strchr(p, '\n');
+ if (end == NULL) {
+ err = -EINVAL;
+ goto out_free_buf;
+ }
+ *end = '\0';
+
+ rule = kmalloc(sizeof(struct setuid_rule), GFP_KERNEL);
+ if (!rule) {
+ err = -ENOMEM;
+ goto out_free_buf;
+ }
- *child = make_kuid(current_user_ns(), parsed_child);
- if (!uid_valid(*child)) {
- ret = -EINVAL;
- goto free_both;
+ err = parse_policy_line(file, p, rule);
+ if (err)
+ goto out_free_rule;
+
+ if (_setuid_policy_lookup(pol, rule->src_uid, rule->dst_uid) ==
+ SIDPOL_ALLOWED) {
+ pr_warn("bad policy: duplicate entry\n");
+ err = -EEXIST;
+ goto out_free_rule;
+ }
+
+ insert_rule(pol, rule);
+ p = end + 1;
+ continue;
+
+out_free_rule:
+ kfree(rule);
+ goto out_free_buf;
}
-free_both:
- kfree(parent_buf);
-free_kern:
- kfree(kern_buf);
- return ret;
+ err = verify_ruleset(pol);
+ /* bogus policy falls through after fixing it up */
+ if (err && err != -EINVAL)
+ goto out_free_buf;
+
+ /*
+ * Everything looks good, apply the policy and release the old one.
+ * What we really want here is an xchg() wrapper for RCU, but since that
+ * doesn't currently exist, just use a spinlock for now.
+ */
+ mutex_lock(&policy_update_lock);
+ rcu_swap_protected(safesetid_setuid_rules, pol,
+ lockdep_is_held(&policy_update_lock));
+ mutex_unlock(&policy_update_lock);
+ err = len;
+
+out_free_buf:
+ kfree(buf);
+out_free_pol:
+ release_ruleset(pol);
+ return err;
}
static ssize_t safesetid_file_write(struct file *file,
@@ -104,90 +196,65 @@ static ssize_t safesetid_file_write(struct file *file,
size_t len,
loff_t *ppos)
{
- struct safesetid_file_entry *file_entry =
- file->f_inode->i_private;
- kuid_t parent;
- kuid_t child;
- int ret;
-
- if (!ns_capable(current_user_ns(), CAP_MAC_ADMIN))
+ if (!file_ns_capable(file, &init_user_ns, CAP_MAC_ADMIN))
return -EPERM;
if (*ppos != 0)
return -EINVAL;
- switch (file_entry->type) {
- case SAFESETID_WHITELIST_FLUSH:
- flush_safesetid_whitelist_entries();
- break;
- case SAFESETID_WHITELIST_ADD:
- ret = parse_safesetid_whitelist_policy(buf, len, &parent,
- &child);
- if (ret)
- return ret;
-
- ret = add_safesetid_whitelist_entry(parent, child);
- if (ret)
- return ret;
- break;
- default:
- pr_warn("Unknown securityfs file %d\n", file_entry->type);
- break;
- }
-
- /* Return len on success so caller won't keep trying to write */
- return len;
+ return handle_policy_update(file, buf, len);
}
-static const struct file_operations safesetid_file_fops = {
- .write = safesetid_file_write,
-};
-
-static void safesetid_shutdown_securityfs(void)
+static ssize_t safesetid_file_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
{
- int i;
+ ssize_t res = 0;
+ struct setuid_ruleset *pol;
+ const char *kbuf;
- for (i = 0; i < ARRAY_SIZE(safesetid_files); ++i) {
- struct safesetid_file_entry *entry =
- &safesetid_files[i];
- securityfs_remove(entry->dentry);
- entry->dentry = NULL;
+ mutex_lock(&policy_update_lock);
+ pol = rcu_dereference_protected(safesetid_setuid_rules,
+ lockdep_is_held(&policy_update_lock));
+ if (pol) {
+ kbuf = pol->policy_str;
+ res = simple_read_from_buffer(buf, len, ppos,
+ kbuf, strlen(kbuf));
}
-
- securityfs_remove(safesetid_policy_dir);
- safesetid_policy_dir = NULL;
+ mutex_unlock(&policy_update_lock);
+ return res;
}
+static const struct file_operations safesetid_file_fops = {
+ .read = safesetid_file_read,
+ .write = safesetid_file_write,
+};
+
static int __init safesetid_init_securityfs(void)
{
- int i;
int ret;
+ struct dentry *policy_dir;
+ struct dentry *policy_file;
if (!safesetid_initialized)
return 0;
- safesetid_policy_dir = securityfs_create_dir("safesetid", NULL);
- if (IS_ERR(safesetid_policy_dir)) {
- ret = PTR_ERR(safesetid_policy_dir);
+ policy_dir = securityfs_create_dir("safesetid", NULL);
+ if (IS_ERR(policy_dir)) {
+ ret = PTR_ERR(policy_dir);
goto error;
}
- for (i = 0; i < ARRAY_SIZE(safesetid_files); ++i) {
- struct safesetid_file_entry *entry =
- &safesetid_files[i];
- entry->dentry = securityfs_create_file(
- entry->name, 0200, safesetid_policy_dir,
- entry, &safesetid_file_fops);
- if (IS_ERR(entry->dentry)) {
- ret = PTR_ERR(entry->dentry);
- goto error;
- }
+ policy_file = securityfs_create_file("whitelist_policy", 0600,
+ policy_dir, NULL, &safesetid_file_fops);
+ if (IS_ERR(policy_file)) {
+ ret = PTR_ERR(policy_file);
+ goto error;
}
return 0;
error:
- safesetid_shutdown_securityfs();
+ securityfs_remove(policy_dir);
return ret;
}
fs_initcall(safesetid_init_securityfs);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f506c68b2612..17e2b1713702 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -806,7 +806,7 @@ union bpf_attr {
* based on a user-provided identifier for all traffic coming from
* the tasks belonging to the related cgroup. See also the related
* kernel documentation, available from the Linux sources in file
- * *Documentation/cgroup-v1/net_cls.rst*.
+ * *Documentation/admin-guide/cgroup-v1/net_cls.rst*.
*
* The Linux kernel has two versions for cgroups: there are
* cgroups v1 and cgroups v2. Both are available to users, who can
diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c
index 892c8e8b1b8b..8f40c6ecdad1 100644
--- a/tools/testing/selftests/safesetid/safesetid-test.c
+++ b/tools/testing/selftests/safesetid/safesetid-test.c
@@ -142,23 +142,19 @@ static void ensure_securityfs_mounted(void)
static void write_policies(void)
{
+ static char *policy_str =
+ "1:2\n"
+ "1:3\n"
+ "2:2\n"
+ "3:3\n";
ssize_t written;
int fd;
fd = open(add_whitelist_policy_file, O_WRONLY);
if (fd < 0)
die("cant open add_whitelist_policy file\n");
- written = write(fd, "1:2", strlen("1:2"));
- if (written != strlen("1:2")) {
- if (written >= 0) {
- die("short write to %s\n", add_whitelist_policy_file);
- } else {
- die("write to %s failed: %s\n",
- add_whitelist_policy_file, strerror(errno));
- }
- }
- written = write(fd, "1:3", strlen("1:3"));
- if (written != strlen("1:3")) {
+ written = write(fd, policy_str, strlen(policy_str));
+ if (written != strlen(policy_str)) {
if (written >= 0) {
die("short write to %s\n", add_whitelist_policy_file);
} else {
diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README
index 7972cc512408..110b34834a6f 100644
--- a/tools/testing/selftests/zram/README
+++ b/tools/testing/selftests/zram/README
@@ -37,4 +37,4 @@ Commands required for testing:
- mkfs/ mkfs.ext4
For more information please refer:
-kernel-source-tree/Documentation/blockdev/zram.txt
+kernel-source-tree/Documentation/admin-guide/blockdev/zram.rst
diff --git a/usr/Kconfig b/usr/Kconfig
index 43658b8a975e..a6b68503d177 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig
@@ -18,7 +18,7 @@ config INITRAMFS_SOURCE
When multiple directories and files are specified then the
initramfs image will be the aggregate of all of them.
- See <file:Documentation/early-userspace/README> for more details.
+ See <file:Documentation/driver-api/early-userspace/early_userspace_support.rst> for more details.
If you are not sure, leave it blank.