diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-08-31 15:40:05 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-08-31 15:40:05 -0700 |
commit | e2701603f72cd38e99c6b1da13c8e99bc27b2f34 (patch) | |
tree | af55a86e0fbc26d4b19d1a2eabb41db26e7687ef /Documentation | |
parent | 22629b6d9072c4e86e900306d7020ad722ae6536 (diff) | |
parent | ce14c5831364118324b10c0355dead062b9ddd40 (diff) | |
download | linux-e2701603f72cd38e99c6b1da13c8e99bc27b2f34.tar.bz2 |
Merge tag 'docs-for-linus' of git://git.lwn.net/linux-2.6
Pull documentation updates from Jonathan Corbet:
"There's been a fair amount going on in the docs tree this time around,
including:
- Support for reproducible document builds, from Ben Hutchings and
company.
- The ability to automatically generate cross-reference links within
a single DocBook book and embedded descriptions for large
structures. From Danilo Cesar Lemes de Paula.
- A new document on how to add a system call from David Drysdale.
- Chameleon bus documentation from Johannes Thumshirn.
...plus the usual collection of improvements, typo fixes, and more"
* tag 'docs-for-linus' of git://git.lwn.net/linux-2.6: (39 commits)
Documentation, add kernel-parameters.txt entry for dis_ucode_ldr
Documentation/x86: Rename IRQSTACKSIZE to IRQ_STACK_SIZE
Documentation/Intel-IOMMU.txt: Modify definition of DRHD
docs: update HOWTO for 3.x -> 4.x versioning
kernel-doc: ignore unneeded attribute information
scripts/kernel-doc: Adding cross-reference links to html documentation.
DocBook: Fix non-determinstic installation of duplicate man pages
Documentation: minor typo fix in mailbox.txt
Documentation: describe how to add a system call
doc: Add more workqueue functions to the documentation
ARM: keystone: add documentation for SoCs and EVMs
scripts/kernel-doc Allow struct arguments documentation in struct body
SubmittingPatches: remove stray quote character
Revert "DocBook: Avoid building man pages repeatedly and inconsistently"
Documentation: Minor changes to men-chameleon-bus.txt
Doc: fix trivial typo in SubmittingPatches
MAINTAINERS: Direct Documentation/DocBook/media properly
Documentation: installed man pages don't need to be executable
fix Evolution submenu name in email-clients.txt
Documentation: Add MCB documentation
...
Diffstat (limited to 'Documentation')
24 files changed, 904 insertions, 69 deletions
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle index b713c35f8543..c06f817b3091 100644 --- a/Documentation/CodingStyle +++ b/Documentation/CodingStyle @@ -929,13 +929,11 @@ The C Programming Language, Second Edition by Brian W. Kernighan and Dennis M. Ritchie. Prentice Hall, Inc., 1988. ISBN 0-13-110362-8 (paperback), 0-13-110370-9 (hardback). -URL: http://cm.bell-labs.com/cm/cs/cbook/ The Practice of Programming by Brian W. Kernighan and Rob Pike. Addison-Wesley, Inc., 1999. ISBN 0-201-61586-X. -URL: http://cm.bell-labs.com/cm/cs/tpop/ GNU manuals - where in compliance with K&R and this text - for cpp, gcc, gcc internals and indent, all available from http://www.gnu.org/manual/ diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 9e086067b4ae..93eff64387cd 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -56,16 +56,19 @@ htmldocs: $(HTML) MAN := $(patsubst %.xml, %.9, $(BOOKS)) mandocs: $(MAN) - find $(obj)/man -name '*.9' | xargs gzip -f + find $(obj)/man -name '*.9' | xargs gzip -nf installmandocs: mandocs mkdir -p /usr/local/man/man9/ - install $(obj)/man/*.9.gz /usr/local/man/man9/ + find $(obj)/man -name '*.9.gz' -printf '%h %f\n' | \ + sort -k 2 -k 1 | uniq -f 1 | sed -e 's: :/:' | \ + xargs install -m 644 -t /usr/local/man/man9/ ### #External programs used -KERNELDOC = $(srctree)/scripts/kernel-doc -DOCPROC = $(objtree)/scripts/docproc +KERNELDOCXMLREF = $(srctree)/scripts/kernel-doc-xml-ref +KERNELDOC = $(srctree)/scripts/kernel-doc +DOCPROC = $(objtree)/scripts/docproc XMLTOFLAGS = -m $(srctree)/$(src)/stylesheet.xsl XMLTOFLAGS += --skip-validation @@ -89,7 +92,7 @@ define rule_docproc ) > $(dir $@).$(notdir $@).cmd endef -%.xml: %.tmpl $(KERNELDOC) $(DOCPROC) FORCE +%.xml: %.tmpl $(KERNELDOC) $(DOCPROC) $(KERNELDOCXMLREF) FORCE $(call if_changed_rule,docproc) # Tell kbuild to always build the programs @@ -140,7 +143,20 @@ quiet_cmd_db2html = HTML $@ echo '<a HREF="$(patsubst %.html,%,$(notdir $@))/index.html"> \ $(patsubst %.html,%,$(notdir $@))</a><p>' > $@ -%.html: %.xml +### +# Rules to create an aux XML and .db, and use them to re-process the DocBook XML +# to fill internal hyperlinks + gen_aux_xml = : + quiet_gen_aux_xml = echo ' XMLREF $@' +silent_gen_aux_xml = : +%.aux.xml: %.xml + @$($(quiet)gen_aux_xml) + @rm -rf $@ + @(cat $< | egrep "^<refentry id" | egrep -o "\".*\"" | cut -f 2 -d \" > $<.db) + @$(KERNELDOCXMLREF) -db $<.db $< > $@ +.PRECIOUS: %.aux.xml + +%.html: %.aux.xml @(which xmlto > /dev/null 2>&1) || \ (echo "*** You need to install xmlto ***"; \ exit 1) @@ -150,12 +166,12 @@ quiet_cmd_db2html = HTML $@ cp $(PNG-$(basename $(notdir $@))) $(patsubst %.html,%,$@); fi quiet_cmd_db2man = MAN $@ - cmd_db2man = if grep -q refentry $<; then xmlto man $(XMLTOFLAGS) -o $(obj)/man $< ; fi + cmd_db2man = if grep -q refentry $<; then xmlto man $(XMLTOFLAGS) -o $(obj)/man/$(*F) $< ; fi %.9 : %.xml @(which xmlto > /dev/null 2>&1) || \ (echo "*** You need to install xmlto ***"; \ exit 1) - $(Q)mkdir -p $(obj)/man + $(Q)mkdir -p $(obj)/man/$(*F) $(call cmd,db2man) @touch $@ @@ -209,15 +225,18 @@ dochelp: ### # Temporary files left by various tools clean-files := $(DOCBOOKS) \ - $(patsubst %.xml, %.dvi, $(DOCBOOKS)) \ - $(patsubst %.xml, %.aux, $(DOCBOOKS)) \ - $(patsubst %.xml, %.tex, $(DOCBOOKS)) \ - $(patsubst %.xml, %.log, $(DOCBOOKS)) \ - $(patsubst %.xml, %.out, $(DOCBOOKS)) \ - $(patsubst %.xml, %.ps, $(DOCBOOKS)) \ - $(patsubst %.xml, %.pdf, $(DOCBOOKS)) \ - $(patsubst %.xml, %.html, $(DOCBOOKS)) \ - $(patsubst %.xml, %.9, $(DOCBOOKS)) \ + $(patsubst %.xml, %.dvi, $(DOCBOOKS)) \ + $(patsubst %.xml, %.aux, $(DOCBOOKS)) \ + $(patsubst %.xml, %.tex, $(DOCBOOKS)) \ + $(patsubst %.xml, %.log, $(DOCBOOKS)) \ + $(patsubst %.xml, %.out, $(DOCBOOKS)) \ + $(patsubst %.xml, %.ps, $(DOCBOOKS)) \ + $(patsubst %.xml, %.pdf, $(DOCBOOKS)) \ + $(patsubst %.xml, %.html, $(DOCBOOKS)) \ + $(patsubst %.xml, %.9, $(DOCBOOKS)) \ + $(patsubst %.xml, %.aux.xml, $(DOCBOOKS)) \ + $(patsubst %.xml, %.xml.db, $(DOCBOOKS)) \ + $(patsubst %.xml, %.xml, $(DOCBOOKS)) \ $(index) clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index faf09d4a0ea8..bbc1d7ee9c76 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -66,6 +66,7 @@ !Ekernel/time/hrtimer.c </sect1> <sect1><title>Workqueues and Kevents</title> +!Iinclude/linux/workqueue.h !Ekernel/workqueue.c </sect1> <sect1><title>Internal Functions</title> diff --git a/Documentation/DocBook/stylesheet.xsl b/Documentation/DocBook/stylesheet.xsl index 85b25275196f..3bf4ecf3d760 100644 --- a/Documentation/DocBook/stylesheet.xsl +++ b/Documentation/DocBook/stylesheet.xsl @@ -5,6 +5,7 @@ <param name="funcsynopsis.tabular.threshold">80</param> <param name="callout.graphics">0</param> <!-- <param name="paper.type">A4</param> --> +<param name="generate.consistent.ids">1</param> <param name="generate.section.toc.level">2</param> <param name="use.id.as.filename">1</param> </stylesheet> diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 93aa8604630e..21152d397b88 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -218,16 +218,16 @@ The development process Linux kernel development process currently consists of a few different main kernel "branches" and lots of different subsystem-specific kernel branches. These different branches are: - - main 3.x kernel tree - - 3.x.y -stable kernel tree - - 3.x -git kernel patches + - main 4.x kernel tree + - 4.x.y -stable kernel tree + - 4.x -git kernel patches - subsystem specific kernel trees and patches - - the 3.x -next kernel tree for integration tests + - the 4.x -next kernel tree for integration tests -3.x kernel tree +4.x kernel tree ----------------- -3.x kernels are maintained by Linus Torvalds, and can be found on -kernel.org in the pub/linux/kernel/v3.x/ directory. Its development +4.x kernels are maintained by Linus Torvalds, and can be found on +kernel.org in the pub/linux/kernel/v4.x/ directory. Its development process is as follows: - As soon as a new kernel is released a two weeks window is open, during this period of time maintainers can submit big diffs to @@ -262,20 +262,20 @@ mailing list about kernel releases: released according to perceived bug status, not according to a preconceived timeline." -3.x.y -stable kernel tree +4.x.y -stable kernel tree --------------------------- Kernels with 3-part versions are -stable kernels. They contain relatively small and critical fixes for security problems or significant -regressions discovered in a given 3.x kernel. +regressions discovered in a given 4.x kernel. This is the recommended branch for users who want the most recent stable kernel and are not interested in helping test development/experimental versions. -If no 3.x.y kernel is available, then the highest numbered 3.x +If no 4.x.y kernel is available, then the highest numbered 4.x kernel is the current stable kernel. -3.x.y are maintained by the "stable" team <stable@vger.kernel.org>, and +4.x.y are maintained by the "stable" team <stable@vger.kernel.org>, and are released as needs dictate. The normal release period is approximately two weeks, but it can be longer if there are no pressing problems. A security-related problem, instead, can cause a release to happen almost @@ -285,7 +285,7 @@ The file Documentation/stable_kernel_rules.txt in the kernel tree documents what kinds of changes are acceptable for the -stable tree, and how the release process works. -3.x -git patches +4.x -git patches ------------------ These are daily snapshots of Linus' kernel tree which are managed in a git repository (hence the name.) These patches are usually released @@ -317,9 +317,9 @@ revisions to it, and maintainers can mark patches as under review, accepted, or rejected. Most of these patchwork sites are listed at http://patchwork.kernel.org/. -3.x -next kernel tree for integration tests +4.x -next kernel tree for integration tests --------------------------------------------- -Before updates from subsystem trees are merged into the mainline 3.x +Before updates from subsystem trees are merged into the mainline 4.x tree, they need to be integration-tested. For this purpose, a special testing repository exists into which virtually all subsystem trees are pulled on an almost daily basis: diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt index cf9431db8731..7b57fc087088 100644 --- a/Documentation/Intel-IOMMU.txt +++ b/Documentation/Intel-IOMMU.txt @@ -10,7 +10,7 @@ This guide gives a quick cheat sheet for some basic understanding. Some Keywords DMAR - DMA remapping -DRHD - DMA Engine Reporting Structure +DRHD - DMA Remapping Hardware Unit Definition RMRR - Reserved memory Region Reporting Structure ZLR - Zero length reads from PCI devices IOVA - IO Virtual address. diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 27e7e5edeca8..fa596d8a3ab0 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -90,11 +90,11 @@ patch. Make sure your patch does not include any extra files which do not belong in a patch submission. Make sure to review your patch -after- -generated it with diff(1), to ensure accuracy. +generating it with diff(1), to ensure accuracy. If your changes produce a lot of deltas, you need to split them into individual patches which modify things in logical stages; see section -#3. This will facilitate easier reviewing by other kernel developers, +#3. This will facilitate review by other kernel developers, very important if you want your patch accepted. If you're using git, "git rebase -i" can help you with this process. If @@ -267,7 +267,7 @@ You should always copy the appropriate subsystem maintainer(s) on any patch to code that they maintain; look through the MAINTAINERS file and the source code revision history to see who those maintainers are. The script scripts/get_maintainer.pl can be very useful at this step. If you -cannot find a maintainer for the subsystem your are working on, Andrew +cannot find a maintainer for the subsystem you are working on, Andrew Morton (akpm@linux-foundation.org) serves as a maintainer of last resort. You should also normally choose at least one mailing list to receive a copy @@ -340,7 +340,7 @@ on the changes you are submitting. It is important for a kernel developer to be able to "quote" your changes, using standard e-mail tools, so that they may comment on specific portions of your code. -For this reason, all patches should be submitting e-mail "inline". +For this reason, all patches should be submitted by e-mail "inline". WARNING: Be wary of your editor's word-wrap corrupting your patch, if you choose to cut-n-paste your patch. @@ -739,7 +739,7 @@ interest on a single line; it should look something like: git://jdelvare.pck.nerim.net/jdelvare-2.6 i2c-for-linus - to get these changes:" + to get these changes: A pull request should also include an overall message saying what will be included in the request, a "git shortlog" listing of the patches @@ -796,7 +796,7 @@ NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! <https://lkml.org/lkml/2005/7/11/336> Kernel Documentation/CodingStyle: - <http://users.sosdg.org/~qiyong/lxr/source/Documentation/CodingStyle> + <Documentation/CodingStyle> Linus Torvalds's mail on the canonical patch format: <http://lkml.org/lkml/2005/4/7/183> diff --git a/Documentation/adding-syscalls.txt b/Documentation/adding-syscalls.txt new file mode 100644 index 000000000000..cc2d4ac4f404 --- /dev/null +++ b/Documentation/adding-syscalls.txt @@ -0,0 +1,527 @@ +Adding a New System Call +======================== + +This document describes what's involved in adding a new system call to the +Linux kernel, over and above the normal submission advice in +Documentation/SubmittingPatches. + + +System Call Alternatives +------------------------ + +The first thing to consider when adding a new system call is whether one of +the alternatives might be suitable instead. Although system calls are the +most traditional and most obvious interaction points between userspace and the +kernel, there are other possibilities -- choose what fits best for your +interface. + + - If the operations involved can be made to look like a filesystem-like + object, it may make more sense to create a new filesystem or device. This + also makes it easier to encapsulate the new functionality in a kernel module + rather than requiring it to be built into the main kernel. + - If the new functionality involves operations where the kernel notifies + userspace that something has happened, then returning a new file + descriptor for the relevant object allows userspace to use + poll/select/epoll to receive that notification. + - However, operations that don't map to read(2)/write(2)-like operations + have to be implemented as ioctl(2) requests, which can lead to a + somewhat opaque API. + - If you're just exposing runtime system information, a new node in sysfs + (see Documentation/filesystems/sysfs.txt) or the /proc filesystem may be + more appropriate. However, access to these mechanisms requires that the + relevant filesystem is mounted, which might not always be the case (e.g. + in a namespaced/sandboxed/chrooted environment). Avoid adding any API to + debugfs, as this is not considered a 'production' interface to userspace. + - If the operation is specific to a particular file or file descriptor, then + an additional fcntl(2) command option may be more appropriate. However, + fcntl(2) is a multiplexing system call that hides a lot of complexity, so + this option is best for when the new function is closely analogous to + existing fcntl(2) functionality, or the new functionality is very simple + (for example, getting/setting a simple flag related to a file descriptor). + - If the operation is specific to a particular task or process, then an + additional prctl(2) command option may be more appropriate. As with + fcntl(2), this system call is a complicated multiplexor so is best reserved + for near-analogs of existing prctl() commands or getting/setting a simple + flag related to a process. + + +Designing the API: Planning for Extension +----------------------------------------- + +A new system call forms part of the API of the kernel, and has to be supported +indefinitely. As such, it's a very good idea to explicitly discuss the +interface on the kernel mailing list, and it's important to plan for future +extensions of the interface. + +(The syscall table is littered with historical examples where this wasn't done, +together with the corresponding follow-up system calls -- eventfd/eventfd2, +dup2/dup3, inotify_init/inotify_init1, pipe/pipe2, renameat/renameat2 -- so +learn from the history of the kernel and plan for extensions from the start.) + +For simpler system calls that only take a couple of arguments, the preferred +way to allow for future extensibility is to include a flags argument to the +system call. To make sure that userspace programs can safely use flags +between kernel versions, check whether the flags value holds any unknown +flags, and reject the system call (with EINVAL) if it does: + + if (flags & ~(THING_FLAG1 | THING_FLAG2 | THING_FLAG3)) + return -EINVAL; + +(If no flags values are used yet, check that the flags argument is zero.) + +For more sophisticated system calls that involve a larger number of arguments, +it's preferred to encapsulate the majority of the arguments into a structure +that is passed in by pointer. Such a structure can cope with future extension +by including a size argument in the structure: + + struct xyzzy_params { + u32 size; /* userspace sets p->size = sizeof(struct xyzzy_params) */ + u32 param_1; + u64 param_2; + u64 param_3; + }; + +As long as any subsequently added field, say param_4, is designed so that a +zero value gives the previous behaviour, then this allows both directions of +version mismatch: + + - To cope with a later userspace program calling an older kernel, the kernel + code should check that any memory beyond the size of the structure that it + expects is zero (effectively checking that param_4 == 0). + - To cope with an older userspace program calling a newer kernel, the kernel + code can zero-extend a smaller instance of the structure (effectively + setting param_4 = 0). + +See perf_event_open(2) and the perf_copy_attr() function (in +kernel/events/core.c) for an example of this approach. + + +Designing the API: Other Considerations +--------------------------------------- + +If your new system call allows userspace to refer to a kernel object, it +should use a file descriptor as the handle for that object -- don't invent a +new type of userspace object handle when the kernel already has mechanisms and +well-defined semantics for using file descriptors. + +If your new xyzzy(2) system call does return a new file descriptor, then the +flags argument should include a value that is equivalent to setting O_CLOEXEC +on the new FD. This makes it possible for userspace to close the timing +window between xyzzy() and calling fcntl(fd, F_SETFD, FD_CLOEXEC), where an +unexpected fork() and execve() in another thread could leak a descriptor to +the exec'ed program. (However, resist the temptation to re-use the actual value +of the O_CLOEXEC constant, as it is architecture-specific and is part of a +numbering space of O_* flags that is fairly full.) + +If your system call returns a new file descriptor, you should also consider +what it means to use the poll(2) family of system calls on that file +descriptor. Making a file descriptor ready for reading or writing is the +normal way for the kernel to indicate to userspace that an event has +occurred on the corresponding kernel object. + +If your new xyzzy(2) system call involves a filename argument: + + int sys_xyzzy(const char __user *path, ..., unsigned int flags); + +you should also consider whether an xyzzyat(2) version is more appropriate: + + int sys_xyzzyat(int dfd, const char __user *path, ..., unsigned int flags); + +This allows more flexibility for how userspace specifies the file in question; +in particular it allows userspace to request the functionality for an +already-opened file descriptor using the AT_EMPTY_PATH flag, effectively giving +an fxyzzy(3) operation for free: + + - xyzzyat(AT_FDCWD, path, ..., 0) is equivalent to xyzzy(path,...) + - xyzzyat(fd, "", ..., AT_EMPTY_PATH) is equivalent to fxyzzy(fd, ...) + +(For more details on the rationale of the *at() calls, see the openat(2) man +page; for an example of AT_EMPTY_PATH, see the statat(2) man page.) + +If your new xyzzy(2) system call involves a parameter describing an offset +within a file, make its type loff_t so that 64-bit offsets can be supported +even on 32-bit architectures. + +If your new xyzzy(2) system call involves privileged functionality, it needs +to be governed by the appropriate Linux capability bit (checked with a call to +capable()), as described in the capabilities(7) man page. Choose an existing +capability bit that governs related functionality, but try to avoid combining +lots of only vaguely related functions together under the same bit, as this +goes against capabilities' purpose of splitting the power of root. In +particular, avoid adding new uses of the already overly-general CAP_SYS_ADMIN +capability. + +If your new xyzzy(2) system call manipulates a process other than the calling +process, it should be restricted (using a call to ptrace_may_access()) so that +only a calling process with the same permissions as the target process, or +with the necessary capabilities, can manipulate the target process. + +Finally, be aware that some non-x86 architectures have an easier time if +system call parameters that are explicitly 64-bit fall on odd-numbered +arguments (i.e. parameter 1, 3, 5), to allow use of contiguous pairs of 32-bit +registers. (This concern does not apply if the arguments are part of a +structure that's passed in by pointer.) + + +Proposing the API +----------------- + +To make new system calls easy to review, it's best to divide up the patchset +into separate chunks. These should include at least the following items as +distinct commits (each of which is described further below): + + - The core implementation of the system call, together with prototypes, + generic numbering, Kconfig changes and fallback stub implementation. + - Wiring up of the new system call for one particular architecture, usually + x86 (including all of x86_64, x86_32 and x32). + - A demonstration of the use of the new system call in userspace via a + selftest in tools/testing/selftests/. + - A draft man-page for the new system call, either as plain text in the + cover letter, or as a patch to the (separate) man-pages repository. + +New system call proposals, like any change to the kernel's API, should always +be cc'ed to linux-api@vger.kernel.org. + + +Generic System Call Implementation +---------------------------------- + +The main entry point for your new xyzzy(2) system call will be called +sys_xyzzy(), but you add this entry point with the appropriate +SYSCALL_DEFINEn() macro rather than explicitly. The 'n' indicates the number +of arguments to the system call, and the macro takes the system call name +followed by the (type, name) pairs for the parameters as arguments. Using +this macro allows metadata about the new system call to be made available for +other tools. + +The new entry point also needs a corresponding function prototype, in +include/linux/syscalls.h, marked as asmlinkage to match the way that system +calls are invoked: + + asmlinkage long sys_xyzzy(...); + +Some architectures (e.g. x86) have their own architecture-specific syscall +tables, but several other architectures share a generic syscall table. Add your +new system call to the generic list by adding an entry to the list in +include/uapi/asm-generic/unistd.h: + + #define __NR_xyzzy 292 + __SYSCALL(__NR_xyzzy, sys_xyzzy) + +Also update the __NR_syscalls count to reflect the additional system call, and +note that if multiple new system calls are added in the same merge window, +your new syscall number may get adjusted to resolve conflicts. + +The file kernel/sys_ni.c provides a fallback stub implementation of each system +call, returning -ENOSYS. Add your new system call here too: + + cond_syscall(sys_xyzzy); + +Your new kernel functionality, and the system call that controls it, should +normally be optional, so add a CONFIG option (typically to init/Kconfig) for +it. As usual for new CONFIG options: + + - Include a description of the new functionality and system call controlled + by the option. + - Make the option depend on EXPERT if it should be hidden from normal users. + - Make any new source files implementing the function dependent on the CONFIG + option in the Makefile (e.g. "obj-$(CONFIG_XYZZY_SYSCALL) += xyzzy.c"). + - Double check that the kernel still builds with the new CONFIG option turned + off. + +To summarize, you need a commit that includes: + + - CONFIG option for the new function, normally in init/Kconfig + - SYSCALL_DEFINEn(xyzzy, ...) for the entry point + - corresponding prototype in include/linux/syscalls.h + - generic table entry in include/uapi/asm-generic/unistd.h + - fallback stub in kernel/sys_ni.c + + +x86 System Call Implementation +------------------------------ + +To wire up your new system call for x86 platforms, you need to update the +master syscall tables. Assuming your new system call isn't special in some +way (see below), this involves a "common" entry (for x86_64 and x32) in +arch/x86/entry/syscalls/syscall_64.tbl: + + 333 common xyzzy sys_xyzzy + +and an "i386" entry in arch/x86/entry/syscalls/syscall_32.tbl: + + 380 i386 xyzzy sys_xyzzy + +Again, these numbers are liable to be changed if there are conflicts in the +relevant merge window. + + +Compatibility System Calls (Generic) +------------------------------------ + +For most system calls the same 64-bit implementation can be invoked even when +the userspace program is itself 32-bit; even if the system call's parameters +include an explicit pointer, this is handled transparently. + +However, there are a couple of situations where a compatibility layer is +needed to cope with size differences between 32-bit and 64-bit. + +The first is if the 64-bit kernel also supports 32-bit userspace programs, and +so needs to parse areas of (__user) memory that could hold either 32-bit or +64-bit values. In particular, this is needed whenever a system call argument +is: + + - a pointer to a pointer + - a pointer to a struct containing a pointer (e.g. struct iovec __user *) + - a pointer to a varying sized integral type (time_t, off_t, long, ...) + - a pointer to a struct containing a varying sized integral type. + +The second situation that requires a compatibility layer is if one of the +system call's arguments has a type that is explicitly 64-bit even on a 32-bit +architecture, for example loff_t or __u64. In this case, a value that arrives +at a 64-bit kernel from a 32-bit application will be split into two 32-bit +values, which then need to be re-assembled in the compatibility layer. + +(Note that a system call argument that's a pointer to an explicit 64-bit type +does *not* need a compatibility layer; for example, splice(2)'s arguments of +type loff_t __user * do not trigger the need for a compat_ system call.) + +The compatibility version of the system call is called compat_sys_xyzzy(), and +is added with the COMPAT_SYSCALL_DEFINEn() macro, analogously to +SYSCALL_DEFINEn. This version of the implementation runs as part of a 64-bit +kernel, but expects to receive 32-bit parameter values and does whatever is +needed to deal with them. (Typically, the compat_sys_ version converts the +values to 64-bit versions and either calls on to the sys_ version, or both of +them call a common inner implementation function.) + +The compat entry point also needs a corresponding function prototype, in +include/linux/compat.h, marked as asmlinkage to match the way that system +calls are invoked: + + asmlinkage long compat_sys_xyzzy(...); + +If the system call involves a structure that is laid out differently on 32-bit +and 64-bit systems, say struct xyzzy_args, then the include/linux/compat.h +header file should also include a compat version of the structure (struct +compat_xyzzy_args) where each variable-size field has the appropriate compat_ +type that corresponds to the type in struct xyzzy_args. The +compat_sys_xyzzy() routine can then use this compat_ structure to parse the +arguments from a 32-bit invocation. + +For example, if there are fields: + + struct xyzzy_args { + const char __user *ptr; + __kernel_long_t varying_val; + u64 fixed_val; + /* ... */ + }; + +in struct xyzzy_args, then struct compat_xyzzy_args would have: + + struct compat_xyzzy_args { + compat_uptr_t ptr; + compat_long_t varying_val; + u64 fixed_val; + /* ... */ + }; + +The generic system call list also needs adjusting to allow for the compat +version; the entry in include/uapi/asm-generic/unistd.h should use +__SC_COMP rather than __SYSCALL: + + #define __NR_xyzzy 292 + __SC_COMP(__NR_xyzzy, sys_xyzzy, compat_sys_xyzzy) + +To summarize, you need: + + - a COMPAT_SYSCALL_DEFINEn(xyzzy, ...) for the compat entry point + - corresponding prototype in include/linux/compat.h + - (if needed) 32-bit mapping struct in include/linux/compat.h + - instance of __SC_COMP not __SYSCALL in include/uapi/asm-generic/unistd.h + + +Compatibility System Calls (x86) +-------------------------------- + +To wire up the x86 architecture of a system call with a compatibility version, +the entries in the syscall tables need to be adjusted. + +First, the entry in arch/x86/entry/syscalls/syscall_32.tbl gets an extra +column to indicate that a 32-bit userspace program running on a 64-bit kernel +should hit the compat entry point: + + 380 i386 xyzzy sys_xyzzy compat_sys_xyzzy + +Second, you need to figure out what should happen for the x32 ABI version of +the new system call. There's a choice here: the layout of the arguments +should either match the 64-bit version or the 32-bit version. + +If there's a pointer-to-a-pointer involved, the decision is easy: x32 is +ILP32, so the layout should match the 32-bit version, and the entry in +arch/x86/entry/syscalls/syscall_64.tbl is split so that x32 programs hit the +compatibility wrapper: + + 333 64 xyzzy sys_xyzzy + ... + 555 x32 xyzzy compat_sys_xyzzy + +If no pointers are involved, then it is preferable to re-use the 64-bit system +call for the x32 ABI (and consequently the entry in +arch/x86/entry/syscalls/syscall_64.tbl is unchanged). + +In either case, you should check that the types involved in your argument +layout do indeed map exactly from x32 (-mx32) to either the 32-bit (-m32) or +64-bit (-m64) equivalents. + + +System Calls Returning Elsewhere +-------------------------------- + +For most system calls, once the system call is complete the user program +continues exactly where it left off -- at the next instruction, with the +stack the same and most of the registers the same as before the system call, +and with the same virtual memory space. + +However, a few system calls do things differently. They might return to a +different location (rt_sigreturn) or change the memory space (fork/vfork/clone) +or even architecture (execve/execveat) of the program. + +To allow for this, the kernel implementation of the system call may need to +save and restore additional registers to the kernel stack, allowing complete +control of where and how execution continues after the system call. + +This is arch-specific, but typically involves defining assembly entry points +that save/restore additional registers and invoke the real system call entry +point. + +For x86_64, this is implemented as a stub_xyzzy entry point in +arch/x86/entry/entry_64.S, and the entry in the syscall table +(arch/x86/entry/syscalls/syscall_64.tbl) is adjusted to match: + + 333 common xyzzy stub_xyzzy + +The equivalent for 32-bit programs running on a 64-bit kernel is normally +called stub32_xyzzy and implemented in arch/x86/entry/entry_64_compat.S, +with the corresponding syscall table adjustment in +arch/x86/entry/syscalls/syscall_32.tbl: + + 380 i386 xyzzy sys_xyzzy stub32_xyzzy + +If the system call needs a compatibility layer (as in the previous section) +then the stub32_ version needs to call on to the compat_sys_ version of the +system call rather than the native 64-bit version. Also, if the x32 ABI +implementation is not common with the x86_64 version, then its syscall +table will also need to invoke a stub that calls on to the compat_sys_ +version. + +For completeness, it's also nice to set up a mapping so that user-mode Linux +still works -- its syscall table will reference stub_xyzzy, but the UML build +doesn't include arch/x86/entry/entry_64.S implementation (because UML +simulates registers etc). Fixing this is as simple as adding a #define to +arch/x86/um/sys_call_table_64.c: + + #define stub_xyzzy sys_xyzzy + + +Other Details +------------- + +Most of the kernel treats system calls in a generic way, but there is the +occasional exception that may need updating for your particular system call. + +The audit subsystem is one such special case; it includes (arch-specific) +functions that classify some special types of system call -- specifically +file open (open/openat), program execution (execve/exeveat) or socket +multiplexor (socketcall) operations. If your new system call is analogous to +one of these, then the audit system should be updated. + +More generally, if there is an existing system call that is analogous to your +new system call, it's worth doing a kernel-wide grep for the existing system +call to check there are no other special cases. + + +Testing +------- + +A new system call should obviously be tested; it is also useful to provide +reviewers with a demonstration of how user space programs will use the system +call. A good way to combine these aims is to include a simple self-test +program in a new directory under tools/testing/selftests/. + +For a new system call, there will obviously be no libc wrapper function and so +the test will need to invoke it using syscall(); also, if the system call +involves a new userspace-visible structure, the corresponding header will need +to be installed to compile the test. + +Make sure the selftest runs successfully on all supported architectures. For +example, check that it works when compiled as an x86_64 (-m64), x86_32 (-m32) +and x32 (-mx32) ABI program. + +For more extensive and thorough testing of new functionality, you should also +consider adding tests to the Linux Test Project, or to the xfstests project +for filesystem-related changes. + - https://linux-test-project.github.io/ + - git://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git + + +Man Page +-------- + +All new system calls should come with a complete man page, ideally using groff +markup, but plain text will do. If groff is used, it's helpful to include a +pre-rendered ASCII version of the man page in the cover email for the +patchset, for the convenience of reviewers. + +The man page should be cc'ed to linux-man@vger.kernel.org +For more details, see https://www.kernel.org/doc/man-pages/patches.html + +References and Sources +---------------------- + + - LWN article from Michael Kerrisk on use of flags argument in system calls: + https://lwn.net/Articles/585415/ + - LWN article from Michael Kerrisk on how to handle unknown flags in a system + call: https://lwn.net/Articles/588444/ + - LWN article from Jake Edge describing constraints on 64-bit system call + arguments: https://lwn.net/Articles/311630/ + - Pair of LWN articles from David Drysdale that describe the system call + implementation paths in detail for v3.14: + - https://lwn.net/Articles/604287/ + - https://lwn.net/Articles/604515/ + - Architecture-specific requirements for system calls are discussed in the + syscall(2) man-page: + http://man7.org/linux/man-pages/man2/syscall.2.html#NOTES + - Collated emails from Linus Torvalds discussing the problems with ioctl(): + http://yarchive.net/comp/linux/ioctl.html + - "How to not invent kernel interfaces", Arnd Bergmann, + http://www.ukuug.org/events/linux2007/2007/papers/Bergmann.pdf + - LWN article from Michael Kerrisk on avoiding new uses of CAP_SYS_ADMIN: + https://lwn.net/Articles/486306/ + - Recommendation from Andrew Morton that all related information for a new + system call should come in the same email thread: + https://lkml.org/lkml/2014/7/24/641 + - Recommendation from Michael Kerrisk that a new system call should come with + a man page: https://lkml.org/lkml/2014/6/13/309 + - Suggestion from Thomas Gleixner that x86 wire-up should be in a separate + commit: https://lkml.org/lkml/2014/11/19/254 + - Suggestion from Greg Kroah-Hartman that it's good for new system calls to + come with a man-page & selftest: https://lkml.org/lkml/2014/3/19/710 + - Discussion from Michael Kerrisk of new system call vs. prctl(2) extension: + https://lkml.org/lkml/2014/6/3/411 + - Suggestion from Ingo Molnar that system calls that involve multiple + arguments should encapsulate those arguments in a struct, which includes a + size field for future extensibility: https://lkml.org/lkml/2015/7/30/117 + - Numbering oddities arising from (re-)use of O_* numbering space flags: + - commit 75069f2b5bfb ("vfs: renumber FMODE_NONOTIFY and add to uniqueness + check") + - commit 12ed2e36c98a ("fanotify: FMODE_NONOTIFY and __O_SYNC in sparc + conflict") + - commit bb458c644a59 ("Safer ABI for O_TMPFILE") + - Discussion from Matthew Wilcox about restrictions on 64-bit arguments: + https://lkml.org/lkml/2008/12/12/187 + - Recommendation from Greg Kroah-Hartman that unknown flags should be + policed: https://lkml.org/lkml/2014/7/17/577 + - Recommendation from Linus Torvalds that x32 system calls should prefer + compatibility with 64-bit versions rather than 32-bit versions: + https://lkml.org/lkml/2011/8/31/244 diff --git a/Documentation/arm/Samsung/Bootloader-interface.txt b/Documentation/arm/Samsung/Bootloader-interface.txt index b96ead9a6919..df8d4fb85939 100644 --- a/Documentation/arm/Samsung/Bootloader-interface.txt +++ b/Documentation/arm/Samsung/Bootloader-interface.txt @@ -15,6 +15,7 @@ executing kernel. 1. Non-Secure mode + Address: sysram_ns_base_addr Offset Value Purpose ============================================================================= @@ -28,6 +29,7 @@ Offset Value Purpose 2. Secure mode + Address: sysram_base_addr Offset Value Purpose ============================================================================= @@ -40,14 +42,25 @@ Offset Value Purpose Address: pmu_base_addr Offset Value Purpose ============================================================================= -0x0800 exynos_cpu_resume AFTR +0x0800 exynos_cpu_resume AFTR, suspend +0x0800 mcpm_entry_point (Exynos542x with MCPM) AFTR, suspend +0x0804 0xfcba0d10 (Magic cookie) AFTR +0x0804 0x00000bad (Magic cookie) System suspend 0x0814 exynos4_secondary_startup (Exynos4210 r1.1) Secondary CPU boot 0x0818 0xfcba0d10 (Magic cookie, Exynos4210 r1.1) AFTR 0x081C exynos_cpu_resume (Exynos4210 r1.1) AFTR 3. Other (regardless of secure/non-secure mode) + Address: pmu_base_addr Offset Value Purpose ============================================================================= 0x0908 Non-zero (only Exynos3250) Secondary CPU boot up indicator + + +4. Glossary + +AFTR - ARM Off Top Running, a low power mode, Cortex cores and many other +modules are power gated, except the TOP modules +MCPM - Multi-Cluster Power Management diff --git a/Documentation/arm/keystone/Overview.txt b/Documentation/arm/keystone/Overview.txt new file mode 100644 index 000000000000..f17bc4c9dff9 --- /dev/null +++ b/Documentation/arm/keystone/Overview.txt @@ -0,0 +1,73 @@ + TI Keystone Linux Overview + -------------------------- + +Introduction +------------ +Keystone range of SoCs are based on ARM Cortex-A15 MPCore Processors +and c66x DSP cores. This document describes essential information required +for users to run Linux on Keystone based EVMs from Texas Instruments. + +Following SoCs & EVMs are currently supported:- + +------------ K2HK SoC and EVM -------------------------------------------------- + +a.k.a Keystone 2 Hawking/Kepler SoC +TCI6636K2H & TCI6636K2K: See documentation at + http://www.ti.com/product/tci6638k2k + http://www.ti.com/product/tci6638k2h + +EVM: +http://www.advantech.com/Support/TI-EVM/EVMK2HX_sd.aspx + +------------ K2E SoC and EVM --------------------------------------------------- + +a.k.a Keystone 2 Edison SoC +K2E - 66AK2E05: See documentation at + http://www.ti.com/product/66AK2E05/technicaldocuments + +EVM: +https://www.einfochips.com/index.php/partnerships/texas-instruments/k2e-evm.html + +------------ K2L SoC and EVM --------------------------------------------------- + +a.k.a Keystone 2 Lamarr SoC +K2L - TCI6630K2L: See documentation at + http://www.ti.com/product/TCI6630K2L/technicaldocuments +EVM: +https://www.einfochips.com/index.php/partnerships/texas-instruments/k2l-evm.html + +Configuration +------------- + +All of the K2 SoCs/EVMs share a common defconfig, keystone_defconfig and same +image is used to boot on individual EVMs. The platform configuration is +specified through DTS. Following are the DTS used:- + K2HK EVM : k2hk-evm.dts + K2E EVM : k2e-evm.dts + K2L EVM : k2l-evm.dts + +The device tree documentation for the keystone machines are located at + Documentation/devicetree/bindings/arm/keystone/keystone.txt + +Known issues & workaround +------------------------- + +Some of the device drivers used on keystone are re-used from that from +DaVinci and other TI SoCs. These device drivers may use clock APIs directly. +Some of the keystone specific drivers such as netcp uses run time power +management API instead to enable clock. As this API has limitations on +keystone, following workaround is needed to boot Linux. + + Add 'clk_ignore_unused' to the bootargs env variable in u-boot. Otherwise + clock frameworks will try to disable clocks that are unused and disable + the hardware. This is because netcp related power domain and clock + domains are enabled in u-boot as run time power management API currently + doesn't enable clocks for netcp due to a limitation. This workaround is + expected to be removed in the future when proper API support becomes + available. Until then, this work around is needed. + + +Document Author +--------------- +Murali Karicheri <m-karicheri2@ti.com> +Copyright 2015 Texas Instruments diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt index c7d49b885559..3fa450881ecb 100644 --- a/Documentation/email-clients.txt +++ b/Documentation/email-clients.txt @@ -93,7 +93,7 @@ Evolution (GUI) Some people use this successfully for patches. When composing mail select: Preformat - from Format->Heading->Preformatted (Ctrl-7) + from Format->Paragraph Style->Preformatted (Ctrl-7) or the toolbar Then use: diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt index d11cc2f8077b..c772b47e7ef0 100644 --- a/Documentation/filesystems/btrfs.txt +++ b/Documentation/filesystems/btrfs.txt @@ -61,7 +61,7 @@ Options with (*) are default options and will not show in the mount options. check_int enables the integrity checker module, which examines all block write requests to ensure on-disk consistency, at a large - memory and CPU cost. + memory and CPU cost. check_int_data includes extent data in the integrity checks, and implies the check_int option. @@ -113,7 +113,7 @@ Options with (*) are default options and will not show in the mount options. Disable/enable debugging option to be more verbose in some ENOSPC conditions. fatal_errors=<action> - Action to take when encountering a fatal error: + Action to take when encountering a fatal error: "bug" - BUG() on a fatal error. This is the default. "panic" - panic() on a fatal error. @@ -132,10 +132,10 @@ Options with (*) are default options and will not show in the mount options. max_inline=<bytes> Specify the maximum amount of space, in bytes, that can be inlined in - a metadata B-tree leaf. The value is specified in bytes, optionally + a metadata B-tree leaf. The value is specified in bytes, optionally with a K, M, or G suffix, case insensitive. In practice, this value is limited by the root sector size, with some space unavailable due - to leaf headers. For a 4k sectorsize, max inline data is ~3900 bytes. + to leaf headers. For a 4k sector size, max inline data is ~3900 bytes. metadata_ratio=<value> Specify that 1 metadata chunk should be allocated after every <value> @@ -170,7 +170,7 @@ Options with (*) are default options and will not show in the mount options. recovery Enable autorecovery attempts if a bad tree root is found at mount time. - Currently this scans a list of several previous tree roots and tries to + Currently this scans a list of several previous tree roots and tries to use the first readable. rescan_uuid_tree @@ -194,7 +194,7 @@ Options with (*) are default options and will not show in the mount options. ssd_spread Options to control ssd allocation schemes. By default, BTRFS will enable or disable ssd allocation heuristics depending on whether a - rotational or nonrotational disk is in use. The ssd and nossd options + rotational or non-rotational disk is in use. The ssd and nossd options can override this autodetection. The ssd_spread mount option attempts to allocate into big chunks @@ -216,13 +216,13 @@ Options with (*) are default options and will not show in the mount options. This allows mounting of subvolumes which are not in the root of the mounted filesystem. You can use "btrfs subvolume show " to see the object ID for a subvolume. - + thread_pool=<number> The number of worker threads to allocate. The default number is equal to the number of CPUs + 2, or 8, whichever is smaller. user_subvol_rm_allowed - Allow subvolumes to be deleted by a non-root user. Use with caution. + Allow subvolumes to be deleted by a non-root user. Use with caution. MAILING LIST ============ diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt index 88ab81c79109..463f595733e8 100644 --- a/Documentation/filesystems/debugfs.txt +++ b/Documentation/filesystems/debugfs.txt @@ -51,6 +51,17 @@ operations should be provided; others can be included as needed. Again, the return value will be a dentry pointer to the created file, NULL for error, or ERR_PTR(-ENODEV) if debugfs support is missing. +Create a file with an initial size, the following function can be used +instead: + + struct dentry *debugfs_create_file_size(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops, + loff_t file_size); + +file_size is the initial file size. The other parameters are the same +as the function debugfs_create_file. + In a number of cases, the creation of a set of file operations is not actually necessary; the debugfs code provides a number of helper functions for simple situations. Files containing a single integer value can be @@ -100,6 +111,14 @@ A read on the resulting file will yield either Y (for non-zero values) or N, followed by a newline. If written to, it will accept either upper- or lower-case values, or 1 or 0. Any other input will be silently ignored. +Also, atomic_t values can be placed in debugfs with: + + struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, + struct dentry *parent, atomic_t *value) + +A read of this file will get atomic_t values, and a write of this file +will set atomic_t values. + Another option is exporting a block of arbitrary binary data, with this structure and function: @@ -147,6 +166,27 @@ The "base" argument may be 0, but you may want to build the reg32 array using __stringify, and a number of register names (macros) are actually byte offsets over a base for the register block. +If you want to dump an u32 array in debugfs, you can create file with: + + struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, + struct dentry *parent, + u32 *array, u32 elements); + +The "array" argument provides data, and the "elements" argument is +the number of elements in the array. Note: Once array is created its +size can not be changed. + +There is a helper function to create device related seq_file: + + struct dentry *debugfs_create_devm_seqfile(struct device *dev, + const char *name, + struct dentry *parent, + int (*read_fn)(struct seq_file *s, + void *data)); + +The "dev" argument is the device related to this debugfs file, and +the "read_fn" is a function pointer which to be called to print the +seq_file content. There are a couple of other directory-oriented helper functions: diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt index acbc1a3d0d91..78f69cdc9b3f 100644 --- a/Documentation/kernel-doc-nano-HOWTO.txt +++ b/Documentation/kernel-doc-nano-HOWTO.txt @@ -128,7 +128,7 @@ are: special place-holders for where the extracted documentation should go. -- scripts/basic/docproc.c +- scripts/docproc.c This is a program for converting SGML template files into SGML files. When a file is referenced it is searched for symbols diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1d6f0459cd7b..34f2afbf7c15 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -910,6 +910,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Disable PIN 1 of APIC timer Can be useful to work around chipset bugs. + dis_ucode_ldr [X86] Disable the microcode loader. + dma_debug=off If the kernel is compiled with DMA_API_DEBUG support, this option disables the debugging code at boot. diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt index 1092ad9578da..7ed371c85204 100644 --- a/Documentation/mailbox.txt +++ b/Documentation/mailbox.txt @@ -51,8 +51,7 @@ struct demo_client { */ static void message_from_remote(struct mbox_client *cl, void *mssg) { - struct demo_client *dc = container_of(mbox_client, - struct demo_client, cl); + struct demo_client *dc = container_of(cl, struct demo_client, cl); if (dc->async) { if (is_an_ack(mssg)) { /* An ACK to our last sample sent */ @@ -68,8 +67,7 @@ static void message_from_remote(struct mbox_client *cl, void *mssg) static void sample_sent(struct mbox_client *cl, void *mssg, int r) { - struct demo_client *dc = container_of(mbox_client, - struct demo_client, cl); + struct demo_client *dc = container_of(cl, struct demo_client, cl); complete(&dc->c); } diff --git a/Documentation/men-chameleon-bus.txt b/Documentation/men-chameleon-bus.txt new file mode 100644 index 000000000000..30ded732027e --- /dev/null +++ b/Documentation/men-chameleon-bus.txt @@ -0,0 +1,163 @@ + MEN Chameleon Bus + ================= + +Table of Contents +================= +1 Introduction + 1.1 Scope of this Document + 1.2 Limitations of the current implementation +2 Architecture + 2.1 MEN Chameleon Bus + 2.2 Carrier Devices + 2.3 Parser +3 Resource handling + 3.1 Memory Resources + 3.2 IRQs +4 Writing an MCB driver + 4.1 The driver structure + 4.2 Probing and attaching + 4.3 Initializing the driver + + +1 Introduction +=============== + This document describes the architecture and implementation of the MEN + Chameleon Bus (called MCB throughout this document). + +1.1 Scope of this Document +--------------------------- + This document is intended to be a short overview of the current + implementation and does by no means describe the complete possibilities of MCB + based devices. + +1.2 Limitations of the current implementation +---------------------------------------------- + The current implementation is limited to PCI and PCIe based carrier devices + that only use a single memory resource and share the PCI legacy IRQ. Not + implemented are: + - Multi-resource MCB devices like the VME Controller or M-Module carrier. + - MCB devices that need another MCB device, like SRAM for a DMA Controller's + buffer descriptors or a video controller's video memory. + - A per-carrier IRQ domain for carrier devices that have one (or more) IRQs + per MCB device like PCIe based carriers with MSI or MSI-X support. + +2 Architecture +=============== + MCB is divided into 3 functional blocks: + - The MEN Chameleon Bus itself, + - drivers for MCB Carrier Devices and + - the parser for the Chameleon table. + +2.1 MEN Chameleon Bus +---------------------- + The MEN Chameleon Bus is an artificial bus system that attaches to a so + called Chameleon FPGA device found on some hardware produced my MEN Mikro + Elektronik GmbH. These devices are multi-function devices implemented in a + single FPGA and usually attached via some sort of PCI or PCIe link. Each + FPGA contains a header section describing the content of the FPGA. The + header lists the device id, PCI BAR, offset from the beginning of the PCI + BAR, size in the FPGA, interrupt number and some other properties currently + not handled by the MCB implementation. + +2.2 Carrier Devices +-------------------- + A carrier device is just an abstraction for the real world physical bus the + Chameleon FPGA is attached to. Some IP Core drivers may need to interact with + properties of the carrier device (like querying the IRQ number of a PCI + device). To provide abstraction from the real hardware bus, an MCB carrier + device provides callback methods to translate the driver's MCB function calls + to hardware related function calls. For example a carrier device may + implement the get_irq() method which can be translated into a hardware bus + query for the IRQ number the device should use. + +2.3 Parser +----------- + The parser reads the first 512 bytes of a Chameleon device and parses the + Chameleon table. Currently the parser only supports the Chameleon v2 variant + of the Chameleon table but can easily be adopted to support an older or + possible future variant. While parsing the table's entries new MCB devices + are allocated and their resources are assigned according to the resource + assignment in the Chameleon table. After resource assignment is finished, the + MCB devices are registered at the MCB and thus at the driver core of the + Linux kernel. + +3 Resource handling +==================== + The current implementation assigns exactly one memory and one IRQ resource + per MCB device. But this is likely going to change in the future. + +3.1 Memory Resources +--------------------- + Each MCB device has exactly one memory resource, which can be requested from + the MCB bus. This memory resource is the physical address of the MCB device + inside the carrier and is intended to be passed to ioremap() and friends. It + is already requested from the kernel by calling request_mem_region(). + +3.2 IRQs +--------- + Each MCB device has exactly one IRQ resource, which can be requested from the + MCB bus. If a carrier device driver implements the ->get_irq() callback + method, the IRQ number assigned by the carrier device will be returned, + otherwise the IRQ number inside the Chameleon table will be returned. This + number is suitable to be passed to request_irq(). + +4 Writing an MCB driver +======================= + +4.1 The driver structure +------------------------- + Each MCB driver has a structure to identify the device driver as well as + device ids which identify the IP Core inside the FPGA. The driver structure + also contains callback methods which get executed on driver probe and + removal from the system. + + + static const struct mcb_device_id foo_ids[] = { + { .device = 0x123 }, + { } + }; + MODULE_DEVICE_TABLE(mcb, foo_ids); + + static struct mcb_driver foo_driver = { + driver = { + .name = "foo-bar", + .owner = THIS_MODULE, + }, + .probe = foo_probe, + .remove = foo_remove, + .id_table = foo_ids, + }; + +4.2 Probing and attaching +-------------------------- + When a driver is loaded and the MCB devices it services are found, the MCB + core will call the driver's probe callback method. When the driver is removed + from the system, the MCB core will call the driver's remove callback method. + + + static init foo_probe(struct mcb_device *mdev, const struct mcb_device_id *id); + static void foo_remove(struct mcb_device *mdev); + +4.3 Initializing the driver +---------------------------- + When the kernel is booted or your foo driver module is inserted, you have to + perform driver initialization. Usually it is enough to register your driver + module at the MCB core. + + + static int __init foo_init(void) + { + return mcb_register_driver(&foo_driver); + } + module_init(foo_init); + + static void __exit foo_exit(void) + { + mcb_unregister_driver(&foo_driver); + } + module_exit(foo_exit); + + The module_mcb_driver() macro can be used to reduce the above code. + + + module_mcb_driver(foo_driver); diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt index b48d4a149411..fd1a1aad49a9 100644 --- a/Documentation/networking/can.txt +++ b/Documentation/networking/can.txt @@ -510,7 +510,7 @@ solution for a couple of reasons: 4.1.2 RAW socket option CAN_RAW_ERR_FILTER - As described in chapter 3.4 the CAN interface driver can generate so + As described in chapter 3.3 the CAN interface driver can generate so called Error Message Frames that can optionally be passed to the user application in the same way as other CAN frames. The possible errors are divided into different error classes that may be filtered @@ -1152,7 +1152,7 @@ solution for a couple of reasons: $ ip link set canX type can restart Note that a restart will also create a CAN error message frame (see - also chapter 3.4). + also chapter 3.3). 6.6 CAN FD (flexible data rate) driver support diff --git a/Documentation/powerpc/cxl.txt b/Documentation/powerpc/cxl.txt index 2a230d01cd8c..205c1b81625c 100644 --- a/Documentation/powerpc/cxl.txt +++ b/Documentation/powerpc/cxl.txt @@ -133,7 +133,7 @@ User API The following file operations are supported on both slave and master devices. - A userspace library libcxl is avaliable here: + A userspace library libcxl is available here: https://github.com/ibm-capi/libcxl This provides a C interface to this kernel API. diff --git a/Documentation/powerpc/dscr.txt b/Documentation/powerpc/dscr.txt index 1ff4400c57b3..ece300c64f76 100644 --- a/Documentation/powerpc/dscr.txt +++ b/Documentation/powerpc/dscr.txt @@ -4,7 +4,7 @@ DSCR register in powerpc allows user to have some control of prefetch of data stream in the processor. Please refer to the ISA documents or related manual for more detailed information regarding how to use this DSCR to attain this -control of the pefetches . This document here provides an overview of kernel +control of the prefetches . This document here provides an overview of kernel support for DSCR, related kernel objects, it's functionalities and exported user interface. @@ -44,7 +44,7 @@ user interface. value into every CPU's DSCR register right away and updates the current thread's DSCR value as well. - Changing the CPU specif DSCR default value in the sysfs does exactly + Changing the CPU specific DSCR default value in the sysfs does exactly the same thing as above but unlike the global one above, it just changes stuff for that particular CPU instead for all the CPUs on the system. @@ -62,7 +62,7 @@ user interface. Accessing DSCR through user level SPR (0x03) from user space will first create a facility unavailable exception. Inside this exception handler - all mfspr isntruction based read attempts will get emulated and returned + all mfspr instruction based read attempts will get emulated and returned where as the first mtspr instruction based write attempts will enable the DSCR facility for the next time around (both for read and write) by setting DSCR facility in the FSCR register. diff --git a/Documentation/powerpc/qe_firmware.txt b/Documentation/powerpc/qe_firmware.txt index 2031ddb33d09..e7ac24aec4ff 100644 --- a/Documentation/powerpc/qe_firmware.txt +++ b/Documentation/powerpc/qe_firmware.txt @@ -117,7 +117,7 @@ specific been defined. This table describes the structure. Extended Modes This is a double word bit array (64 bits) that defines special functionality -which has an impact on the softwarew drivers. Each bit has its own impact +which has an impact on the software drivers. Each bit has its own impact and has special instructions for the s/w associated with it. This structure is described in this table: diff --git a/Documentation/pps/pps.txt b/Documentation/pps/pps.txt index c508cceeee7d..7cb7264ad598 100644 --- a/Documentation/pps/pps.txt +++ b/Documentation/pps/pps.txt @@ -125,7 +125,7 @@ The same function may also run the defined echo function (pps_ktimer_echo(), passing to it the "ptr" pointer) if the user asked for that... etc.. -Please see the file drivers/pps/clients/ktimer.c for example code. +Please see the file drivers/pps/clients/pps-ktimer.c for example code. SYSFS support diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 9832ec52f859..9c3f2f8054b5 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -225,11 +225,11 @@ with your system. To disable them, echo 4 (bit 3) into drop_caches. extfrag_threshold This parameter affects whether the kernel will compact memory or direct -reclaim to satisfy a high-order allocation. /proc/extfrag_index shows what -the fragmentation index for each order is in each zone in the system. Values -tending towards 0 imply allocations would fail due to lack of memory, -values towards 1000 imply failures are due to fragmentation and -1 implies -that the allocation will succeed as long as watermarks are met. +reclaim to satisfy a high-order allocation. The extfrag/extfrag_index file in +debugfs shows what the fragmentation index for each order is in each zone in +the system. Values tending towards 0 imply allocations would fail due to lack +of memory, values towards 1000 imply failures are due to fragmentation and -1 +implies that the allocation will succeed as long as watermarks are met. The kernel will not compact memory in a zone if the fragmentation index is <= extfrag_threshold. The default value is 500. diff --git a/Documentation/x86/kernel-stacks b/Documentation/x86/kernel-stacks index 0f3a6c201943..9a0aa4d3a866 100644 --- a/Documentation/x86/kernel-stacks +++ b/Documentation/x86/kernel-stacks @@ -16,7 +16,7 @@ associated with each CPU. These stacks are only used while the kernel is in control on that CPU; when a CPU returns to user space the specialized stacks contain no useful data. The main CPU stacks are: -* Interrupt stack. IRQSTACKSIZE +* Interrupt stack. IRQ_STACK_SIZE Used for external hardware interrupts. If this is the first external hardware interrupt (i.e. not a nested hardware interrupt) then the |